Parser.php 20KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. <?php
  2. /*
  3. * This file is part of the Symfony package.
  4. * (c) Fabien Potencier <fabien@symfony.com>
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. namespace Symfony\Component\Yaml;
  10. use Symfony\Component\Yaml\Exception\ParseException;
  11. /**
  12. * Parser parses YAML strings to convert them to PHP arrays.
  13. *
  14. * @author Fabien Potencier <fabien@symfony.com>
  15. */
  16. class Parser
  17. {
  18. private $offset = 0;
  19. private $lines = array();
  20. private $currentLineNb = -1;
  21. private $currentLine = '';
  22. private $refs = array();
  23. /**
  24. * Constructor
  25. *
  26. * @param integer $offset The offset of YAML document (used for line numbers in error messages)
  27. */
  28. public function __construct($offset = 0)
  29. {
  30. $this->offset = $offset;
  31. }
  32. /**
  33. * Parses a YAML string to a PHP value.
  34. *
  35. * @param string $value A YAML string
  36. * @param Boolean $exceptionOnInvalidType true if an exception must be thrown on invalid types (a PHP resource or object), false otherwise
  37. * @param Boolean $objectSupport true if object support is enabled, false otherwise
  38. *
  39. * @return mixed A PHP value
  40. *
  41. * @throws ParseException If the YAML is not valid
  42. */
  43. public function parse($value, $exceptionOnInvalidType = false, $objectSupport = false)
  44. {
  45. $this->currentLineNb = -1;
  46. $this->currentLine = '';
  47. $this->lines = explode("\n", $this->cleanup($value));
  48. if (function_exists('mb_detect_encoding') && false === mb_detect_encoding($value, 'UTF-8', true)) {
  49. throw new ParseException('The YAML value does not appear to be valid UTF-8.');
  50. }
  51. if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
  52. $mbEncoding = mb_internal_encoding();
  53. mb_internal_encoding('UTF-8');
  54. }
  55. $data = array();
  56. while ($this->moveToNextLine()) {
  57. if ($this->isCurrentLineEmpty()) {
  58. continue;
  59. }
  60. // tab?
  61. if ("\t" === $this->currentLine[0]) {
  62. throw new ParseException('A YAML file cannot contain tabs as indentation.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  63. }
  64. $isRef = $isInPlace = $isProcessed = false;
  65. if (preg_match('#^\-((?P<leadspaces>\s+)(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
  66. if (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
  67. $isRef = $matches['ref'];
  68. $values['value'] = $matches['value'];
  69. }
  70. // array
  71. if (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
  72. $c = $this->getRealCurrentLineNb() + 1;
  73. $parser = new Parser($c);
  74. $parser->refs =& $this->refs;
  75. $data[] = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport);
  76. } else {
  77. if (isset($values['leadspaces'])
  78. && ' ' == $values['leadspaces']
  79. && preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\{\[].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $values['value'], $matches)
  80. ) {
  81. // this is a compact notation element, add to next block and parse
  82. $c = $this->getRealCurrentLineNb();
  83. $parser = new Parser($c);
  84. $parser->refs =& $this->refs;
  85. $block = $values['value'];
  86. if (!$this->isNextLineIndented()) {
  87. $block .= "\n".$this->getNextEmbedBlock($this->getCurrentLineIndentation() + 2);
  88. }
  89. $data[] = $parser->parse($block, $exceptionOnInvalidType, $objectSupport);
  90. } else {
  91. $data[] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport);
  92. }
  93. }
  94. } elseif (preg_match('#^(?P<key>'.Inline::REGEX_QUOTED_STRING.'|[^ \'"\[\{].*?) *\:(\s+(?P<value>.+?))?\s*$#u', $this->currentLine, $values)) {
  95. // force correct settings
  96. Inline::parse(null, $exceptionOnInvalidType, $objectSupport);
  97. try {
  98. $key = Inline::parseScalar($values['key']);
  99. } catch (ParseException $e) {
  100. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  101. $e->setSnippet($this->currentLine);
  102. throw $e;
  103. }
  104. if ('<<' === $key) {
  105. if (isset($values['value']) && 0 === strpos($values['value'], '*')) {
  106. $isInPlace = substr($values['value'], 1);
  107. if (!array_key_exists($isInPlace, $this->refs)) {
  108. throw new ParseException(sprintf('Reference "%s" does not exist.', $isInPlace), $this->getRealCurrentLineNb() + 1, $this->currentLine);
  109. }
  110. } else {
  111. if (isset($values['value']) && $values['value'] !== '') {
  112. $value = $values['value'];
  113. } else {
  114. $value = $this->getNextEmbedBlock();
  115. }
  116. $c = $this->getRealCurrentLineNb() + 1;
  117. $parser = new Parser($c);
  118. $parser->refs =& $this->refs;
  119. $parsed = $parser->parse($value, $exceptionOnInvalidType, $objectSupport);
  120. $merged = array();
  121. if (!is_array($parsed)) {
  122. throw new ParseException('YAML merge keys used with a scalar value instead of an array.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  123. } elseif (isset($parsed[0])) {
  124. // Numeric array, merge individual elements
  125. foreach (array_reverse($parsed) as $parsedItem) {
  126. if (!is_array($parsedItem)) {
  127. throw new ParseException('Merge items must be arrays.', $this->getRealCurrentLineNb() + 1, $parsedItem);
  128. }
  129. $merged = array_merge($parsedItem, $merged);
  130. }
  131. } else {
  132. // Associative array, merge
  133. $merged = array_merge($merged, $parsed);
  134. }
  135. $isProcessed = $merged;
  136. }
  137. } elseif (isset($values['value']) && preg_match('#^&(?P<ref>[^ ]+) *(?P<value>.*)#u', $values['value'], $matches)) {
  138. $isRef = $matches['ref'];
  139. $values['value'] = $matches['value'];
  140. }
  141. if ($isProcessed) {
  142. // Merge keys
  143. $data = $isProcessed;
  144. // hash
  145. } elseif (!isset($values['value']) || '' == trim($values['value'], ' ') || 0 === strpos(ltrim($values['value'], ' '), '#')) {
  146. // if next line is less indented or equal, then it means that the current value is null
  147. if ($this->isNextLineIndented()) {
  148. $data[$key] = null;
  149. } else {
  150. $c = $this->getRealCurrentLineNb() + 1;
  151. $parser = new Parser($c);
  152. $parser->refs =& $this->refs;
  153. $data[$key] = $parser->parse($this->getNextEmbedBlock(), $exceptionOnInvalidType, $objectSupport);
  154. }
  155. } else {
  156. if ($isInPlace) {
  157. $data = $this->refs[$isInPlace];
  158. } else {
  159. $data[$key] = $this->parseValue($values['value'], $exceptionOnInvalidType, $objectSupport);
  160. }
  161. }
  162. } else {
  163. // 1-liner followed by newline
  164. if (2 == count($this->lines) && empty($this->lines[1])) {
  165. try {
  166. $value = Inline::parse($this->lines[0], $exceptionOnInvalidType, $objectSupport);
  167. } catch (ParseException $e) {
  168. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  169. $e->setSnippet($this->currentLine);
  170. throw $e;
  171. }
  172. if (is_array($value)) {
  173. $first = reset($value);
  174. if (is_string($first) && 0 === strpos($first, '*')) {
  175. $data = array();
  176. foreach ($value as $alias) {
  177. $data[] = $this->refs[substr($alias, 1)];
  178. }
  179. $value = $data;
  180. }
  181. }
  182. if (isset($mbEncoding)) {
  183. mb_internal_encoding($mbEncoding);
  184. }
  185. return $value;
  186. }
  187. switch (preg_last_error()) {
  188. case PREG_INTERNAL_ERROR:
  189. $error = 'Internal PCRE error.';
  190. break;
  191. case PREG_BACKTRACK_LIMIT_ERROR:
  192. $error = 'pcre.backtrack_limit reached.';
  193. break;
  194. case PREG_RECURSION_LIMIT_ERROR:
  195. $error = 'pcre.recursion_limit reached.';
  196. break;
  197. case PREG_BAD_UTF8_ERROR:
  198. $error = 'Malformed UTF-8 data.';
  199. break;
  200. case PREG_BAD_UTF8_OFFSET_ERROR:
  201. $error = 'Offset doesn\'t correspond to the begin of a valid UTF-8 code point.';
  202. break;
  203. default:
  204. $error = 'Unable to parse.';
  205. }
  206. throw new ParseException($error, $this->getRealCurrentLineNb() + 1, $this->currentLine);
  207. }
  208. if ($isRef) {
  209. $this->refs[$isRef] = end($data);
  210. }
  211. }
  212. if (isset($mbEncoding)) {
  213. mb_internal_encoding($mbEncoding);
  214. }
  215. return empty($data) ? null : $data;
  216. }
  217. /**
  218. * Returns the current line number (takes the offset into account).
  219. *
  220. * @return integer The current line number
  221. */
  222. private function getRealCurrentLineNb()
  223. {
  224. return $this->currentLineNb + $this->offset;
  225. }
  226. /**
  227. * Returns the current line indentation.
  228. *
  229. * @return integer The current line indentation
  230. */
  231. private function getCurrentLineIndentation()
  232. {
  233. return strlen($this->currentLine) - strlen(ltrim($this->currentLine, ' '));
  234. }
  235. /**
  236. * Returns the next embed block of YAML.
  237. *
  238. * @param integer $indentation The indent level at which the block is to be read, or null for default
  239. *
  240. * @return string A YAML string
  241. *
  242. * @throws ParseException When indentation problem are detected
  243. */
  244. private function getNextEmbedBlock($indentation = null)
  245. {
  246. $this->moveToNextLine();
  247. if (null === $indentation) {
  248. $newIndent = $this->getCurrentLineIndentation();
  249. if (!$this->isCurrentLineEmpty() && 0 == $newIndent) {
  250. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  251. }
  252. } else {
  253. $newIndent = $indentation;
  254. }
  255. $data = array(substr($this->currentLine, $newIndent));
  256. while ($this->moveToNextLine()) {
  257. if ($this->isCurrentLineEmpty()) {
  258. if ($this->isCurrentLineBlank()) {
  259. $data[] = substr($this->currentLine, $newIndent);
  260. }
  261. continue;
  262. }
  263. $indent = $this->getCurrentLineIndentation();
  264. if (preg_match('#^(?P<text> *)$#', $this->currentLine, $match)) {
  265. // empty line
  266. $data[] = $match['text'];
  267. } elseif ($indent >= $newIndent) {
  268. $data[] = substr($this->currentLine, $newIndent);
  269. } elseif (0 == $indent) {
  270. $this->moveToPreviousLine();
  271. break;
  272. } else {
  273. throw new ParseException('Indentation problem.', $this->getRealCurrentLineNb() + 1, $this->currentLine);
  274. }
  275. }
  276. return implode("\n", $data);
  277. }
  278. /**
  279. * Moves the parser to the next line.
  280. *
  281. * @return Boolean
  282. */
  283. private function moveToNextLine()
  284. {
  285. if ($this->currentLineNb >= count($this->lines) - 1) {
  286. return false;
  287. }
  288. $this->currentLine = $this->lines[++$this->currentLineNb];
  289. return true;
  290. }
  291. /**
  292. * Moves the parser to the previous line.
  293. */
  294. private function moveToPreviousLine()
  295. {
  296. $this->currentLine = $this->lines[--$this->currentLineNb];
  297. }
  298. /**
  299. * Parses a YAML value.
  300. *
  301. * @param string $value A YAML value
  302. *
  303. * @return mixed A PHP value
  304. *
  305. * @throws ParseException When reference does not exist
  306. */
  307. private function parseValue($value, $exceptionOnInvalidType, $objectSupport)
  308. {
  309. if (0 === strpos($value, '*')) {
  310. if (false !== $pos = strpos($value, '#')) {
  311. $value = substr($value, 1, $pos - 2);
  312. } else {
  313. $value = substr($value, 1);
  314. }
  315. if (!array_key_exists($value, $this->refs)) {
  316. throw new ParseException(sprintf('Reference "%s" does not exist.', $value), $this->currentLine);
  317. }
  318. return $this->refs[$value];
  319. }
  320. if (preg_match('/^(?P<separator>\||>)(?P<modifiers>\+|\-|\d+|\+\d+|\-\d+|\d+\+|\d+\-)?(?P<comments> +#.*)?$/', $value, $matches)) {
  321. $modifiers = isset($matches['modifiers']) ? $matches['modifiers'] : '';
  322. return $this->parseFoldedScalar($matches['separator'], preg_replace('#\d+#', '', $modifiers), intval(abs($modifiers)));
  323. }
  324. try {
  325. return Inline::parse($value, $exceptionOnInvalidType, $objectSupport);
  326. } catch (ParseException $e) {
  327. $e->setParsedLine($this->getRealCurrentLineNb() + 1);
  328. $e->setSnippet($this->currentLine);
  329. throw $e;
  330. }
  331. }
  332. /**
  333. * Parses a folded scalar.
  334. *
  335. * @param string $separator The separator that was used to begin this folded scalar (| or >)
  336. * @param string $indicator The indicator that was used to begin this folded scalar (+ or -)
  337. * @param integer $indentation The indentation that was used to begin this folded scalar
  338. *
  339. * @return string The text value
  340. */
  341. private function parseFoldedScalar($separator, $indicator = '', $indentation = 0)
  342. {
  343. $separator = '|' == $separator ? "\n" : ' ';
  344. $text = '';
  345. $notEOF = $this->moveToNextLine();
  346. while ($notEOF && $this->isCurrentLineBlank()) {
  347. $text .= "\n";
  348. $notEOF = $this->moveToNextLine();
  349. }
  350. if (!$notEOF) {
  351. return '';
  352. }
  353. if (!preg_match('#^(?P<indent>'.($indentation ? str_repeat(' ', $indentation) : ' +').')(?P<text>.*)$#u', $this->currentLine, $matches)) {
  354. $this->moveToPreviousLine();
  355. return '';
  356. }
  357. $textIndent = $matches['indent'];
  358. $previousIndent = 0;
  359. $text .= $matches['text'].$separator;
  360. while ($this->currentLineNb + 1 < count($this->lines)) {
  361. $this->moveToNextLine();
  362. if (preg_match('#^(?P<indent> {'.strlen($textIndent).',})(?P<text>.+)$#u', $this->currentLine, $matches)) {
  363. if (' ' == $separator && $previousIndent != $matches['indent']) {
  364. $text = substr($text, 0, -1)."\n";
  365. }
  366. $previousIndent = $matches['indent'];
  367. $text .= str_repeat(' ', $diff = strlen($matches['indent']) - strlen($textIndent)).$matches['text'].($diff ? "\n" : $separator);
  368. } elseif (preg_match('#^(?P<text> *)$#', $this->currentLine, $matches)) {
  369. $text .= preg_replace('#^ {1,'.strlen($textIndent).'}#', '', $matches['text'])."\n";
  370. } else {
  371. $this->moveToPreviousLine();
  372. break;
  373. }
  374. }
  375. if (' ' == $separator) {
  376. // replace last separator by a newline
  377. $text = preg_replace('/ (\n*)$/', "\n$1", $text);
  378. }
  379. switch ($indicator) {
  380. case '':
  381. $text = preg_replace('#\n+$#s', "\n", $text);
  382. break;
  383. case '+':
  384. break;
  385. case '-':
  386. $text = preg_replace('#\n+$#s', '', $text);
  387. break;
  388. }
  389. return $text;
  390. }
  391. /**
  392. * Returns true if the next line is indented.
  393. *
  394. * @return Boolean Returns true if the next line is indented, false otherwise
  395. */
  396. private function isNextLineIndented()
  397. {
  398. $currentIndentation = $this->getCurrentLineIndentation();
  399. $notEOF = $this->moveToNextLine();
  400. while ($notEOF && $this->isCurrentLineEmpty()) {
  401. $notEOF = $this->moveToNextLine();
  402. }
  403. if (false === $notEOF) {
  404. return false;
  405. }
  406. $ret = false;
  407. if ($this->getCurrentLineIndentation() <= $currentIndentation) {
  408. $ret = true;
  409. }
  410. $this->moveToPreviousLine();
  411. return $ret;
  412. }
  413. /**
  414. * Returns true if the current line is blank or if it is a comment line.
  415. *
  416. * @return Boolean Returns true if the current line is empty or if it is a comment line, false otherwise
  417. */
  418. private function isCurrentLineEmpty()
  419. {
  420. return $this->isCurrentLineBlank() || $this->isCurrentLineComment();
  421. }
  422. /**
  423. * Returns true if the current line is blank.
  424. *
  425. * @return Boolean Returns true if the current line is blank, false otherwise
  426. */
  427. private function isCurrentLineBlank()
  428. {
  429. return '' == trim($this->currentLine, ' ');
  430. }
  431. /**
  432. * Returns true if the current line is a comment line.
  433. *
  434. * @return Boolean Returns true if the current line is a comment line, false otherwise
  435. */
  436. private function isCurrentLineComment()
  437. {
  438. //checking explicitly the first char of the trim is faster than loops or strpos
  439. $ltrimmedLine = ltrim($this->currentLine, ' ');
  440. return $ltrimmedLine[0] === '#';
  441. }
  442. /**
  443. * Cleanups a YAML string to be parsed.
  444. *
  445. * @param string $value The input YAML string
  446. *
  447. * @return string A cleaned up YAML string
  448. */
  449. private function cleanup($value)
  450. {
  451. $value = str_replace(array("\r\n", "\r"), "\n", $value);
  452. if (!preg_match("#\n$#", $value)) {
  453. $value .= "\n";
  454. }
  455. // strip YAML header
  456. $count = 0;
  457. $value = preg_replace('#^\%YAML[: ][\d\.]+.*\n#su', '', $value, -1, $count);
  458. $this->offset += $count;
  459. // remove leading comments
  460. $trimmedValue = preg_replace('#^(\#.*?\n)+#s', '', $value, -1, $count);
  461. if ($count == 1) {
  462. // items have been removed, update the offset
  463. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  464. $value = $trimmedValue;
  465. }
  466. // remove start of the document marker (---)
  467. $trimmedValue = preg_replace('#^\-\-\-.*?\n#s', '', $value, -1, $count);
  468. if ($count == 1) {
  469. // items have been removed, update the offset
  470. $this->offset += substr_count($value, "\n") - substr_count($trimmedValue, "\n");
  471. $value = $trimmedValue;
  472. // remove end of the document marker (...)
  473. $value = preg_replace('#\.\.\.\s*$#s', '', $value);
  474. }
  475. return $value;
  476. }
  477. }