Grammar.php 6.6KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. <?php
  2. /*
  3. * This file is part of SwiftMailer.
  4. * (c) 2004-2009 Chris Corbyn
  5. *
  6. * For the full copyright and license information, please view the LICENSE
  7. * file that was distributed with this source code.
  8. */
  9. /**
  10. * Defines the grammar to use for validation, implements the RFC 2822 (and friends) ABNF grammar definitions.
  11. *
  12. * @package Swift
  13. * @subpackage Mime
  14. * @author Fabien Potencier
  15. * @author Chris Corbyn
  16. */
  17. class Swift_Mime_Grammar
  18. {
  19. /**
  20. * Special characters used in the syntax which need to be escaped.
  21. *
  22. * @var string[]
  23. */
  24. private static $_specials = array();
  25. /**
  26. * Tokens defined in RFC 2822 (and some related RFCs).
  27. *
  28. * @var string[]
  29. */
  30. private static $_grammar = array();
  31. /**
  32. * Initialize some RFC 2822 (and friends) ABNF grammar definitions.
  33. */
  34. public function __construct()
  35. {
  36. $this->init();
  37. }
  38. public function __wakeup()
  39. {
  40. $this->init();
  41. }
  42. protected function init()
  43. {
  44. if (count(self::$_specials) > 0) {
  45. return;
  46. }
  47. self::$_specials = array(
  48. '(', ')', '<', '>', '[', ']',
  49. ':', ';', '@', ',', '.', '"'
  50. );
  51. /*** Refer to RFC 2822 for ABNF grammar ***/
  52. //All basic building blocks
  53. self::$_grammar['NO-WS-CTL'] = '[\x01-\x08\x0B\x0C\x0E-\x19\x7F]';
  54. self::$_grammar['WSP'] = '[ \t]';
  55. self::$_grammar['CRLF'] = '(?:\r\n)';
  56. self::$_grammar['FWS'] = '(?:(?:' . self::$_grammar['WSP'] . '*' .
  57. self::$_grammar['CRLF'] . ')?' . self::$_grammar['WSP'] . ')';
  58. self::$_grammar['text'] = '[\x00-\x08\x0B\x0C\x0E-\x7F]';
  59. self::$_grammar['quoted-pair'] = '(?:\\\\' . self::$_grammar['text'] . ')';
  60. self::$_grammar['ctext'] = '(?:' . self::$_grammar['NO-WS-CTL'] .
  61. '|[\x21-\x27\x2A-\x5B\x5D-\x7E])';
  62. //Uses recursive PCRE (?1) -- could be a weak point??
  63. self::$_grammar['ccontent'] = '(?:' . self::$_grammar['ctext'] . '|' .
  64. self::$_grammar['quoted-pair'] . '|(?1))';
  65. self::$_grammar['comment'] = '(\((?:' . self::$_grammar['FWS'] . '|' .
  66. self::$_grammar['ccontent']. ')*' . self::$_grammar['FWS'] . '?\))';
  67. self::$_grammar['CFWS'] = '(?:(?:' . self::$_grammar['FWS'] . '?' .
  68. self::$_grammar['comment'] . ')*(?:(?:' . self::$_grammar['FWS'] . '?' .
  69. self::$_grammar['comment'] . ')|' . self::$_grammar['FWS'] . '))';
  70. self::$_grammar['qtext'] = '(?:' . self::$_grammar['NO-WS-CTL'] .
  71. '|[\x21\x23-\x5B\x5D-\x7E])';
  72. self::$_grammar['qcontent'] = '(?:' . self::$_grammar['qtext'] . '|' .
  73. self::$_grammar['quoted-pair'] . ')';
  74. self::$_grammar['quoted-string'] = '(?:' . self::$_grammar['CFWS'] . '?"' .
  75. '(' . self::$_grammar['FWS'] . '?' . self::$_grammar['qcontent'] . ')*' .
  76. self::$_grammar['FWS'] . '?"' . self::$_grammar['CFWS'] . '?)';
  77. self::$_grammar['atext'] = '[a-zA-Z0-9!#\$%&\'\*\+\-\/=\?\^_`\{\}\|~]';
  78. self::$_grammar['atom'] = '(?:' . self::$_grammar['CFWS'] . '?' .
  79. self::$_grammar['atext'] . '+' . self::$_grammar['CFWS'] . '?)';
  80. self::$_grammar['dot-atom-text'] = '(?:' . self::$_grammar['atext'] . '+' .
  81. '(\.' . self::$_grammar['atext'] . '+)*)';
  82. self::$_grammar['dot-atom'] = '(?:' . self::$_grammar['CFWS'] . '?' .
  83. self::$_grammar['dot-atom-text'] . '+' . self::$_grammar['CFWS'] . '?)';
  84. self::$_grammar['word'] = '(?:' . self::$_grammar['atom'] . '|' .
  85. self::$_grammar['quoted-string'] . ')';
  86. self::$_grammar['phrase'] = '(?:' . self::$_grammar['word'] . '+?)';
  87. self::$_grammar['no-fold-quote'] = '(?:"(?:' . self::$_grammar['qtext'] .
  88. '|' . self::$_grammar['quoted-pair'] . ')*")';
  89. self::$_grammar['dtext'] = '(?:' . self::$_grammar['NO-WS-CTL'] .
  90. '|[\x21-\x5A\x5E-\x7E])';
  91. self::$_grammar['no-fold-literal'] = '(?:\[(?:' . self::$_grammar['dtext'] .
  92. '|' . self::$_grammar['quoted-pair'] . ')*\])';
  93. //Message IDs
  94. self::$_grammar['id-left'] = '(?:' . self::$_grammar['dot-atom-text'] . '|' .
  95. self::$_grammar['no-fold-quote'] . ')';
  96. self::$_grammar['id-right'] = '(?:' . self::$_grammar['dot-atom-text'] . '|' .
  97. self::$_grammar['no-fold-literal'] . ')';
  98. //Addresses, mailboxes and paths
  99. self::$_grammar['local-part'] = '(?:' . self::$_grammar['dot-atom'] . '|' .
  100. self::$_grammar['quoted-string'] . ')';
  101. self::$_grammar['dcontent'] = '(?:' . self::$_grammar['dtext'] . '|' .
  102. self::$_grammar['quoted-pair'] . ')';
  103. self::$_grammar['domain-literal'] = '(?:' . self::$_grammar['CFWS'] . '?\[(' .
  104. self::$_grammar['FWS'] . '?' . self::$_grammar['dcontent'] . ')*?' .
  105. self::$_grammar['FWS'] . '?\]' . self::$_grammar['CFWS'] . '?)';
  106. self::$_grammar['domain'] = '(?:' . self::$_grammar['dot-atom'] . '|' .
  107. self::$_grammar['domain-literal'] . ')';
  108. self::$_grammar['addr-spec'] = '(?:' . self::$_grammar['local-part'] . '@' .
  109. self::$_grammar['domain'] . ')';
  110. }
  111. /**
  112. * Get the grammar defined for $name token.
  113. *
  114. * @param string $name exactly as written in the RFC
  115. *
  116. * @return string
  117. */
  118. public function getDefinition($name)
  119. {
  120. if (array_key_exists($name, self::$_grammar)) {
  121. return self::$_grammar[$name];
  122. } else {
  123. throw new Swift_RfcComplianceException(
  124. "No such grammar '" . $name . "' defined."
  125. );
  126. }
  127. }
  128. /**
  129. * Returns the tokens defined in RFC 2822 (and some related RFCs).
  130. *
  131. * @return array
  132. */
  133. public function getGrammarDefinitions()
  134. {
  135. return self::$_grammar;
  136. }
  137. /**
  138. * Returns the current special characters used in the syntax which need to be escaped.
  139. *
  140. * @return array
  141. */
  142. public function getSpecials()
  143. {
  144. return self::$_specials;
  145. }
  146. /**
  147. * Escape special characters in a string (convert to quoted-pairs).
  148. *
  149. * @param string $token
  150. * @param string[] $include additional chars to escape
  151. * @param string[] $exclude chars from escaping
  152. *
  153. * @return string
  154. */
  155. public function escapeSpecials($token, $include = array(), $exclude = array())
  156. {
  157. foreach (array_merge(array('\\'), array_diff(self::$_specials, $exclude), $include) as $char) {
  158. $token = str_replace($char, '\\' . $char, $token);
  159. }
  160. return $token;
  161. }
  162. }