SqlFormatter.php 30KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. <?php
  2. /**
  3. * SQL Formatter is a collection of utilities for debugging SQL queries.
  4. * It includes methods for formatting, syntax highlighting, removing comments, etc.
  5. *
  6. * @package SqlFormatter
  7. * @author Jeremy Dorn <jeremy@jeremydorn.com>
  8. * @author Florin Patan <florinpatan@gmail.com>
  9. * @copyright 2013 Jeremy Dorn
  10. * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
  11. * @link http://github.com/jdorn/sql-formatter
  12. * @version 1.2.5
  13. */
  14. class SqlFormatter
  15. {
  16. // Constants for token types
  17. const TOKEN_TYPE_WHITESPACE = 0;
  18. const TOKEN_TYPE_WORD = 1;
  19. const TOKEN_TYPE_QUOTE = 2;
  20. const TOKEN_TYPE_BACKTICK_QUOTE = 3;
  21. const TOKEN_TYPE_RESERVED = 4;
  22. const TOKEN_TYPE_SPECIAL_RESERVED = 5;
  23. const TOKEN_TYPE_BOUNDARY = 6;
  24. const TOKEN_TYPE_COMMENT = 7;
  25. const TOKEN_TYPE_BLOCK_COMMENT = 8;
  26. const TOKEN_TYPE_NUMBER = 9;
  27. const TOKEN_TYPE_ERROR = 10;
  28. // Constants for different components of a token
  29. const TOKEN_TYPE = 0;
  30. const TOKEN_VALUE = 1;
  31. // Reserved words (for syntax highlighting)
  32. protected static $reserved = array(
  33. 'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'AS', 'ASC',
  34. 'AUTOCOMMIT', 'AUTO_INCREMENT', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED',
  35. 'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT',
  36. 'CONSTRAINT', 'CONTAINS', 'CONVERT', 'COUNT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE',
  37. 'DAY_SECOND', 'DEFAULT', 'DEFINER', 'DELAYED', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV',
  38. 'DO', 'DROP', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINE_TYPE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXECUTE',
  39. 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FULL', 'FULLTEXT',
  40. 'FUNCTION', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP_CONCAT', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE',
  41. 'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL',
  42. 'INTO', 'INVOKER', 'IS', 'ISOLATION', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEVEL', 'LIKE', 'LINEAR',
  43. 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
  44. 'MATCH', 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
  45. 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', 'OR',
  46. 'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
  47. 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'READ', 'READ_ONLY',
  48. 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
  49. 'RETURN', 'RETURNS', 'REVOKE', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SEPARATOR',
  50. 'SERIALIZABLE', 'SESSION', 'SET', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL',
  51. 'SQL_CACHE', 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE',
  52. 'STRAIGHT_JOIN', 'STRING', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL',
  53. 'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNIQUE', 'UNLOCK', 'UNSIGNED', 'USAGE', 'USE', 'USING', 'VARIABLES',
  54. 'VIEW', 'WHEN', 'WITH', 'WORK', 'WRITE', 'XOR', 'YEAR_MONTH'
  55. );
  56. // For SQL formatting
  57. // These keywords will all be on their own line
  58. protected static $special_reserved = array(
  59. 'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LEFT JOIN', 'OUTER JOIN', 'INNER JOIN', 'RIGHT JOIN', 'JOIN', 'LIMIT',
  60. 'VALUES', 'UPDATE', 'HAVING', 'ADD', 'AFTER', 'ALTER TABLE', 'DELETE FROM', 'UNION ALL', 'UNION', 'EXCEPT', 'INTERSECT'
  61. );
  62. // Punctuation that can be used as a boundary between other tokens
  63. protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&');
  64. // For syntax highlighting
  65. // Styles applied to different token types
  66. public static $quote_attributes = 'style="color: blue;"';
  67. public static $backtick_quote_attributes = 'style="color: purple;"';
  68. public static $reserved_attributes = 'style="font-weight:bold;"';
  69. public static $boundary_attributes = '';
  70. public static $number_attributes = 'style="color: green;"';
  71. public static $word_attributes = 'style="color: #333;"';
  72. public static $error_attributes = 'style="background-color: red;"';
  73. public static $comment_attributes = 'style="color: #aaa;"';
  74. public static $pre_attributes = 'style="color: black; background-color: white;"';
  75. // The tab character to use when formatting SQL
  76. public static $tab = ' ';
  77. // This flag tells us if SqlFormatted has been initialized
  78. protected static $init;
  79. // Regular expressions for tokenizing
  80. protected static $regex_boundaries;
  81. protected static $regex_reserved;
  82. protected static $regex_special_reserved;
  83. // Cache variables
  84. // Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases.
  85. public static $max_cachekey_size = 15;
  86. protected static $token_cache = array();
  87. protected static $cache_hits = 0;
  88. protected static $cache_misses = 0;
  89. /**
  90. * Get stats about the token cache
  91. * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
  92. */
  93. public static function getCacheStats() {
  94. return array(
  95. 'hits'=>self::$cache_hits,
  96. 'misses'=>self::$cache_misses,
  97. 'entries'=>count(self::$token_cache),
  98. 'size'=>strlen(serialize(self::$token_cache))
  99. );
  100. }
  101. /**
  102. * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words.
  103. */
  104. protected static function init() {
  105. if(self::$init) return;
  106. // Sort reserved word list from longest word to shortest
  107. usort(self::$reserved, array('SqlFormatter', 'sortLength'));
  108. // Set up regular expressions
  109. self::$regex_boundaries = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$boundaries)).')';
  110. self::$regex_reserved = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved)).')';
  111. self::$regex_special_reserved = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$special_reserved)).')');
  112. self::$init = true;
  113. }
  114. /**
  115. * Return the next token and token type in a SQL string.
  116. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
  117. *
  118. * @param String $string The SQL string
  119. * @param array $previous The result of the previous getNextToken() call
  120. *
  121. * @return Array An associative array containing the type and value of the token.
  122. */
  123. protected static function getNextToken($string, $previous = null)
  124. {
  125. // Whitespace
  126. if (preg_match('/^\s+/',$string,$matches)) {
  127. return array(
  128. self::TOKEN_VALUE => $matches[0],
  129. self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE
  130. );
  131. }
  132. // Comment
  133. if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) {
  134. // Comment until end of line
  135. if ($string[0] === '-' || $string[0] === '#') {
  136. $last = strpos($string, "\n");
  137. $type = self::TOKEN_TYPE_COMMENT;
  138. } else { // Comment until closing comment tag
  139. $last = strpos($string, "*/", 2) + 2;
  140. $type = self::TOKEN_TYPE_BLOCK_COMMENT;
  141. }
  142. if ($last === false) {
  143. $last = strlen($string);
  144. }
  145. return array(
  146. self::TOKEN_VALUE => substr($string, 0, $last),
  147. self::TOKEN_TYPE => $type
  148. );
  149. }
  150. // Quoted String
  151. if($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') {
  152. // This checks for the following patterns:
  153. // 1. backtick quoted string using `` to escape
  154. // 2. double quoted string using "" or \" to escape
  155. // 3. single quoted string using '' or \' to escape
  156. if( preg_match('/^((`(?:[^`]|``)*($|`))|("((?:[^"\\\\]|"")|(?:[^"\\\\]|\\\\.))*($|"))|(\'((?:[^\'\\\\]|\'\')|(?:[^\'\\\\]|\\\\.))*($|\')))/', $string, $matches)) {
  157. if($string[0]==='`') {
  158. return array(
  159. self::TOKEN_VALUE=>$matches[1],
  160. self::TOKEN_TYPE=>self::TOKEN_TYPE_BACKTICK_QUOTE
  161. );
  162. }
  163. else {
  164. return array(
  165. self::TOKEN_VALUE=>$matches[1],
  166. self::TOKEN_TYPE=>self::TOKEN_TYPE_QUOTE
  167. );
  168. }
  169. }
  170. }
  171. // Number
  172. if(preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
  173. return array(
  174. self::TOKEN_VALUE => $matches[1],
  175. self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER
  176. );
  177. }
  178. // Boundary Character (punctuation and symbols)
  179. if(preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
  180. return array(
  181. self::TOKEN_VALUE => $matches[1],
  182. self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY
  183. );
  184. }
  185. // A reserved word cannot be preceded by a '.'
  186. // this makes it so in "mytable.from", "from" is not considered a reserved word
  187. if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
  188. $upper = strtoupper($string);
  189. // Special Reserved Word
  190. if(preg_match('/^('.self::$regex_special_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
  191. return array(
  192. self::TOKEN_TYPE=>self::TOKEN_TYPE_SPECIAL_RESERVED,
  193. self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
  194. );
  195. }
  196. // Other Reserved Word
  197. if(preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
  198. return array(
  199. self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
  200. self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
  201. );
  202. }
  203. }
  204. // Non reserved word
  205. preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches);
  206. return array(
  207. self::TOKEN_VALUE => $matches[1],
  208. self::TOKEN_TYPE => self::TOKEN_TYPE_WORD
  209. );
  210. }
  211. /**
  212. * Takes a SQL string and breaks it into tokens.
  213. * Each token is an associative array with type and value.
  214. *
  215. * @param String $string The SQL string
  216. *
  217. * @return Array An array of tokens.
  218. */
  219. protected static function tokenize($string)
  220. {
  221. self::init();
  222. $tokens = array();
  223. // Used for debugging if there is an error while tokenizing the string
  224. $original_length = strlen($string);
  225. // Used to make sure the string keeps shrinking on each iteration
  226. $old_string_len = strlen($string) + 1;
  227. $token = null;
  228. $current_length = strlen($string);
  229. // Keep processing the string until it is empty
  230. while ($current_length) {
  231. // If the string stopped shrinking, there was a problem
  232. if ($old_string_len <= $current_length) {
  233. $tokens[] = array(
  234. self::TOKEN_VALUE=>$string,
  235. self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR
  236. );
  237. return $tokens;
  238. }
  239. $old_string_len = $current_length;
  240. // Determine if we can use caching
  241. if($current_length >= self::$max_cachekey_size) {
  242. $cacheKey = substr($string,0,self::$max_cachekey_size);
  243. }
  244. else {
  245. $cacheKey = false;
  246. }
  247. // See if the token is already cached
  248. if($cacheKey && isset(self::$token_cache[$cacheKey])) {
  249. // Retrieve from cache
  250. $token = self::$token_cache[$cacheKey];
  251. $token_length = strlen($token[self::TOKEN_VALUE]);
  252. self::$cache_hits++;
  253. }
  254. else {
  255. // Get the next token and the token type
  256. $token = self::getNextToken($string, $token);
  257. $token_length = strlen($token[self::TOKEN_VALUE]);
  258. self::$cache_misses++;
  259. // If the token is shorter than the max length, store it in cache
  260. if($cacheKey && $token_length < self::$max_cachekey_size) {
  261. self::$token_cache[$cacheKey] = $token;
  262. }
  263. }
  264. $tokens[] = $token;
  265. // Advance the string
  266. $string = substr($string, $token_length);
  267. $current_length -= $token_length;
  268. }
  269. return $tokens;
  270. }
  271. /**
  272. * Format the whitespace in a SQL string to make it easier to read.
  273. *
  274. * @param String $string The SQL string
  275. * @param boolean $highlight If true, syntax highlighting will also be performed
  276. *
  277. * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
  278. */
  279. public static function format($string, $highlight=true) {
  280. // This variable will be populated with formatted html
  281. $return = '';
  282. // Use an actual tab while formatting and then switch out with self::$tab at the end
  283. $tab = "\t";
  284. $indent_level = 0;
  285. $newline = false;
  286. $inline_parentheses = false;
  287. $increase_special_indent = false;
  288. $increase_block_indent = false;
  289. $indent_types = array();
  290. $added_newline = false;
  291. // Tokenize String
  292. $tokens = self::tokenize($string);
  293. // Format token by token
  294. foreach ($tokens as $i=>$token) {
  295. // Don't process whitespace
  296. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
  297. continue;
  298. }
  299. // Get highlighted token if doing syntax highlighting
  300. if ($highlight) {
  301. $highlighted = self::highlightToken($token);
  302. } else { // If returning raw text
  303. $highlighted = $token[self::TOKEN_VALUE];
  304. }
  305. // If we are increasing the special indent level now
  306. if($increase_special_indent) {
  307. $indent_level++;
  308. $increase_special_indent = false;
  309. array_unshift($indent_types,'special');
  310. }
  311. // If we are increasing the block indent level now
  312. if($increase_block_indent) {
  313. $indent_level++;
  314. $increase_block_indent = false;
  315. array_unshift($indent_types,'block');
  316. }
  317. // Display comments directly where they appear in the source
  318. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  319. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  320. $return .= "\n" . str_repeat($tab,$indent_level);
  321. }
  322. $return .= $highlighted;
  323. $newline = true;
  324. continue;
  325. }
  326. // If we need a new line before the token
  327. if ($newline) {
  328. $return .= "\n" . str_repeat($tab, $indent_level);
  329. $newline = false;
  330. $added_newline = true;
  331. }
  332. else {
  333. $added_newline = false;
  334. }
  335. // Opening parentheses increase the block indent level and start a new line
  336. if ($token[self::TOKEN_VALUE] === '(') {
  337. // First check if this should be an inline parentheses block
  338. // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
  339. // Allow up to 3 non-whitespace tokens inside inline parentheses
  340. $nonwhitespace = 0;
  341. for($j=1;$j<=8;$j++) {
  342. // Reached end of string
  343. if(!isset($tokens[$i+$j])) break;
  344. $next = $tokens[$i+$j];
  345. // Ignore whitespace
  346. if($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_WHITESPACE) {
  347. continue;
  348. }
  349. // Reached closing parentheses
  350. if($next[self::TOKEN_VALUE] === ')') {
  351. $inline_parentheses = true;
  352. break;
  353. }
  354. // Reached an invalid token for inline parentheses
  355. if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') {
  356. break;
  357. }
  358. // Reached an invalid token type for inline parentheses
  359. if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_SPECIAL_RESERVED || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) {
  360. break;
  361. }
  362. // Too many tokens for inline parentheses
  363. if ($nonwhitespace >= 3) {
  364. break;
  365. }
  366. $nonwhitespace++;
  367. }
  368. // Take out the preceding space unless there was whitespace there in the original query
  369. if (isset($tokens[$i-1]) && $tokens[$i-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
  370. $return = rtrim($return,' ');
  371. }
  372. if(!$inline_parentheses) {
  373. $increase_block_indent = true;
  374. // Add a newline after the parentheses
  375. $newline = true;
  376. }
  377. }
  378. // Closing parentheses decrease the block indent level
  379. elseif ($token[self::TOKEN_VALUE] === ')') {
  380. // Remove whitespace before the closing parentheses
  381. $return = rtrim($return,' ');
  382. // If we are in an inline parentheses section
  383. if($inline_parentheses) {
  384. $inline_parentheses = false;
  385. }
  386. else {
  387. $indent_level--;
  388. // Reset indent level
  389. while($j=array_shift($indent_types)) {
  390. if($j==='special') {
  391. $indent_level--;
  392. }
  393. else {
  394. break;
  395. }
  396. }
  397. if($indent_level < 0) {
  398. // This is an error
  399. $indent_level = 0;
  400. if ($highlight) {
  401. $return .= "\n".self::highlightError($token[self::TOKEN_VALUE]);
  402. continue;
  403. }
  404. }
  405. // Add a newline before the closing parentheses (if not already added)
  406. if(!$added_newline) {
  407. $return .= "\n" . str_repeat($tab, $indent_level);
  408. }
  409. }
  410. }
  411. // Commas start a new line (unless within inline parentheses)
  412. elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
  413. $newline = true;
  414. }
  415. // Special reserved words start a new line and increase the special indent level
  416. elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_SPECIAL_RESERVED) {
  417. $increase_special_indent = true;
  418. // If the last indent type was 'special', decrease the special indent for this round
  419. reset($indent_types);
  420. if(current($indent_types)==='special') {
  421. $indent_level--;
  422. array_shift($indent_types);
  423. }
  424. // Add a newline after the special reserved word
  425. $newline = true;
  426. // Add a newline before the special reserved word (if not already added)
  427. if(!$added_newline) {
  428. $return .= "\n" . str_repeat($tab, $indent_level);
  429. }
  430. // If we already added a newline, redo the indentation since it may be different now
  431. else {
  432. $return = rtrim($return,$tab).str_repeat($tab, $indent_level);
  433. }
  434. // If the token may have extra whitespace
  435. if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
  436. $highlighted = preg_replace('/\s+/',' ',$highlighted);
  437. }
  438. }
  439. // If the token shouldn't have a space before it
  440. if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
  441. $return = rtrim($return, ' ');
  442. }
  443. $return .= $highlighted.' ';
  444. // If the token shouldn't have a space after it
  445. if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') {
  446. $return = rtrim($return,' ');
  447. }
  448. }
  449. // If there are unmatched parentheses
  450. if ($highlight && array_search('block',$indent_types) !== false) {
  451. $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
  452. }
  453. // Replace tab characters with the configuration tab character
  454. $return = trim(str_replace("\t",self::$tab,$return));
  455. if ($highlight) {
  456. $return = "<pre ".self::$pre_attributes.">" . $return . "</pre>";
  457. }
  458. return $return;
  459. }
  460. /**
  461. * Add syntax highlighting to a SQL string
  462. *
  463. * @param String $string The SQL string
  464. *
  465. * @return String The SQL string with HTML styles applied
  466. */
  467. public static function highlight($string)
  468. {
  469. $tokens = self::tokenize($string);
  470. $return = '';
  471. foreach ($tokens as $token) {
  472. $return .= self::highlightToken($token);
  473. }
  474. return "<pre ".self::$pre_attributes.">" . trim($return) . "</pre>";
  475. }
  476. /**
  477. * Split a SQL string into multiple queries.
  478. * Uses ";" as a query delimiter.
  479. *
  480. * @param String $string The SQL string
  481. *
  482. * @return Array An array of individual query strings without trailing semicolons
  483. */
  484. public static function splitQuery($string)
  485. {
  486. $queries = array();
  487. $current_query = '';
  488. $empty = true;
  489. $tokens = self::tokenize($string);
  490. foreach ($tokens as $token) {
  491. // If this is a query separator
  492. if ($token[self::TOKEN_VALUE] === ';') {
  493. if (!$empty) {
  494. $queries[] = $current_query.';';
  495. }
  496. $current_query = '';
  497. $empty = true;
  498. continue;
  499. }
  500. // If this is a non-empty character
  501. if($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
  502. $empty = false;
  503. }
  504. $current_query .= $token[self::TOKEN_VALUE];
  505. }
  506. if (!$empty) {
  507. $queries[] = trim($current_query);
  508. }
  509. return $queries;
  510. }
  511. /**
  512. * Remove all comments from a SQL string
  513. *
  514. * @param String $string The SQL string
  515. *
  516. * @return String The SQL string without comments
  517. */
  518. public static function removeComments($string)
  519. {
  520. $result = '';
  521. $tokens = self::tokenize($string);
  522. foreach ($tokens as $token) {
  523. // Skip comment tokens
  524. if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
  525. continue;
  526. }
  527. $result .= $token[self::TOKEN_VALUE];
  528. }
  529. return $result;
  530. }
  531. /**
  532. * Highlights a token depending on its type.
  533. *
  534. * @param Array $token An associative array containing type and value.
  535. *
  536. * @return String HTML code of the highlighted token.
  537. */
  538. protected static function highlightToken($token)
  539. {
  540. $type = $token[self::TOKEN_TYPE];
  541. $token = htmlentities($token[self::TOKEN_VALUE]);
  542. if($type===self::TOKEN_TYPE_BOUNDARY) {
  543. return self::highlightBoundary($token);
  544. }
  545. elseif($type===self::TOKEN_TYPE_WORD) {
  546. return self::highlightWord($token);
  547. }
  548. elseif($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
  549. return self::highlightBacktickQuote($token);
  550. }
  551. elseif($type===self::TOKEN_TYPE_QUOTE) {
  552. return self::highlightQuote($token);
  553. }
  554. elseif($type===self::TOKEN_TYPE_RESERVED) {
  555. return self::highlightReservedWord($token);
  556. }
  557. elseif($type===self::TOKEN_TYPE_SPECIAL_RESERVED) {
  558. return self::highlightReservedWord($token);
  559. }
  560. elseif($type===self::TOKEN_TYPE_NUMBER) {
  561. return self::highlightNumber($token);
  562. }
  563. elseif($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
  564. return self::highlightComment($token);
  565. }
  566. return $token;
  567. }
  568. /**
  569. * Highlights a quoted string
  570. *
  571. * @param String $value The token's value
  572. *
  573. * @return String HTML code of the highlighted token.
  574. */
  575. protected static function highlightQuote($value)
  576. {
  577. return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
  578. }
  579. /**
  580. * Highlights a backtick quoted string
  581. *
  582. * @param String $value The token's value
  583. *
  584. * @return String HTML code of the highlighted token.
  585. */
  586. protected static function highlightBacktickQuote($value) {
  587. return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
  588. }
  589. /**
  590. * Highlights a reserved word
  591. *
  592. * @param String $value The token's value
  593. *
  594. * @return String HTML code of the highlighted token.
  595. */
  596. protected static function highlightReservedWord($value)
  597. {
  598. return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
  599. }
  600. /**
  601. * Highlights a boundary token
  602. *
  603. * @param String $value The token's value
  604. *
  605. * @return String HTML code of the highlighted token.
  606. */
  607. protected static function highlightBoundary($value)
  608. {
  609. if($value==='(' || $value===')') return $value;
  610. return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
  611. }
  612. /**
  613. * Highlights a number
  614. *
  615. * @param String $value The token's value
  616. *
  617. * @return String HTML code of the highlighted token.
  618. */
  619. protected static function highlightNumber($value)
  620. {
  621. return '<span ' . self::$number_attributes . '>' . $value . '</span>';
  622. }
  623. /**
  624. * Highlights an error
  625. *
  626. * @param String $value The token's value
  627. *
  628. * @return String HTML code of the highlighted token.
  629. */
  630. protected static function highlightError($value)
  631. {
  632. return '<span ' . self::$error_attributes . '>' . $value . '</span>';
  633. }
  634. /**
  635. * Highlights a comment
  636. *
  637. * @param String $value The token's value
  638. *
  639. * @return String HTML code of the highlighted token.
  640. */
  641. protected static function highlightComment($value)
  642. {
  643. return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
  644. }
  645. /**
  646. * Highlights a word token
  647. *
  648. * @param String $value The token's value
  649. *
  650. * @return String HTML code of the highlighted token.
  651. */
  652. protected static function highlightWord($value)
  653. {
  654. return '<span ' . self::$word_attributes . '>' . $value . '</span>';
  655. }
  656. /**
  657. * Helper function for sorting the list of reserved words by length
  658. *
  659. * @param String $a The first string
  660. * @param String $b The second string
  661. *
  662. * @return int The comparison of the string lengths
  663. */
  664. private static function sortLength($a, $b)
  665. {
  666. return strlen($b) - strlen($a);
  667. }
  668. /**
  669. * Helper function for building regular expressions for reserved words and boundary characters
  670. *
  671. * @param String $a The string to be quoted
  672. *
  673. * @return String The quoted string
  674. */
  675. private static function quote_regex($a) {
  676. return preg_quote($a,'/');
  677. }
  678. }