SqlFormatter.php 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706
  1. <?php
  2. /**
  3. * SQL Formatter is a collection of utilities for debugging SQL queries.
  4. * It includes methods for formatting, syntax highlighting, removing comments, etc.
  5. *
  6. * @package SqlFormatter
  7. * @author Jeremy Dorn <jeremy@jeremydorn.com>
  8. * @copyright 2012 Jeremy Dorn
  9. * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
  10. * @link http://github.com/jdorn/sql-formatter
  11. * @version 1.2.0
  12. */
  13. class SqlFormatter
  14. {
  15. // Reserved words (for syntax highlighting)
  16. protected static $reserved = array(
  17. 'ACCESSIBLE', 'ACTION', 'ADD', 'AFTER', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'AS', 'ASC',
  18. 'AUTOCOMMIT', 'AUTO_INCREMENT', 'AVG_ROW_LENGTH', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'BY', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED',
  19. 'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT',
  20. 'CONSTRAINT', 'CONTAINS', 'CONVERT', 'COUNT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE',
  21. 'DAY_SECOND', 'DEFINER', 'DELAYED', 'DELAY_KEY_WRITE', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV',
  22. 'DO', 'DROP', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXECUTE',
  23. 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FROM', 'FULL', 'FULLTEXT',
  24. 'FUNCTION', 'GEMINI', 'GEMINI_SPIN_RETRIES', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP', 'GROUP_CONCAT', 'GROUP BY', 'HAVING', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE',
  25. 'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INNER', 'INNER JOIN', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL',
  26. 'INTO', 'INVOKER', 'IS', 'ISOLATION', 'JOIN', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEFT', 'LEFT JOIN', 'LEVEL', 'LIKE', 'LIMIT', 'LINEAR',
  27. 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
  28. 'MASTER_LOG_POS', 'MASTER_PASSWORD', 'MASTER_PORT', 'MASTER_USER', 'MATCH', 'MAX_CONNECTIONS_PER_HOUR', 'MAX_QUERIES_PER_HOUR',
  29. 'MAX_ROWS', 'MAX_UPDATES_PER_HOUR', 'MAX_USER_CONNECTIONS', 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
  30. 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', 'OR',
  31. 'ORDER', 'ORDER BY', 'OUTER', 'OUTER JOIN', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
  32. 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RAID0', 'RAID_CHUNKS', 'RAID_CHUNKSIZE', 'RAID_TYPE', 'RANGE', 'READ', 'READ_ONLY',
  33. 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
  34. 'RETURN', 'RETURNS', 'REVOKE', 'RIGHT', 'RIGHT JOIN', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SELECT', 'SEPARATOR',
  35. 'SERIALIZABLE', 'SESSION', 'SET', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL', 'SQL_AUTO_IS_NULL', 'SQL_BIG_RESULT',
  36. 'SQL_BIG_SELECTS', 'SQL_BIG_TABLES', 'SQL_BUFFER_RESULT', 'SQL_CACHE', 'SQL_CALC_FOUND_ROWS', 'SQL_LOG_BIN', 'SQL_LOG_OFF',
  37. 'SQL_LOG_UPDATE', 'SQL_LOW_PRIORITY_UPDATES', 'SQL_MAX_JOIN_SIZE', 'SQL_NO_CACHE', 'SQL_QUOTE_SHOW_CREATE', 'SQL_SAFE_UPDATES',
  38. 'SQL_SELECT_LIMIT', 'SQL_SLAVE_SKIP_COUNTER', 'SQL_SMALL_RESULT', 'SQL_WARNINGS', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE',
  39. 'STRAIGHT_JOIN', 'STRING', 'STRIPED', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL',
  40. 'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNION', 'UNIQUE', 'UNLOCK', 'UPDATE', 'USAGE', 'USE', 'USING', 'VALUES', 'VARIABLES',
  41. 'VIEW', 'WHEN', 'WHERE', 'WITH', 'WORK', 'WRITE', 'XOR', 'YEAR_MONTH'
  42. );
  43. // For SQL formatting
  44. // These keywords will all be on their own line
  45. protected static $special_reserved = array(
  46. 'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LEFT JOIN', 'OUTER JOIN', 'INNER JOIN', 'RIGHT JOIN', 'JOIN', 'LIMIT', 'VALUES', 'UPDATE', 'HAVING'
  47. );
  48. // Punctuation that can be used as a boundary between other tokens
  49. protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&');
  50. // White space characters. These can also be used as a boundary between other tokens
  51. protected static $whitespace = array(' ', "\n", "\t", "\r");
  52. // Start of quoted strings
  53. protected static $quotes = array('"', "'", '`');
  54. // For syntax highlighting
  55. // Styles applied to different token types
  56. public static $quote_style = 'color: blue;';
  57. public static $backtick_quote_style = 'color: purple;';
  58. public static $reserved_style = 'color:black; font-weight:bold;';
  59. public static $boundary_style = 'color:black;';
  60. public static $number_style = 'color: green;';
  61. public static $default_style = 'color: #333;';
  62. public static $error_style = 'background-color: red; color: black;';
  63. public static $comment_style = 'color: #aaa;';
  64. // The tab character to use when formatting SQL
  65. public static $tab = ' ';
  66. // This flag tells us if SqlFormatted has been initialized
  67. protected static $init;
  68. // This is a combination of all the boundary characters and all the whitespace characters
  69. protected static $all_boundaries;
  70. //cache variables
  71. //Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases.
  72. public static $max_cachekey_size = 15;
  73. protected static $token_cache = array();
  74. protected static $cache_hits = 0;
  75. protected static $cache_misses = 0;
  76. /**
  77. * Get stats about the token cache
  78. * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
  79. */
  80. public static function getCacheStats() {
  81. return array(
  82. 'hits'=>self::$cache_hits,
  83. 'misses'=>self::$cache_misses,
  84. 'entries'=>count(self::$token_cache),
  85. 'size'=>strlen(serialize(self::$token_cache))
  86. );
  87. }
  88. /**
  89. * Return the next token and token type in a SQL string.
  90. * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
  91. *
  92. * @param String $string The SQL string
  93. * @param array $previous The result of the previous getNextToken() call
  94. *
  95. * @return Array An associative array containing a 'token' and 'type' key.
  96. */
  97. protected static function getNextToken($string, $previous = null)
  98. {
  99. // If the next token is a comment
  100. if ($string[0] === '#' || substr($string, 0, 2) === '--' || substr($string, 0, 2) === '/*') {
  101. // Comment until end of line
  102. if ($string[0] === '-' || $string[0] === '#') {
  103. $last = strpos($string, "\n");
  104. $type = 'comment';
  105. } else { // Comment until closing comment tag
  106. $last = strpos($string, "*/", 2) + 2;
  107. $type = 'block comment';
  108. }
  109. if ($last === false) {
  110. $last = strlen($string);
  111. }
  112. return array(
  113. 'token' => substr($string, 0, $last),
  114. 'type' => $type
  115. );
  116. }
  117. // If the next item is a string
  118. if (in_array($string[0], self::$quotes)) {
  119. $quote = $string[0];
  120. for ($i = 1, $length = strlen($string); $i < $length; $i++) {
  121. $next_char = null;
  122. if (isset($string[$i + 1])) {
  123. $next_char = $string[$i + 1];
  124. }
  125. // Escaped (either backslash or backtick escaped)
  126. if (($quote !== '`' && $string[$i] === '\\') || ($quote === '`' && $string[$i] === '`' && $next_char === '`')) {
  127. $i++;
  128. } elseif ($string[$i] === $quote) {
  129. break;
  130. }
  131. }
  132. if ($quote === '`') {
  133. $type = 'backtick quote';
  134. } else {
  135. $type = 'quote';
  136. }
  137. return array(
  138. 'token' => substr($string, 0, $i + 1),
  139. 'type' => $type
  140. );
  141. }
  142. // Separators
  143. if (in_array($string[0], self::$boundaries)) {
  144. // If it is a simple string or empty between the parentheses, just count as a word
  145. // this makes it so we don't split things like NOW() or COUNT(*) into separate lines
  146. if ($string[0] === '(') {
  147. // "()"
  148. if (isset($string[1]) && $string[1] === ')') {
  149. return array(
  150. 'token' => '()',
  151. 'type' => 'word'
  152. );
  153. }
  154. // "(word/whitespace/boundary)"
  155. $next_token = self::getNextToken(substr($string, 1));
  156. $length = strlen($next_token['token']);
  157. if (isset($string[$length + 1]) && $string[$length + 1] === ')') {
  158. if ($next_token['type'] === 'word' || $next_token['type'] === 'whitespace' || $next_token['type'] === 'boundary') {
  159. return array(
  160. 'token' => '(' . $next_token['token'] . ')',
  161. 'type' => 'word'
  162. );
  163. }
  164. }
  165. }
  166. //return single parentheses as their own token
  167. if ($string[0] === '(' || $string[0] === ')') {
  168. return array(
  169. 'token' => $string[0],
  170. 'type' => $string[0]
  171. );
  172. }
  173. // If there are 1 or more boundary characters together, return as a single word
  174. $next_token = self::getNextToken(substr($string, 1));
  175. if ($next_token['type'] === 'boundary') {
  176. return array(
  177. 'token' => $string[0].$next_token['token'],
  178. 'type' => 'boundary'
  179. );
  180. }
  181. // Otherwise, just return the single boundary character
  182. if ($string[0] === '.' || $string[0] === ',') {
  183. $type = $string[0];
  184. } else {
  185. $type = 'boundary';
  186. }
  187. return array(
  188. 'token' => $string[0],
  189. 'type' => $type
  190. );
  191. }
  192. // Whitespace
  193. if (in_array($string[0], self::$whitespace)) {
  194. for ($i = 1, $length = strlen($string); $i < $length; $i++) {
  195. if (!in_array($string[$i], self::$whitespace)) {
  196. break;
  197. }
  198. }
  199. return array(
  200. 'token' => substr($string, 0, $i),
  201. 'type' => 'whitespace'
  202. );
  203. }
  204. if (!self::$init) {
  205. //Sort reserved word list from longest word to shortest
  206. usort(self::$reserved, array('SqlFormatter', 'sortLength'));
  207. //Combine boundary characters and whitespace
  208. self::$all_boundaries = array_merge(self::$boundaries, self::$whitespace);
  209. self::$init = true;
  210. }
  211. //a reserved word cannot be preceded by a '.'
  212. //this makes it so in "mytable.from", "from" is not considered a reserved word
  213. if (!$previous || !isset($previous['token']) || $previous['token'] !== '.') {
  214. // Reserved word
  215. $test = strtoupper($string);
  216. foreach (self::$reserved as $word) {
  217. $length = strlen($word);
  218. if (substr($test, 0, $length) === $word) {
  219. if (isset($string[$length]) && !in_array($string[$length], self::$all_boundaries)) {
  220. continue;
  221. }
  222. if (in_array($word, self::$special_reserved)) {
  223. $type = 'special reserved';
  224. } else {
  225. $type = 'reserved';
  226. }
  227. return array(
  228. 'token' => substr($string, 0, $length),
  229. 'type' => $type
  230. );
  231. }
  232. }
  233. }
  234. // Look for first word separator
  235. for ($i = 1, $length = strlen($string); $i < $length; $i++) {
  236. if (in_array($string[$i], self::$all_boundaries)) {
  237. break;
  238. }
  239. }
  240. $ret = substr($string, 0, $i);
  241. if (is_numeric($ret)) {
  242. $type = 'number';
  243. } else {
  244. $type = 'word';
  245. }
  246. return array(
  247. 'token' => $ret,
  248. 'type' => $type
  249. );
  250. }
  251. /**
  252. * Takes a SQL string and breaks it into tokens.
  253. * Each token is an associative array with a 'token' and 'type' key.
  254. *
  255. * @param String $string The SQL string
  256. *
  257. * @throws Exception when there is a problem tokenizing the input string
  258. *
  259. * @return Array An array of tokens.
  260. */
  261. protected static function tokenize($string)
  262. {
  263. $tokens = array();
  264. //used for debugging if there is an error while tokenizing the string
  265. $original_length = strlen($string);
  266. //used to make sure the string keeps shrinking on each iteration
  267. $old_string_len = strlen($string) + 1;
  268. $token = null;
  269. $current_length = strlen($string);
  270. // Keep processing the string until it is empty
  271. while ($current_length) {
  272. // If the string stopped shrinking, there was a problem
  273. if ($old_string_len <= $current_length) {
  274. throw new Exception("SQL Parse Error - Unable to tokenize string at character ".($original_length - $old_string_len));
  275. }
  276. $old_string_len = $current_length;
  277. // Determine if we can use caching
  278. if($current_length >= self::$max_cachekey_size) {
  279. $cacheKey = substr($string,0,self::$max_cachekey_size);
  280. }
  281. else {
  282. $cacheKey = false;
  283. }
  284. // See if the token is already cached
  285. if($cacheKey && isset(self::$token_cache[$cacheKey])) {
  286. //retrieve from cache
  287. $token = self::$token_cache[$cacheKey];
  288. $token_length = strlen($token['token']);
  289. self::$cache_hits++;
  290. }
  291. else {
  292. // Get the next token and the token type
  293. $token = self::getNextToken($string, $token);
  294. $token_length = strlen($token['token']);
  295. self::$cache_misses++;
  296. // If the token is shorter than the max length, store it in cache
  297. if($cacheKey && $token_length < self::$max_cachekey_size) {
  298. self::$token_cache[$cacheKey] = $token;
  299. }
  300. }
  301. $tokens[] = $token;
  302. //advance the string
  303. $string = substr($string, $token_length);
  304. $current_length -= $token_length;
  305. }
  306. return $tokens;
  307. }
  308. /**
  309. * Format the whitespace in a SQL string to make it easier to read.
  310. *
  311. * @param String $string The SQL string
  312. * @param boolean $highlight If true, syntax highlighting will also be performed
  313. *
  314. * @throws Exception when there is a problem tokenizing the input string
  315. *
  316. * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
  317. */
  318. public static function format($string, $highlight=true)
  319. {
  320. // This variable will be populated with formatted html
  321. $return = '';
  322. // Configuration values
  323. $tab = self::$tab;
  324. // Starting values
  325. $indent = 1;
  326. $newline = false;
  327. $indented = false;
  328. $extra_indent = 0;
  329. // Tokenize String
  330. $tokens = self::tokenize($string);
  331. foreach ($tokens as $i=>$token) {
  332. // Get highlighted token if doing syntax highlighting
  333. if ($highlight) {
  334. $highlighted = self::highlightToken($token);
  335. } else { // If returning raw text
  336. $highlighted = $token['token'];
  337. }
  338. // Don't process whitespace
  339. if ($token['type'] === 'whitespace') {
  340. continue;
  341. }
  342. // Display comments directly where they appear in the source
  343. if ($token['type'] === 'comment' || $token['type'] === 'block comment') {
  344. if ($token['type'] === 'block comment') {
  345. $return .= "\n" . str_repeat($tab, $indent);
  346. }
  347. $return .= $highlighted;
  348. $newline = true;
  349. continue;
  350. }
  351. // If this token decreases the indent level
  352. if ($token['type'] === 'special reserved' || $token['type'] === ')') {
  353. if ($indented) {
  354. ++$extra_indent;
  355. } elseif ($indent && ($token['type'] === 'special reserved' || $indent > 1)) {
  356. --$indent;
  357. if ($token['type'] === ')' && $extra_indent) {
  358. $indent -= $extra_indent;
  359. $extra_indent = 0;
  360. }
  361. } else { // If there are mismatched parentheses
  362. if ($highlight) {
  363. $return .= self::highlightError(htmlentities($token['token'])).' ';
  364. } else {
  365. $return .= $highlighted;
  366. }
  367. continue;
  368. }
  369. }
  370. // If we need a new line before the token
  371. if ($newline || ($token['type'] === ')' || $token['type'] === 'special reserved')) {
  372. $newline = false;
  373. $return .= "\n" . str_repeat($tab, $indent);
  374. }
  375. // If we need a new line after the token
  376. if ($token['type'] === ',' || $token['type'] === '(' || $token['type'] === 'special reserved') {
  377. $newline = true;
  378. }
  379. // If this token increases the indent level
  380. if ($token['type'] === 'special reserved' || $token['type'] === '(') {
  381. ++$indent;
  382. $indented = true;
  383. } else {
  384. $indented = false;
  385. }
  386. // If the token shouldn't have a space before it
  387. if ($token['token'] === '.' || $token['token'] === ',' || $token['token'] === ';') {
  388. $return = rtrim($return, ' ');
  389. }
  390. //if this is an opening parentheses, take out the preceding space unless there was whitespace there in the
  391. //original query
  392. if ($token['token'][0] === '(' && isset($tokens[$i-1]) && $tokens[$i-1]['type'] !== 'whitespace') {
  393. $return = rtrim($return,' ');
  394. }
  395. $return .= $highlighted.' ';
  396. // If the token shouldn't have a space after it
  397. if ($token['token'] === '(' || $token['token'] === '.') {
  398. $return = rtrim($return,' ');
  399. }
  400. }
  401. // If there are unmatched parentheses
  402. if ($highlight && $indent !== 1) {
  403. $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
  404. }
  405. if ($highlight) {
  406. return "<pre style='background:white;'>" . trim($return) . "</pre>";
  407. }
  408. return trim($return);
  409. }
  410. /**
  411. * Add syntax highlighting to a SQL string
  412. *
  413. * @param String $string The SQL string
  414. *
  415. * @throws Exception when there is a problem tokenizing the input string
  416. *
  417. * @return String The SQL string with HTML styles applied
  418. */
  419. public static function highlight($string)
  420. {
  421. $tokens = self::tokenize($string);
  422. $return = '';
  423. foreach ($tokens as $token) {
  424. $return .= self::highlightToken($token);
  425. }
  426. return "<pre style='background:white;'>" . trim($return) . "</pre>";
  427. }
  428. /**
  429. * Split a SQL string into multiple queries.
  430. * Uses ";" as a query delimiter.
  431. *
  432. * @param String $string The SQL string
  433. *
  434. * @throws Exception when there is a problem tokenizing the input string
  435. *
  436. * @return Array An array of individual query strings without trailing semicolons
  437. */
  438. public static function splitQuery($string)
  439. {
  440. // Comments between queries cause problems, so remove them first
  441. $string = self::removeComments($string);
  442. $queries = array();
  443. $current_query = '';
  444. $tokens = self::tokenize($string);
  445. foreach ($tokens as $token) {
  446. // If this is a query separator
  447. if ($token['token'] === ';') {
  448. if (trim($current_query)) {
  449. $queries[] = trim($current_query);
  450. }
  451. $current_query = '';
  452. continue;
  453. }
  454. $current_query .= $token['token'];
  455. }
  456. if (trim($current_query)) {
  457. $queries[] = trim($current_query);
  458. }
  459. return $queries;
  460. }
  461. /**
  462. * Remove all comments from a SQL string
  463. *
  464. * @param String $string The SQL string
  465. *
  466. * @throws Exception when there is a problem tokenizing the input string
  467. *
  468. * @return String The SQL string without comments
  469. */
  470. public static function removeComments($string)
  471. {
  472. $result = '';
  473. $tokens = self::tokenize($string);
  474. foreach ($tokens as $token) {
  475. // Skip comment tokens
  476. if ($token['type'] === 'comment' || $token['type'] === 'block comment') {
  477. continue;
  478. }
  479. $result .= $token['token'];
  480. }
  481. return $result;
  482. }
  483. /**
  484. * Highlights a token depending on its type.
  485. *
  486. * @param Array $token An associative array containing 'token' and 'type' keys.
  487. *
  488. * @return String HTML code of the highlighted token.
  489. */
  490. protected static function highlightToken($token)
  491. {
  492. $type = $token['type'];
  493. $token = htmlentities($token['token']);
  494. switch ($type) {
  495. case 'backtick quote':
  496. case 'quote':
  497. return self::highlightQuote($token, $type);
  498. case 'reserved':
  499. case 'special reserved':
  500. return self::highlightReservedWord($token);
  501. case '(':
  502. case ')':
  503. return $token;
  504. case 'number':
  505. return self::highlightNumber($token);
  506. case 'boundary':
  507. case '.':
  508. case ',':
  509. return self::highlightBoundary($token);
  510. case 'comment':
  511. case 'block comment':
  512. return self::highlightComment($token);
  513. default:
  514. return self::highlightDefault($token);
  515. }
  516. }
  517. /**
  518. * Highlights a quoted string
  519. *
  520. * @param String $value The token's value
  521. * @param String $type The token's type
  522. *
  523. * @return String HTML code of the highlighted token.
  524. */
  525. protected static function highlightQuote($value, $type)
  526. {
  527. if ($type === 'backtick quote') {
  528. return "<span style='" . self::$backtick_quote_style . "'>" . $value . "</span>";
  529. }
  530. return "<span style='" . self::$quote_style . "'>" . $value . "</span>";
  531. }
  532. /**
  533. * Highlights a reserved word
  534. *
  535. * @param String $value The token's value
  536. *
  537. * @return String HTML code of the highlighted token.
  538. */
  539. protected static function highlightReservedWord($value)
  540. {
  541. return "<span style='" . self::$reserved_style . "'>" . $value . "</span>";
  542. }
  543. /**
  544. * Highlights a boundary token
  545. *
  546. * @param String $value The token's value
  547. *
  548. * @return String HTML code of the highlighted token.
  549. */
  550. protected static function highlightBoundary($value)
  551. {
  552. return "<span style='" . self::$boundary_style . "'>" . $value . "</span>";
  553. }
  554. /**
  555. * Highlights a number
  556. *
  557. * @param String $value The token's value
  558. *
  559. * @return String HTML code of the highlighted token.
  560. */
  561. protected static function highlightNumber($value)
  562. {
  563. return "<span style='" . self::$number_style . "'>" . $value . "</span>";
  564. }
  565. /**
  566. * Highlights an error
  567. *
  568. * @param String $value The token's value
  569. *
  570. * @return String HTML code of the highlighted token.
  571. */
  572. protected static function highlightError($value)
  573. {
  574. return "<span style='" . self::$error_style . "'>" . $value . "</span>";
  575. }
  576. /**
  577. * Highlights a comment
  578. *
  579. * @param String $value The token's value
  580. *
  581. * @return String HTML code of the highlighted token.
  582. */
  583. protected static function highlightComment($value)
  584. {
  585. return "<span style='" . self::$comment_style . "'>" . $value . "</span>";
  586. }
  587. /**
  588. * Highlights a generic token
  589. *
  590. * @param String $value The token's value
  591. *
  592. * @return String HTML code of the highlighted token.
  593. */
  594. protected static function highlightDefault($value)
  595. {
  596. return "<span style='" . self::$default_style . "'>" . $value . "</span>";
  597. }
  598. /**
  599. * Helper function for sorting the list of reserved words by length
  600. *
  601. * @param String $a The first string
  602. * @param String $b The second string
  603. *
  604. * @return int The comparison of the string lengths
  605. */
  606. private static function sortLength($a, $b)
  607. {
  608. return strlen($b) - strlen($a);
  609. }
  610. }