123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788 |
- <?php
- /**
- * SQL Formatter is a collection of utilities for debugging SQL queries.
- * It includes methods for formatting, syntax highlighting, removing comments, etc.
- *
- * @package SqlFormatter
- * @author Jeremy Dorn <jeremy@jeremydorn.com>
- * @author Florin Patan <florinpatan@gmail.com>
- * @copyright 2013 Jeremy Dorn
- * @license http://www.opensource.org/licenses/lgpl-license.php LGPL
- * @link http://github.com/jdorn/sql-formatter
- * @version 1.2.5
- */
- class SqlFormatter
- {
- // Constants for token types
- const TOKEN_TYPE_WHITESPACE = 0;
- const TOKEN_TYPE_WORD = 1;
- const TOKEN_TYPE_QUOTE = 2;
- const TOKEN_TYPE_BACKTICK_QUOTE = 3;
- const TOKEN_TYPE_RESERVED = 4;
- const TOKEN_TYPE_SPECIAL_RESERVED = 5;
- const TOKEN_TYPE_BOUNDARY = 6;
- const TOKEN_TYPE_COMMENT = 7;
- const TOKEN_TYPE_BLOCK_COMMENT = 8;
- const TOKEN_TYPE_NUMBER = 9;
- const TOKEN_TYPE_ERROR = 10;
-
- // Constants for different components of a token
- const TOKEN_TYPE = 0;
- const TOKEN_VALUE = 1;
-
- // Reserved words (for syntax highlighting)
- protected static $reserved = array(
- 'ACCESSIBLE', 'ACTION', 'AGAINST', 'AGGREGATE', 'ALGORITHM', 'ALL', 'ALTER', 'ANALYSE', 'ANALYZE', 'AND', 'AS', 'ASC',
- 'AUTOCOMMIT', 'AUTO_INCREMENT', 'BACKUP', 'BEGIN', 'BETWEEN', 'BINLOG', 'BOTH', 'CASCADE', 'CASE', 'CHANGE', 'CHANGED',
- 'CHARSET', 'CHECK', 'CHECKSUM', 'COLLATE', 'COLLATION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMMITTED', 'COMPRESSED', 'CONCURRENT',
- 'CONSTRAINT', 'CONTAINS', 'CONVERT', 'COUNT', 'CREATE', 'CROSS', 'CURRENT_TIMESTAMP', 'DATABASE', 'DATABASES', 'DAY', 'DAY_HOUR', 'DAY_MINUTE',
- 'DAY_SECOND', 'DEFAULT', 'DEFINER', 'DELAYED', 'DELETE', 'DESC', 'DESCRIBE', 'DETERMINISTIC', 'DISTINCT', 'DISTINCTROW', 'DIV',
- 'DO', 'DROP', 'DUMPFILE', 'DUPLICATE', 'DYNAMIC', 'ELSE', 'ENCLOSED', 'END', 'ENGINE', 'ENGINE_TYPE', 'ENGINES', 'ESCAPE', 'ESCAPED', 'EVENTS', 'EXECUTE',
- 'EXISTS', 'EXPLAIN', 'EXTENDED', 'FAST', 'FIELDS', 'FILE', 'FIRST', 'FIXED', 'FLUSH', 'FOR', 'FORCE', 'FOREIGN', 'FULL', 'FULLTEXT',
- 'FUNCTION', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP_CONCAT', 'HEAP', 'HIGH_PRIORITY', 'HOSTS', 'HOUR', 'HOUR_MINUTE',
- 'HOUR_SECOND', 'IDENTIFIED', 'IF', 'IGNORE', 'IN', 'INDEX', 'INDEXES', 'INFILE', 'INSERT', 'INSERT_ID', 'INSERT_METHOD', 'INTERVAL',
- 'INTO', 'INVOKER', 'IS', 'ISOLATION', 'KEY', 'KEYS', 'KILL', 'LAST_INSERT_ID', 'LEADING', 'LEVEL', 'LIKE', 'LINEAR',
- 'LINES', 'LOAD', 'LOCAL', 'LOCK', 'LOCKS', 'LOGS', 'LOW_PRIORITY', 'MARIA', 'MASTER', 'MASTER_CONNECT_RETRY', 'MASTER_HOST', 'MASTER_LOG_FILE',
- 'MATCH', 'MEDIUM', 'MERGE', 'MINUTE', 'MINUTE_SECOND', 'MIN_ROWS', 'MODE', 'MODIFY',
- 'MONTH', 'MRG_MYISAM', 'MYISAM', 'NAMES', 'NATURAL', 'NOT', 'NOW', 'NULL', 'OFFSET', 'ON', 'OPEN', 'OPTIMIZE', 'OPTION', 'OPTIONALLY', 'OR',
- 'ON UPDATE', 'ON DELETE', 'OUTFILE', 'PACK_KEYS', 'PAGE', 'PARTIAL', 'PARTITION', 'PARTITIONS', 'PASSWORD', 'PRIMARY', 'PRIVILEGES', 'PROCEDURE',
- 'PROCESS', 'PROCESSLIST', 'PURGE', 'QUICK', 'RANGE', 'READ', 'READ_ONLY',
- 'READ_WRITE', 'REFERENCES', 'REGEXP', 'RELOAD', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICATION', 'RESET', 'RESTORE', 'RESTRICT',
- 'RETURN', 'RETURNS', 'REVOKE', 'RLIKE', 'ROLLBACK', 'ROW', 'ROWS', 'ROW_FORMAT', 'SECOND', 'SECURITY', 'SEPARATOR',
- 'SERIALIZABLE', 'SESSION', 'SET', 'SHARE', 'SHOW', 'SHUTDOWN', 'SLAVE', 'SONAME', 'SOUNDS', 'SQL',
- 'SQL_CACHE', 'SQL_NO_CACHE', 'START', 'STARTING', 'STATUS', 'STOP', 'STORAGE',
- 'STRAIGHT_JOIN', 'STRING', 'SUPER', 'TABLE', 'TABLES', 'TEMPORARY', 'TERMINATED', 'THEN', 'TO', 'TRAILING', 'TRANSACTIONAL',
- 'TRUNCATE', 'TYPE', 'TYPES', 'UNCOMMITTED', 'UNIQUE', 'UNLOCK', 'UNSIGNED', 'USAGE', 'USE', 'USING', 'VARIABLES',
- 'VIEW', 'WHEN', 'WITH', 'WORK', 'WRITE', 'XOR', 'YEAR_MONTH'
- );
-
- // For SQL formatting
- // These keywords will all be on their own line
- protected static $special_reserved = array(
- 'SELECT', 'FROM', 'WHERE', 'SET', 'ORDER BY', 'GROUP BY', 'LEFT JOIN', 'OUTER JOIN', 'INNER JOIN', 'RIGHT JOIN', 'JOIN', 'LIMIT',
- 'VALUES', 'UPDATE', 'HAVING', 'ADD', 'AFTER', 'ALTER TABLE', 'DELETE FROM', 'UNION ALL', 'UNION', 'EXCEPT', 'INTERSECT'
- );
-
- // Punctuation that can be used as a boundary between other tokens
- protected static $boundaries = array(',', ';', ')', '(', '.', '=', '<', '>', '+', '-', '*', '/', '!', '^', '%', '|', '&');
-
- // For syntax highlighting
- // Styles applied to different token types
- public static $quote_attributes = 'style="color: blue;"';
- public static $backtick_quote_attributes = 'style="color: purple;"';
- public static $reserved_attributes = 'style="font-weight:bold;"';
- public static $boundary_attributes = '';
- public static $number_attributes = 'style="color: green;"';
- public static $word_attributes = 'style="color: #333;"';
- public static $error_attributes = 'style="background-color: red;"';
- public static $comment_attributes = 'style="color: #aaa;"';
- public static $pre_attributes = 'style="color: black; background-color: white;"';
-
- // The tab character to use when formatting SQL
- public static $tab = ' ';
-
- // This flag tells us if SqlFormatted has been initialized
- protected static $init;
-
- // Regular expressions for tokenizing
- protected static $regex_boundaries;
- protected static $regex_reserved;
- protected static $regex_special_reserved;
-
- // Cache variables
- // Only tokens shorter than this size will be cached. Somewhere between 10 and 20 seems to work well for most cases.
- public static $max_cachekey_size = 15;
- protected static $token_cache = array();
- protected static $cache_hits = 0;
- protected static $cache_misses = 0;
-
- /**
- * Get stats about the token cache
- * @return Array An array containing the keys 'hits', 'misses', 'entries', and 'size' in bytes
- */
- public static function getCacheStats() {
- return array(
- 'hits'=>self::$cache_hits,
- 'misses'=>self::$cache_misses,
- 'entries'=>count(self::$token_cache),
- 'size'=>strlen(serialize(self::$token_cache))
- );
- }
-
- /**
- * Stuff that only needs to be done once. Builds regular expressions and sorts the reserved words.
- */
- protected static function init() {
- if(self::$init) return;
-
- // Sort reserved word list from longest word to shortest
- usort(self::$reserved, array('SqlFormatter', 'sortLength'));
-
- // Set up regular expressions
- self::$regex_boundaries = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$boundaries)).')';
- self::$regex_reserved = '('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$reserved)).')';
- self::$regex_special_reserved = str_replace(' ','\\s+','('.implode('|',array_map(array('SqlFormatter', 'quote_regex'),self::$special_reserved)).')');
-
- self::$init = true;
- }
-
- /**
- * Return the next token and token type in a SQL string.
- * Quoted strings, comments, reserved words, whitespace, and punctuation are all their own tokens.
- *
- * @param String $string The SQL string
- * @param array $previous The result of the previous getNextToken() call
- *
- * @return Array An associative array containing the type and value of the token.
- */
- protected static function getNextToken($string, $previous = null)
- {
- // Whitespace
- if (preg_match('/^\s+/',$string,$matches)) {
- return array(
- self::TOKEN_VALUE => $matches[0],
- self::TOKEN_TYPE=>self::TOKEN_TYPE_WHITESPACE
- );
- }
-
- // Comment
- if ($string[0] === '#' || (isset($string[1])&&($string[0]==='-'&&$string[1]==='-') || ($string[0]==='/'&&$string[1]==='*'))) {
- // Comment until end of line
- if ($string[0] === '-' || $string[0] === '#') {
- $last = strpos($string, "\n");
- $type = self::TOKEN_TYPE_COMMENT;
- } else { // Comment until closing comment tag
- $last = strpos($string, "*/", 2) + 2;
- $type = self::TOKEN_TYPE_BLOCK_COMMENT;
- }
-
- if ($last === false) {
- $last = strlen($string);
- }
-
- return array(
- self::TOKEN_VALUE => substr($string, 0, $last),
- self::TOKEN_TYPE => $type
- );
- }
-
- // Quoted String
- if($string[0]==='"' || $string[0]==='\'' || $string[0]==='`') {
- // This checks for the following patterns:
- // 1. backtick quoted string using `` to escape
- // 2. double quoted string using "" or \" to escape
- // 3. single quoted string using '' or \' to escape
- if( preg_match('/^((`(?:[^`]|``)*($|`))|("((?:[^"\\\\]|"")|(?:[^"\\\\]|\\\\.))*($|"))|(\'((?:[^\'\\\\]|\'\')|(?:[^\'\\\\]|\\\\.))*($|\')))/', $string, $matches)) {
- if($string[0]==='`') {
- return array(
- self::TOKEN_VALUE=>$matches[1],
- self::TOKEN_TYPE=>self::TOKEN_TYPE_BACKTICK_QUOTE
- );
- }
- else {
- return array(
- self::TOKEN_VALUE=>$matches[1],
- self::TOKEN_TYPE=>self::TOKEN_TYPE_QUOTE
- );
- }
- }
- }
-
- // Number
- if(preg_match('/^([0-9]+(\.[0-9]+)?)($|\s|"\'`|'.self::$regex_boundaries.')/',$string,$matches)) {
- return array(
- self::TOKEN_VALUE => $matches[1],
- self::TOKEN_TYPE=>self::TOKEN_TYPE_NUMBER
- );
- }
-
- // Boundary Character (punctuation and symbols)
- if(preg_match('/^('.self::$regex_boundaries.')/',$string,$matches)) {
- return array(
- self::TOKEN_VALUE => $matches[1],
- self::TOKEN_TYPE => self::TOKEN_TYPE_BOUNDARY
- );
- }
-
- // A reserved word cannot be preceded by a '.'
- // this makes it so in "mytable.from", "from" is not considered a reserved word
- if (!$previous || !isset($previous[self::TOKEN_VALUE]) || $previous[self::TOKEN_VALUE] !== '.') {
- $upper = strtoupper($string);
- // Special Reserved Word
- if(preg_match('/^('.self::$regex_special_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
- return array(
- self::TOKEN_TYPE=>self::TOKEN_TYPE_SPECIAL_RESERVED,
- self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
- );
- }
- // Other Reserved Word
- if(preg_match('/^('.self::$regex_reserved.')($|\s|'.self::$regex_boundaries.')/', $upper,$matches)) {
- return array(
- self::TOKEN_TYPE=>self::TOKEN_TYPE_RESERVED,
- self::TOKEN_VALUE=>substr($string,0,strlen($matches[1]))
- );
- }
- }
-
- // Non reserved word
- preg_match('/^(.*?)($|\s|["\'`]|'.self::$regex_boundaries.')/',$string,$matches);
-
- return array(
- self::TOKEN_VALUE => $matches[1],
- self::TOKEN_TYPE => self::TOKEN_TYPE_WORD
- );
- }
-
- /**
- * Takes a SQL string and breaks it into tokens.
- * Each token is an associative array with type and value.
- *
- * @param String $string The SQL string
- *
- * @return Array An array of tokens.
- */
- protected static function tokenize($string)
- {
- self::init();
-
- $tokens = array();
-
- // Used for debugging if there is an error while tokenizing the string
- $original_length = strlen($string);
-
- // Used to make sure the string keeps shrinking on each iteration
- $old_string_len = strlen($string) + 1;
-
- $token = null;
-
- $current_length = strlen($string);
-
- // Keep processing the string until it is empty
- while ($current_length) {
- // If the string stopped shrinking, there was a problem
- if ($old_string_len <= $current_length) {
- $tokens[] = array(
- self::TOKEN_VALUE=>$string,
- self::TOKEN_TYPE=>self::TOKEN_TYPE_ERROR
- );
-
- return $tokens;
- }
- $old_string_len = $current_length;
-
- // Determine if we can use caching
- if($current_length >= self::$max_cachekey_size) {
- $cacheKey = substr($string,0,self::$max_cachekey_size);
- }
- else {
- $cacheKey = false;
- }
-
- // See if the token is already cached
- if($cacheKey && isset(self::$token_cache[$cacheKey])) {
- // Retrieve from cache
- $token = self::$token_cache[$cacheKey];
- $token_length = strlen($token[self::TOKEN_VALUE]);
- self::$cache_hits++;
- }
- else {
- // Get the next token and the token type
- $token = self::getNextToken($string, $token);
- $token_length = strlen($token[self::TOKEN_VALUE]);
- self::$cache_misses++;
-
- // If the token is shorter than the max length, store it in cache
- if($cacheKey && $token_length < self::$max_cachekey_size) {
- self::$token_cache[$cacheKey] = $token;
- }
- }
-
- $tokens[] = $token;
-
- // Advance the string
- $string = substr($string, $token_length);
-
- $current_length -= $token_length;
- }
-
- return $tokens;
- }
-
- /**
- * Format the whitespace in a SQL string to make it easier to read.
- *
- * @param String $string The SQL string
- * @param boolean $highlight If true, syntax highlighting will also be performed
- *
- * @return String The SQL string with HTML styles and formatting wrapped in a <pre> tag
- */
- public static function format($string, $highlight=true) {
- // This variable will be populated with formatted html
- $return = '';
-
- // Use an actual tab while formatting and then switch out with self::$tab at the end
- $tab = "\t";
-
- $indent_level = 0;
- $newline = false;
- $inline_parentheses = false;
- $increase_special_indent = false;
- $increase_block_indent = false;
- $indent_types = array();
- $added_newline = false;
-
- // Tokenize String
- $tokens = self::tokenize($string);
-
- // Format token by token
- foreach ($tokens as $i=>$token) {
- // Don't process whitespace
- if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_WHITESPACE) {
- continue;
- }
-
- // Get highlighted token if doing syntax highlighting
- if ($highlight) {
- $highlighted = self::highlightToken($token);
- } else { // If returning raw text
- $highlighted = $token[self::TOKEN_VALUE];
- }
-
- // If we are increasing the special indent level now
- if($increase_special_indent) {
- $indent_level++;
- $increase_special_indent = false;
- array_unshift($indent_types,'special');
- }
- // If we are increasing the block indent level now
- if($increase_block_indent) {
- $indent_level++;
- $increase_block_indent = false;
- array_unshift($indent_types,'block');
- }
-
- // Display comments directly where they appear in the source
- if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
- if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
- $return .= "\n" . str_repeat($tab,$indent_level);
- }
-
- $return .= $highlighted;
- $newline = true;
- continue;
- }
-
- // If we need a new line before the token
- if ($newline) {
- $return .= "\n" . str_repeat($tab, $indent_level);
- $newline = false;
- $added_newline = true;
- }
- else {
- $added_newline = false;
- }
-
- // Opening parentheses increase the block indent level and start a new line
- if ($token[self::TOKEN_VALUE] === '(') {
- // First check if this should be an inline parentheses block
- // Examples are "NOW()", "COUNT(*)", "int(10)", key(`somecolumn`), DECIMAL(7,2)
- // Allow up to 3 non-whitespace tokens inside inline parentheses
- $nonwhitespace = 0;
- for($j=1;$j<=8;$j++) {
- // Reached end of string
- if(!isset($tokens[$i+$j])) break;
-
- $next = $tokens[$i+$j];
-
- // Ignore whitespace
- if($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_WHITESPACE) {
- continue;
- }
-
- // Reached closing parentheses
- if($next[self::TOKEN_VALUE] === ')') {
- $inline_parentheses = true;
- break;
- }
-
- // Reached an invalid token for inline parentheses
- if ($next[self::TOKEN_VALUE]===';' || $next[self::TOKEN_VALUE]==='(') {
- break;
- }
-
- // Reached an invalid token type for inline parentheses
- if ($next[self::TOKEN_TYPE]===self::TOKEN_TYPE_SPECIAL_RESERVED || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_COMMENT || $next[self::TOKEN_TYPE]===self::TOKEN_TYPE_BLOCK_COMMENT) {
- break;
- }
-
- // Too many tokens for inline parentheses
- if ($nonwhitespace >= 3) {
- break;
- }
-
- $nonwhitespace++;
- }
-
- // Take out the preceding space unless there was whitespace there in the original query
- if (isset($tokens[$i-1]) && $tokens[$i-1][self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE) {
- $return = rtrim($return,' ');
- }
-
- if(!$inline_parentheses) {
- $increase_block_indent = true;
- // Add a newline after the parentheses
- $newline = true;
- }
-
- }
-
- // Closing parentheses decrease the block indent level
- elseif ($token[self::TOKEN_VALUE] === ')') {
- // Remove whitespace before the closing parentheses
- $return = rtrim($return,' ');
-
- // If we are in an inline parentheses section
- if($inline_parentheses) {
- $inline_parentheses = false;
- }
- else {
- $indent_level--;
-
- // Reset indent level
- while($j=array_shift($indent_types)) {
- if($j==='special') {
- $indent_level--;
- }
- else {
- break;
- }
- }
-
- if($indent_level < 0) {
- // This is an error
- $indent_level = 0;
-
- if ($highlight) {
- $return .= "\n".self::highlightError($token[self::TOKEN_VALUE]);
- continue;
- }
- }
-
- // Add a newline before the closing parentheses (if not already added)
- if(!$added_newline) {
- $return .= "\n" . str_repeat($tab, $indent_level);
- }
- }
-
- }
-
- // Commas start a new line (unless within inline parentheses)
- elseif ($token[self::TOKEN_VALUE] === ',' && !$inline_parentheses) {
- $newline = true;
- }
-
- // Special reserved words start a new line and increase the special indent level
- elseif ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_SPECIAL_RESERVED) {
- $increase_special_indent = true;
-
- // If the last indent type was 'special', decrease the special indent for this round
- reset($indent_types);
- if(current($indent_types)==='special') {
- $indent_level--;
- array_shift($indent_types);
- }
-
- // Add a newline after the special reserved word
- $newline = true;
- // Add a newline before the special reserved word (if not already added)
- if(!$added_newline) {
- $return .= "\n" . str_repeat($tab, $indent_level);
- }
- // If we already added a newline, redo the indentation since it may be different now
- else {
- $return = rtrim($return,$tab).str_repeat($tab, $indent_level);
- }
-
- // If the token may have extra whitespace
- if (strpos($token[self::TOKEN_VALUE],' ')!==false || strpos($token[self::TOKEN_VALUE],"\n")!==false || strpos($token[self::TOKEN_VALUE],"\t")!==false) {
- $highlighted = preg_replace('/\s+/',' ',$highlighted);
- }
- }
-
- // If the token shouldn't have a space before it
- if ($token[self::TOKEN_VALUE] === '.' || $token[self::TOKEN_VALUE] === ',' || $token[self::TOKEN_VALUE] === ';') {
- $return = rtrim($return, ' ');
- }
-
- $return .= $highlighted.' ';
-
- // If the token shouldn't have a space after it
- if ($token[self::TOKEN_VALUE] === '(' || $token[self::TOKEN_VALUE] === '.') {
- $return = rtrim($return,' ');
- }
- }
-
- // If there are unmatched parentheses
- if ($highlight && array_search('block',$indent_types) !== false) {
- $return .= "\n".self::highlightError("WARNING: unclosed parentheses or section");
- }
-
- // Replace tab characters with the configuration tab character
- $return = trim(str_replace("\t",self::$tab,$return));
-
- if ($highlight) {
- $return = "<pre ".self::$pre_attributes.">" . $return . "</pre>";
- }
-
- return $return;
- }
-
- /**
- * Add syntax highlighting to a SQL string
- *
- * @param String $string The SQL string
- *
- * @return String The SQL string with HTML styles applied
- */
- public static function highlight($string)
- {
- $tokens = self::tokenize($string);
-
- $return = '';
-
- foreach ($tokens as $token) {
- $return .= self::highlightToken($token);
- }
-
- return "<pre ".self::$pre_attributes.">" . trim($return) . "</pre>";
- }
-
- /**
- * Split a SQL string into multiple queries.
- * Uses ";" as a query delimiter.
- *
- * @param String $string The SQL string
- *
- * @return Array An array of individual query strings without trailing semicolons
- */
- public static function splitQuery($string)
- {
- $queries = array();
- $current_query = '';
- $empty = true;
-
- $tokens = self::tokenize($string);
-
- foreach ($tokens as $token) {
- // If this is a query separator
- if ($token[self::TOKEN_VALUE] === ';') {
- if (!$empty) {
- $queries[] = $current_query.';';
- }
- $current_query = '';
- $empty = true;
- continue;
- }
-
- // If this is a non-empty character
- if($token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_WHITESPACE && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_COMMENT && $token[self::TOKEN_TYPE] !== self::TOKEN_TYPE_BLOCK_COMMENT) {
- $empty = false;
- }
-
- $current_query .= $token[self::TOKEN_VALUE];
- }
-
- if (!$empty) {
- $queries[] = trim($current_query);
- }
-
- return $queries;
- }
-
- /**
- * Remove all comments from a SQL string
- *
- * @param String $string The SQL string
- *
- * @return String The SQL string without comments
- */
- public static function removeComments($string)
- {
- $result = '';
-
- $tokens = self::tokenize($string);
-
- foreach ($tokens as $token) {
- // Skip comment tokens
- if ($token[self::TOKEN_TYPE] === self::TOKEN_TYPE_COMMENT || $token[self::TOKEN_TYPE] === self::TOKEN_TYPE_BLOCK_COMMENT) {
- continue;
- }
-
- $result .= $token[self::TOKEN_VALUE];
- }
-
- return $result;
- }
-
- /**
- * Highlights a token depending on its type.
- *
- * @param Array $token An associative array containing type and value.
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightToken($token)
- {
- $type = $token[self::TOKEN_TYPE];
- $token = htmlentities($token[self::TOKEN_VALUE]);
-
- if($type===self::TOKEN_TYPE_BOUNDARY) {
- return self::highlightBoundary($token);
- }
- elseif($type===self::TOKEN_TYPE_WORD) {
- return self::highlightWord($token);
- }
- elseif($type===self::TOKEN_TYPE_BACKTICK_QUOTE) {
- return self::highlightBacktickQuote($token);
- }
- elseif($type===self::TOKEN_TYPE_QUOTE) {
- return self::highlightQuote($token);
- }
- elseif($type===self::TOKEN_TYPE_RESERVED) {
- return self::highlightReservedWord($token);
- }
- elseif($type===self::TOKEN_TYPE_SPECIAL_RESERVED) {
- return self::highlightReservedWord($token);
- }
- elseif($type===self::TOKEN_TYPE_NUMBER) {
- return self::highlightNumber($token);
- }
- elseif($type===self::TOKEN_TYPE_COMMENT || $type===self::TOKEN_TYPE_BLOCK_COMMENT) {
- return self::highlightComment($token);
- }
-
- return $token;
- }
-
- /**
- * Highlights a quoted string
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightQuote($value)
- {
- return '<span ' . self::$quote_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a backtick quoted string
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightBacktickQuote($value) {
- return '<span ' . self::$backtick_quote_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a reserved word
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightReservedWord($value)
- {
- return '<span ' . self::$reserved_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a boundary token
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightBoundary($value)
- {
- if($value==='(' || $value===')') return $value;
-
- return '<span ' . self::$boundary_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a number
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightNumber($value)
- {
- return '<span ' . self::$number_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights an error
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightError($value)
- {
- return '<span ' . self::$error_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a comment
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightComment($value)
- {
- return '<span ' . self::$comment_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Highlights a word token
- *
- * @param String $value The token's value
- *
- * @return String HTML code of the highlighted token.
- */
- protected static function highlightWord($value)
- {
- return '<span ' . self::$word_attributes . '>' . $value . '</span>';
- }
-
- /**
- * Helper function for sorting the list of reserved words by length
- *
- * @param String $a The first string
- * @param String $b The second string
- *
- * @return int The comparison of the string lengths
- */
- private static function sortLength($a, $b)
- {
- return strlen($b) - strlen($a);
- }
-
- /**
- * Helper function for building regular expressions for reserved words and boundary characters
- *
- * @param String $a The string to be quoted
- *
- * @return String The quoted string
- */
- private static function quote_regex($a) {
- return preg_quote($a,'/');
- }
- }
|