From 8ea0cd8285c84d8414c376c1714e1ec9503e141d Mon Sep 17 00:00:00 2001 From: eelke Date: Thu, 9 Feb 2017 21:54:31 +0100 Subject: [PATCH] Improved parser for syntax highlighting. Also added even more keywords (all from appendix C of the postgresql docs are included now) --- SqlSyntaxHighlighter.cpp | 174 ++++++++++++++++++++++++++------------- SqlSyntaxHighlighter.h | 1 + 2 files changed, 117 insertions(+), 58 deletions(-) diff --git a/SqlSyntaxHighlighter.cpp b/SqlSyntaxHighlighter.cpp index 3194659..8af2203 100644 --- a/SqlSyntaxHighlighter.cpp +++ b/SqlSyntaxHighlighter.cpp @@ -24,7 +24,7 @@ namespace { class Lexer { private: QString m_block; - int m_pos = -1; + int m_pos = 0; LexerState m_state; public: Lexer(const QString &block, LexerState currentstate) @@ -35,22 +35,21 @@ namespace { QChar nextChar() { QChar result = QChar::Null; - if (m_pos+1 < m_block.size()) { - result = m_block.at(++m_pos); - } - else { - ++m_pos; + if (m_pos < m_block.size()) { + result = m_block.at(m_pos++); } return result; } + QChar peekChar() { QChar result = QChar::Null; - if (m_pos+1 < m_block.size()) { - result = m_block.at(m_pos+1); + if (m_pos < m_block.size()) { + result = m_block.at(m_pos); } return result; } + /** * @brief NextBasicToken * @param in @@ -65,67 +64,75 @@ namespace { // it does also recognize comments and quoted strings/identifiers bool result = false; while (true) { + startpos = m_pos; QChar c = nextChar(); if (c.isSpace()) { // Just skip whitespace } else if (c == '-' && peekChar() == '-') { // two dashes, start of comment - startpos = m_pos; // Loop till end of line or end of block c = nextChar(); - while (c != QChar::Null && c != '\n') { - c = nextChar(); + for (;;) { + c = peekChar(); + if (c != QChar::Null && c != '\n') + nextChar(); + else + break; } length = m_pos - startpos; tokentype = BasicTokenType::Comment; return true; } else if (c == '\'') { - startpos = m_pos; // Single quoted string so it's an SQL text literal while (true) { - c = nextChar(); + c = peekChar(); if (c == QChar::Null || c == '\n') { // unexpected end, pretend nothings wrong length = m_pos - startpos; tokentype = BasicTokenType::QuotedString; return true; } - else if (c == '\'') { - // maybe end of string literal - if (peekChar() == '\'') { - // Nope, just double quote to escape quote - nextChar(); // eat it - } - else { - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedString; - return true; + else { + nextChar(); + if (c == '\'') { + // maybe end of string literal + if (peekChar() == '\'') { + // Nope, just double quote to escape quote + nextChar(); // eat it + } + else { + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedString; + return true; + } } } } } else if (c == '"') { - startpos = m_pos; // Double quoted identifier while (true) { - c = nextChar(); + c = peekChar(); if (c == QChar::Null || c == '\n') { // unexpected end, pretend nothings wrong length = m_pos - startpos; tokentype = BasicTokenType::QuotedIdentifier; return true; } - else if (c == '"') { - // maybe end of string literal - if (peekChar() == '"') { - // Nope, just double quote to escape quote - nextChar(); // eat it - } - else { - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedIdentifier; - return true; + else { + nextChar(); + if (c == '"') { + // maybe end of string literal + if (peekChar() == '"') { + // Nope, just double quote to escape quote + nextChar(); // eat it + } + else { + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedIdentifier; + return true; + } } } } @@ -134,10 +141,13 @@ namespace { break; } else { - startpos = m_pos; // Undetermined symbol - while (!c.isSpace() && c != QChar::Null) { - c = nextChar(); + for (;;) { + c = peekChar(); + if (c.isLetterOrNumber() || c == '_') + nextChar(); + else + break; } length = m_pos - startpos; tokentype = BasicTokenType::Symbol; @@ -157,7 +167,7 @@ namespace { "admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and", "any", "are", "array", "array_agg", "array_max_cardinality", "as", "asc", "asensitive", "assetion", "assignment", "asymmetric", "at", "atomic", "attribute", "attributes", "authorization", "avg", - "backward", "base64", "before", "begin", "begin_frame", "begin_partition", "bernoulli", "between", "binary", + "backward", "base64", "before", "begin", "begin_frame", "begin_partition", "bernoulli", "between", "bigint", "binary", "bit", "bit_length", "blob", "blocked", "bom", "boolean", "both", "breadth", "buffer", "by", "c", "cache", "call", "called", "cardinality", "cascade", "cascaded", "case", "cast", "catalog", "catalog_name", "ceil", "ceiling", "chain", "char", "character", "characteristics", @@ -179,25 +189,69 @@ namespace { "deref", "derived", "desc", "describe", "descriptor", "deterministic", "diagnostics", "dictionary", "disable", "discard", "disconnect", "dispatch", "distinct", "dlnewcopy", "dlpreviouscopy", "dlurlcomplete", "dlurlcompleteonly", "dlurlcompletewrite", "dlurlpatch", "dlurlpathonly", "dlurlpathwrite", "dlurlscheme", - "dlurlserver", "do", "domain", "drop", - "elif", "end", "event", "exclude", "execute", "exists", "extenstion", - "fetch", "first", "foreign", "from", "function", "full", - "global", "grant", "group", - "having", - "if", "ilike", "immediate", "in", "index", "inherits", "initially", "inner", "insert", "into", "is", + "dlurlserver", "dlvalue", "do", "document", "domain", "double", "drop", "dynamic", "dynamic_function", + "dynamic_function_code", + "each", "element", "else", "empty", "enable", "encodign", "encrypted", "end", "end-exec", "end_frame", + "end_partition", "enforced", "enum", "equals", "escape", "event", "every", "except", "exception", "exclude", + "excluding", "exclusive", "exec", "execute", "exists", "exp", "explain", "expression", "extenstion", + "external", "extract", "false", "family", "fetch", "file", "filter", "final", "first", "first_value", + "flag", "float", "floor", "following", "for", "force", "foreign", "fortran", "forward", "found", + "frame_row", "free", "freeze", "from", "fs", "full", "function", "functions", "fusion", + "g", "general", "generated", "get", "global", "go" "goto", "grant", "granted", "greatest", "group", + "grouping", "groups", "handler", "having", "header", "hex", "hierarchy", "hold", "hour", "id", "identity", + "if", "ignore", "ilike", "immediate", "immediatly", "immutable", "implementation", "implicit", "import", "in", + "including", "increment", "indent", "index", "indexes", "indicator", "inherit", "inherits", "initially", "inline", + "inner", "inout", "input", "insensitive", "insert", "instance", "instantiable", "instead", "int", "integer", + "integrity", "intersect", "intersection", "interval", "into", "invoker", "is", "isnull", "isolation", "join", - "key", - "language", "last", "left", "like", "limit", "listen", "local", "lock", - "match", - "natural", "not", "null", "nulls", - "offset", "oids", "on", "or", "order", "outer", "over", - "partial", "partition", "prepare", "preserve", "primary", "privileges", "public", - "references", "refresh", "reindex", "release", "replace", "reset", "restrict", "revoke", "right", "role", "rollback", "row", "rows", "rule", - "savepoint", "schema", "select", "sequence", "server", "set", "show", "simple", "statement", - "table", "tablespace", "temp", "temporary", "trigger", "truncate", - "unique", "unlisten", "unlogged", "update", "user", "using", - "vacuum", "values", "view", "volatile", - "when", "where", "with", "wrapper" + "k", "key", "key_member", "key_type", + "label", "lag", "language", "large", "last", "last_value", "lateral", "lead", "leading", "leakproof", + "least", "left", "length", "level", "library", "like", "like_regex", "limit", "link", "listen", "ln", "load", "local", + "localtime", "localtimestamp", "location", "locator", "lock", "locked", "logged", "lower", + "m", "map", "mapping", "match", "matched", "materialized", "max", "maxvalue", "max_cardinality", "member", + "merge", "message_length", "message_octet_length", "message_text", "method", "min", "minute", "minvalue", + "mod", "mode", "modifies", "module", "month", "more", "move", "multiset", "mumps", + "name", "namespace", "national", "natural", "nchar", "nclob", "nesting", "new", "next", "nfc", "nfd", "nfkc", "nkfd", + "nil", "no", "none", "normalize", "normalize", "not", "nothing", "notify", "notnull", "nowait", "nth_value", "ntile", + "null", "nullable", "nullif", "nulls", "number", "numeric", + "object", "occurrences_regex", "octets", "octet_length", "of", "off", "offset", "oids", "old", "on", "only", "open", + "operator", "option", "options", "or", "order", "ordering", "ordinality", "others", "out", "outer", "output", "over", + "overlaps", "overlay", "overriding", "owned", "owner", + "p", "pad", "parallel", "parameter", "parameter_mode", "parameter_name", "parameter_specific_catalog", + "parameter_specific_name", "parameter_specific_schema", "parser", + "partial", "partition", "pascal", "passing", "passthrough", "password", "path", "percent", "percentile_cont", + "percentile_disc", "percent_rank", "period", "permission", "placing", "plans", "pli", "policy", "portion", + "position", "position_regex", "power", "precedes", "preceding", "precision", "prepare", "prepared", "preserve", + "primary", "prior", "privileges", "procedural", "procedure", "program", "public", + "quote", "range", "rank", "read", "reads", "real", "reassign", "recheck", "recovery", "recursive", "ref", + "references", "referencing", "refresh", "regr_avgx", "regr_avgy", "regr_count", "regr_intercept", "regr_r2", + "regr_slope", "regr_sxx", "regr_sxy", "regr_syy", "reindex", "relative", "release", "rename", "repeatable", + "replace", "replica", "requiring", "reset", "respect", "restart", "restore", "restrict", "result", "return", + "returned_cardinality", "returned_length", "returned_octet_length", "returned_sqlstate", "returning", "returns", + "revoke", "right", "role", "rollback", "rollup", "routine", "routine_catalog", "routine_name", "routine_schema", + "row", "rows", "row_count", "row_number", "rule", + "savepoint", "scale", "schema", "schema_name", "scope", "scope_catalog", "scope_name", "scope_schema", "scroll", + "search", "second", "section", "security", "select", "selective", "self", "sensitive", "sequence", "sequences", + "serializable", "server", "server_name", "session", "session_user", "set", "setof", "sets", "share", "show", + "similar", "simple", "size", "skip", "smallint", "snapshot", "some", "source", "space", "specific", "specifictype", + "specific_name", "sql", "sqlcode", "sqlerror", "sqlexception", "sqlstate", "sqlwarning", "sqrt", "stable", + "standalone", "start", "state", "statement", "static", "statistics", "stddev_pop", "stddev_samp", "stdin", "stdout", + "storage", "strict", "strip", "structure", "style", "subclass_origin", "submultiset", "substring", "substring_regex", + "succeeds", "sum", "symmetric", "sysid", "system", "system_time", "system_user", + "t", "table", "tables", "tablesample", "tablespace", "table_name", "temp", "template", "temporary", "text", "then", + "ties", "time", "timestamp", "timezone_hour", "timezone_minute", "to", "token", "top_level_count", "trailing", + "transaction", "transaction_committed", "transaction_rolled_back", "transaction_active", "transform", "transforms", + "translate", "translate_regex", "translation", "treat", "trigger", "trigger_catalog", "trigger_name", "trigger_schema", + "trim", "trim_array", "true", "truncate", "trusted", "type", "types", "uescape", "unbounded", "uncommitted", "under", + "unencrypted", "union", "unique", "unknown", "unlink", "unlisten", "unlogged", "unnamed", "unnest", "until", "untyped", + "update", "upper", "uri", "usage", "user", "user_defined_type_catalog", "user_defined_type_code", + "user_defined_type_name", "user_defined_type_schema", "using", + "vacuum", "valid", "validate", "validator", "value", "values", "value_of", "varbinary", "varchar", "variadic", + "varying", "var_pop", "var_samp", "verbose", "version", "versioning", "view", "views", "volatile", + "when", "whenever", "where", "whitespace", "width_bucket", "window", "with", "within", "without", "work", "wrapper", + "write", "xml", "xmlagg", "xmlattributes", "xmlbinary", "xmlcast", "xmlcomment", "xmlconcat", "xmldeclaration", + "xmldocument", "xmlelement", "xmlexists", "xmlforest", "xmliterate", "xmlnamespaces", "xmlparse", "xmlpi", + "xmlquery", "xmlroot", "xmlschema", "xmlserialize", "xmltable", "xmltext", "xmlvalidate", "year", "yes", "zone" }; //"bigint", @@ -211,9 +265,12 @@ SqlSyntaxHighlighter::SqlSyntaxHighlighter(QTextDocument *parent) m_keywordFormat.setForeground(QColor(32, 32, 192)); m_keywordFormat.setFontWeight(QFont::Bold); - m_commentFormat.setForeground(QColor(64, 64, 64)); + m_commentFormat.setForeground(QColor(128, 128, 128)); + m_quotedStringFormat.setForeground(QColor(192, 32, 192)); + m_quotedIdentifierFormat.setForeground(QColor(192, 128, 32)); + m_typeFormat.setForeground(QColor(32, 192, 32)); m_typeFormat.setFontWeight(QFont::Bold); } @@ -257,6 +314,7 @@ void SqlSyntaxHighlighter::highlightBlock(const QString &text) setFormat(startpos, length, m_quotedStringFormat); break; case BasicTokenType::QuotedIdentifier: + setFormat(startpos, length, m_quotedIdentifierFormat); break; } } diff --git a/SqlSyntaxHighlighter.h b/SqlSyntaxHighlighter.h index 238a678..877a8b8 100644 --- a/SqlSyntaxHighlighter.h +++ b/SqlSyntaxHighlighter.h @@ -27,6 +27,7 @@ private: QTextCharFormat m_commentFormat; QTextCharFormat m_quotedStringFormat; QTextCharFormat m_typeFormat; + QTextCharFormat m_quotedIdentifierFormat; t_SymbolSet m_typeNames; };