From 914d2fe9fad215c6da120648211b4daa6473c911 Mon Sep 17 00:00:00 2001 From: eelke Date: Mon, 5 Feb 2018 22:23:28 +0100 Subject: [PATCH] Lexical analyzer should now be less confused by dots and comma's and an assortment of other single character symbols. --- core/SqlLexer.cpp | 69 +++++++++++++++++++++++++------ core/SqlLexer.h | 2 + tests/pglabtests/tst_SqlLexer.cpp | 26 ++++++++++++ 3 files changed, 85 insertions(+), 12 deletions(-) diff --git a/core/SqlLexer.cpp b/core/SqlLexer.cpp index ecb94e6..a25c55a 100644 --- a/core/SqlLexer.cpp +++ b/core/SqlLexer.cpp @@ -24,6 +24,17 @@ QChar SqlLexer::peekChar() } + +//self ,()\[\].;\:\+\-\*\/\%\^\<\>\= +template +inline bool isSelf(C c) +{ + return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.' + || c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/' + || c == '%' || c == '^' || c == '<' || c == '>' || c == '='; +} + + //+ - * / < > = ~ ! @ # % ^ & | ` ? //There are a few restrictions on your choice of name: // -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment. @@ -40,6 +51,16 @@ inline bool isOperatorChar(C c) || c == '|' || c == '`' || c == '?'; } +//typecast "::" IMPLEMENTED +//dot_dot \.\. TODO +//colon_equals ":=" TODO +//equals_greater "=>" TODO +//less_equals "<=" TODO +//greater_equals ">=" TODO +//less_greater "<>" TODO +//not_equals "!=" TODO + +// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l /** * @brief NextBasicToken @@ -60,8 +81,9 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent // if (LexerState::Null == m_state) { if (c.isSpace()) { // Just skip whitespace + continue; } - else if (c == '-' && peekChar() == '-') { // two dashes, start of comment + if (c == '-' && peekChar() == '-') { // two dashes, start of comment // Loop till end of line or end of block c = nextChar(); for (;;) { @@ -75,6 +97,40 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent tokentype = BasicTokenType::Comment; return true; } + if (c == ':') { + c = peekChar(); + if (c == ':') { + nextChar(); + length = m_pos - startpos; + tokentype = BasicTokenType::Cast; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + } + if (isSelf(c)) { + length = m_pos - startpos; + tokentype = BasicTokenType::Self; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + if (isOperatorChar(c)) { + while (true) { + QChar c = peekChar(); + if (isOperatorChar(c)) { + nextChar(); + } + else { + // unexpected end, pretend nothings wrong + length = m_pos - startpos; + tokentype = BasicTokenType::Operator; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + } + } else if (c == '\'') { // Single quoted string so it's an SQL text literal return parseSingleQuotedString(startpos, length, tokentype); @@ -93,17 +149,6 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent else if (c == '$') { return parseDollarQuote(startpos, length, tokentype, out); } - else if (c == ':') { - c = peekChar(); - if (c == ':') { - nextChar(); - length = m_pos - startpos; - tokentype = BasicTokenType::Cast; - QStringRef sr(&m_block, startpos, length); - out = sr.toString(); - return true; - } - } else { // Undetermined symbol for (;;) { diff --git a/core/SqlLexer.h b/core/SqlLexer.h index c9684d8..dc23db8 100644 --- a/core/SqlLexer.h +++ b/core/SqlLexer.h @@ -14,6 +14,8 @@ enum class BasicTokenType { DollarQuote, // Return the dollar quote tag, do not consume the entire string (potentially long) QuotedIdentifier, Parameter, + Operator, + Self, // single char representing it self Cast }; diff --git a/tests/pglabtests/tst_SqlLexer.cpp b/tests/pglabtests/tst_SqlLexer.cpp index 76dd1f7..421933a 100644 --- a/tests/pglabtests/tst_SqlLexer.cpp +++ b/tests/pglabtests/tst_SqlLexer.cpp @@ -36,6 +36,32 @@ TEST(SqlLexer, lexer_quote_in_string) ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString)); } +TEST(SqlLexer, lexer_comma_handling) +{ + QString input = "abc,def"; + SqlLexer lexer(input, LexerState::Null); + + int startpos, length; + BasicTokenType tokentype; + QString out; + + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(0)); + ASSERT_THAT(length, Eq(3)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol)); + + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(3)); + ASSERT_THAT(length, Eq(1)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::Self)); + ASSERT_THAT(out, Eq(QString(","))); + + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(4)); + ASSERT_THAT(length, Eq(3)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol)); +} + TEST(SqlLexer, lexer_cast) { QString input = "'1'::integer";