Lexical analyzer should now be less confused by dots and comma's and an assortment of other single character symbols.

2018-02-05 22:23:28 +01:00 · 2018-02-05 22:23:28 +01:00 · 914d2fe9fa
commit 914d2fe9fa
parent 44326da564
3 changed files with 85 additions and 12 deletions
--- a/core/SqlLexer.cpp
+++ b/core/SqlLexer.cpp
@ -24,6 +24,17 @@ QChar SqlLexer::peekChar()
 }
 //self			,()\[\].;\:\+\-\*\/\%\^\<\>\=
 template <typename C>
 inline bool isSelf(C c)
 {
 	return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.'
 		|| c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/'
 		|| c == '%' || c == '^' || c == '<' || c == '>' || c == '=';
 }
 //+ - * / < > = ~ ! @ # % ^ & | ` ?
 //There are a few restrictions on your choice of name:
 //   -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
@ -40,6 +51,16 @@ inline bool isOperatorChar(C c)
 		|| c == '|' || c == '`' || c == '?';
 }
 //typecast		"::" IMPLEMENTED
 //dot_dot			\.\. TODO
 //colon_equals	":="  TODO
 //equals_greater	"=>"  TODO
 //less_equals		"<="  TODO
 //greater_equals	">="  TODO
 //less_greater	"<>"  TODO
 //not_equals		"!="  TODO
 // See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
 /**
 * @brief NextBasicToken
@ -60,8 +81,9 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
 //		if (LexerState::Null == m_state) {
 			if (c.isSpace()) {
 				// Just skip whitespace
 				continue;
 			}
-			else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
+			if (c == '-' && peekChar() == '-') { // two dashes, start of comment
 				// Loop till end of line or end of block
 				c = nextChar();
 				for (;;) {
@ -75,6 +97,40 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
 				tokentype = BasicTokenType::Comment;
 				return true;
 			}
 			if (c == ':') {
 				c = peekChar();
 				if (c == ':') {
 					nextChar();
 					length = m_pos - startpos;
 					tokentype = BasicTokenType::Cast;
 					QStringRef sr(&m_block, startpos, length);
 					out = sr.toString();
 					return true;
 				}
 			}
 			if (isSelf(c)) {
 				length = m_pos - startpos;
 				tokentype = BasicTokenType::Self;
 				QStringRef sr(&m_block, startpos, length);
 				out = sr.toString();
 				return true;
 			}
 			if (isOperatorChar(c)) {
 				while (true) {
 					QChar c = peekChar();
 					if (isOperatorChar(c)) {
 						nextChar();
 					}
 					else {
 						// unexpected end, pretend nothings wrong
 						length = m_pos - startpos;
 						tokentype = BasicTokenType::Operator;
 						QStringRef sr(&m_block, startpos, length);
 						out = sr.toString();
 						return true;
 					}
 				}
 			}
 			else if (c == '\'') {
 				// Single quoted string so it's an SQL text literal
                return parseSingleQuotedString(startpos, length, tokentype);
@ -93,17 +149,6 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
 			else if (c == '$') {
                return parseDollarQuote(startpos, length, tokentype, out);                
            }
 			else if (c == ':') {
 				c = peekChar();
 				if (c == ':') {
 					nextChar();
 					length = m_pos - startpos;
 					tokentype = BasicTokenType::Cast;
 					QStringRef sr(&m_block, startpos, length);
 					out = sr.toString();
 					return true;
 				}
 			}
 			else {
 				// Undetermined symbol
 				for (;;) {
--- a/core/SqlLexer.h
+++ b/core/SqlLexer.h
@ -14,6 +14,8 @@ enum class BasicTokenType {
 	DollarQuote, // Return the dollar quote tag, do not consume the entire string (potentially long)
 	QuotedIdentifier,
 	Parameter,
 	Operator,
 	Self, // single char representing it self
 	Cast
 };
--- a/tests/pglabtests/tst_SqlLexer.cpp
+++ b/tests/pglabtests/tst_SqlLexer.cpp
@ -36,6 +36,32 @@ TEST(SqlLexer, lexer_quote_in_string)
 	ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString));
 }
 TEST(SqlLexer, lexer_comma_handling)
 {
 	QString input = "abc,def";
 	SqlLexer lexer(input, LexerState::Null);
 	int startpos, length;
 	BasicTokenType tokentype;
 	QString out;
 	lexer.nextBasicToken(startpos, length, tokentype, out);
 	ASSERT_THAT(startpos, Eq(0));
 	ASSERT_THAT(length, Eq(3));
 	ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
 	lexer.nextBasicToken(startpos, length, tokentype, out);
 	ASSERT_THAT(startpos, Eq(3));
 	ASSERT_THAT(length, Eq(1));
 	ASSERT_THAT(tokentype, Eq(BasicTokenType::Self));
 	ASSERT_THAT(out, Eq(QString(",")));
 	lexer.nextBasicToken(startpos, length, tokentype, out);
 	ASSERT_THAT(startpos, Eq(4));
 	ASSERT_THAT(length, Eq(3));
 	ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
 }
 TEST(SqlLexer, lexer_cast)
 {
 	QString input = "'1'::integer";