Lexical analyzer should now be less confused by dots and comma's and an assortment of other single character symbols.

This commit is contained in:
eelke 2018-02-05 22:23:28 +01:00
parent 44326da564
commit 914d2fe9fa
3 changed files with 85 additions and 12 deletions

View file

@ -24,6 +24,17 @@ QChar SqlLexer::peekChar()
}
//self ,()\[\].;\:\+\-\*\/\%\^\<\>\=
template <typename C>
inline bool isSelf(C c)
{
return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.'
|| c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/'
|| c == '%' || c == '^' || c == '<' || c == '>' || c == '=';
}
//+ - * / < > = ~ ! @ # % ^ & | ` ?
//There are a few restrictions on your choice of name:
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
@ -40,6 +51,16 @@ inline bool isOperatorChar(C c)
|| c == '|' || c == '`' || c == '?';
}
//typecast "::" IMPLEMENTED
//dot_dot \.\. TODO
//colon_equals ":=" TODO
//equals_greater "=>" TODO
//less_equals "<=" TODO
//greater_equals ">=" TODO
//less_greater "<>" TODO
//not_equals "!=" TODO
// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
/**
* @brief NextBasicToken
@ -60,8 +81,9 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
// if (LexerState::Null == m_state) {
if (c.isSpace()) {
// Just skip whitespace
continue;
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
@ -75,6 +97,40 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
tokentype = BasicTokenType::Comment;
return true;
}
if (c == ':') {
c = peekChar();
if (c == ':') {
nextChar();
length = m_pos - startpos;
tokentype = BasicTokenType::Cast;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
if (isSelf(c)) {
length = m_pos - startpos;
tokentype = BasicTokenType::Self;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
if (isOperatorChar(c)) {
while (true) {
QChar c = peekChar();
if (isOperatorChar(c)) {
nextChar();
}
else {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::Operator;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
return parseSingleQuotedString(startpos, length, tokentype);
@ -93,17 +149,6 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
else if (c == '$') {
return parseDollarQuote(startpos, length, tokentype, out);
}
else if (c == ':') {
c = peekChar();
if (c == ':') {
nextChar();
length = m_pos - startpos;
tokentype = BasicTokenType::Cast;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
else {
// Undetermined symbol
for (;;) {

View file

@ -14,6 +14,8 @@ enum class BasicTokenType {
DollarQuote, // Return the dollar quote tag, do not consume the entire string (potentially long)
QuotedIdentifier,
Parameter,
Operator,
Self, // single char representing it self
Cast
};

View file

@ -36,6 +36,32 @@ TEST(SqlLexer, lexer_quote_in_string)
ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString));
}
TEST(SqlLexer, lexer_comma_handling)
{
QString input = "abc,def";
SqlLexer lexer(input, LexerState::Null);
int startpos, length;
BasicTokenType tokentype;
QString out;
lexer.nextBasicToken(startpos, length, tokentype, out);
ASSERT_THAT(startpos, Eq(0));
ASSERT_THAT(length, Eq(3));
ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
lexer.nextBasicToken(startpos, length, tokentype, out);
ASSERT_THAT(startpos, Eq(3));
ASSERT_THAT(length, Eq(1));
ASSERT_THAT(tokentype, Eq(BasicTokenType::Self));
ASSERT_THAT(out, Eq(QString(",")));
lexer.nextBasicToken(startpos, length, tokentype, out);
ASSERT_THAT(startpos, Eq(4));
ASSERT_THAT(length, Eq(3));
ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
}
TEST(SqlLexer, lexer_cast)
{
QString input = "'1'::integer";