Lexical analyzer should now be less confused by dots and comma's and an assortment of other single character symbols.
This commit is contained in:
parent
44326da564
commit
914d2fe9fa
3 changed files with 85 additions and 12 deletions
|
|
@ -24,6 +24,17 @@ QChar SqlLexer::peekChar()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//self ,()\[\].;\:\+\-\*\/\%\^\<\>\=
|
||||||
|
template <typename C>
|
||||||
|
inline bool isSelf(C c)
|
||||||
|
{
|
||||||
|
return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.'
|
||||||
|
|| c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/'
|
||||||
|
|| c == '%' || c == '^' || c == '<' || c == '>' || c == '=';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//+ - * / < > = ~ ! @ # % ^ & | ` ?
|
//+ - * / < > = ~ ! @ # % ^ & | ` ?
|
||||||
//There are a few restrictions on your choice of name:
|
//There are a few restrictions on your choice of name:
|
||||||
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
|
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
|
||||||
|
|
@ -40,6 +51,16 @@ inline bool isOperatorChar(C c)
|
||||||
|| c == '|' || c == '`' || c == '?';
|
|| c == '|' || c == '`' || c == '?';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//typecast "::" IMPLEMENTED
|
||||||
|
//dot_dot \.\. TODO
|
||||||
|
//colon_equals ":=" TODO
|
||||||
|
//equals_greater "=>" TODO
|
||||||
|
//less_equals "<=" TODO
|
||||||
|
//greater_equals ">=" TODO
|
||||||
|
//less_greater "<>" TODO
|
||||||
|
//not_equals "!=" TODO
|
||||||
|
|
||||||
|
// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief NextBasicToken
|
* @brief NextBasicToken
|
||||||
|
|
@ -60,8 +81,9 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
|
||||||
// if (LexerState::Null == m_state) {
|
// if (LexerState::Null == m_state) {
|
||||||
if (c.isSpace()) {
|
if (c.isSpace()) {
|
||||||
// Just skip whitespace
|
// Just skip whitespace
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
|
if (c == '-' && peekChar() == '-') { // two dashes, start of comment
|
||||||
// Loop till end of line or end of block
|
// Loop till end of line or end of block
|
||||||
c = nextChar();
|
c = nextChar();
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
@ -75,6 +97,40 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
|
||||||
tokentype = BasicTokenType::Comment;
|
tokentype = BasicTokenType::Comment;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (c == ':') {
|
||||||
|
c = peekChar();
|
||||||
|
if (c == ':') {
|
||||||
|
nextChar();
|
||||||
|
length = m_pos - startpos;
|
||||||
|
tokentype = BasicTokenType::Cast;
|
||||||
|
QStringRef sr(&m_block, startpos, length);
|
||||||
|
out = sr.toString();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isSelf(c)) {
|
||||||
|
length = m_pos - startpos;
|
||||||
|
tokentype = BasicTokenType::Self;
|
||||||
|
QStringRef sr(&m_block, startpos, length);
|
||||||
|
out = sr.toString();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (isOperatorChar(c)) {
|
||||||
|
while (true) {
|
||||||
|
QChar c = peekChar();
|
||||||
|
if (isOperatorChar(c)) {
|
||||||
|
nextChar();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// unexpected end, pretend nothings wrong
|
||||||
|
length = m_pos - startpos;
|
||||||
|
tokentype = BasicTokenType::Operator;
|
||||||
|
QStringRef sr(&m_block, startpos, length);
|
||||||
|
out = sr.toString();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (c == '\'') {
|
else if (c == '\'') {
|
||||||
// Single quoted string so it's an SQL text literal
|
// Single quoted string so it's an SQL text literal
|
||||||
return parseSingleQuotedString(startpos, length, tokentype);
|
return parseSingleQuotedString(startpos, length, tokentype);
|
||||||
|
|
@ -93,17 +149,6 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
|
||||||
else if (c == '$') {
|
else if (c == '$') {
|
||||||
return parseDollarQuote(startpos, length, tokentype, out);
|
return parseDollarQuote(startpos, length, tokentype, out);
|
||||||
}
|
}
|
||||||
else if (c == ':') {
|
|
||||||
c = peekChar();
|
|
||||||
if (c == ':') {
|
|
||||||
nextChar();
|
|
||||||
length = m_pos - startpos;
|
|
||||||
tokentype = BasicTokenType::Cast;
|
|
||||||
QStringRef sr(&m_block, startpos, length);
|
|
||||||
out = sr.toString();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
else {
|
||||||
// Undetermined symbol
|
// Undetermined symbol
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,8 @@ enum class BasicTokenType {
|
||||||
DollarQuote, // Return the dollar quote tag, do not consume the entire string (potentially long)
|
DollarQuote, // Return the dollar quote tag, do not consume the entire string (potentially long)
|
||||||
QuotedIdentifier,
|
QuotedIdentifier,
|
||||||
Parameter,
|
Parameter,
|
||||||
|
Operator,
|
||||||
|
Self, // single char representing it self
|
||||||
Cast
|
Cast
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -36,6 +36,32 @@ TEST(SqlLexer, lexer_quote_in_string)
|
||||||
ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString));
|
ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(SqlLexer, lexer_comma_handling)
|
||||||
|
{
|
||||||
|
QString input = "abc,def";
|
||||||
|
SqlLexer lexer(input, LexerState::Null);
|
||||||
|
|
||||||
|
int startpos, length;
|
||||||
|
BasicTokenType tokentype;
|
||||||
|
QString out;
|
||||||
|
|
||||||
|
lexer.nextBasicToken(startpos, length, tokentype, out);
|
||||||
|
ASSERT_THAT(startpos, Eq(0));
|
||||||
|
ASSERT_THAT(length, Eq(3));
|
||||||
|
ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
|
||||||
|
|
||||||
|
lexer.nextBasicToken(startpos, length, tokentype, out);
|
||||||
|
ASSERT_THAT(startpos, Eq(3));
|
||||||
|
ASSERT_THAT(length, Eq(1));
|
||||||
|
ASSERT_THAT(tokentype, Eq(BasicTokenType::Self));
|
||||||
|
ASSERT_THAT(out, Eq(QString(",")));
|
||||||
|
|
||||||
|
lexer.nextBasicToken(startpos, length, tokentype, out);
|
||||||
|
ASSERT_THAT(startpos, Eq(4));
|
||||||
|
ASSERT_THAT(length, Eq(3));
|
||||||
|
ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
|
||||||
|
}
|
||||||
|
|
||||||
TEST(SqlLexer, lexer_cast)
|
TEST(SqlLexer, lexer_cast)
|
||||||
{
|
{
|
||||||
QString input = "'1'::integer";
|
QString input = "'1'::integer";
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue