Lexer improvements:
- Convert unquoted idents to lowercase. - Recognize quoted idents. - Allow all unicode whitespace characters - Added UnexpectedSymbol token for unexpected input (otherwise it is just ignored) - Handle mixed case keywords in the lexer file instead of filtering the stream
This commit is contained in:
parent
81f27a6a18
commit
0da32b916c
6 changed files with 132 additions and 104 deletions
|
|
@ -1,83 +0,0 @@
|
|||
#pragma once
|
||||
|
||||
#include "antlr4-runtime.h"
|
||||
#include <QChar>
|
||||
|
||||
/// Helper stream for antlr, the lexer does not need to base case sensitive
|
||||
/// this is achieved by changing the case of the chars in LA how ever
|
||||
/// when the text of a recognized token is captured the getText function
|
||||
/// is used which does no case conversion so the parse will receive the original
|
||||
/// case.
|
||||
class CaseChangingCharStream: public antlr4::CharStream
|
||||
{
|
||||
public:
|
||||
CaseChangingCharStream(antlr4::CharStream *stream, bool upper)
|
||||
: stream(stream)
|
||||
, upper(upper)
|
||||
{}
|
||||
|
||||
virtual ~CaseChangingCharStream()
|
||||
{}
|
||||
|
||||
virtual void consume() override
|
||||
{
|
||||
stream->consume();
|
||||
}
|
||||
|
||||
virtual size_t LA(ssize_t i) override
|
||||
{
|
||||
int c = stream->LA(i);
|
||||
if (c <= 0)
|
||||
return c;
|
||||
|
||||
if (upper)
|
||||
return QChar::toUpper(c);
|
||||
|
||||
return QChar::toLower(c);
|
||||
}
|
||||
|
||||
virtual std::string getText(const antlr4::misc::Interval &interval) override
|
||||
{
|
||||
return stream->getText(interval);
|
||||
}
|
||||
|
||||
virtual std::string toString() const override
|
||||
{
|
||||
return stream->toString();
|
||||
}
|
||||
|
||||
virtual ssize_t mark() override
|
||||
{
|
||||
return stream->mark();
|
||||
}
|
||||
|
||||
virtual void release(ssize_t marker) override
|
||||
{
|
||||
stream->release(marker);
|
||||
}
|
||||
|
||||
virtual size_t index() override
|
||||
{
|
||||
return stream->index();
|
||||
}
|
||||
|
||||
virtual void seek(size_t index) override
|
||||
{
|
||||
stream->seek(index);
|
||||
}
|
||||
|
||||
virtual size_t size() override
|
||||
{
|
||||
return stream->size();
|
||||
}
|
||||
|
||||
virtual std::string getSourceName() const override
|
||||
{
|
||||
return stream->getSourceName();
|
||||
}
|
||||
|
||||
private:
|
||||
antlr4::CharStream *stream;
|
||||
bool upper;
|
||||
|
||||
};
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
lexer grammar PgsqlLexer;
|
||||
|
||||
@lexer::preinclude {
|
||||
#include <memory>
|
||||
#include <QString>
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -11,21 +11,61 @@ Dot: '.';
|
|||
OpenParen: '(';
|
||||
CloseParen: ')';
|
||||
|
||||
As: 'AS';
|
||||
By: 'BY';
|
||||
From: 'FROM';
|
||||
Full: 'FULL';
|
||||
Group: 'GROUP';
|
||||
Having: 'HAVING';
|
||||
Join: 'JOIN';
|
||||
Left : 'LEFT';
|
||||
Order : 'ORDER';
|
||||
Right : 'RIGHT';
|
||||
Select: 'SELECT';
|
||||
Where: 'WHERE';
|
||||
fragment A : 'a' | 'A';
|
||||
fragment B : 'B' | 'b';
|
||||
fragment C : 'C' | 'c';
|
||||
fragment D : 'D' | 'd';
|
||||
fragment E : 'E' | 'e';
|
||||
fragment F : 'F' | 'f';
|
||||
fragment G : 'G' | 'g';
|
||||
fragment H : 'H' | 'h';
|
||||
fragment I : 'I' | 'i';
|
||||
fragment J : 'J' | 'j';
|
||||
fragment K : 'K' | 'k';
|
||||
fragment L : 'L' | 'l';
|
||||
fragment M : 'M' | 'm';
|
||||
fragment N : 'N' | 'n';
|
||||
fragment O : 'O' | 'o';
|
||||
fragment P : 'P' | 'p';
|
||||
fragment Q : 'Q' | 'q';
|
||||
fragment R : 'R' | 'r';
|
||||
fragment S : 'S' | 's';
|
||||
fragment T : 'T' | 't';
|
||||
fragment U : 'U' | 'u';
|
||||
fragment V : 'V' | 'v';
|
||||
fragment W : 'W' | 'w';
|
||||
fragment X : 'X' | 'x';
|
||||
fragment Y : 'Y' | 'y';
|
||||
fragment Z : 'Z' | 'z';
|
||||
|
||||
Ident: [A-Za-z_][A-Za-z_0-9]* ; // match lower-case identifiers
|
||||
As: A S;
|
||||
By: B Y;
|
||||
From: F R O M;
|
||||
Full: F U L L;
|
||||
Group: G R O U P;
|
||||
Having: H A V I N G;
|
||||
Join: J O I N;
|
||||
Left : L E F T;
|
||||
Order : O R D E R;
|
||||
Right : R I G H T;
|
||||
Select: S E L E C T;
|
||||
Where: W H E R E;
|
||||
|
||||
Ident: [\p{Alpha}]~[\p{White_Space}]*
|
||||
{
|
||||
setText(QString::fromStdString(getText()).toLower().toStdString());
|
||||
}
|
||||
| '"' ~["]+ '"'
|
||||
{
|
||||
{
|
||||
std::string s = getText();
|
||||
s = s.substr(1, s.length() - 2);
|
||||
setText(s);
|
||||
}
|
||||
};
|
||||
IntegerLiteral: [1-9][0-9]*;
|
||||
StringLiteral: '\'' ('\'\'' | ~ ('\''))* '\'' { setText(getText().substr(1, getText().length()-2)); };
|
||||
StringLiteral: '\'' ('\'\'' | ~['])+ '\'' { setText(getText().substr(1, getText().length()-2)); };
|
||||
|
||||
Whitespace : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
|
||||
Whitespace: [\p{White_Space}] -> skip ; // skip spaces, tabs, newlines
|
||||
|
||||
UnexpectedSymbol: .;
|
||||
|
|
@ -104,7 +104,6 @@ SOURCES += \
|
|||
catalog/PgSequenceContainer.cpp
|
||||
|
||||
HEADERS += \
|
||||
CaseChangingCharStream.h \
|
||||
Pglablib.h \
|
||||
ASyncDBConnection.h \
|
||||
ConnectionConfig.h \
|
||||
|
|
|
|||
|
|
@ -8,8 +8,7 @@ Parser::Parser(const std::string &input_string)
|
|||
|
||||
Parser::Parser(std::unique_ptr<antlr4::CharStream> stream)
|
||||
: InputStream(std::move(stream))
|
||||
, CaseFilter(InputStream.get(), true)
|
||||
, Lexer(&CaseFilter)
|
||||
, Lexer(InputStream.get())
|
||||
, TokenStream(&Lexer)
|
||||
, AParser(&TokenStream)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
|
||||
#include ".generated/PgsqlLexer.h"
|
||||
#include ".generated/PgsqlParser.h"
|
||||
#include "CaseChangingCharStream.h"
|
||||
#include "ErrorListener.h"
|
||||
|
||||
class Parser
|
||||
|
|
@ -19,7 +18,6 @@ public:
|
|||
}
|
||||
private:
|
||||
std::unique_ptr<antlr4::CharStream> InputStream;
|
||||
CaseChangingCharStream CaseFilter;
|
||||
PgsqlLexer Lexer;
|
||||
antlr4::CommonTokenStream TokenStream;
|
||||
PgsqlParser AParser;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue