Start of new ANTLR4 based parser.

Very simple tests pass.
This commit is contained in:
eelke 2022-04-03 12:27:35 +02:00
parent 03b4194193
commit fbbe832a05
44 changed files with 860 additions and 8 deletions

2
.gitignore vendored
View file

@ -11,3 +11,5 @@ srcdoc/
pglabAll.pro.user.4.8-pre1
*.user
/pglabAll.pro.user*
.antlr/
**/.generated/*

View file

@ -5,8 +5,6 @@ error( "Use local.pri.sample to create your own local.pri" )
LIBS += -lUser32 -lws2_32 -llibpq
CONFIG += c++17
QMAKE_CXXFLAGS += /std:c++17
# The following define makes your compiler emit warnings if you use
# any feature of Qt which as been marked as deprecated (the exact warnings
# depend on your compiler). Please consult the documentation of the

View file

@ -19,7 +19,9 @@ enum class BasicTokenType {
Comma,
Cast,
WhiteSpace,
NewLine
NewLine,
LastLexerToken
};
enum class LexerState {

View file

@ -26,7 +26,24 @@ Keyword isKeyword(const QString &symbol)
return Keyword::NotAKeyword;
}
/*
Put tokens on a stack
Every time something is put on the stack see if it matches a rule
The stack needs to contain both tokens from the lexical analyzer as tokens for reductions done by the parser.
Matching rules, as we need to match against the top of the stack we should match the rules end to start.
Meaning if we have on the stack A B C then we need to consider rules ending with a C
*/
class StackItem {
public:
int Token;
};
SqlParser::SqlParser(SqlLexer &lexer)
: lexer(lexer)

View file

@ -0,0 +1,83 @@
#pragma once
#include "antlr4-runtime.h"
#include <QChar>
/// Helper stream for antlr, the lexer does not need to base case sensitive
/// this is achieved by changing the case of the chars in LA how ever
/// when the text of a recognized token is captured the getText function
/// is used which does no case conversion so the parse will receive the original
/// case.
class CaseChangingCharStream: public antlr4::CharStream
{
public:
CaseChangingCharStream(antlr4::CharStream *stream, bool upper)
: stream(stream)
, upper(upper)
{}
virtual ~CaseChangingCharStream()
{}
virtual void consume() override
{
stream->consume();
}
virtual size_t LA(ssize_t i) override
{
int c = stream->LA(i);
if (c <= 0)
return c;
if (upper)
return QChar::toUpper(c);
return QChar::toLower(c);
}
virtual std::string getText(const antlr4::misc::Interval &interval) override
{
return stream->getText(interval);
}
virtual std::string toString() const override
{
return stream->toString();
}
virtual ssize_t mark() override
{
return stream->mark();
}
virtual void release(ssize_t marker) override
{
stream->release(marker);
}
virtual size_t index() override
{
return stream->index();
}
virtual void seek(size_t index) override
{
stream->seek(index);
}
virtual size_t size() override
{
return stream->size();
}
virtual std::string getSourceName() const override
{
return stream->getSourceName();
}
private:
antlr4::CharStream *stream;
bool upper;
};

View file

@ -36,6 +36,7 @@ class ConnectionConfig;
class ConnectionNode {
public:
virtual ~ConnectionNode() = default;
};
class ConnectionGroup: public ConnectionNode {
@ -115,7 +116,10 @@ public:
bool dirty() const;
void clean();
bool operator==(QUuid id) const { return m_uuid == id; }
bool operator==(const ConnectionConfig &rhs) const
{
return m_uuid == rhs.m_uuid;
}
QString makeLongDescription() const;
QByteArray encodedPassword() const;

31
pglablib/PgsqlLexer.g4 Normal file
View file

@ -0,0 +1,31 @@
lexer grammar PgsqlLexer;
@lexer::preinclude {
#include <memory>
}
SemiColon: ';';
Comma: ',';
Dot: '.';
OpenParen: '(';
CloseParen: ')';
As: 'AS';
By: 'BY';
From: 'FROM';
Full: 'FULL';
Group: 'GROUP';
Having: 'HAVING';
Join: 'JOIN';
Left : 'LEFT';
Order : 'ORDER';
Right : 'RIGHT';
Select: 'SELECT';
Where: 'WHERE';
Ident: [A-Za-z_][A-Za-z_0-9]* ; // match lower-case identifiers
IntegerLiteral: [1-9][0-9]*;
StringLiteral: '\'' ('\'\'' | ~ ('\''))* '\'' { setText(getText().substr(1, getText().length()-2)); };
Whitespace : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines

101
pglablib/PgsqlParser.g4 Normal file
View file

@ -0,0 +1,101 @@
// Define a grammar called postgresql
parser grammar PgsqlParser;
options {
tokenVocab = PgsqlLexer;
}
@parser::preinclude {
#include "sqlast/sqlast.h"
}
@parser::includes {
}
@parser::members {
}
main returns [std::unique_ptr<sqlast::StatementList> program]
: statement_list { $program = std::move($statement_list.result); }
;
statement_list returns [std::unique_ptr<sqlast::StatementList> result]
: { $result = std::make_unique<sqlast::StatementList>(); }
(statement SemiColon { $result->Add(std::move($statement.result)); } | empty_statement)*
(statement SemiColon? { $result->Add(std::move($statement.result)); } | empty_statement )
;
statement returns [std::unique_ptr<sqlast::Statement> result]
: select_stmt { $result = std::move($select_stmt.result); }
;
empty_statement
: SemiColon
;
select_stmt returns [std::unique_ptr<sqlast::SelectStatement> result]
: Select select_list
{
$result = std::make_unique<sqlast::SelectStatement>();
$result->SetSelectList(std::move($select_list.result));
}
(From from_item (Comma from_item)* )?
(Where condition)?
(Group By group_by)?
(Order By order_by)?
(Having having)?
;
from_item
: Ident Dot Ident from_alias?
| Ident from_alias?
| OpenParen select_stmt CloseParen from_alias
;
from_alias
: As? Ident (OpenParen Ident (Comma Ident)* CloseParen)?
;
condition
: expr
;
group_by
:
;
order_by
:
;
having
:
;
select_list returns [std::unique_ptr<sqlast::SelectList> result]
: select_item
{
$result = std::make_unique<sqlast::SelectList>();
$result->Add(std::move($select_item.result));
}
(Comma select_item)* { $result->Add(std::move($select_item.result)); }
|
;
select_item returns [std::unique_ptr<sqlast::SelectItem> result]
: expr { $result = std::make_unique<sqlast::SelectItem>(std::move($expr.result)); }
(As? Ident { $result->SetAlias($Ident.text); })?
;
expr returns [std::unique_ptr<sqlast::Expression> result]
: expr Dot Ident
| Ident
| value { $result = std::move($value.result); }
;
value returns [std::unique_ptr<sqlast::Literal> result]
: IntegerLiteral
| StringLiteral { $result = std::make_unique<sqlast::StringLiteral>($StringLiteral.text); }
;

1
pglablib/build-cpp.bat Normal file
View file

@ -0,0 +1 @@
java -Xmx500M -cp "c:\prog\antlr\antlr-4.9.2-complete.jar;%CLASSPATH%" org.antlr.v4.Tool -Dlanguage=Cpp PgsqlLexer.g4 PgsqlParser.g4 -o sqlparser/.generated -no-listener %2 %3 %4 %5 %6 %7

View file

@ -4,11 +4,11 @@
#
#-------------------------------------------------
QT += widgets core
QT += widgets core concurrent
TARGET = pglablib
TEMPLATE = lib
CONFIG += staticlib
CONFIG += staticlib no_keywords
! include( ../common.pri ) {
error( "Couldn't find the common.pri file!" )
@ -45,6 +45,23 @@ SOURCES += \
catalog/PgConstraintContainer.cpp \
ParamListJson.cpp \
ParamListModel.cpp \
sqlast/ColumnDefinition.cpp \
sqlast/CreateTable.cpp \
sqlast/Expression.cpp \
sqlast/Literal.cpp \
sqlast/Node.cpp \
sqlast/SelectItem.cpp \
sqlast/SelectList.cpp \
sqlast/SelectStatement.cpp \
sqlast/Statement.cpp \
sqlast/StatementList.cpp \
sqlast/StringLiteral.cpp \
sqlast/TypeSpecification.cpp \
sqlast/Visitor.cpp \
sqlparser/.generated/PgsqlLexer.cpp \
sqlparser/.generated/PgsqlParser.cpp \
sqlparser/ErrorListener.cpp \
sqlparser/Parser.cpp \
util.cpp \
SqlFormattingUtils.cpp \
catalog/PgKeywordList.cpp \
@ -87,6 +104,7 @@ SOURCES += \
catalog/PgSequenceContainer.cpp
HEADERS += \
CaseChangingCharStream.h \
Pglablib.h \
ASyncDBConnection.h \
ConnectionConfig.h \
@ -113,6 +131,24 @@ HEADERS += \
catalog/PgConstraintContainer.h \
ParamListJson.h \
ParamListModel.h \
sqlast/ColumnDefinition.h \
sqlast/CreateTable.h \
sqlast/Expression.h \
sqlast/Literal.h \
sqlast/Node.h \
sqlast/SelectItem.h \
sqlast/SelectList.h \
sqlast/SelectStatement.h \
sqlast/Statement.h \
sqlast/StatementList.h \
sqlast/StringLiteral.h \
sqlast/TypeSpecification.h \
sqlast/Visitor.h \
sqlast/sqlast.h \
sqlparser/.generated/PgsqlLexer.h \
sqlparser/.generated/PgsqlParser.h \
sqlparser/ErrorListener.h \
sqlparser/Parser.h \
util.h \
SqlFormattingUtils.h \
catalog/PgCatalogTypes.h \
@ -170,6 +206,10 @@ else:unix:!macx: LIBS += -L$$OUT_PWD/../core/ -lcore
INCLUDEPATH += $$PWD/../core
DEPENDPATH += $$PWD/../core
INCLUDEPATH += C:\Prog\include\antlr
win32:CONFIG(debug, debug|release): LIBS += -lantlr4-runtimed
else:win32:CONFIG(release, debug|release): LIBS += -lantlr4-runtime
win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../core/release/libcore.a
else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../core/debug/libcore.a
else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../core/release/core.lib

View file

@ -0,0 +1,9 @@
#include "ColumnDefinition.h"
#include "TypeSpecification.h"
using namespace sqlast;
ColumnDefinition::ColumnDefinition()
{
}

View file

@ -0,0 +1,28 @@
#pragma once
#include "Node.h"
#include <QString>
#include <memory>
namespace sqlast {
class TypeSpecification;
/// Defines the details of a table column
///
/// Constraints are not included here, as we handle constraints can apply to multiple columns
/// and we want to put them all in one place. The UI and SQL generator is allowed to display
/// column specific constraints with the column they belong to.
class ColumnDefinition : public Node
{
public:
ColumnDefinition();
private:
QString name;
std::unique_ptr<TypeSpecification> typeName;
bool notNull = true;
};
}

View file

@ -0,0 +1,8 @@
#include "CreateTable.h"
using namespace sqlast;
CreateTable::CreateTable()
{
}

View file

@ -0,0 +1,21 @@
#pragma once
#include "Statement.h"
#include <memory>
namespace sqlast {
class ColumnDefinition;
class TableConstraint;
class CreateTable: public Statement
{
public:
CreateTable();
private:
};
}

View file

@ -0,0 +1,8 @@
#include "Expression.h"
using namespace sqlast;
Expression::Expression()
{
}

View file

@ -0,0 +1,13 @@
#pragma once
#include "Node.h"
namespace sqlast {
class Expression: public Node
{
public:
Expression();
};
}

View file

@ -0,0 +1,8 @@
#include "Literal.h"
using namespace sqlast;
Literal::Literal()
{
}

13
pglablib/sqlast/Literal.h Normal file
View file

@ -0,0 +1,13 @@
#pragma once
#include "Expression.h"
namespace sqlast {
class Literal: public Expression
{
public:
Literal();
};
}

8
pglablib/sqlast/Node.cpp Normal file
View file

@ -0,0 +1,8 @@
#include "Node.h"
using namespace sqlast;
Node::Node()
{
}

24
pglablib/sqlast/Node.h Normal file
View file

@ -0,0 +1,24 @@
#pragma once
namespace sqlast {
class Node {
public:
Node();
virtual ~Node() = default;
};
}
/*
- Node
- INSERT
- UPDATE
- DELETE
- SELECT
- WITH
- CTE
*/

View file

@ -0,0 +1,13 @@
#include "SelectItem.h"
#include "Expression.h"
using namespace sqlast;
SelectItem::SelectItem(std::unique_ptr<sqlast::Expression> expr)
: expression(std::move(expr))
{}
void SelectItem::SetAlias(const std::string &alias)
{
this->alias = alias;
}

View file

@ -0,0 +1,25 @@
#pragma once
#include "Node.h"
#include <memory>
#include <string>
namespace sqlast {
class Expression;
class SelectItem : public Node
{
public:
explicit SelectItem(std::unique_ptr<sqlast::Expression> expr);
Expression& GetExpression() { return *expression; }
void SetAlias(const std::string &alias);
std::string GetAlias() const { return alias; }
private:
std::unique_ptr<Expression> expression;
std::string alias;
};
}

View file

@ -0,0 +1,18 @@
#include "SelectList.h"
#include "SelectItem.h"
using namespace sqlast;
SelectList::SelectList()
{
}
void SelectList::Add(std::unique_ptr<SelectItem> select_item)
{
list.push_back(std::move(select_item));
}
int SelectList::Count() const
{
return static_cast<int>(list.size());
}

View file

@ -0,0 +1,29 @@
#pragma once
#include "Node.h"
#include <memory>
#include <vector>
namespace sqlast {
class SelectItem;
class SelectList : public Node
{
public:
SelectList();
void Add(std::unique_ptr<SelectItem> select_item);
int Count() const;
SelectItem& Get(int index)
{
return *list.at(index);
}
private:
using List = std::vector<std::unique_ptr<SelectItem>>;
List list;
};
}

View file

@ -0,0 +1,19 @@
#include "SelectStatement.h"
#include "SelectList.h"
using namespace sqlast;
SelectStatement::SelectStatement()
{
}
SelectList* SelectStatement::GetSelectList()
{
return selectList.get();
}
void SelectStatement::SetSelectList(std::unique_ptr<SelectList> value)
{
selectList = std::move(value);
}

View file

@ -0,0 +1,22 @@
#pragma once
#include "Statement.h"
#include <memory>
namespace sqlast {
class SelectList;
class SelectStatement: public Statement
{
public:
SelectStatement();
SelectList* GetSelectList();
void SetSelectList(std::unique_ptr<SelectList> value);
private:
std::unique_ptr<SelectList> selectList;
};
}

View file

@ -0,0 +1,10 @@
#include "Statement.h"
using namespace sqlast;
Statement::Statement()
{
}

View file

@ -0,0 +1,13 @@
#pragma once
#include "Node.h"
namespace sqlast {
class Statement: public Node
{
public:
Statement();
};
}

View file

@ -0,0 +1,24 @@
#include "StatementList.h"
#include "Statement.h"
using namespace sqlast;
StatementList::StatementList()
{}
void StatementList::Add(std::unique_ptr<Statement> &&statement)
{
statements.push_back(std::move(statement));
}
Statement &StatementList::Get(int index)
{
return *statements[index];
}
int StatementList::Count() const
{
return static_cast<int>(statements.size());
}

View file

@ -0,0 +1,27 @@
#pragma once
#include "Node.h"
#include <memory>
#include <vector>
namespace sqlast {
class Statement;
class StatementList: public Node
{
public:
StatementList();
void Add(std::unique_ptr<Statement> &&statement);
Statement &Get(int index);
int Count() const;
private:
using Statements = std::vector<std::unique_ptr<Statement>>;
Statements statements;
};
}

View file

@ -0,0 +1,7 @@
#include "StringLiteral.h"
using namespace sqlast;
StringLiteral::StringLiteral(const std::string s)
: value(QString::fromStdString(s))
{}

View file

@ -0,0 +1,19 @@
#pragma once
#include "Literal.h"
#include <QString>
#include <string>
namespace sqlast {
class StringLiteral : public Literal
{
public:
explicit StringLiteral(const std::string s);
QString GetValue() const { return value; }
private:
QString value;
};
}

View file

@ -0,0 +1,8 @@
#include "TypeSpecification.h"
using namespace sqlast;
TypeSpecification::TypeSpecification()
{
}

View file

@ -0,0 +1,23 @@
#pragma once
#include "Node.h"
#include <QString>
namespace sqlast {
/// These object define not only the base type, but also
/// parameters used with the type
/// Think the precision of numeric, max length of char, array
class TypeSpecification : public Node
{
public:
TypeSpecification();
private:
/// We do not use the PgType from the catalog here as the type used might be defined
/// inside the script and not present yet in the catalog.
QString baseType;
// is_array
};
}

View file

@ -0,0 +1,8 @@
#include "Visitor.h"
using namespace sqlast;
Visitor::Visitor()
{
}

13
pglablib/sqlast/Visitor.h Normal file
View file

@ -0,0 +1,13 @@
#pragma once
namespace sqlast {
class Visitor
{
public:
Visitor();
};
}

12
pglablib/sqlast/sqlast.h Normal file
View file

@ -0,0 +1,12 @@
#pragma once
#include "Node.h"
#include "SelectStatement.h"
#include "SelectItem.h"
#include "SelectList.h"
#include "Statement.h"
#include "StatementList.h"
#include "StringLiteral.h"
#undef emit

View file

@ -0,0 +1,22 @@
#include "ErrorListener.h"
void ErrorListener::syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *offendingSymbol, size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr e)
{
++errors;
}
void ErrorListener::reportAmbiguity(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, const antlrcpp::BitSet &ambigAlts, antlr4::atn::ATNConfigSet *configs)
{
++errors;
}
void ErrorListener::reportAttemptingFullContext(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, const antlrcpp::BitSet &conflictingAlts, antlr4::atn::ATNConfigSet *configs)
{
++errors;
}
void ErrorListener::reportContextSensitivity(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, size_t prediction, antlr4::atn::ATNConfigSet *configs)
{
++errors;
}

View file

@ -0,0 +1,21 @@
#pragma once
#include "antlr4-runtime.h"
class ErrorListener : public antlr4::ANTLRErrorListener
{
// ANTLRErrorListener interface
public:
virtual void syntaxError(antlr4::Recognizer *recognizer, antlr4::Token *offendingSymbol, size_t line, size_t charPositionInLine, const std::string &msg, std::exception_ptr e) override;
virtual void reportAmbiguity(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, bool exact, const antlrcpp::BitSet &ambigAlts, antlr4::atn::ATNConfigSet *configs) override;
virtual void reportAttemptingFullContext(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, const antlrcpp::BitSet &conflictingAlts, antlr4::atn::ATNConfigSet *configs) override;
virtual void reportContextSensitivity(antlr4::Parser *recognizer, const antlr4::dfa::DFA &dfa, size_t startIndex, size_t stopIndex, size_t prediction, antlr4::atn::ATNConfigSet *configs) override;
public:
int errorCount() const
{
return errors;
}
private:
int errors = 0;
};

View file

@ -0,0 +1,21 @@
#include "Parser.h"
#include "antlr4-runtime.h"
Parser::Parser(const std::string &input_string)
: InputStream(std::make_unique<antlr4::ANTLRInputStream>(input_string))
, CaseFilter(InputStream.get(), true)
, Lexer(&CaseFilter)
, TokenStream(&Lexer)
, AParser(&TokenStream)
{
AParser.removeErrorListeners();
AParser.addErrorListener(&Errors);
}
std::unique_ptr<sqlast::StatementList> Parser::Parse()
{
auto context = AParser.main();
return std::move(context->program);
}

View file

@ -0,0 +1,27 @@
#pragma once
#include ".generated/PgsqlLexer.h"
#include ".generated/PgsqlParser.h"
#include "CaseChangingCharStream.h"
#include "ErrorListener.h"
class Parser
{
public:
Parser(const std::string &input_string);
std::unique_ptr<sqlast::StatementList> Parse();
int errorCount() const
{
return Errors.errorCount();
}
private:
std::unique_ptr<antlr4::CharStream> InputStream;
CaseChangingCharStream CaseFilter;
PgsqlLexer Lexer;
antlr4::CommonTokenStream TokenStream;
PgsqlParser AParser;
ErrorListener Errors;
};

View file

@ -17,7 +17,7 @@ QString msfloatToHumanReadableString(float ms)
if (ms < 1.0f) {
val = ms * 1000.f;
//result = QString::asprintf("%0.3f", ms * 1000.0f);
unit = u8"μs";
unit = QString::fromUtf8("μs");
}
else if (ms >= 1000.0) {
val = ms / 1000.0f;

View file

@ -6,7 +6,7 @@ include(gtest_dependency.pri)
TEMPLATE = app
CONFIG += console
CONFIG -= app_bundle
CONFIG += thread
CONFIG += thread no_keywords
CONFIG += qt
QT += core widgets
@ -20,6 +20,7 @@ SOURCES += main.cpp \
tst_escapeConnectionStringValue.cpp \
tst_expected.cpp \
tst_SqlLexer.cpp \
tst_newParser.cpp \
tst_scopeguard.cpp \
tst_CsvWriter.cpp \
tst_PasswordManager.cpp \
@ -39,6 +40,10 @@ DEPENDPATH += $$PWD/../../core
win32:CONFIG(debug, debug|release): LIBS += -lbotand
else:win32:CONFIG(release, debug|release): LIBS += -lbotan
INCLUDEPATH += C:\Prog\include\antlr
win32:CONFIG(debug, debug|release): LIBS += -lantlr4-runtimed
else:win32:CONFIG(release, debug|release): LIBS += -lantlr4-runtime
win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../../core/release/libcore.a
else:win32-g++:CONFIG(debug, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../../core/debug/libcore.a
else:win32:!win32-g++:CONFIG(release, debug|release): PRE_TARGETDEPS += $$OUT_PWD/../../core/release/core.lib

View file

@ -0,0 +1,47 @@
#include <gtest/gtest.h>
#include <gmock/gmock-matchers.h>
#include "PrintTo_Qt.h"
#include "sqlparser/Parser.h"
using namespace testing;
using namespace sqlast;
TEST(NewSqlParser, statementList)
{
std::string input_string = "SEleCT 1; Select 2;";
Parser parser(input_string);
std::unique_ptr<sqlast::StatementList> program = parser.Parse();
ASSERT_TRUE(program != nullptr);
ASSERT_EQ(2, program->Count());
}
TEST(NewSqlParser, missingSemi)
{
std::string input_string = "1";
Parser parser(input_string);
std::unique_ptr<sqlast::StatementList> program = parser.Parse();
ASSERT_EQ(1, parser.errorCount());
}
TEST(NewSqlParser, selectList)
{
std::string input_string = "SEleCT 1, 'Tekst'";
Parser parser(input_string);
std::unique_ptr<sqlast::StatementList> program = parser.Parse();
ASSERT_TRUE(program != nullptr);
ASSERT_EQ(1, program->Count());
SelectStatement &s = dynamic_cast<SelectStatement&>(program->Get(0));
SelectList* sl = s.GetSelectList();
ASSERT_TRUE(sl != nullptr);
ASSERT_EQ(2, sl->Count());
SelectItem& si = sl->Get(1);
StringLiteral& string_literal = dynamic_cast<StringLiteral&>(si.GetExpression());
ASSERT_EQ("Tekst", string_literal.GetValue());
}