Sketched rough parser code construction including some SQL AST classes.

This commit is contained in:
eelke 2018-06-19 19:52:56 +02:00
parent f3898599fd
commit 5b20f900fc
15 changed files with 459 additions and 4 deletions

13
core/SqlAstExpression.cpp Normal file
View file

@ -0,0 +1,13 @@
#include "SqlAstExpression.h"
using namespace SqlAst;
std::shared_ptr<SqlAst::Expression> parseExpression(SqlParser &parser)
{
// get token, what is it?
// number
// symbol
// left parenthesis
}

19
core/SqlAstExpression.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef SQLASTEXPRESSION_H
#define SQLASTEXPRESSION_H
#include "SqlAstNode.h"
class SqlParser;
namespace SqlAst {
/// Base class for parts of expressions like calculations, comparisons, function calls etc...
class Expression: public Node {
};
std::shared_ptr<SqlAst::Expression> parseExpression(SqlParser &parser);
}
#endif // SQLASTEXPRESSION_H

8
core/SqlAstNode.cpp Normal file
View file

@ -0,0 +1,8 @@
#include "SqlAstNode.h"
using namespace SqlAst;
Node::Node()
{
}

58
core/SqlAstNode.h Normal file
View file

@ -0,0 +1,58 @@
#ifndef SQLASTNODE_H
#define SQLASTNODE_H
#include <memory>
#include <string>
namespace SqlAst {
class Node {
public:
Node();
//virtual Node* Clone() const = 0;
};
class DDLNode: public Node {
};
class CreateTable: public Node {
};
// Is there a benefit for having a common base for crud operations???
class CrudNode: public Node {
};
class Insert: public CrudNode {
};
/** Class for representing an identifier.
*
* This can still be multiple things like:
* - name of alias, schema, table and or column
* - name of function
* - predefined symbol like LOCAL_TIME
*
* During parsing this cannot always be determined as an alias might be involved that hasn't been parsed yet
* so we put a Identifier in the AST a follow up pass could determine what it actually as and act appropriatly
*
* An identifier can consist of following fields
* - [[schema.]table.]column
* - [alias.]column
* - [schema.]table.function (for function taking a row of type table)
* - alias.function (for function taking a row of type table)
* - schema.function
* - sql symbol like CURRENT_DATE
*/
class Identifier: public Node {
};
}
#endif // SQLASTNODE_H

17
core/SqlAstSelect.cpp Normal file
View file

@ -0,0 +1,17 @@
#include "SqlAstSelect.h"
#include "SqlAstSelectList.h"
#include "SqlParser.h"
using namespace SqlAst;
std::shared_ptr<SqlAst::Select> parseSelect(SqlParser &parser)
{
std::shared_ptr<SqlAst::Select> ast_select = std::make_shared<SqlAst::Select>();
// parse select list of expression + aliasses, required
auto select_list = parseSelectList(parser);
ast_select->setSelectList(select_list);
// parse optional from list
return ast_select;
}

39
core/SqlAstSelect.h Normal file
View file

@ -0,0 +1,39 @@
#ifndef SQLASTSELECT_H
#define SQLASTSELECT_H
#include "SqlAstNode.h"
//#include "SqlAstSelectList.h"
#include <memory>
class SqlParser;
namespace SqlAst {
class SelectList;
class From;
class Where;
class GroupBy;
class Having;
class OrderBy;
class Select: public CrudNode {
public:
void setSelectList(std::shared_ptr<SelectList> list) { select = list; }
auto getSelectList() const { return select; }
void setFrom(std::shared_ptr<From> f) { from = f; }
private:
std::shared_ptr<SelectList> select;
std::shared_ptr<From> from;
std::shared_ptr<Where> where;
std::shared_ptr<GroupBy> groupBy;
std::shared_ptr<Having> having;
std::shared_ptr<OrderBy> orderBy;
};
std::shared_ptr<SqlAst::Select> parseSelect(SqlParser &parser);
}
#endif // SQLASTSELECT_H

27
core/SqlAstSelectList.cpp Normal file
View file

@ -0,0 +1,27 @@
#include "SqlAstSelectList.h"
#include "SqlAstSelectListEntry.h"
#include "SqlParser.h"
using namespace SqlAst;
std::shared_ptr<SqlAst::SelectList> parseSelectList(SqlParser &parser)
{
// parse select element
// whats next?
// comma -> parse an element
// something else return and let caller figure it out
auto ast_select_list = std::make_shared<SelectList>();
while (1) {
auto ast_select_elem = parseSelectListEntry(parser);
if (ast_select_elem) {
ast_select_list->add(ast_select_elem);
}
else {
// todo error
}
if (!parser.expectToken(BasicTokenType::Comma)) {
break;
}
}
return ast_select_list;
}

33
core/SqlAstSelectList.h Normal file
View file

@ -0,0 +1,33 @@
#ifndef SQLASTSELECTLIST_H
#define SQLASTSELECTLIST_H
#include "SqlAstNode.h"
#include <memory>
#include <vector>
class SqlParser;
namespace SqlAst {
class SelectListEntry;
class SelectList: public Node {
public:
using EntrySPtr = std::shared_ptr<SelectListEntry>;
void add(EntrySPtr entry)
{
entryList.push_back(entry);
}
private:
using EntryList = std::vector<EntrySPtr>;
EntryList entryList;
};
std::shared_ptr<SqlAst::SelectList> parseSelectList(SqlParser &parser);
}
#endif // SQLASTSELECTLIST_H

View file

@ -0,0 +1,34 @@
#include "SqlAstSelectListEntry.h"
#include "SqlAstExpression.h"
#include "SqlParser.h"
using namespace SqlAst;
std::shared_ptr<SelectListEntry> parseSelectListEntry(SqlParser &parser)
{
// parse expression
auto ast_expr = parseExpression(parser);
if (parser.consumeOptionalKeyword(Keyword::As)) {
// alias required!
auto token = parser.expectSymbol();
}
else {
// optional alias
}
// can come three groups of things:
// - AS keyword, alias should follow, followed by comma
// - a symbol ie an alias followed by comma
// - something else ie comma, FROM or INTO or... is something one of the callers should figure out as long as we have valid
// selectElem we are happy
auto ast = std::make_shared<SelectListEntry>();
return ast;
}

View file

@ -0,0 +1,29 @@
#ifndef SQLASTSELECTLISTENTRY_H
#define SQLASTSELECTLISTENTRY_H
#include "SqlAstNode.h"
#include <QString>
#include <memory>
#include <vector>
class SqlParser;
namespace SqlAst {
class Expression;
class SelectListEntry: public Node {
public:
void setExpression(std::shared_ptr<Expression> expr) { this->expression = expr; }
void setAlias(QString a) { this->alias = alias; }
private:
std::shared_ptr<Expression> expression;
QString alias;
};
std::shared_ptr<SqlAst::SelectListEntry> parseSelectListEntry(SqlParser &parser);
}
#endif // SQLASTSELECTLISTENTRY_H

View file

@ -110,7 +110,11 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
} }
if (isSelf(c)) { if (isSelf(c)) {
length = m_pos - startpos; length = m_pos - startpos;
tokentype = BasicTokenType::Self; if (c == ',')
tokentype = BasicTokenType::Comma;
else
tokentype = BasicTokenType::Self;
QStringRef sr(&m_block, startpos, length); QStringRef sr(&m_block, startpos, length);
out = sr.toString(); out = sr.toString();
return true; return true;

View file

@ -15,7 +15,8 @@ enum class BasicTokenType {
QuotedIdentifier, QuotedIdentifier,
Parameter, Parameter,
Operator, Operator,
Self, // single char representing it self Self, // single char representing it self, maybe remove this and replace with token for each possibility
Comma,
Cast Cast
}; };
@ -25,6 +26,14 @@ enum class LexerState {
InBlockComment InBlockComment
}; };
class SqlToken {
public:
bool ok;
int startPos;
int length;
BasicTokenType tokenType;
QString out;
};
class SqlLexer { class SqlLexer {
public: public:
@ -40,6 +49,12 @@ public:
* @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
*/ */
bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out); bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out);
SqlToken nextBasicToken()
{
SqlToken token;
token.ok = !nextBasicToken(token.startPos, token.length, token.tokenType, token.out);
return token;
}
LexerState currentState() const { return m_state; } LexerState currentState() const { return m_state; }
private: private:

75
core/SqlParser.cpp Normal file
View file

@ -0,0 +1,75 @@
#include "SqlParser.h"
#include "SqlAstSelect.h"
#include <unordered_map>
using namespace SqlAst;
Keyword isKeyword(QString symbol)
{
static std::unordered_map<std::string, Keyword> lookup_map = {
{ "as", Keyword::As },
{ "by", Keyword::By },
{ "delete", Keyword::Delete },
{ "from", Keyword::From },
{ "group", Keyword::Group },
{ "insert", Keyword::Insert },
{ "order", Keyword::Order },
{ "select", Keyword::Select },
{ "update", Keyword::Update },
{ "where", Keyword::Where }
};
auto res = lookup_map.find(symbol.toLower().toUtf8().data());
if (res != lookup_map.end())
return res->second;
else
return Keyword::NotAKeyword;
}
SqlParser::SqlParser(SqlLexer &lexer)
: lexer(lexer)
{
}
void SqlParser::parse()
{
// Basic algo:
// LOOP
// GET token
// IF NOT try_reduce(token)
// THEN SHIFT
// END LOOP
while (1) {
SqlToken token = lexer.nextBasicToken();
if (token.ok) {
if (token.tokenType == BasicTokenType::Symbol) {
Keyword kw = isKeyword(token.out);
switch (kw) {
case Keyword::Select:
parseSelect(*this);
break;
case Keyword::NotAKeyword:
default:
// unexpected
break;
}
}
}
else {
// error during lexical analysis, need to recover
}
}
}
//bool try_reduce(SqkToken token)
//{
// // what state are we in? what are we expecting
//}

72
core/SqlParser.h Normal file
View file

@ -0,0 +1,72 @@
#ifndef SQLPARSER_H
#define SQLPARSER_H
#include "SqlLexer.h"
#include <memory>
#include <optional>
enum class Keyword {
NotAKeyword,
As,
By,
Delete,
From,
Group,
Insert,
Order,
Select,
Update,
Where,
};
namespace SqlAst {
class Select;
class SelectList;
}
// The parsing works by calling functions that know either a global part
// or a smaller specific part and is thus recursive. At certain points a function parsing something specific
// will reach a point where it is in a valid terminal state and it encounters something that cannot be a continuation
// of what it is parsing and thus returns succesfully or it is in a non terminal state and encounters something unexpected
// In both cases it will return hoping that one of the functions above can continue (and recover from the error if needed)
class SqlParser
{
public:
explicit SqlParser(SqlLexer &lexer);
void parse();
/** Checks to see if the next token is the expected keyword.
*
* If it is the token is consumed and the function returns true.
* Otherwise false is returned
*/
std::optional<SqlToken> expectKeyword(Keyword kw);
std::optional<SqlToken> expectSymbol();
std::optional<SqlToken> expectToken(BasicTokenType tt);
/** If the next token is Keyword kw consume it otherwise do nothing.
* In some cases the return value is unimportant as the keyword is completely optional
* in other cases the optional keywords presence might force the next token to be something specific
*
* \return true if the token was found
*/
bool consumeOptionalKeyword(Keyword kw);
private:
//using TokenStack = std::stack<SqlToken>;
//TokenStack tokenStack;
SqlLexer &lexer;
//bool try_reduce(SqkToken token);
};
#endif // SQLPARSER_H

View file

@ -39,7 +39,13 @@ SOURCES += my_boost_assert_handler.cpp \
QueuedBackgroundTask.cpp \ QueuedBackgroundTask.cpp \
ExplainTreeModelItem.cpp \ ExplainTreeModelItem.cpp \
jsoncpp.cpp \ jsoncpp.cpp \
WorkManager.cpp WorkManager.cpp \
SqlParser.cpp \
SqlAstNode.cpp \
SqlAstSelectList.cpp \
SqlAstSelectListEntry.cpp \
SqlAstSelect.cpp \
SqlAstExpression.cpp
HEADERS += PasswordManager.h \ HEADERS += PasswordManager.h \
SqlLexer.h \ SqlLexer.h \
@ -54,7 +60,13 @@ HEADERS += PasswordManager.h \
TaskControl.h \ TaskControl.h \
ControllableTask.h \ ControllableTask.h \
RunControllableTask.h \ RunControllableTask.h \
TaskExecutor.h TaskExecutor.h \
SqlParser.h \
SqlAstNode.h \
SqlAstSelectList.h \
SqlAstSelectListEntry.h \
SqlAstSelect.h \
SqlAstExpression.h
unix { unix {
target.path = /usr/lib target.path = /usr/lib