diff --git a/core/SqlAstExpression.cpp b/core/SqlAstExpression.cpp new file mode 100644 index 0000000..618fce3 --- /dev/null +++ b/core/SqlAstExpression.cpp @@ -0,0 +1,13 @@ +#include "SqlAstExpression.h" + +using namespace SqlAst; + + +std::shared_ptr parseExpression(SqlParser &parser) +{ + // get token, what is it? + // number + // symbol + // left parenthesis + +} diff --git a/core/SqlAstExpression.h b/core/SqlAstExpression.h new file mode 100644 index 0000000..6990fd5 --- /dev/null +++ b/core/SqlAstExpression.h @@ -0,0 +1,19 @@ +#ifndef SQLASTEXPRESSION_H +#define SQLASTEXPRESSION_H + +#include "SqlAstNode.h" + +class SqlParser; + +namespace SqlAst { + +/// Base class for parts of expressions like calculations, comparisons, function calls etc... +class Expression: public Node { + +}; + +std::shared_ptr parseExpression(SqlParser &parser); + +} + +#endif // SQLASTEXPRESSION_H diff --git a/core/SqlAstNode.cpp b/core/SqlAstNode.cpp new file mode 100644 index 0000000..19c59b4 --- /dev/null +++ b/core/SqlAstNode.cpp @@ -0,0 +1,8 @@ +#include "SqlAstNode.h" + +using namespace SqlAst; + +Node::Node() +{ + +} diff --git a/core/SqlAstNode.h b/core/SqlAstNode.h new file mode 100644 index 0000000..3c40f41 --- /dev/null +++ b/core/SqlAstNode.h @@ -0,0 +1,58 @@ +#ifndef SQLASTNODE_H +#define SQLASTNODE_H + +#include +#include + +namespace SqlAst { + +class Node { +public: + Node(); + //virtual Node* Clone() const = 0; +}; + +class DDLNode: public Node { + +}; + +class CreateTable: public Node { + +}; + + +// Is there a benefit for having a common base for crud operations??? +class CrudNode: public Node { + +}; + + +class Insert: public CrudNode { + +}; + +/** Class for representing an identifier. + * + * This can still be multiple things like: + * - name of alias, schema, table and or column + * - name of function + * - predefined symbol like LOCAL_TIME + * + * During parsing this cannot always be determined as an alias might be involved that hasn't been parsed yet + * so we put a Identifier in the AST a follow up pass could determine what it actually as and act appropriatly + * + * An identifier can consist of following fields + * - [[schema.]table.]column + * - [alias.]column + * - [schema.]table.function (for function taking a row of type table) + * - alias.function (for function taking a row of type table) + * - schema.function + * - sql symbol like CURRENT_DATE + */ +class Identifier: public Node { + +}; + +} + +#endif // SQLASTNODE_H diff --git a/core/SqlAstSelect.cpp b/core/SqlAstSelect.cpp new file mode 100644 index 0000000..dcf7a5a --- /dev/null +++ b/core/SqlAstSelect.cpp @@ -0,0 +1,17 @@ +#include "SqlAstSelect.h" +#include "SqlAstSelectList.h" +#include "SqlParser.h" + +using namespace SqlAst; + +std::shared_ptr parseSelect(SqlParser &parser) +{ + std::shared_ptr ast_select = std::make_shared(); + // parse select list of expression + aliasses, required + auto select_list = parseSelectList(parser); + ast_select->setSelectList(select_list); + + // parse optional from list + + return ast_select; +} diff --git a/core/SqlAstSelect.h b/core/SqlAstSelect.h new file mode 100644 index 0000000..d2b7232 --- /dev/null +++ b/core/SqlAstSelect.h @@ -0,0 +1,39 @@ +#ifndef SQLASTSELECT_H +#define SQLASTSELECT_H + +#include "SqlAstNode.h" +//#include "SqlAstSelectList.h" +#include + +class SqlParser; + +namespace SqlAst { + +class SelectList; +class From; +class Where; +class GroupBy; +class Having; +class OrderBy; + +class Select: public CrudNode { +public: + + void setSelectList(std::shared_ptr list) { select = list; } + auto getSelectList() const { return select; } + void setFrom(std::shared_ptr f) { from = f; } + +private: + std::shared_ptr select; + std::shared_ptr from; + std::shared_ptr where; + std::shared_ptr groupBy; + std::shared_ptr having; + std::shared_ptr orderBy; +}; + +std::shared_ptr parseSelect(SqlParser &parser); + +} + +#endif // SQLASTSELECT_H diff --git a/core/SqlAstSelectList.cpp b/core/SqlAstSelectList.cpp new file mode 100644 index 0000000..65fec73 --- /dev/null +++ b/core/SqlAstSelectList.cpp @@ -0,0 +1,27 @@ +#include "SqlAstSelectList.h" +#include "SqlAstSelectListEntry.h" +#include "SqlParser.h" + +using namespace SqlAst; + +std::shared_ptr parseSelectList(SqlParser &parser) +{ + // parse select element + // whats next? + // comma -> parse an element + // something else return and let caller figure it out + auto ast_select_list = std::make_shared(); + while (1) { + auto ast_select_elem = parseSelectListEntry(parser); + if (ast_select_elem) { + ast_select_list->add(ast_select_elem); + } + else { + // todo error + } + if (!parser.expectToken(BasicTokenType::Comma)) { + break; + } + } + return ast_select_list; +} diff --git a/core/SqlAstSelectList.h b/core/SqlAstSelectList.h new file mode 100644 index 0000000..4f092e0 --- /dev/null +++ b/core/SqlAstSelectList.h @@ -0,0 +1,33 @@ +#ifndef SQLASTSELECTLIST_H +#define SQLASTSELECTLIST_H + +#include "SqlAstNode.h" +#include +#include + +class SqlParser; + +namespace SqlAst { + +class SelectListEntry; + +class SelectList: public Node { +public: + using EntrySPtr = std::shared_ptr; + + void add(EntrySPtr entry) + { + entryList.push_back(entry); + } + +private: + using EntryList = std::vector; + + EntryList entryList; +}; + +std::shared_ptr parseSelectList(SqlParser &parser); + +} + +#endif // SQLASTSELECTLIST_H diff --git a/core/SqlAstSelectListEntry.cpp b/core/SqlAstSelectListEntry.cpp new file mode 100644 index 0000000..dcf74a9 --- /dev/null +++ b/core/SqlAstSelectListEntry.cpp @@ -0,0 +1,34 @@ +#include "SqlAstSelectListEntry.h" +#include "SqlAstExpression.h" +#include "SqlParser.h" + +using namespace SqlAst; + +std::shared_ptr parseSelectListEntry(SqlParser &parser) +{ + + // parse expression + auto ast_expr = parseExpression(parser); + + if (parser.consumeOptionalKeyword(Keyword::As)) { + // alias required! + auto token = parser.expectSymbol(); + + + } + else { + + // optional alias + + } + // can come three groups of things: + // - AS keyword, alias should follow, followed by comma + // - a symbol ie an alias followed by comma + // - something else ie comma, FROM or INTO or... is something one of the callers should figure out as long as we have valid + // selectElem we are happy + + + auto ast = std::make_shared(); + + return ast; +} diff --git a/core/SqlAstSelectListEntry.h b/core/SqlAstSelectListEntry.h new file mode 100644 index 0000000..9ad7476 --- /dev/null +++ b/core/SqlAstSelectListEntry.h @@ -0,0 +1,29 @@ +#ifndef SQLASTSELECTLISTENTRY_H +#define SQLASTSELECTLISTENTRY_H + +#include "SqlAstNode.h" +#include +#include +#include + +class SqlParser; + +namespace SqlAst { + +class Expression; + +class SelectListEntry: public Node { +public: + void setExpression(std::shared_ptr expr) { this->expression = expr; } + void setAlias(QString a) { this->alias = alias; } +private: + std::shared_ptr expression; + QString alias; +}; + +std::shared_ptr parseSelectListEntry(SqlParser &parser); + +} + + +#endif // SQLASTSELECTLISTENTRY_H diff --git a/core/SqlLexer.cpp b/core/SqlLexer.cpp index a25c55a..3a23dac 100644 --- a/core/SqlLexer.cpp +++ b/core/SqlLexer.cpp @@ -110,7 +110,11 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent } if (isSelf(c)) { length = m_pos - startpos; - tokentype = BasicTokenType::Self; + if (c == ',') + tokentype = BasicTokenType::Comma; + else + tokentype = BasicTokenType::Self; + QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; diff --git a/core/SqlLexer.h b/core/SqlLexer.h index dc23db8..3641714 100644 --- a/core/SqlLexer.h +++ b/core/SqlLexer.h @@ -15,7 +15,8 @@ enum class BasicTokenType { QuotedIdentifier, Parameter, Operator, - Self, // single char representing it self + Self, // single char representing it self, maybe remove this and replace with token for each possibility + Comma, Cast }; @@ -25,6 +26,14 @@ enum class LexerState { InBlockComment }; +class SqlToken { +public: + bool ok; + int startPos; + int length; + BasicTokenType tokenType; + QString out; +}; class SqlLexer { public: @@ -40,6 +49,12 @@ public: * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover */ bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out); + SqlToken nextBasicToken() + { + SqlToken token; + token.ok = !nextBasicToken(token.startPos, token.length, token.tokenType, token.out); + return token; + } LexerState currentState() const { return m_state; } private: diff --git a/core/SqlParser.cpp b/core/SqlParser.cpp new file mode 100644 index 0000000..8ff1ddf --- /dev/null +++ b/core/SqlParser.cpp @@ -0,0 +1,75 @@ +#include "SqlParser.h" +#include "SqlAstSelect.h" +#include + +using namespace SqlAst; + +Keyword isKeyword(QString symbol) +{ + static std::unordered_map lookup_map = { + { "as", Keyword::As }, + { "by", Keyword::By }, + { "delete", Keyword::Delete }, + { "from", Keyword::From }, + { "group", Keyword::Group }, + { "insert", Keyword::Insert }, + { "order", Keyword::Order }, + { "select", Keyword::Select }, + { "update", Keyword::Update }, + { "where", Keyword::Where } + }; + + auto res = lookup_map.find(symbol.toLower().toUtf8().data()); + if (res != lookup_map.end()) + return res->second; + else + return Keyword::NotAKeyword; +} + + +SqlParser::SqlParser(SqlLexer &lexer) + : lexer(lexer) +{ + +} + +void SqlParser::parse() +{ + // Basic algo: + // LOOP + // GET token + // IF NOT try_reduce(token) + // THEN SHIFT + // END LOOP + while (1) { + SqlToken token = lexer.nextBasicToken(); + if (token.ok) { + if (token.tokenType == BasicTokenType::Symbol) { + Keyword kw = isKeyword(token.out); + switch (kw) { + case Keyword::Select: + parseSelect(*this); + break; + + case Keyword::NotAKeyword: + default: + // unexpected + break; + } + } + } + else { + // error during lexical analysis, need to recover + + } + } +} + + + + +//bool try_reduce(SqkToken token) +//{ +// // what state are we in? what are we expecting + +//} diff --git a/core/SqlParser.h b/core/SqlParser.h new file mode 100644 index 0000000..621675b --- /dev/null +++ b/core/SqlParser.h @@ -0,0 +1,72 @@ +#ifndef SQLPARSER_H +#define SQLPARSER_H + +#include "SqlLexer.h" +#include +#include + +enum class Keyword { + NotAKeyword, + As, + By, + Delete, + From, + Group, + Insert, + Order, + Select, + Update, + Where, + +}; + +namespace SqlAst { + + class Select; + class SelectList; + +} +// The parsing works by calling functions that know either a global part +// or a smaller specific part and is thus recursive. At certain points a function parsing something specific +// will reach a point where it is in a valid terminal state and it encounters something that cannot be a continuation +// of what it is parsing and thus returns succesfully or it is in a non terminal state and encounters something unexpected +// In both cases it will return hoping that one of the functions above can continue (and recover from the error if needed) + +class SqlParser +{ +public: + explicit SqlParser(SqlLexer &lexer); + + void parse(); + + /** Checks to see if the next token is the expected keyword. + * + * If it is the token is consumed and the function returns true. + * Otherwise false is returned + */ + std::optional expectKeyword(Keyword kw); + std::optional expectSymbol(); + std::optional expectToken(BasicTokenType tt); + + /** If the next token is Keyword kw consume it otherwise do nothing. + * In some cases the return value is unimportant as the keyword is completely optional + * in other cases the optional keywords presence might force the next token to be something specific + * + * \return true if the token was found + */ + bool consumeOptionalKeyword(Keyword kw); +private: + + //using TokenStack = std::stack; + + //TokenStack tokenStack; + + SqlLexer &lexer; + + //bool try_reduce(SqkToken token); + + +}; + + +#endif // SQLPARSER_H diff --git a/core/core.pro b/core/core.pro index d716f57..7b165a1 100644 --- a/core/core.pro +++ b/core/core.pro @@ -39,7 +39,13 @@ SOURCES += my_boost_assert_handler.cpp \ QueuedBackgroundTask.cpp \ ExplainTreeModelItem.cpp \ jsoncpp.cpp \ - WorkManager.cpp + WorkManager.cpp \ + SqlParser.cpp \ + SqlAstNode.cpp \ + SqlAstSelectList.cpp \ + SqlAstSelectListEntry.cpp \ + SqlAstSelect.cpp \ + SqlAstExpression.cpp HEADERS += PasswordManager.h \ SqlLexer.h \ @@ -54,7 +60,13 @@ HEADERS += PasswordManager.h \ TaskControl.h \ ControllableTask.h \ RunControllableTask.h \ - TaskExecutor.h + TaskExecutor.h \ + SqlParser.h \ + SqlAstNode.h \ + SqlAstSelectList.h \ + SqlAstSelectListEntry.h \ + SqlAstSelect.h \ + SqlAstExpression.h unix { target.path = /usr/lib