From c2e201f81379bfe4a6e48af58ad0c1ef94f630f0 Mon Sep 17 00:00:00 2001 From: eelke Date: Sat, 11 Feb 2017 08:03:10 +0100 Subject: [PATCH] Seperated the lexer from the syntax highlighter for automated testing. --- SqlLexer.cpp | 133 ++++++++++++++++++++++++++++++++ SqlLexer.h | 43 +++++++++++ SqlSyntaxHighlighter.cpp | 160 +-------------------------------------- pglab.pro | 6 +- 4 files changed, 182 insertions(+), 160 deletions(-) create mode 100644 SqlLexer.cpp create mode 100644 SqlLexer.h diff --git a/SqlLexer.cpp b/SqlLexer.cpp new file mode 100644 index 0000000..525cc04 --- /dev/null +++ b/SqlLexer.cpp @@ -0,0 +1,133 @@ +#include "SqlLexer.h" + +SqlLexer::SqlLexer(const QString &block, LexerState currentstate) + : m_block(block) + , m_state(currentstate) +{} + +QChar SqlLexer::nextChar() +{ + QChar result = QChar::Null; + if (m_pos < m_block.size()) { + result = m_block.at(m_pos++); + } + return result; +} + +QChar SqlLexer::peekChar() +{ + QChar result = QChar::Null; + if (m_pos < m_block.size()) { + result = m_block.at(m_pos); + } + return result; +} + +/** + * @brief NextBasicToken + * @param in + * @param ofs + * @param start + * @param length + * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover + */ +bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out) +{ + // Basically chops based on white space + // it does also recognize comments and quoted strings/identifiers + bool result = false; + while (true) { + startpos = m_pos; + QChar c = nextChar(); + if (c.isSpace()) { + // Just skip whitespace + } + else if (c == '-' && peekChar() == '-') { // two dashes, start of comment + // Loop till end of line or end of block + c = nextChar(); + for (;;) { + c = peekChar(); + if (c != QChar::Null && c != '\n') + nextChar(); + else + break; + } + length = m_pos - startpos; + tokentype = BasicTokenType::Comment; + return true; + } + else if (c == '\'') { + // Single quoted string so it's an SQL text literal + while (true) { + c = peekChar(); + if (c == QChar::Null || c == '\n') { + // unexpected end, pretend nothings wrong + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedString; + return true; + } + else { + nextChar(); + if (c == '\'') { + // maybe end of string literal + if (peekChar() == '\'') { + // Nope, just double quote to escape quote + nextChar(); // eat it + } + else { + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedString; + return true; + } + } + } + } + } + else if (c == '"') { + // Double quoted identifier + while (true) { + c = peekChar(); + if (c == QChar::Null || c == '\n') { + // unexpected end, pretend nothings wrong + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedIdentifier; + return true; + } + else { + nextChar(); + if (c == '"') { + // maybe end of string literal + if (peekChar() == '"') { + // Nope, just double quote to escape quote + nextChar(); // eat it + } + else { + length = m_pos - startpos; + tokentype = BasicTokenType::QuotedIdentifier; + return true; + } + } + } + } + } + else if (c == QChar::Null) { + break; + } + else { + // Undetermined symbol + for (;;) { + c = peekChar(); + if (c.isLetterOrNumber() || c == '_') + nextChar(); + else + break; + } + length = m_pos - startpos; + tokentype = BasicTokenType::Symbol; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + } + return false; +} diff --git a/SqlLexer.h b/SqlLexer.h new file mode 100644 index 0000000..488fee4 --- /dev/null +++ b/SqlLexer.h @@ -0,0 +1,43 @@ +#ifndef SQLLEXER_H +#define SQLLEXER_H + +#include + +enum class BasicTokenType { + None, + End, // End of input + Symbol, // can be many things, keyword, object name, operator, .. + Comment, + QuotedString, + DollarQuotedString, + QuotedIdentifier +}; + +enum class LexerState { + Null, + InDollarQuotedString +}; + + +class SqlLexer { +public: + SqlLexer(const QString &block, LexerState currentstate); + QChar nextChar(); + QChar peekChar(); + /** + * @brief NextBasicToken + * @param in + * @param ofs + * @param start + * @param length + * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover + */ + bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out); + +private: + QString m_block; + int m_pos = 0; + LexerState m_state; +}; + +#endif // SQLLEXER_H diff --git a/SqlSyntaxHighlighter.cpp b/SqlSyntaxHighlighter.cpp index 8af2203..3d0263b 100644 --- a/SqlSyntaxHighlighter.cpp +++ b/SqlSyntaxHighlighter.cpp @@ -1,167 +1,11 @@ #include "SqlSyntaxHighlighter.h" #include "pgtypecontainer.h" +#include "SqlLexer.h" namespace { - enum class BasicTokenType { - None, - End, // End of input - Symbol, // can be many things, keyword, object name, operator, .. - Comment, - QuotedString, - DollarQuotedString, - QuotedIdentifier - }; - - enum class LexerState { - Null, - InDollarQuotedString - }; - - - class Lexer { - private: - QString m_block; - int m_pos = 0; - LexerState m_state; - public: - Lexer(const QString &block, LexerState currentstate) - : m_block(block) - , m_state(currentstate) - {} - - QChar nextChar() - { - QChar result = QChar::Null; - if (m_pos < m_block.size()) { - result = m_block.at(m_pos++); - } - return result; - } - - QChar peekChar() - { - QChar result = QChar::Null; - if (m_pos < m_block.size()) { - result = m_block.at(m_pos); - } - return result; - } - - /** - * @brief NextBasicToken - * @param in - * @param ofs - * @param start - * @param length - * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover - */ - bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out) - { - // Basically chops based on white space - // it does also recognize comments and quoted strings/identifiers - bool result = false; - while (true) { - startpos = m_pos; - QChar c = nextChar(); - if (c.isSpace()) { - // Just skip whitespace - } - else if (c == '-' && peekChar() == '-') { // two dashes, start of comment - // Loop till end of line or end of block - c = nextChar(); - for (;;) { - c = peekChar(); - if (c != QChar::Null && c != '\n') - nextChar(); - else - break; - } - length = m_pos - startpos; - tokentype = BasicTokenType::Comment; - return true; - } - else if (c == '\'') { - // Single quoted string so it's an SQL text literal - while (true) { - c = peekChar(); - if (c == QChar::Null || c == '\n') { - // unexpected end, pretend nothings wrong - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedString; - return true; - } - else { - nextChar(); - if (c == '\'') { - // maybe end of string literal - if (peekChar() == '\'') { - // Nope, just double quote to escape quote - nextChar(); // eat it - } - else { - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedString; - return true; - } - } - } - } - } - else if (c == '"') { - // Double quoted identifier - while (true) { - c = peekChar(); - if (c == QChar::Null || c == '\n') { - // unexpected end, pretend nothings wrong - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedIdentifier; - return true; - } - else { - nextChar(); - if (c == '"') { - // maybe end of string literal - if (peekChar() == '"') { - // Nope, just double quote to escape quote - nextChar(); // eat it - } - else { - length = m_pos - startpos; - tokentype = BasicTokenType::QuotedIdentifier; - return true; - } - } - } - } - } - else if (c == QChar::Null) { - break; - } - else { - // Undetermined symbol - for (;;) { - c = peekChar(); - if (c.isLetterOrNumber() || c == '_') - nextChar(); - else - break; - } - length = m_pos - startpos; - tokentype = BasicTokenType::Symbol; - QStringRef sr(&m_block, startpos, length); - out = sr.toString(); - return true; - } - } - return false; - } - - }; - - t_SymbolSet g_Keywords = { "a", "abort", "abs", "absent", "absolute", "access", "according", "action", "ada", "add", "admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and", @@ -289,7 +133,7 @@ void SqlSyntaxHighlighter::setTypes(const PgTypeContainer *types) void SqlSyntaxHighlighter::highlightBlock(const QString &text) { - Lexer lexer(text, LexerState::Null); + SqlLexer lexer(text, LexerState::Null); int startpos, length; BasicTokenType tokentype; QString s; diff --git a/pglab.pro b/pglab.pro index 8926085..1034bd4 100644 --- a/pglab.pro +++ b/pglab.pro @@ -50,7 +50,8 @@ SOURCES += main.cpp\ OpenDatabase.cpp \ ParamListModel.cpp \ MainWindow.cpp \ - SqlSyntaxHighlighter.cpp + SqlSyntaxHighlighter.cpp \ + SqlLexer.cpp HEADERS += \ sqlparser.h \ @@ -87,7 +88,8 @@ HEADERS += \ OpenDatabase.h \ ParamListModel.h \ MainWindow.h \ - SqlSyntaxHighlighter.h + SqlSyntaxHighlighter.h \ + SqlLexer.h FORMS += mainwindow.ui \ databasewindow.ui \