Seperated the lexer from the syntax highlighter for automated testing.

This commit is contained in:
eelke 2017-02-11 08:03:10 +01:00
parent 254cc82116
commit c2e201f813
4 changed files with 182 additions and 160 deletions

133
SqlLexer.cpp Normal file
View file

@ -0,0 +1,133 @@
#include "SqlLexer.h"
SqlLexer::SqlLexer(const QString &block, LexerState currentstate)
: m_block(block)
, m_state(currentstate)
{}
QChar SqlLexer::nextChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos++);
}
return result;
}
QChar SqlLexer::peekChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos);
}
return result;
}
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
*/
bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
bool result = false;
while (true) {
startpos = m_pos;
QChar c = nextChar();
if (c.isSpace()) {
// Just skip whitespace
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
c = peekChar();
if (c != QChar::Null && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Comment;
return true;
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
else {
nextChar();
if (c == '\'') {
// maybe end of string literal
if (peekChar() == '\'') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
}
}
}
}
else if (c == '"') {
// Double quoted identifier
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
else {
nextChar();
if (c == '"') {
// maybe end of string literal
if (peekChar() == '"') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
}
}
}
}
else if (c == QChar::Null) {
break;
}
else {
// Undetermined symbol
for (;;) {
c = peekChar();
if (c.isLetterOrNumber() || c == '_')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Symbol;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
return false;
}

43
SqlLexer.h Normal file
View file

@ -0,0 +1,43 @@
#ifndef SQLLEXER_H
#define SQLLEXER_H
#include <QString>
enum class BasicTokenType {
None,
End, // End of input
Symbol, // can be many things, keyword, object name, operator, ..
Comment,
QuotedString,
DollarQuotedString,
QuotedIdentifier
};
enum class LexerState {
Null,
InDollarQuotedString
};
class SqlLexer {
public:
SqlLexer(const QString &block, LexerState currentstate);
QChar nextChar();
QChar peekChar();
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
*/
bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out);
private:
QString m_block;
int m_pos = 0;
LexerState m_state;
};
#endif // SQLLEXER_H

View file

@ -1,167 +1,11 @@
#include "SqlSyntaxHighlighter.h" #include "SqlSyntaxHighlighter.h"
#include "pgtypecontainer.h" #include "pgtypecontainer.h"
#include "SqlLexer.h"
namespace { namespace {
enum class BasicTokenType {
None,
End, // End of input
Symbol, // can be many things, keyword, object name, operator, ..
Comment,
QuotedString,
DollarQuotedString,
QuotedIdentifier
};
enum class LexerState {
Null,
InDollarQuotedString
};
class Lexer {
private:
QString m_block;
int m_pos = 0;
LexerState m_state;
public:
Lexer(const QString &block, LexerState currentstate)
: m_block(block)
, m_state(currentstate)
{}
QChar nextChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos++);
}
return result;
}
QChar peekChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos);
}
return result;
}
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
*/
bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
bool result = false;
while (true) {
startpos = m_pos;
QChar c = nextChar();
if (c.isSpace()) {
// Just skip whitespace
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
c = peekChar();
if (c != QChar::Null && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Comment;
return true;
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
else {
nextChar();
if (c == '\'') {
// maybe end of string literal
if (peekChar() == '\'') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
}
}
}
}
else if (c == '"') {
// Double quoted identifier
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
else {
nextChar();
if (c == '"') {
// maybe end of string literal
if (peekChar() == '"') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
}
}
}
}
else if (c == QChar::Null) {
break;
}
else {
// Undetermined symbol
for (;;) {
c = peekChar();
if (c.isLetterOrNumber() || c == '_')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Symbol;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
return false;
}
};
t_SymbolSet g_Keywords = { t_SymbolSet g_Keywords = {
"a", "abort", "abs", "absent", "absolute", "access", "according", "action", "ada", "add", "a", "abort", "abs", "absent", "absolute", "access", "according", "action", "ada", "add",
"admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and", "admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and",
@ -289,7 +133,7 @@ void SqlSyntaxHighlighter::setTypes(const PgTypeContainer *types)
void SqlSyntaxHighlighter::highlightBlock(const QString &text) void SqlSyntaxHighlighter::highlightBlock(const QString &text)
{ {
Lexer lexer(text, LexerState::Null); SqlLexer lexer(text, LexerState::Null);
int startpos, length; int startpos, length;
BasicTokenType tokentype; BasicTokenType tokentype;
QString s; QString s;

View file

@ -50,7 +50,8 @@ SOURCES += main.cpp\
OpenDatabase.cpp \ OpenDatabase.cpp \
ParamListModel.cpp \ ParamListModel.cpp \
MainWindow.cpp \ MainWindow.cpp \
SqlSyntaxHighlighter.cpp SqlSyntaxHighlighter.cpp \
SqlLexer.cpp
HEADERS += \ HEADERS += \
sqlparser.h \ sqlparser.h \
@ -87,7 +88,8 @@ HEADERS += \
OpenDatabase.h \ OpenDatabase.h \
ParamListModel.h \ ParamListModel.h \
MainWindow.h \ MainWindow.h \
SqlSyntaxHighlighter.h SqlSyntaxHighlighter.h \
SqlLexer.h
FORMS += mainwindow.ui \ FORMS += mainwindow.ui \
databasewindow.ui \ databasewindow.ui \