Seperated the lexer from the syntax highlighter for automated testing.

This commit is contained in:
eelke 2017-02-11 08:03:10 +01:00
parent 254cc82116
commit c2e201f813
4 changed files with 182 additions and 160 deletions

View file

@ -1,167 +1,11 @@
#include "SqlSyntaxHighlighter.h"
#include "pgtypecontainer.h"
#include "SqlLexer.h"
namespace {
enum class BasicTokenType {
None,
End, // End of input
Symbol, // can be many things, keyword, object name, operator, ..
Comment,
QuotedString,
DollarQuotedString,
QuotedIdentifier
};
enum class LexerState {
Null,
InDollarQuotedString
};
class Lexer {
private:
QString m_block;
int m_pos = 0;
LexerState m_state;
public:
Lexer(const QString &block, LexerState currentstate)
: m_block(block)
, m_state(currentstate)
{}
QChar nextChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos++);
}
return result;
}
QChar peekChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos);
}
return result;
}
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
*/
bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
bool result = false;
while (true) {
startpos = m_pos;
QChar c = nextChar();
if (c.isSpace()) {
// Just skip whitespace
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
c = peekChar();
if (c != QChar::Null && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Comment;
return true;
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
else {
nextChar();
if (c == '\'') {
// maybe end of string literal
if (peekChar() == '\'') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
}
}
}
}
else if (c == '"') {
// Double quoted identifier
while (true) {
c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
else {
nextChar();
if (c == '"') {
// maybe end of string literal
if (peekChar() == '"') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
}
}
}
}
else if (c == QChar::Null) {
break;
}
else {
// Undetermined symbol
for (;;) {
c = peekChar();
if (c.isLetterOrNumber() || c == '_')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Symbol;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
return false;
}
};
t_SymbolSet g_Keywords = {
"a", "abort", "abs", "absent", "absolute", "access", "according", "action", "ada", "add",
"admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and",
@ -289,7 +133,7 @@ void SqlSyntaxHighlighter::setTypes(const PgTypeContainer *types)
void SqlSyntaxHighlighter::highlightBlock(const QString &text)
{
Lexer lexer(text, LexerState::Null);
SqlLexer lexer(text, LexerState::Null);
int startpos, length;
BasicTokenType tokentype;
QString s;