pgLab/core/SqlLexer.cpp

220 lines
5.8 KiB
C++

#include "SqlLexer.h"
SqlLexer::SqlLexer(const QString &block, LexerState currentstate)
: m_block(block)
, m_state(currentstate)
{}
QChar SqlLexer::nextChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos++);
}
return result;
}
QChar SqlLexer::peekChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos);
}
return result;
}
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something
* wasn't right, parser should try to recover
*/
bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
while (true) {
startpos = m_pos;
QChar c = nextChar();
// if (LexerState::Null == m_state) {
if (c.isSpace()) {
// Just skip whitespace
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
c = peekChar();
if (c != QChar::Null && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Comment;
return true;
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
return parseSingleQuotedString(startpos, length, tokentype);
}
else if (c == '"') {
// Double quoted identifier
return parseDoubleQuotedIdentifier(startpos, length, tokentype);
}
// else if (c == '/' && peekChar() == '*') {
// nextChar();
// m_state = LexerState::InBlockComment;
// }
else if (c == QChar::Null) {
break;
}
else if (c == '$') {
return parseDollarQuote(startpos, length, tokentype, out);
}
else {
// Undetermined symbol
for (;;) {
c = peekChar();
if (c.isLetterOrNumber() || c == '_')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Symbol;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
// }
// else if (LexerState::InBlockComment == m_state) {
// if (c == QChar::Null) {
// // eof current buffer, we need to return state so
// if (m_pos == startpos) {
// break;
// }
// else {
// length = m_pos - startpos;
// tokentype = BasicTokenType::OpenBlockComment;
// return true;
// }
// }
// else if (c == '*') {
// nextChar();
// if (peekChar() == '/') {
// nextChar();
// length = m_pos - startpos;
// tokentype = BasicTokenType::BlockComment;
// m_state = LexerState::Null;
// return true;
// }
// }
// }
}
return false;
}
bool SqlLexer::parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype)
{
while (true) {
QChar c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
else {
nextChar();
if (c == '\'') {
// maybe end of string literal
if (peekChar() == '\'') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
}
}
}
}
bool SqlLexer::parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype)
{
while (true) {
QChar c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
else {
nextChar();
if (c == '"') {
// maybe end of string literal
if (peekChar() == '"') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
}
}
}
}
bool SqlLexer::parseDollarQuote(int startpos, int &length, BasicTokenType &tokentype, QString &out)
{
QChar c = nextChar();
if (c.isDigit()) {
for (;;) {
c = peekChar();
if (c.isDigit())
nextChar();
else
break;
}
tokentype = BasicTokenType::Parameter;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
else if (c.isLetter()) {
// is this a dollar quote?
while (true) {
c = nextChar();
if (c == '$') {
// Found valid dollar quote
tokentype = BasicTokenType::DollarQuote;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
else if (!c.isLetter()) {
// ERROR, unallowed character
tokentype = BasicTokenType::None;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return false;
}
}
}
return false;
}