pgLab/core/SqlLexer.cpp

299 lines
7.8 KiB
C++
Raw Normal View History

#include "SqlLexer.h"
SqlLexer::SqlLexer(QString block, LexerState currentstate)
: m_block(std::move(block))
, m_state(currentstate)
{}
QChar SqlLexer::nextChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos++);
}
return result;
}
QChar SqlLexer::peekChar()
{
QChar result = QChar::Null;
if (m_pos < m_block.size()) {
result = m_block.at(m_pos);
}
return result;
}
//self ,()\[\].;\:\+\-\*\/\%\^\<\>\=
template <typename C>
inline bool isSelf(C c)
{
return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.'
|| c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/'
|| c == '%' || c == '^' || c == '<' || c == '>' || c == '=';
}
//+ - * / < > = ~ ! @ # % ^ & | ` ?
//There are a few restrictions on your choice of name:
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
// A multicharacter operator name cannot end in + or -, unless the name also contains at least one of these characters:
// ~ ! @ # % ^ & | ` ?
// For example, @- is an allowed operator name, but *- is not. This restriction allows PostgreSQL to parse SQL-compliant commands without requiring spaces between tokens.
// The use of => as an operator name is deprecated. It may be disallowed altogether in a future release.
//The operator != is mapped to <> on input, so these two names are always equivalent.
template <typename C>
inline bool isOperatorChar(C c)
{
return c == '+' || c == '-' || c == '*' || c == '/' || c == '<' || c == '>' || c == '='
|| c == '~' || c == '!' || c == '@' || c == '#' || c == '%' || c == '^' || c == '&'
|| c == '|' || c == '`' || c == '?';
}
//typecast "::" IMPLEMENTED
//dot_dot \.\. TODO
//colon_equals ":=" TODO
//equals_greater "=>" TODO
//less_equals "<=" TODO
//greater_equals ">=" TODO
//less_greater "<>" TODO
//not_equals "!=" TODO
// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
/**
* @brief NextBasicToken
* @param in
* @param ofs
* @param start
* @param length
* @return false when input seems invalid, it will return what it did recognize but something
* wasn't right, parser should try to recover
*/
bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
while (true) {
startpos = m_pos;
QChar c = nextChar();
// if (LexerState::Null == m_state) {
if (c.isSpace()) {
// Just skip whitespace
continue;
}
if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar();
for (;;) {
c = peekChar();
if (c != QChar::Null && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Comment;
return true;
}
if (c == ':') {
c = peekChar();
if (c == ':') {
nextChar();
length = m_pos - startpos;
tokentype = BasicTokenType::Cast;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
if (isSelf(c)) {
length = m_pos - startpos;
if (c == ',')
tokentype = BasicTokenType::Comma;
else
tokentype = BasicTokenType::Self;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
if (isOperatorChar(c)) {
while (true) {
QChar c = peekChar();
if (isOperatorChar(c)) {
nextChar();
}
else {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::Operator;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
}
else if (c == '\'') {
// Single quoted string so it's an SQL text literal
return parseSingleQuotedString(startpos, length, tokentype);
}
else if (c == '"') {
// Double quoted identifier
return parseDoubleQuotedIdentifier(startpos, length, tokentype);
}
// else if (c == '/' && peekChar() == '*') {
// nextChar();
// m_state = LexerState::InBlockComment;
// }
else if (c == QChar::Null) {
break;
}
else if (c == '$') {
return parseDollarQuote(startpos, length, tokentype, out);
}
else {
// Undetermined symbol
for (;;) {
c = peekChar();
if (c.isLetterOrNumber() || c == '_')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::Symbol;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
// }
// else if (LexerState::InBlockComment == m_state) {
// if (c == QChar::Null) {
// // eof current buffer, we need to return state so
// if (m_pos == startpos) {
// break;
// }
// else {
// length = m_pos - startpos;
// tokentype = BasicTokenType::OpenBlockComment;
// return true;
// }
// }
// else if (c == '*') {
// nextChar();
// if (peekChar() == '/') {
// nextChar();
// length = m_pos - startpos;
// tokentype = BasicTokenType::BlockComment;
// m_state = LexerState::Null;
// return true;
// }
// }
// }
}
return false;
}
bool SqlLexer::parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype)
{
while (true) {
QChar c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
nextChar();
if (c == '\'') {
// maybe end of string literal
if (peekChar() == '\'') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedString;
return true;
}
}
}
}
bool SqlLexer::parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype)
{
while (true) {
QChar c = peekChar();
if (c == QChar::Null || c == '\n') {
// unexpected end, pretend nothings wrong
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
nextChar();
if (c == '"') {
// maybe end of string literal
if (peekChar() == '"') {
// Nope, just double quote to escape quote
nextChar(); // eat it
}
else {
length = m_pos - startpos;
tokentype = BasicTokenType::QuotedIdentifier;
return true;
}
}
}
}
bool SqlLexer::parseDollarQuote(int startpos, int &length, BasicTokenType &tokentype, QString &out)
{
QChar c = nextChar();
if (c.isDigit()) {
for (;;) {
c = peekChar();
if (c.isDigit())
nextChar();
else
break;
}
tokentype = BasicTokenType::Parameter;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
2018-12-16 15:38:32 +01:00
if (c.isLetter()) {
// is this a dollar quote?
while (true) {
c = nextChar();
if (c == '$') {
// Found valid dollar quote
tokentype = BasicTokenType::DollarQuote;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
2018-12-16 15:38:32 +01:00
if (!c.isLetter()) {
// ERROR, unallowed character
tokentype = BasicTokenType::None;
length = m_pos - startpos;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return false;
}
}
}
return false;
}