#include "SqlLexer.h" SqlLexer::SqlLexer(QString block, LexerState currentstate) : m_block(std::move(block)) , m_state(currentstate) {} QChar SqlLexer::nextChar() { QChar result = QChar::Null; if (m_pos < m_block.size()) { result = m_block.at(m_pos++); } return result; } QChar SqlLexer::peekChar() { QChar result = QChar::Null; if (m_pos < m_block.size()) { result = m_block.at(m_pos); } return result; } //self ,()\[\].;\:\+\-\*\/\%\^\<\>\= template inline bool isSelf(C c) { return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.' || c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '^' || c == '<' || c == '>' || c == '='; } //+ - * / < > = ~ ! @ # % ^ & | ` ? //There are a few restrictions on your choice of name: // -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment. // A multicharacter operator name cannot end in + or -, unless the name also contains at least one of these characters: // ~ ! @ # % ^ & | ` ? // For example, @- is an allowed operator name, but *- is not. This restriction allows PostgreSQL to parse SQL-compliant commands without requiring spaces between tokens. // The use of => as an operator name is deprecated. It may be disallowed altogether in a future release. //The operator != is mapped to <> on input, so these two names are always equivalent. template inline bool isOperatorChar(C c) { return c == '+' || c == '-' || c == '*' || c == '/' || c == '<' || c == '>' || c == '=' || c == '~' || c == '!' || c == '@' || c == '#' || c == '%' || c == '^' || c == '&' || c == '|' || c == '`' || c == '?'; } //typecast "::" IMPLEMENTED //dot_dot \.\. TODO //colon_equals ":=" TODO //equals_greater "=>" TODO //less_equals "<=" TODO //greater_equals ">=" TODO //less_greater "<>" TODO //not_equals "!=" TODO // See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l /** * @brief NextBasicToken * @param in * @param ofs * @param start * @param length * @return false when input seems invalid, it will return what it did recognize but something * wasn't right, parser should try to recover */ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out) { // Basically chops based on white space // it does also recognize comments and quoted strings/identifiers while (true) { startpos = m_pos; QChar c = nextChar(); // if (LexerState::Null == m_state) { if (c.isSpace()) { // Just skip whitespace continue; } if (c == '-' && peekChar() == '-') { // two dashes, start of comment // Loop till end of line or end of block c = nextChar(); for (;;) { c = peekChar(); if (c != QChar::Null && c != '\n') nextChar(); else break; } length = m_pos - startpos; tokentype = BasicTokenType::Comment; return true; } if (c == ':') { c = peekChar(); if (c == ':') { nextChar(); length = m_pos - startpos; tokentype = BasicTokenType::Cast; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } } if (isSelf(c)) { length = m_pos - startpos; if (c == ',') tokentype = BasicTokenType::Comma; else tokentype = BasicTokenType::Self; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } if (isOperatorChar(c)) { while (true) { QChar c = peekChar(); if (isOperatorChar(c)) { nextChar(); } else { // unexpected end, pretend nothings wrong length = m_pos - startpos; tokentype = BasicTokenType::Operator; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } } } else if (c == '\'') { // Single quoted string so it's an SQL text literal return parseSingleQuotedString(startpos, length, tokentype); } else if (c == '"') { // Double quoted identifier return parseDoubleQuotedIdentifier(startpos, length, tokentype); } // else if (c == '/' && peekChar() == '*') { // nextChar(); // m_state = LexerState::InBlockComment; // } else if (c == QChar::Null) { break; } else if (c == '$') { return parseDollarQuote(startpos, length, tokentype, out); } else { // Undetermined symbol for (;;) { c = peekChar(); if (c.isLetterOrNumber() || c == '_') nextChar(); else break; } length = m_pos - startpos; tokentype = BasicTokenType::Symbol; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } // } // else if (LexerState::InBlockComment == m_state) { // if (c == QChar::Null) { // // eof current buffer, we need to return state so // if (m_pos == startpos) { // break; // } // else { // length = m_pos - startpos; // tokentype = BasicTokenType::OpenBlockComment; // return true; // } // } // else if (c == '*') { // nextChar(); // if (peekChar() == '/') { // nextChar(); // length = m_pos - startpos; // tokentype = BasicTokenType::BlockComment; // m_state = LexerState::Null; // return true; // } // } // } } return false; } bool SqlLexer::parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype) { while (true) { QChar c = peekChar(); if (c == QChar::Null || c == '\n') { // unexpected end, pretend nothings wrong length = m_pos - startpos; tokentype = BasicTokenType::QuotedString; return true; } nextChar(); if (c == '\'') { // maybe end of string literal if (peekChar() == '\'') { // Nope, just double quote to escape quote nextChar(); // eat it } else { length = m_pos - startpos; tokentype = BasicTokenType::QuotedString; return true; } } } } bool SqlLexer::parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype) { while (true) { QChar c = peekChar(); if (c == QChar::Null || c == '\n') { // unexpected end, pretend nothings wrong length = m_pos - startpos; tokentype = BasicTokenType::QuotedIdentifier; return true; } nextChar(); if (c == '"') { // maybe end of string literal if (peekChar() == '"') { // Nope, just double quote to escape quote nextChar(); // eat it } else { length = m_pos - startpos; tokentype = BasicTokenType::QuotedIdentifier; return true; } } } } bool SqlLexer::parseDollarQuote(int startpos, int &length, BasicTokenType &tokentype, QString &out) { QChar c = nextChar(); if (c.isDigit()) { for (;;) { c = peekChar(); if (c.isDigit()) nextChar(); else break; } tokentype = BasicTokenType::Parameter; length = m_pos - startpos; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } else if (c.isLetter()) { // is this a dollar quote? while (true) { c = nextChar(); if (c == '$') { // Found valid dollar quote tokentype = BasicTokenType::DollarQuote; length = m_pos - startpos; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return true; } else if (!c.isLetter()) { // ERROR, unallowed character tokentype = BasicTokenType::None; length = m_pos - startpos; QStringRef sr(&m_block, startpos, length); out = sr.toString(); return false; } } } return false; }