323 lines
8.5 KiB
C++
323 lines
8.5 KiB
C++
#include "SqlLexer.h"
|
|
|
|
SqlLexer::SqlLexer(QString block, LexerState currentstate, bool return_whitespace)
|
|
: m_block(std::move(block))
|
|
, m_state(currentstate)
|
|
, m_returnWhitespace(return_whitespace)
|
|
{}
|
|
|
|
QChar SqlLexer::nextChar()
|
|
{
|
|
QChar result = QChar::Null;
|
|
if (m_pos < m_block.size()) {
|
|
result = m_block.at(m_pos++);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
QChar SqlLexer::peekChar()
|
|
{
|
|
QChar result = QChar::Null;
|
|
if (m_pos < m_block.size()) {
|
|
result = m_block.at(m_pos);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
|
|
//self ,()\[\].;\:\+\-\*\/\%\^\<\>\=
|
|
template <typename C>
|
|
inline bool isSelf(C c)
|
|
{
|
|
return c == ',' || c == '(' || c == ')' || c == '[' || c == ']' || c == '.'
|
|
|| c == ';' || c == ':' || c == '+' || c == '-' || c == '*' || c == '/'
|
|
|| c == '%' || c == '^' || c == '<' || c == '>' || c == '=';
|
|
}
|
|
|
|
|
|
//+ - * / < > = ~ ! @ # % ^ & | ` ?
|
|
//There are a few restrictions on your choice of name:
|
|
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
|
|
// A multicharacter operator name cannot end in + or -, unless the name also contains at least one of these characters:
|
|
// ~ ! @ # % ^ & | ` ?
|
|
// For example, @- is an allowed operator name, but *- is not. This restriction allows PostgreSQL to parse SQL-compliant commands without requiring spaces between tokens.
|
|
// The use of => as an operator name is deprecated. It may be disallowed altogether in a future release.
|
|
//The operator != is mapped to <> on input, so these two names are always equivalent.
|
|
template <typename C>
|
|
inline bool isOperatorChar(C c)
|
|
{
|
|
return c == '+' || c == '-' || c == '*' || c == '/' || c == '<' || c == '>' || c == '='
|
|
|| c == '~' || c == '!' || c == '@' || c == '#' || c == '%' || c == '^' || c == '&'
|
|
|| c == '|' || c == '`' || c == '?';
|
|
}
|
|
|
|
//typecast "::" IMPLEMENTED
|
|
//dot_dot \.\. TODO
|
|
//colon_equals ":=" TODO
|
|
//equals_greater "=>" TODO
|
|
//less_equals "<=" TODO
|
|
//greater_equals ">=" TODO
|
|
//less_greater "<>" TODO
|
|
//not_equals "!=" TODO
|
|
|
|
// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
|
|
|
|
/**
|
|
* @brief NextBasicToken
|
|
* @param in
|
|
* @param ofs
|
|
* @param start
|
|
* @param length
|
|
* @return false when input seems invalid, it will return what it did recognize but something
|
|
* wasn't right, parser should try to recover
|
|
*/
|
|
bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
|
|
{
|
|
// Basically chops based on white space
|
|
// it does also recognize comments and quoted strings/identifiers
|
|
while (true) {
|
|
startpos = m_pos;
|
|
QChar c = nextChar();
|
|
// if (LexerState::Null == m_state) {
|
|
if (c == '\n') {
|
|
if (m_returnWhitespace) {
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::NewLine;
|
|
out = "\n";
|
|
return true;
|
|
}
|
|
}
|
|
else if (c.isSpace()) {
|
|
// Just skip whitespace
|
|
if (m_returnWhitespace) {
|
|
for (;;) {
|
|
c = peekChar();
|
|
if (c != QChar::Null && c.isSpace() && c != '\n')
|
|
nextChar();
|
|
else
|
|
break;
|
|
}
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::WhiteSpace;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
}
|
|
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
|
|
// Loop till end of line or end of block
|
|
c = nextChar();
|
|
for (;;) {
|
|
c = peekChar();
|
|
if (c != QChar::Null && c != '\n')
|
|
nextChar();
|
|
else
|
|
break;
|
|
}
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::Comment;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
else if (c == ':') {
|
|
c = peekChar();
|
|
if (c == ':') {
|
|
nextChar();
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::Cast;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
}
|
|
else if (isSelf(c)) {
|
|
length = m_pos - startpos;
|
|
if (c == ',')
|
|
tokentype = BasicTokenType::Comma;
|
|
else
|
|
tokentype = BasicTokenType::Self;
|
|
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
else if (isOperatorChar(c)) {
|
|
while (true) {
|
|
QChar c = peekChar();
|
|
if (isOperatorChar(c)) {
|
|
nextChar();
|
|
}
|
|
else {
|
|
// unexpected end, pretend nothings wrong
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::Operator;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
else if (c == '\'') {
|
|
// Single quoted string so it's an SQL text literal
|
|
if (parseSingleQuotedString(startpos, length, tokentype)) {
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
else if (c == '"') {
|
|
// Double quoted identifier
|
|
if (parseDoubleQuotedIdentifier(startpos, length, tokentype)) {
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
// else if (c == '/' && peekChar() == '*') {
|
|
// nextChar();
|
|
// m_state = LexerState::InBlockComment;
|
|
// }
|
|
else if (c == QChar::Null) {
|
|
length = 0;
|
|
tokentype = BasicTokenType::End;
|
|
return true;
|
|
}
|
|
else if (c == '$') {
|
|
return parseDollarQuote(startpos, length, tokentype, out);
|
|
}
|
|
else {
|
|
// Undetermined symbol
|
|
for (;;) {
|
|
c = peekChar();
|
|
if (c.isLetterOrNumber() || c == '_')
|
|
nextChar();
|
|
else
|
|
break;
|
|
}
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::Symbol;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
// }
|
|
// else if (LexerState::InBlockComment == m_state) {
|
|
// if (c == QChar::Null) {
|
|
// // eof current buffer, we need to return state so
|
|
// if (m_pos == startpos) {
|
|
// break;
|
|
// }
|
|
// else {
|
|
// length = m_pos - startpos;
|
|
// tokentype = BasicTokenType::OpenBlockComment;
|
|
// return true;
|
|
// }
|
|
// }
|
|
// else if (c == '*') {
|
|
// nextChar();
|
|
// if (peekChar() == '/') {
|
|
// nextChar();
|
|
// length = m_pos - startpos;
|
|
// tokentype = BasicTokenType::BlockComment;
|
|
// m_state = LexerState::Null;
|
|
// return true;
|
|
// }
|
|
// }
|
|
// }
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool SqlLexer::parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype)
|
|
{
|
|
while (true) {
|
|
QChar c = peekChar();
|
|
if (c == QChar::Null || c == '\n') {
|
|
// unexpected end, pretend nothings wrong
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::QuotedString;
|
|
return true;
|
|
}
|
|
|
|
nextChar();
|
|
if (c == '\'') {
|
|
// maybe end of string literal
|
|
if (peekChar() == '\'') {
|
|
// Nope, just double quote to escape quote
|
|
nextChar(); // eat it
|
|
}
|
|
else {
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::QuotedString;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
bool SqlLexer::parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype)
|
|
{
|
|
while (true) {
|
|
QChar c = peekChar();
|
|
if (c == QChar::Null || c == '\n') {
|
|
// unexpected end, pretend nothings wrong
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::QuotedIdentifier;
|
|
return true;
|
|
}
|
|
|
|
nextChar();
|
|
if (c == '"') {
|
|
// maybe end of string literal
|
|
if (peekChar() == '"') {
|
|
// Nope, just double quote to escape quote
|
|
nextChar(); // eat it
|
|
}
|
|
else {
|
|
length = m_pos - startpos;
|
|
tokentype = BasicTokenType::QuotedIdentifier;
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool SqlLexer::parseDollarQuote(int startpos, int &length, BasicTokenType &tokentype, QString &out)
|
|
{
|
|
QChar c = nextChar();
|
|
if (c.isDigit()) {
|
|
for (;;) {
|
|
c = peekChar();
|
|
if (c.isDigit())
|
|
nextChar();
|
|
else
|
|
break;
|
|
}
|
|
tokentype = BasicTokenType::Parameter;
|
|
length = m_pos - startpos;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
|
|
if (c.isLetter()) {
|
|
// is this a dollar quote?
|
|
while (true) {
|
|
c = nextChar();
|
|
if (c == '$') {
|
|
// Found valid dollar quote
|
|
tokentype = BasicTokenType::DollarQuote;
|
|
length = m_pos - startpos;
|
|
out = m_block.mid(startpos, length);
|
|
return true;
|
|
}
|
|
|
|
if (!c.isLetter()) {
|
|
// ERROR, unallowed character
|
|
tokentype = BasicTokenType::None;
|
|
length = m_pos - startpos;
|
|
out = m_block.mid(startpos, length);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|