2017-02-26 19:29:50 +01:00
# include " SqlLexer.h "
2019-08-19 13:52:23 +02:00
SqlLexer : : SqlLexer ( QString block , LexerState currentstate , bool return_whitespace )
2018-12-09 21:10:35 +01:00
: m_block ( std : : move ( block ) )
2017-02-26 19:29:50 +01:00
, m_state ( currentstate )
2019-08-19 13:52:23 +02:00
, m_returnWhitespace ( return_whitespace )
2017-02-26 19:29:50 +01:00
{ }
QChar SqlLexer : : nextChar ( )
{
QChar result = QChar : : Null ;
if ( m_pos < m_block . size ( ) ) {
result = m_block . at ( m_pos + + ) ;
}
return result ;
}
QChar SqlLexer : : peekChar ( )
{
QChar result = QChar : : Null ;
if ( m_pos < m_block . size ( ) ) {
result = m_block . at ( m_pos ) ;
}
return result ;
}
2018-01-06 21:18:28 +01:00
2018-02-05 22:23:28 +01:00
//self ,()\[\].;\:\+\-\*\/\%\^\<\>\=
template < typename C >
inline bool isSelf ( C c )
{
return c = = ' , ' | | c = = ' ( ' | | c = = ' ) ' | | c = = ' [ ' | | c = = ' ] ' | | c = = ' . '
| | c = = ' ; ' | | c = = ' : ' | | c = = ' + ' | | c = = ' - ' | | c = = ' * ' | | c = = ' / '
| | c = = ' % ' | | c = = ' ^ ' | | c = = ' < ' | | c = = ' > ' | | c = = ' = ' ;
}
2018-01-06 21:18:28 +01:00
//+ - * / < > = ~ ! @ # % ^ & | ` ?
//There are a few restrictions on your choice of name:
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
// A multicharacter operator name cannot end in + or -, unless the name also contains at least one of these characters:
// ~ ! @ # % ^ & | ` ?
// For example, @- is an allowed operator name, but *- is not. This restriction allows PostgreSQL to parse SQL-compliant commands without requiring spaces between tokens.
// The use of => as an operator name is deprecated. It may be disallowed altogether in a future release.
//The operator != is mapped to <> on input, so these two names are always equivalent.
template < typename C >
inline bool isOperatorChar ( C c )
{
return c = = ' + ' | | c = = ' - ' | | c = = ' * ' | | c = = ' / ' | | c = = ' < ' | | c = = ' > ' | | c = = ' = '
| | c = = ' ~ ' | | c = = ' ! ' | | c = = ' @ ' | | c = = ' # ' | | c = = ' % ' | | c = = ' ^ ' | | c = = ' & '
| | c = = ' | ' | | c = = ' ` ' | | c = = ' ? ' ;
}
2018-02-05 22:23:28 +01:00
//typecast "::" IMPLEMENTED
//dot_dot \.\. TODO
//colon_equals ":=" TODO
//equals_greater "=>" TODO
//less_equals "<=" TODO
//greater_equals ">=" TODO
//less_greater "<>" TODO
//not_equals "!=" TODO
// See also C:\Prog\postgresql-9.6.4\src\backend\parser\main.l
2018-01-06 21:18:28 +01:00
2017-02-26 19:29:50 +01:00
/**
* @ brief NextBasicToken
* @ param in
* @ param ofs
* @ param start
* @ param length
2017-09-10 10:13:58 +02:00
* @ return false when input seems invalid , it will return what it did recognize but something
* wasn ' t right , parser should try to recover
2017-02-26 19:29:50 +01:00
*/
bool SqlLexer : : nextBasicToken ( int & startpos , int & length , BasicTokenType & tokentype , QString & out )
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
while ( true ) {
startpos = m_pos ;
QChar c = nextChar ( ) ;
// if (LexerState::Null == m_state) {
2019-08-19 13:52:23 +02:00
if ( c = = ' \n ' ) {
if ( m_returnWhitespace ) {
length = m_pos - startpos ;
tokentype = BasicTokenType : : NewLine ;
out = " \n " ;
return true ;
}
}
else if ( c . isSpace ( ) ) {
2017-02-26 19:29:50 +01:00
// Just skip whitespace
2019-08-19 13:52:23 +02:00
if ( m_returnWhitespace ) {
for ( ; ; ) {
c = peekChar ( ) ;
if ( c ! = QChar : : Null & & c . isSpace ( ) & & c ! = ' \n ' )
nextChar ( ) ;
else
break ;
}
length = m_pos - startpos ;
tokentype = BasicTokenType : : WhiteSpace ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
2017-02-26 19:29:50 +01:00
}
2019-08-19 13:52:23 +02:00
else if ( c = = ' - ' & & peekChar ( ) = = ' - ' ) { // two dashes, start of comment
2017-02-26 19:29:50 +01:00
// Loop till end of line or end of block
c = nextChar ( ) ;
for ( ; ; ) {
c = peekChar ( ) ;
if ( c ! = QChar : : Null & & c ! = ' \n ' )
nextChar ( ) ;
else
break ;
}
length = m_pos - startpos ;
tokentype = BasicTokenType : : Comment ;
2019-08-19 13:52:23 +02:00
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
2017-02-26 19:29:50 +01:00
return true ;
}
2019-08-19 13:52:23 +02:00
else if ( c = = ' : ' ) {
2018-02-05 22:23:28 +01:00
c = peekChar ( ) ;
if ( c = = ' : ' ) {
nextChar ( ) ;
length = m_pos - startpos ;
tokentype = BasicTokenType : : Cast ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
}
2019-08-19 13:52:23 +02:00
else if ( isSelf ( c ) ) {
2018-02-05 22:23:28 +01:00
length = m_pos - startpos ;
2018-06-19 19:52:56 +02:00
if ( c = = ' , ' )
tokentype = BasicTokenType : : Comma ;
else
tokentype = BasicTokenType : : Self ;
2018-02-05 22:23:28 +01:00
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
2019-08-19 13:52:23 +02:00
else if ( isOperatorChar ( c ) ) {
2018-02-05 22:23:28 +01:00
while ( true ) {
QChar c = peekChar ( ) ;
if ( isOperatorChar ( c ) ) {
nextChar ( ) ;
}
else {
// unexpected end, pretend nothings wrong
length = m_pos - startpos ;
tokentype = BasicTokenType : : Operator ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
}
}
2017-02-26 19:29:50 +01:00
else if ( c = = ' \' ' ) {
// Single quoted string so it's an SQL text literal
2019-08-19 13:52:23 +02:00
if ( parseSingleQuotedString ( startpos , length , tokentype ) ) {
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
return false ;
2017-02-26 19:29:50 +01:00
}
else if ( c = = ' " ' ) {
// Double quoted identifier
2019-08-19 13:52:23 +02:00
if ( parseDoubleQuotedIdentifier ( startpos , length , tokentype ) ) {
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
return false ;
2017-02-26 19:29:50 +01:00
}
// else if (c == '/' && peekChar() == '*') {
// nextChar();
// m_state = LexerState::InBlockComment;
// }
else if ( c = = QChar : : Null ) {
2019-01-28 20:52:39 +01:00
length = 0 ;
tokentype = BasicTokenType : : End ;
return true ;
2017-02-26 19:29:50 +01:00
}
2017-09-10 10:13:58 +02:00
else if ( c = = ' $ ' ) {
2017-12-26 07:32:52 +01:00
return parseDollarQuote ( startpos , length , tokentype , out ) ;
2017-09-10 10:13:58 +02:00
}
2017-02-26 19:29:50 +01:00
else {
// Undetermined symbol
for ( ; ; ) {
c = peekChar ( ) ;
if ( c . isLetterOrNumber ( ) | | c = = ' _ ' )
nextChar ( ) ;
else
break ;
}
length = m_pos - startpos ;
tokentype = BasicTokenType : : Symbol ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
// }
// else if (LexerState::InBlockComment == m_state) {
// if (c == QChar::Null) {
// // eof current buffer, we need to return state so
// if (m_pos == startpos) {
// break;
// }
// else {
// length = m_pos - startpos;
// tokentype = BasicTokenType::OpenBlockComment;
// return true;
// }
// }
// else if (c == '*') {
// nextChar();
// if (peekChar() == '/') {
// nextChar();
// length = m_pos - startpos;
// tokentype = BasicTokenType::BlockComment;
// m_state = LexerState::Null;
// return true;
// }
// }
// }
}
return false ;
}
2017-12-26 07:32:52 +01:00
bool SqlLexer : : parseSingleQuotedString ( int startpos , int & length , BasicTokenType & tokentype )
{
while ( true ) {
QChar c = peekChar ( ) ;
if ( c = = QChar : : Null | | c = = ' \n ' ) {
// unexpected end, pretend nothings wrong
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedString ;
return true ;
}
2018-12-09 21:10:35 +01:00
nextChar ( ) ;
if ( c = = ' \' ' ) {
// maybe end of string literal
if ( peekChar ( ) = = ' \' ' ) {
// Nope, just double quote to escape quote
nextChar ( ) ; // eat it
}
else {
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedString ;
return true ;
}
}
2017-12-26 07:32:52 +01:00
}
}
bool SqlLexer : : parseDoubleQuotedIdentifier ( int startpos , int & length , BasicTokenType & tokentype )
{
while ( true ) {
QChar c = peekChar ( ) ;
if ( c = = QChar : : Null | | c = = ' \n ' ) {
// unexpected end, pretend nothings wrong
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedIdentifier ;
return true ;
}
2018-12-09 21:10:35 +01:00
nextChar ( ) ;
if ( c = = ' " ' ) {
// maybe end of string literal
if ( peekChar ( ) = = ' " ' ) {
// Nope, just double quote to escape quote
nextChar ( ) ; // eat it
}
else {
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedIdentifier ;
return true ;
}
}
2017-12-26 07:32:52 +01:00
}
}
bool SqlLexer : : parseDollarQuote ( int startpos , int & length , BasicTokenType & tokentype , QString & out )
{
QChar c = nextChar ( ) ;
if ( c . isDigit ( ) ) {
for ( ; ; ) {
c = peekChar ( ) ;
if ( c . isDigit ( ) )
nextChar ( ) ;
else
break ;
}
tokentype = BasicTokenType : : Parameter ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
2018-12-16 15:38:32 +01:00
if ( c . isLetter ( ) ) {
2017-12-26 07:32:52 +01:00
// is this a dollar quote?
while ( true ) {
c = nextChar ( ) ;
if ( c = = ' $ ' ) {
// Found valid dollar quote
tokentype = BasicTokenType : : DollarQuote ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
2018-12-16 15:38:32 +01:00
if ( ! c . isLetter ( ) ) {
2017-12-26 07:32:52 +01:00
// ERROR, unallowed character
tokentype = BasicTokenType : : None ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return false ;
}
}
}
return false ;
}