2017-02-26 19:29:50 +01:00
# include " SqlLexer.h "
SqlLexer : : SqlLexer ( const QString & block , LexerState currentstate )
: m_block ( block )
, m_state ( currentstate )
{ }
QChar SqlLexer : : nextChar ( )
{
QChar result = QChar : : Null ;
if ( m_pos < m_block . size ( ) ) {
result = m_block . at ( m_pos + + ) ;
}
return result ;
}
QChar SqlLexer : : peekChar ( )
{
QChar result = QChar : : Null ;
if ( m_pos < m_block . size ( ) ) {
result = m_block . at ( m_pos ) ;
}
return result ;
}
2018-01-06 21:18:28 +01:00
//+ - * / < > = ~ ! @ # % ^ & | ` ?
//There are a few restrictions on your choice of name:
// -- and /* cannot appear anywhere in an operator name, since they will be taken as the start of a comment.
// A multicharacter operator name cannot end in + or -, unless the name also contains at least one of these characters:
// ~ ! @ # % ^ & | ` ?
// For example, @- is an allowed operator name, but *- is not. This restriction allows PostgreSQL to parse SQL-compliant commands without requiring spaces between tokens.
// The use of => as an operator name is deprecated. It may be disallowed altogether in a future release.
//The operator != is mapped to <> on input, so these two names are always equivalent.
template < typename C >
inline bool isOperatorChar ( C c )
{
return c = = ' + ' | | c = = ' - ' | | c = = ' * ' | | c = = ' / ' | | c = = ' < ' | | c = = ' > ' | | c = = ' = '
| | c = = ' ~ ' | | c = = ' ! ' | | c = = ' @ ' | | c = = ' # ' | | c = = ' % ' | | c = = ' ^ ' | | c = = ' & '
| | c = = ' | ' | | c = = ' ` ' | | c = = ' ? ' ;
}
2017-02-26 19:29:50 +01:00
/**
* @ brief NextBasicToken
* @ param in
* @ param ofs
* @ param start
* @ param length
2017-09-10 10:13:58 +02:00
* @ return false when input seems invalid , it will return what it did recognize but something
* wasn ' t right , parser should try to recover
2017-02-26 19:29:50 +01:00
*/
bool SqlLexer : : nextBasicToken ( int & startpos , int & length , BasicTokenType & tokentype , QString & out )
{
// Basically chops based on white space
// it does also recognize comments and quoted strings/identifiers
while ( true ) {
startpos = m_pos ;
QChar c = nextChar ( ) ;
// if (LexerState::Null == m_state) {
if ( c . isSpace ( ) ) {
// Just skip whitespace
}
else if ( c = = ' - ' & & peekChar ( ) = = ' - ' ) { // two dashes, start of comment
// Loop till end of line or end of block
c = nextChar ( ) ;
for ( ; ; ) {
c = peekChar ( ) ;
if ( c ! = QChar : : Null & & c ! = ' \n ' )
nextChar ( ) ;
else
break ;
}
length = m_pos - startpos ;
tokentype = BasicTokenType : : Comment ;
return true ;
}
else if ( c = = ' \' ' ) {
// Single quoted string so it's an SQL text literal
2017-12-26 07:32:52 +01:00
return parseSingleQuotedString ( startpos , length , tokentype ) ;
2017-02-26 19:29:50 +01:00
}
else if ( c = = ' " ' ) {
// Double quoted identifier
2017-12-26 07:32:52 +01:00
return parseDoubleQuotedIdentifier ( startpos , length , tokentype ) ;
2017-02-26 19:29:50 +01:00
}
// else if (c == '/' && peekChar() == '*') {
// nextChar();
// m_state = LexerState::InBlockComment;
// }
else if ( c = = QChar : : Null ) {
break ;
}
2017-09-10 10:13:58 +02:00
else if ( c = = ' $ ' ) {
2017-12-26 07:32:52 +01:00
return parseDollarQuote ( startpos , length , tokentype , out ) ;
2017-09-10 10:13:58 +02:00
}
2018-01-06 21:18:28 +01:00
else if ( c = = ' : ' ) {
c = peekChar ( ) ;
if ( c = = ' : ' ) {
nextChar ( ) ;
length = m_pos - startpos ;
tokentype = BasicTokenType : : Cast ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
}
2017-02-26 19:29:50 +01:00
else {
// Undetermined symbol
for ( ; ; ) {
c = peekChar ( ) ;
if ( c . isLetterOrNumber ( ) | | c = = ' _ ' )
nextChar ( ) ;
else
break ;
}
length = m_pos - startpos ;
tokentype = BasicTokenType : : Symbol ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
// }
// else if (LexerState::InBlockComment == m_state) {
// if (c == QChar::Null) {
// // eof current buffer, we need to return state so
// if (m_pos == startpos) {
// break;
// }
// else {
// length = m_pos - startpos;
// tokentype = BasicTokenType::OpenBlockComment;
// return true;
// }
// }
// else if (c == '*') {
// nextChar();
// if (peekChar() == '/') {
// nextChar();
// length = m_pos - startpos;
// tokentype = BasicTokenType::BlockComment;
// m_state = LexerState::Null;
// return true;
// }
// }
// }
}
return false ;
}
2017-12-26 07:32:52 +01:00
bool SqlLexer : : parseSingleQuotedString ( int startpos , int & length , BasicTokenType & tokentype )
{
while ( true ) {
QChar c = peekChar ( ) ;
if ( c = = QChar : : Null | | c = = ' \n ' ) {
// unexpected end, pretend nothings wrong
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedString ;
return true ;
}
else {
nextChar ( ) ;
if ( c = = ' \' ' ) {
// maybe end of string literal
if ( peekChar ( ) = = ' \' ' ) {
// Nope, just double quote to escape quote
nextChar ( ) ; // eat it
}
else {
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedString ;
return true ;
}
}
}
}
}
bool SqlLexer : : parseDoubleQuotedIdentifier ( int startpos , int & length , BasicTokenType & tokentype )
{
while ( true ) {
QChar c = peekChar ( ) ;
if ( c = = QChar : : Null | | c = = ' \n ' ) {
// unexpected end, pretend nothings wrong
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedIdentifier ;
return true ;
}
else {
nextChar ( ) ;
if ( c = = ' " ' ) {
// maybe end of string literal
if ( peekChar ( ) = = ' " ' ) {
// Nope, just double quote to escape quote
nextChar ( ) ; // eat it
}
else {
length = m_pos - startpos ;
tokentype = BasicTokenType : : QuotedIdentifier ;
return true ;
}
}
}
}
}
bool SqlLexer : : parseDollarQuote ( int startpos , int & length , BasicTokenType & tokentype , QString & out )
{
QChar c = nextChar ( ) ;
if ( c . isDigit ( ) ) {
for ( ; ; ) {
c = peekChar ( ) ;
if ( c . isDigit ( ) )
nextChar ( ) ;
else
break ;
}
tokentype = BasicTokenType : : Parameter ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
else if ( c . isLetter ( ) ) {
// is this a dollar quote?
while ( true ) {
c = nextChar ( ) ;
if ( c = = ' $ ' ) {
// Found valid dollar quote
tokentype = BasicTokenType : : DollarQuote ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return true ;
}
else if ( ! c . isLetter ( ) ) {
// ERROR, unallowed character
tokentype = BasicTokenType : : None ;
length = m_pos - startpos ;
QStringRef sr ( & m_block , startpos , length ) ;
out = sr . toString ( ) ;
return false ;
}
}
}
return false ;
}