Improved generation of c/cpp string from query

Extra lines before and after query are removed. Whitespace at end of line
is removed. SQL comments are converted to cpp style comments and are outside
the string literal.

To achieve this the function now uses the SQLLexer to know what is comment.
This also required the additional capability in the lexer to also return whitespace and newline tokens.
Also a few bugs in the lexer were fixed.
This commit is contained in:
eelke 2019-08-19 13:52:23 +02:00
parent fbd630489e
commit 48ac8c6bab
7 changed files with 247 additions and 34 deletions

View file

@ -1,8 +1,9 @@
#include "SqlLexer.h" #include "SqlLexer.h"
SqlLexer::SqlLexer(QString block, LexerState currentstate) SqlLexer::SqlLexer(QString block, LexerState currentstate, bool return_whitespace)
: m_block(std::move(block)) : m_block(std::move(block))
, m_state(currentstate) , m_state(currentstate)
, m_returnWhitespace(return_whitespace)
{} {}
QChar SqlLexer::nextChar() QChar SqlLexer::nextChar()
@ -79,11 +80,32 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
startpos = m_pos; startpos = m_pos;
QChar c = nextChar(); QChar c = nextChar();
// if (LexerState::Null == m_state) { // if (LexerState::Null == m_state) {
if (c.isSpace()) { if (c == '\n') {
// Just skip whitespace if (m_returnWhitespace) {
continue; length = m_pos - startpos;
tokentype = BasicTokenType::NewLine;
out = "\n";
return true;
}
} }
if (c == '-' && peekChar() == '-') { // two dashes, start of comment else if (c.isSpace()) {
// Just skip whitespace
if (m_returnWhitespace) {
for (;;) {
c = peekChar();
if (c != QChar::Null && c.isSpace() && c != '\n')
nextChar();
else
break;
}
length = m_pos - startpos;
tokentype = BasicTokenType::WhiteSpace;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
}
else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
// Loop till end of line or end of block // Loop till end of line or end of block
c = nextChar(); c = nextChar();
for (;;) { for (;;) {
@ -95,9 +117,11 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
} }
length = m_pos - startpos; length = m_pos - startpos;
tokentype = BasicTokenType::Comment; tokentype = BasicTokenType::Comment;
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true; return true;
} }
if (c == ':') { else if (c == ':') {
c = peekChar(); c = peekChar();
if (c == ':') { if (c == ':') {
nextChar(); nextChar();
@ -108,7 +132,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
return true; return true;
} }
} }
if (isSelf(c)) { else if (isSelf(c)) {
length = m_pos - startpos; length = m_pos - startpos;
if (c == ',') if (c == ',')
tokentype = BasicTokenType::Comma; tokentype = BasicTokenType::Comma;
@ -119,7 +143,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
out = sr.toString(); out = sr.toString();
return true; return true;
} }
if (isOperatorChar(c)) { else if (isOperatorChar(c)) {
while (true) { while (true) {
QChar c = peekChar(); QChar c = peekChar();
if (isOperatorChar(c)) { if (isOperatorChar(c)) {
@ -137,11 +161,21 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent
} }
else if (c == '\'') { else if (c == '\'') {
// Single quoted string so it's an SQL text literal // Single quoted string so it's an SQL text literal
return parseSingleQuotedString(startpos, length, tokentype); if (parseSingleQuotedString(startpos, length, tokentype)) {
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
return false;
} }
else if (c == '"') { else if (c == '"') {
// Double quoted identifier // Double quoted identifier
return parseDoubleQuotedIdentifier(startpos, length, tokentype); if (parseDoubleQuotedIdentifier(startpos, length, tokentype)) {
QStringRef sr(&m_block, startpos, length);
out = sr.toString();
return true;
}
return false;
} }
// else if (c == '/' && peekChar() == '*') { // else if (c == '/' && peekChar() == '*') {
// nextChar(); // nextChar();

View file

@ -17,7 +17,9 @@ enum class BasicTokenType {
Operator, Operator,
Self, // single char representing it self, maybe remove this and replace with token for each possibility Self, // single char representing it self, maybe remove this and replace with token for each possibility
Comma, Comma,
Cast Cast,
WhiteSpace,
NewLine
}; };
enum class LexerState { enum class LexerState {
@ -37,7 +39,7 @@ public:
class SqlLexer { class SqlLexer {
public: public:
SqlLexer(QString block, LexerState currentstate); SqlLexer(QString block, LexerState currentstate, bool return_whitespace=false);
QChar nextChar(); QChar nextChar();
QChar peekChar(); QChar peekChar();
/** /**
@ -61,6 +63,7 @@ private:
QString m_block; QString m_block;
int m_pos = 0; int m_pos = 0;
LexerState m_state; LexerState m_state;
bool m_returnWhitespace;
bool parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype); bool parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype);
bool parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype); bool parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype);

View file

@ -1,5 +1,6 @@
#include "util.h" #include "util.h"
#include "CsvWriter.h" #include "CsvWriter.h"
#include "SqlLexer.h"
#include <QApplication> #include <QApplication>
#include <QTextStream> #include <QTextStream>
#include <QClipboard> #include <QClipboard>
@ -106,32 +107,77 @@ void copySelectionToClipboard(const QTableView *view)
} }
} }
QString ConvertToMultiLineCString(const QString &in) QString ConvertToMultiLineCString(const QString &in_)
{ {
// We need to atleast escape " and \ and also any multi byte utf8 char // We need to atleast escape " and \ and also any multi byte utf8 char
QString out; // remove empty lines at start
out.append('"'); int last_nl_idx = 0;
QByteArray ba = in.toUtf8(); for (int idx = 0; idx < in_.length(); ++idx) {
for (auto c : ba) { QChar c = in_[idx];
if (c == '\\') { if (c == '\n') last_nl_idx = idx+1;
out.append("\\\\"); if (!c.isSpace()) {
} break;
else if (c == '"') { }
out.append("\\\""); }
} QString in = in_.right(in_.length() - last_nl_idx);
else if (uchar(c) > 127) { int idx;
out.append(QString("\\x%1").arg(uchar(c), 2, 16, QChar('0'))); for (idx = in.length() - 1; idx >= 0 && in[idx].isSpace(); --idx) ;
} ++idx;
else if (c == '\n') { in.truncate(idx);
// at end of line we add a space and a new line in the string then we put in the end quote go to the next line and put the open quote
out.append(" \\n\"\n\""); SqlLexer lexer(in, LexerState::Null, true);
} QString out;
else { QString line = "\"";
out.append(c); QString comment;
while (true) {
SqlToken token = lexer.nextBasicToken();
if (token.ok) {
if (token.tokenType == BasicTokenType::Comment) {
// save comment is seperate variable
comment = "//" + token.out.rightRef(token.out.length()-2);
// Trim whitespace on right
int idx;
for (idx = comment.length() - 1; idx >= 0 && comment[idx].isSpace(); --idx) ;
++idx;
comment.truncate(idx);
}
else if (token.tokenType == BasicTokenType::End || token.tokenType == BasicTokenType::NewLine) {
// trim right
{
int idx;
for (idx = line.length() - 1; idx >= 0 && line[idx].isSpace(); --idx) ;
++idx;
if (!comment.isEmpty()) {
// put the whitespace in front of the comment so it will be outside the contents of the string literal but alignment of comments is preserved
comment = line.rightRef(line.length() - (idx)) + comment;
}
line.truncate(idx);
}
out += line;
if (token.tokenType == BasicTokenType::End) {
out += "\"";
out += comment;
break;
}
else {
out += "\\n\"";
out += comment;
out += "\n";
line = "\"";
}
comment.clear();
}
else {
line += token.out;
}
}
else {
// error during lexical analysis, need to recover
throw std::runtime_error("Unrecognized input");
} }
} }
out.append('"');
return out; return out;
} }

View file

@ -15,6 +15,7 @@ HEADERS +=
SOURCES += main.cpp \ SOURCES += main.cpp \
tst_ConvertLangToSqlString.cpp \ tst_ConvertLangToSqlString.cpp \
tst_ConvertToMultiLineCString.cpp \
tst_ExplainJsonParser.cpp \ tst_ExplainJsonParser.cpp \
tst_expected.cpp \ tst_expected.cpp \
tst_SqlLexer.cpp \ tst_SqlLexer.cpp \

View file

@ -60,4 +60,3 @@ TEST(ConvertLangToSqlString, testSemiColon)
auto output = ConvertLangToSqlString(in); auto output = ConvertLangToSqlString(in);
ASSERT_EQ(output, expected); ASSERT_EQ(output, expected);
} }

View file

@ -0,0 +1,108 @@
#include <gtest/gtest.h>
#include <gmock/gmock-matchers.h>
#include "util.h"
#include "PrintTo_Qt.h"
using namespace testing;
TEST(ConvertToMultiLineCString, singleLine)
{
QString in(R"__(SELECT 1)__");
QString expected(R"__("SELECT 1")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, singleLineTrimWhiteSpace)
{
QString in(R"__(SELECT 1 )__");
QString expected(R"__("SELECT 1")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, singleLineWithComment)
{
QString in(R"__(SELECT 1 -- hello)__");
QString expected(R"__("SELECT 1" // hello)__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, singleLineWithCommentTrimWhiteSpace)
{
// Check whitespace at end is removed but in between is kept
QString in(R"__(SELECT 1 -- hello )__");
QString expected(R"__("SELECT 1" // hello)__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, multiLine)
{
QString in(
R"__(SELECT kol
FROM table)__");
QString expected(
R"__("SELECT kol\n"
"FROM table")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, multiLineWithComment)
{
QString in(
R"__(SELECT kol -- eerste
FROM table -- tweede)__");
QString expected(
R"__("SELECT kol\n" // eerste
"FROM table" // tweede)__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
// Test case for a discovered bug
TEST(ConvertToMultiLineCString, multiLineWithCommentNoErronousRepeat)
{
QString in(
R"__(SELECT kol -- eerste
FROM table)__");
QString expected(
R"__("SELECT kol\n" // eerste
"FROM table")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, trimExtraEmptyLines)
{
QString in(R"__(
SELECT 1
)__");
QString expected(R"__("SELECT 1")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}
TEST(ConvertToMultiLineCString, trimExtraEmptyLines2)
{
QString in(R"__(
SELECT 1
FROM tab
)__");
QString expected(R"__("SELECT 1\n"
"\n"
"FROM tab")__");
auto output = ConvertToMultiLineCString(in);
ASSERT_EQ(output, expected);
}

View file

@ -35,6 +35,27 @@ TEST(SqlLexer, lexer)
ASSERT_THAT( out, Eq(QString("SELECT")) ); ASSERT_THAT( out, Eq(QString("SELECT")) );
} }
TEST(SqlLexer, lexerWithWhiteSpace)
{
QString input = " SELECT ";
SqlLexer lexer(input, LexerState::Null, true);
int startpos, length;
BasicTokenType tokentype;
QString out;
lexer.nextBasicToken(startpos, length, tokentype, out);
ASSERT_THAT(startpos, Eq(0));
ASSERT_THAT(length, Eq(1));
ASSERT_THAT(tokentype, Eq(BasicTokenType::WhiteSpace));
ASSERT_THAT(out, Eq(QString(" ")) );
lexer.nextBasicToken(startpos, length, tokentype, out);
ASSERT_THAT(startpos, Eq(1));
ASSERT_THAT(length, Eq(6));
ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol));
ASSERT_THAT(out, Eq(QString("SELECT")) );
}
TEST(SqlLexer, lexer_quote_in_string) TEST(SqlLexer, lexer_quote_in_string)
{ {
QString input = " 'abc''def' "; QString input = " 'abc''def' ";
@ -48,6 +69,7 @@ TEST(SqlLexer, lexer_quote_in_string)
ASSERT_THAT(startpos, Eq(1)); ASSERT_THAT(startpos, Eq(1));
ASSERT_THAT(length, Eq(10)); ASSERT_THAT(length, Eq(10));
ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString)); ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString));
ASSERT_THAT(out, Eq(QString("'abc''def'")) );
} }
TEST(SqlLexer, lexer_comma_handling) TEST(SqlLexer, lexer_comma_handling)