From 48ac8c6bab0fda2c57867de312d5d3ea013a97b7 Mon Sep 17 00:00:00 2001 From: eelke Date: Mon, 19 Aug 2019 13:52:23 +0200 Subject: [PATCH] Improved generation of c/cpp string from query Extra lines before and after query are removed. Whitespace at end of line is removed. SQL comments are converted to cpp style comments and are outside the string literal. To achieve this the function now uses the SQLLexer to know what is comment. This also required the additional capability in the lexer to also return whitespace and newline tokens. Also a few bugs in the lexer were fixed. --- core/SqlLexer.cpp | 54 +++++++-- core/SqlLexer.h | 7 +- pglablib/util.cpp | 88 ++++++++++---- tests/pglabtests/pglabtests.pro | 1 + .../pglabtests/tst_ConvertLangToSqlString.cpp | 1 - .../tst_ConvertToMultiLineCString.cpp | 108 ++++++++++++++++++ tests/pglabtests/tst_SqlLexer.cpp | 22 ++++ 7 files changed, 247 insertions(+), 34 deletions(-) create mode 100644 tests/pglabtests/tst_ConvertToMultiLineCString.cpp diff --git a/core/SqlLexer.cpp b/core/SqlLexer.cpp index b6b7986..f309d38 100644 --- a/core/SqlLexer.cpp +++ b/core/SqlLexer.cpp @@ -1,8 +1,9 @@ #include "SqlLexer.h" -SqlLexer::SqlLexer(QString block, LexerState currentstate) +SqlLexer::SqlLexer(QString block, LexerState currentstate, bool return_whitespace) : m_block(std::move(block)) , m_state(currentstate) + , m_returnWhitespace(return_whitespace) {} QChar SqlLexer::nextChar() @@ -79,11 +80,32 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent startpos = m_pos; QChar c = nextChar(); // if (LexerState::Null == m_state) { - if (c.isSpace()) { - // Just skip whitespace - continue; + if (c == '\n') { + if (m_returnWhitespace) { + length = m_pos - startpos; + tokentype = BasicTokenType::NewLine; + out = "\n"; + return true; + } } - if (c == '-' && peekChar() == '-') { // two dashes, start of comment + else if (c.isSpace()) { + // Just skip whitespace + if (m_returnWhitespace) { + for (;;) { + c = peekChar(); + if (c != QChar::Null && c.isSpace() && c != '\n') + nextChar(); + else + break; + } + length = m_pos - startpos; + tokentype = BasicTokenType::WhiteSpace; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + } + else if (c == '-' && peekChar() == '-') { // two dashes, start of comment // Loop till end of line or end of block c = nextChar(); for (;;) { @@ -95,9 +117,11 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent } length = m_pos - startpos; tokentype = BasicTokenType::Comment; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); return true; } - if (c == ':') { + else if (c == ':') { c = peekChar(); if (c == ':') { nextChar(); @@ -108,7 +132,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent return true; } } - if (isSelf(c)) { + else if (isSelf(c)) { length = m_pos - startpos; if (c == ',') tokentype = BasicTokenType::Comma; @@ -119,7 +143,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent out = sr.toString(); return true; } - if (isOperatorChar(c)) { + else if (isOperatorChar(c)) { while (true) { QChar c = peekChar(); if (isOperatorChar(c)) { @@ -137,11 +161,21 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent } else if (c == '\'') { // Single quoted string so it's an SQL text literal - return parseSingleQuotedString(startpos, length, tokentype); + if (parseSingleQuotedString(startpos, length, tokentype)) { + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + return false; } else if (c == '"') { // Double quoted identifier - return parseDoubleQuotedIdentifier(startpos, length, tokentype); + if (parseDoubleQuotedIdentifier(startpos, length, tokentype)) { + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + return false; } // else if (c == '/' && peekChar() == '*') { // nextChar(); diff --git a/core/SqlLexer.h b/core/SqlLexer.h index b9db567..acc5089 100644 --- a/core/SqlLexer.h +++ b/core/SqlLexer.h @@ -17,7 +17,9 @@ enum class BasicTokenType { Operator, Self, // single char representing it self, maybe remove this and replace with token for each possibility Comma, - Cast + Cast, + WhiteSpace, + NewLine }; enum class LexerState { @@ -37,7 +39,7 @@ public: class SqlLexer { public: - SqlLexer(QString block, LexerState currentstate); + SqlLexer(QString block, LexerState currentstate, bool return_whitespace=false); QChar nextChar(); QChar peekChar(); /** @@ -61,6 +63,7 @@ private: QString m_block; int m_pos = 0; LexerState m_state; + bool m_returnWhitespace; bool parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype); bool parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype); diff --git a/pglablib/util.cpp b/pglablib/util.cpp index 647f944..e3772ee 100644 --- a/pglablib/util.cpp +++ b/pglablib/util.cpp @@ -1,5 +1,6 @@ #include "util.h" #include "CsvWriter.h" +#include "SqlLexer.h" #include #include #include @@ -106,32 +107,77 @@ void copySelectionToClipboard(const QTableView *view) } } -QString ConvertToMultiLineCString(const QString &in) +QString ConvertToMultiLineCString(const QString &in_) { // We need to atleast escape " and \ and also any multi byte utf8 char - QString out; - out.append('"'); - QByteArray ba = in.toUtf8(); - for (auto c : ba) { - if (c == '\\') { - out.append("\\\\"); - } - else if (c == '"') { - out.append("\\\""); - } - else if (uchar(c) > 127) { - out.append(QString("\\x%1").arg(uchar(c), 2, 16, QChar('0'))); - } - else if (c == '\n') { - // at end of line we add a space and a new line in the string then we put in the end quote go to the next line and put the open quote - out.append(" \\n\"\n\""); - } - else { - out.append(c); + // remove empty lines at start + int last_nl_idx = 0; + for (int idx = 0; idx < in_.length(); ++idx) { + QChar c = in_[idx]; + if (c == '\n') last_nl_idx = idx+1; + if (!c.isSpace()) { + break; + } + } + QString in = in_.right(in_.length() - last_nl_idx); + int idx; + for (idx = in.length() - 1; idx >= 0 && in[idx].isSpace(); --idx) ; + ++idx; + in.truncate(idx); + + SqlLexer lexer(in, LexerState::Null, true); + QString out; + QString line = "\""; + QString comment; + while (true) { + SqlToken token = lexer.nextBasicToken(); + if (token.ok) { + if (token.tokenType == BasicTokenType::Comment) { + // save comment is seperate variable + comment = "//" + token.out.rightRef(token.out.length()-2); + // Trim whitespace on right + int idx; + for (idx = comment.length() - 1; idx >= 0 && comment[idx].isSpace(); --idx) ; + ++idx; + comment.truncate(idx); + } + else if (token.tokenType == BasicTokenType::End || token.tokenType == BasicTokenType::NewLine) { + // trim right + { + int idx; + for (idx = line.length() - 1; idx >= 0 && line[idx].isSpace(); --idx) ; + ++idx; + if (!comment.isEmpty()) { + // put the whitespace in front of the comment so it will be outside the contents of the string literal but alignment of comments is preserved + comment = line.rightRef(line.length() - (idx)) + comment; + } + line.truncate(idx); + } + + out += line; + if (token.tokenType == BasicTokenType::End) { + out += "\""; + out += comment; + break; + } + else { + out += "\\n\""; + out += comment; + out += "\n"; + line = "\""; + } + comment.clear(); + } + else { + line += token.out; + } + } + else { + // error during lexical analysis, need to recover + throw std::runtime_error("Unrecognized input"); } } - out.append('"'); return out; } diff --git a/tests/pglabtests/pglabtests.pro b/tests/pglabtests/pglabtests.pro index 2ab510b..8b795b5 100644 --- a/tests/pglabtests/pglabtests.pro +++ b/tests/pglabtests/pglabtests.pro @@ -15,6 +15,7 @@ HEADERS += SOURCES += main.cpp \ tst_ConvertLangToSqlString.cpp \ + tst_ConvertToMultiLineCString.cpp \ tst_ExplainJsonParser.cpp \ tst_expected.cpp \ tst_SqlLexer.cpp \ diff --git a/tests/pglabtests/tst_ConvertLangToSqlString.cpp b/tests/pglabtests/tst_ConvertLangToSqlString.cpp index 1fa28af..8310a7c 100644 --- a/tests/pglabtests/tst_ConvertLangToSqlString.cpp +++ b/tests/pglabtests/tst_ConvertLangToSqlString.cpp @@ -60,4 +60,3 @@ TEST(ConvertLangToSqlString, testSemiColon) auto output = ConvertLangToSqlString(in); ASSERT_EQ(output, expected); } - diff --git a/tests/pglabtests/tst_ConvertToMultiLineCString.cpp b/tests/pglabtests/tst_ConvertToMultiLineCString.cpp new file mode 100644 index 0000000..8d60bc4 --- /dev/null +++ b/tests/pglabtests/tst_ConvertToMultiLineCString.cpp @@ -0,0 +1,108 @@ +#include +#include +#include "util.h" +#include "PrintTo_Qt.h" + +using namespace testing; + + +TEST(ConvertToMultiLineCString, singleLine) +{ + QString in(R"__(SELECT 1)__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineTrimWhiteSpace) +{ + QString in(R"__(SELECT 1 )__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineWithComment) +{ + QString in(R"__(SELECT 1 -- hello)__"); + QString expected(R"__("SELECT 1" // hello)__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineWithCommentTrimWhiteSpace) +{ + // Check whitespace at end is removed but in between is kept + QString in(R"__(SELECT 1 -- hello )__"); + QString expected(R"__("SELECT 1" // hello)__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, multiLine) +{ + QString in( +R"__(SELECT kol +FROM table)__"); + QString expected( +R"__("SELECT kol\n" +"FROM table")__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, multiLineWithComment) +{ + QString in( +R"__(SELECT kol -- eerste +FROM table -- tweede)__"); + QString expected( +R"__("SELECT kol\n" // eerste +"FROM table" // tweede)__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +// Test case for a discovered bug +TEST(ConvertToMultiLineCString, multiLineWithCommentNoErronousRepeat) +{ + QString in( +R"__(SELECT kol -- eerste +FROM table)__"); + QString expected( +R"__("SELECT kol\n" // eerste +"FROM table")__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, trimExtraEmptyLines) +{ + QString in(R"__( +SELECT 1 +)__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, trimExtraEmptyLines2) +{ + QString in(R"__( +SELECT 1 + +FROM tab +)__"); + QString expected(R"__("SELECT 1\n" +"\n" +"FROM tab")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + diff --git a/tests/pglabtests/tst_SqlLexer.cpp b/tests/pglabtests/tst_SqlLexer.cpp index 72a1474..bcdb03e 100644 --- a/tests/pglabtests/tst_SqlLexer.cpp +++ b/tests/pglabtests/tst_SqlLexer.cpp @@ -35,6 +35,27 @@ TEST(SqlLexer, lexer) ASSERT_THAT( out, Eq(QString("SELECT")) ); } +TEST(SqlLexer, lexerWithWhiteSpace) +{ + QString input = " SELECT "; + SqlLexer lexer(input, LexerState::Null, true); + + int startpos, length; + BasicTokenType tokentype; + QString out; + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(0)); + ASSERT_THAT(length, Eq(1)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::WhiteSpace)); + ASSERT_THAT(out, Eq(QString(" ")) ); + + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(1)); + ASSERT_THAT(length, Eq(6)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol)); + ASSERT_THAT(out, Eq(QString("SELECT")) ); +} + TEST(SqlLexer, lexer_quote_in_string) { QString input = " 'abc''def' "; @@ -48,6 +69,7 @@ TEST(SqlLexer, lexer_quote_in_string) ASSERT_THAT(startpos, Eq(1)); ASSERT_THAT(length, Eq(10)); ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString)); + ASSERT_THAT(out, Eq(QString("'abc''def'")) ); } TEST(SqlLexer, lexer_comma_handling)