diff --git a/core/SqlLexer.cpp b/core/SqlLexer.cpp index b6b7986..f309d38 100644 --- a/core/SqlLexer.cpp +++ b/core/SqlLexer.cpp @@ -1,8 +1,9 @@ #include "SqlLexer.h" -SqlLexer::SqlLexer(QString block, LexerState currentstate) +SqlLexer::SqlLexer(QString block, LexerState currentstate, bool return_whitespace) : m_block(std::move(block)) , m_state(currentstate) + , m_returnWhitespace(return_whitespace) {} QChar SqlLexer::nextChar() @@ -79,11 +80,32 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent startpos = m_pos; QChar c = nextChar(); // if (LexerState::Null == m_state) { - if (c.isSpace()) { - // Just skip whitespace - continue; + if (c == '\n') { + if (m_returnWhitespace) { + length = m_pos - startpos; + tokentype = BasicTokenType::NewLine; + out = "\n"; + return true; + } } - if (c == '-' && peekChar() == '-') { // two dashes, start of comment + else if (c.isSpace()) { + // Just skip whitespace + if (m_returnWhitespace) { + for (;;) { + c = peekChar(); + if (c != QChar::Null && c.isSpace() && c != '\n') + nextChar(); + else + break; + } + length = m_pos - startpos; + tokentype = BasicTokenType::WhiteSpace; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + } + else if (c == '-' && peekChar() == '-') { // two dashes, start of comment // Loop till end of line or end of block c = nextChar(); for (;;) { @@ -95,9 +117,11 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent } length = m_pos - startpos; tokentype = BasicTokenType::Comment; + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); return true; } - if (c == ':') { + else if (c == ':') { c = peekChar(); if (c == ':') { nextChar(); @@ -108,7 +132,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent return true; } } - if (isSelf(c)) { + else if (isSelf(c)) { length = m_pos - startpos; if (c == ',') tokentype = BasicTokenType::Comma; @@ -119,7 +143,7 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent out = sr.toString(); return true; } - if (isOperatorChar(c)) { + else if (isOperatorChar(c)) { while (true) { QChar c = peekChar(); if (isOperatorChar(c)) { @@ -137,11 +161,21 @@ bool SqlLexer::nextBasicToken(int &startpos, int &length, BasicTokenType &tokent } else if (c == '\'') { // Single quoted string so it's an SQL text literal - return parseSingleQuotedString(startpos, length, tokentype); + if (parseSingleQuotedString(startpos, length, tokentype)) { + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + return false; } else if (c == '"') { // Double quoted identifier - return parseDoubleQuotedIdentifier(startpos, length, tokentype); + if (parseDoubleQuotedIdentifier(startpos, length, tokentype)) { + QStringRef sr(&m_block, startpos, length); + out = sr.toString(); + return true; + } + return false; } // else if (c == '/' && peekChar() == '*') { // nextChar(); diff --git a/core/SqlLexer.h b/core/SqlLexer.h index b9db567..acc5089 100644 --- a/core/SqlLexer.h +++ b/core/SqlLexer.h @@ -17,7 +17,9 @@ enum class BasicTokenType { Operator, Self, // single char representing it self, maybe remove this and replace with token for each possibility Comma, - Cast + Cast, + WhiteSpace, + NewLine }; enum class LexerState { @@ -37,7 +39,7 @@ public: class SqlLexer { public: - SqlLexer(QString block, LexerState currentstate); + SqlLexer(QString block, LexerState currentstate, bool return_whitespace=false); QChar nextChar(); QChar peekChar(); /** @@ -61,6 +63,7 @@ private: QString m_block; int m_pos = 0; LexerState m_state; + bool m_returnWhitespace; bool parseSingleQuotedString(int startpos, int &length, BasicTokenType &tokentype); bool parseDoubleQuotedIdentifier(int startpos, int &length, BasicTokenType &tokentype); diff --git a/pglablib/util.cpp b/pglablib/util.cpp index 647f944..e3772ee 100644 --- a/pglablib/util.cpp +++ b/pglablib/util.cpp @@ -1,5 +1,6 @@ #include "util.h" #include "CsvWriter.h" +#include "SqlLexer.h" #include #include #include @@ -106,32 +107,77 @@ void copySelectionToClipboard(const QTableView *view) } } -QString ConvertToMultiLineCString(const QString &in) +QString ConvertToMultiLineCString(const QString &in_) { // We need to atleast escape " and \ and also any multi byte utf8 char - QString out; - out.append('"'); - QByteArray ba = in.toUtf8(); - for (auto c : ba) { - if (c == '\\') { - out.append("\\\\"); - } - else if (c == '"') { - out.append("\\\""); - } - else if (uchar(c) > 127) { - out.append(QString("\\x%1").arg(uchar(c), 2, 16, QChar('0'))); - } - else if (c == '\n') { - // at end of line we add a space and a new line in the string then we put in the end quote go to the next line and put the open quote - out.append(" \\n\"\n\""); - } - else { - out.append(c); + // remove empty lines at start + int last_nl_idx = 0; + for (int idx = 0; idx < in_.length(); ++idx) { + QChar c = in_[idx]; + if (c == '\n') last_nl_idx = idx+1; + if (!c.isSpace()) { + break; + } + } + QString in = in_.right(in_.length() - last_nl_idx); + int idx; + for (idx = in.length() - 1; idx >= 0 && in[idx].isSpace(); --idx) ; + ++idx; + in.truncate(idx); + + SqlLexer lexer(in, LexerState::Null, true); + QString out; + QString line = "\""; + QString comment; + while (true) { + SqlToken token = lexer.nextBasicToken(); + if (token.ok) { + if (token.tokenType == BasicTokenType::Comment) { + // save comment is seperate variable + comment = "//" + token.out.rightRef(token.out.length()-2); + // Trim whitespace on right + int idx; + for (idx = comment.length() - 1; idx >= 0 && comment[idx].isSpace(); --idx) ; + ++idx; + comment.truncate(idx); + } + else if (token.tokenType == BasicTokenType::End || token.tokenType == BasicTokenType::NewLine) { + // trim right + { + int idx; + for (idx = line.length() - 1; idx >= 0 && line[idx].isSpace(); --idx) ; + ++idx; + if (!comment.isEmpty()) { + // put the whitespace in front of the comment so it will be outside the contents of the string literal but alignment of comments is preserved + comment = line.rightRef(line.length() - (idx)) + comment; + } + line.truncate(idx); + } + + out += line; + if (token.tokenType == BasicTokenType::End) { + out += "\""; + out += comment; + break; + } + else { + out += "\\n\""; + out += comment; + out += "\n"; + line = "\""; + } + comment.clear(); + } + else { + line += token.out; + } + } + else { + // error during lexical analysis, need to recover + throw std::runtime_error("Unrecognized input"); } } - out.append('"'); return out; } diff --git a/tests/pglabtests/pglabtests.pro b/tests/pglabtests/pglabtests.pro index 2ab510b..8b795b5 100644 --- a/tests/pglabtests/pglabtests.pro +++ b/tests/pglabtests/pglabtests.pro @@ -15,6 +15,7 @@ HEADERS += SOURCES += main.cpp \ tst_ConvertLangToSqlString.cpp \ + tst_ConvertToMultiLineCString.cpp \ tst_ExplainJsonParser.cpp \ tst_expected.cpp \ tst_SqlLexer.cpp \ diff --git a/tests/pglabtests/tst_ConvertLangToSqlString.cpp b/tests/pglabtests/tst_ConvertLangToSqlString.cpp index 1fa28af..8310a7c 100644 --- a/tests/pglabtests/tst_ConvertLangToSqlString.cpp +++ b/tests/pglabtests/tst_ConvertLangToSqlString.cpp @@ -60,4 +60,3 @@ TEST(ConvertLangToSqlString, testSemiColon) auto output = ConvertLangToSqlString(in); ASSERT_EQ(output, expected); } - diff --git a/tests/pglabtests/tst_ConvertToMultiLineCString.cpp b/tests/pglabtests/tst_ConvertToMultiLineCString.cpp new file mode 100644 index 0000000..8d60bc4 --- /dev/null +++ b/tests/pglabtests/tst_ConvertToMultiLineCString.cpp @@ -0,0 +1,108 @@ +#include +#include +#include "util.h" +#include "PrintTo_Qt.h" + +using namespace testing; + + +TEST(ConvertToMultiLineCString, singleLine) +{ + QString in(R"__(SELECT 1)__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineTrimWhiteSpace) +{ + QString in(R"__(SELECT 1 )__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineWithComment) +{ + QString in(R"__(SELECT 1 -- hello)__"); + QString expected(R"__("SELECT 1" // hello)__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, singleLineWithCommentTrimWhiteSpace) +{ + // Check whitespace at end is removed but in between is kept + QString in(R"__(SELECT 1 -- hello )__"); + QString expected(R"__("SELECT 1" // hello)__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, multiLine) +{ + QString in( +R"__(SELECT kol +FROM table)__"); + QString expected( +R"__("SELECT kol\n" +"FROM table")__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, multiLineWithComment) +{ + QString in( +R"__(SELECT kol -- eerste +FROM table -- tweede)__"); + QString expected( +R"__("SELECT kol\n" // eerste +"FROM table" // tweede)__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +// Test case for a discovered bug +TEST(ConvertToMultiLineCString, multiLineWithCommentNoErronousRepeat) +{ + QString in( +R"__(SELECT kol -- eerste +FROM table)__"); + QString expected( +R"__("SELECT kol\n" // eerste +"FROM table")__"); + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, trimExtraEmptyLines) +{ + QString in(R"__( +SELECT 1 +)__"); + QString expected(R"__("SELECT 1")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + +TEST(ConvertToMultiLineCString, trimExtraEmptyLines2) +{ + QString in(R"__( +SELECT 1 + +FROM tab +)__"); + QString expected(R"__("SELECT 1\n" +"\n" +"FROM tab")__"); + + auto output = ConvertToMultiLineCString(in); + ASSERT_EQ(output, expected); +} + diff --git a/tests/pglabtests/tst_SqlLexer.cpp b/tests/pglabtests/tst_SqlLexer.cpp index 72a1474..bcdb03e 100644 --- a/tests/pglabtests/tst_SqlLexer.cpp +++ b/tests/pglabtests/tst_SqlLexer.cpp @@ -35,6 +35,27 @@ TEST(SqlLexer, lexer) ASSERT_THAT( out, Eq(QString("SELECT")) ); } +TEST(SqlLexer, lexerWithWhiteSpace) +{ + QString input = " SELECT "; + SqlLexer lexer(input, LexerState::Null, true); + + int startpos, length; + BasicTokenType tokentype; + QString out; + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(0)); + ASSERT_THAT(length, Eq(1)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::WhiteSpace)); + ASSERT_THAT(out, Eq(QString(" ")) ); + + lexer.nextBasicToken(startpos, length, tokentype, out); + ASSERT_THAT(startpos, Eq(1)); + ASSERT_THAT(length, Eq(6)); + ASSERT_THAT(tokentype, Eq(BasicTokenType::Symbol)); + ASSERT_THAT(out, Eq(QString("SELECT")) ); +} + TEST(SqlLexer, lexer_quote_in_string) { QString input = " 'abc''def' "; @@ -48,6 +69,7 @@ TEST(SqlLexer, lexer_quote_in_string) ASSERT_THAT(startpos, Eq(1)); ASSERT_THAT(length, Eq(10)); ASSERT_THAT(tokentype, Eq(BasicTokenType::QuotedString)); + ASSERT_THAT(out, Eq(QString("'abc''def'")) ); } TEST(SqlLexer, lexer_comma_handling)