From ec930218cd68cb18796d4a64fd523d5d073cf45a Mon Sep 17 00:00:00 2001 From: eelke Date: Sat, 16 Dec 2017 10:31:51 +0100 Subject: [PATCH] Implemented ArrayParser and unit tests to verify its working. --- pgsql/ArrayParser.cpp | 102 +++++++++++++++++++++++++++ pgsql/ArrayParser.h | 53 ++++++++++++++ pgsql/pgsql.pro | 10 ++- tests/PgsqlTests/tst_ArrayParser.cpp | 92 ++++++++++++++++++++++++ 4 files changed, 254 insertions(+), 3 deletions(-) create mode 100644 pgsql/ArrayParser.cpp create mode 100644 pgsql/ArrayParser.h create mode 100644 tests/PgsqlTests/tst_ArrayParser.cpp diff --git a/pgsql/ArrayParser.cpp b/pgsql/ArrayParser.cpp new file mode 100644 index 0000000..e629a4e --- /dev/null +++ b/pgsql/ArrayParser.cpp @@ -0,0 +1,102 @@ +#include "ArrayParser.h" +#include + +using namespace Pgsql; + +namespace { + + constexpr char ArrayStart = '{'; + constexpr char ArrayEnd = '}'; + constexpr char Quote = '"'; + constexpr char Seperator = ','; + +} + +ArrayParser::ArrayParser(const char *array_string) + : data(array_string) + , end(array_string + strlen(array_string)) + , pos(array_string) +{ + initializeParse(); +} + + +ArrayParser::NextElemResult ArrayParser::GetNextElem() +{ + // We should be at the start of an element or at the end of the array + NextElemResult result = { false, std::nullopt }; + if (pos < end && *pos != ArrayEnd) { + if (*pos == Quote) { + // parse quoted value, slow path removing escapes + parseQuotedValue(); + result.ok = true; + result.value = std::string_view(temp); + } + else { + // parse unquoted value, fast path no escapes + const char *start = pos; + while (pos < end && *pos != Seperator && *pos != ArrayEnd) ++pos; + if (*pos == 0) // reached end of data shouldn't happen + throw std::runtime_error("Invalid input"); + + result.ok = true; + if ((pos - start) != 4 || std::strncmp(start, "NULL", 4) != 0) + result.value = std::string_view(start, pos-start); + + } + // move to start of next element + ++pos; // skip seperator + skipWhitespace(); + + } + return result; +} + + +void ArrayParser::parseQuotedValue() +{ + std::string s; + // internal function thus we can safely assumed the caller already verified + // the opening quote + ++pos; + if (pos == end) + throw std::runtime_error("Invalid input"); + + while (pos < end) { + if (*pos == Quote) { + ++pos; + break; + } + if (*pos == '\\') { + ++pos; + if (pos == end) + throw std::runtime_error("Invalid input"); + } + s += *pos; + ++pos; + } + temp = std::move(s); +} + + +void ArrayParser::initializeParse() +{ + // Test if non empty string (empty string is an empty array) + // + skipWhitespace(); + if (pos < end) { + // first character should be opening brace + if (*pos != ArrayStart) { + throw std::runtime_error("Unexpected input"); + } + ++pos; + skipWhitespace(); + // Position is now first element or end of array when the array is empty + // GetNextElem can take it from here + } +} + +inline void ArrayParser::skipWhitespace() +{ + while (pos < end && (*pos == ' ' || *pos == '\t')) ++pos; +} diff --git a/pgsql/ArrayParser.h b/pgsql/ArrayParser.h new file mode 100644 index 0000000..f1ca1e0 --- /dev/null +++ b/pgsql/ArrayParser.h @@ -0,0 +1,53 @@ +#ifndef ARRAYPARSER_H +#define ARRAYPARSER_H + +#include +#include +#include +#include + +namespace Pgsql { + /** Class for parsing array values coming from postgresql + * + * exceptions are used to report serious errors + * in production these kind of errors should rarely happen as + * they are either a bug in this parser or postgres changed its format. + */ + class ArrayParser { + public: + /** + * \param data The string that needs parsing (warning just the pointer is stored, the string is not copied) + */ + explicit ArrayParser(const char *array_string); + + class NextElemResult { + public: + bool ok; + std::optional value; + }; + /** + * + * Usage: + * auto [ok, val] = parser.GetNextElem(); + * + * \return the bool signals if there was a next element when it is true or the end of the array in which + * case it is false. The optional is not set when the next element IS NULL. Otherwise it refers to the + * string value of the element. If the element was quoted it has been stripped of quotes and escapes. + */ + NextElemResult GetNextElem(); + private: + const char *data; + const char *end; + std::string temp; // internal buffer for when a value needs escaping + const char *pos; + + void parseQuotedValue(); + void initializeParse(); + /** Moves pos forward to the first non whitespace character. + */ + void skipWhitespace(); + }; + +} // end namespace Pgsql + +#endif // ARRAYPARSER_H diff --git a/pgsql/pgsql.pro b/pgsql/pgsql.pro index ea9f394..a8d563c 100644 --- a/pgsql/pgsql.pro +++ b/pgsql/pgsql.pro @@ -4,7 +4,7 @@ # #------------------------------------------------- -CONFIG += staticlib c++14 +CONFIG += staticlib c++17 QT += core greaterThan(QT_MAJOR_VERSION, 4): QT += widgets sql @@ -16,6 +16,8 @@ INCLUDEPATH += C:\prog\include \ C:\Prog\include\pgsql \ C:\VSproj\boost32\include\boost-1_65_1 +QMAKE_CXXFLAGS += /std:c++17 + DEFINES += WIN32_LEAN_AND_MEAN NOMINMAX #LIBS += -LC:/prog/boost/lib -Lc:/prog/lib libpq.lib fmt.lib User32.lib ws2_32.lib LIBS += -LC:/PROG/LIB -lws2_32 -llibpq @@ -35,7 +37,8 @@ SOURCES += Pgsql_Connection.cpp \ Pgsql_Result.cpp \ Pgsql_Row.cpp \ Pgsql_Value.cpp \ - Pgsql_Col.cpp + Pgsql_Col.cpp \ + ArrayParser.cpp HEADERS += Pgsql_Connection.h \ Pgsql_Params.h \ @@ -43,7 +46,8 @@ HEADERS += Pgsql_Connection.h \ Pgsql_Row.h \ Pgsql_Value.h \ Pgsql_declare.h \ - Pgsql_Col.h + Pgsql_Col.h \ + ArrayParser.h #FORMS += diff --git a/tests/PgsqlTests/tst_ArrayParser.cpp b/tests/PgsqlTests/tst_ArrayParser.cpp new file mode 100644 index 0000000..3827c04 --- /dev/null +++ b/tests/PgsqlTests/tst_ArrayParser.cpp @@ -0,0 +1,92 @@ +#include +#include +#include "ArrayParser.h" +#include "PrintTo_Qt.h" + +using namespace testing; +using namespace Pgsql; + +TEST(ArrayParser, emptyInput) +{ + const char * input = ""; + ArrayParser parser(input); + auto res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + +TEST(ArrayParser, emptyArray) +{ + const char * input = "{}"; + ArrayParser parser(input); + auto res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + +TEST(ArrayParser, oneInt) +{ + const char * input = "{1}"; + ArrayParser parser(input); + auto res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, "1"); + + res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + +TEST(ArrayParser, twoElems) +{ + const char * input = "{1,2.3}"; + ArrayParser parser(input); + + auto res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, "1"); + + res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, "2.3"); + + res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + +TEST(ArrayParser, nullElem) +{ + const char * input = "{NULL}"; + ArrayParser parser(input); + auto res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, std::nullopt); + + res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + + + +// ARRAY['ab c', NULL, 'def', 'd"e', 'g''h ', 'g,j'] +// {"ab c",NULL,def,"d\"e","g'h ","g,j"} + +TEST(ArrayParser, quotedValues) +{ + const char * input = R"_({"ab c","de\"f"})_"; + ArrayParser parser(input); + auto res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, "ab c"); + + res = parser.GetNextElem(); + ASSERT_TRUE(res.ok); + ASSERT_EQ(res.value, "de\"f"); + + res = parser.GetNextElem(); + ASSERT_FALSE(res.ok); +} + + +// ARRAY['2017-12-11'::date, NULL] +// {2017-12-11,NULL} + +// NULL::int2 +// null