#include "ArrayParser.h" #include using namespace Pgsql; namespace { constexpr char ArrayStart = '{'; constexpr char ArrayEnd = '}'; constexpr char Quote = '"'; constexpr char Seperator = ','; } ArrayParser::ArrayParser(const char *array_string) : data(array_string) , end(array_string + strlen(array_string)) , pos(array_string) { initializeParse(); } ArrayParser::NextElemResult ArrayParser::GetNextElem() { // We should be at the start of an element or at the end of the array NextElemResult result = { false, std::nullopt }; if (pos < end && *pos != ArrayEnd) { if (*pos == Quote) { // parse quoted value, slow path removing escapes parseQuotedValue(); result.ok = true; result.value = std::string_view(temp); } else { // parse unquoted value, fast path no escapes const char *start = pos; while (pos < end && *pos != Seperator && *pos != ArrayEnd) ++pos; if (*pos == 0) // reached end of data shouldn't happen throw std::runtime_error("Invalid input"); result.ok = true; if ((pos - start) != 4 || std::strncmp(start, "NULL", 4) != 0) result.value = std::string_view(start, pos-start); } // move to start of next element ++pos; // skip seperator skipWhitespace(); } return result; } void ArrayParser::parseQuotedValue() { std::string s; // internal function thus we can safely assumed the caller already verified // the opening quote ++pos; if (pos == end) throw std::runtime_error("Invalid input"); while (pos < end) { if (*pos == Quote) { ++pos; break; } if (*pos == '\\') { ++pos; if (pos == end) throw std::runtime_error("Invalid input"); } s += *pos; ++pos; } temp = std::move(s); } void ArrayParser::initializeParse() { // Test if non empty string (empty string is an empty array) // skipWhitespace(); if (pos < end) { // first character should be opening brace if (*pos != ArrayStart) { throw std::runtime_error("Unexpected input"); } ++pos; skipWhitespace(); // Position is now first element or end of array when the array is empty // GetNextElem can take it from here } } inline void ArrayParser::skipWhitespace() { while (pos < end && (*pos == ' ' || *pos == '\t')) ++pos; }