Added paste lang option for pasting programming code.

Expects you to paste only string literals with possible concatenation operators like . or +
This commit is contained in:
eelke 2019-08-19 11:38:04 +02:00
parent 35d1e75d35
commit fbd630489e
8 changed files with 302 additions and 2 deletions

View file

@ -146,3 +146,211 @@ QString ConvertToMultiLineRawCppString(const QString &in)
out.append("\n)" + delim + "\"");
return out;
}
QString ConvertLangToSqlString(const QString &in)
{
// Assume mostly C++ for now but allow some other things like
// - single quotes (php)
// - concattenation operators . (php) and + (java)
// Allow cpp prefixes L u8 u U
// Parser flow, we start in whitespace state and search for prefix|opening quote
// parse string and process escapes
// if escape is \r strip it if \n go to new line
// until we reach matching end quote
// skip whitespace and + or .
QString output;
enum {
WHITESPACE,
PREFIX,
IN_STRING,
END,
ERROR
} state = WHITESPACE;
int index = 0;
QChar quote = '\0';
while (state != ERROR && state != END && index < in.length()) {
if (state == WHITESPACE) {
// skip all whitespace untill we encounter something else
// we also skip concatenation operators. Note this code is not trying to validate
// for correct syntax so it will quite happily accept many incorrect constructs
// that doesn't matter however as we are just trying to strip everything which is not SQL.
while (index < in.length() && (in[index].isSpace() || in[index] == '+' || in[index] == '.')) ++index;
if (index == in.length()) {
state == END;
break;
}
// Assume quotes can vary
if (in[index] == '\'' || in[index] == '\"') {
quote = in[index];
++index;
state = IN_STRING;
}
else {
state = PREFIX;
}
}
else if (state == PREFIX) {
auto c = in[index];
if (c == 'L' || c == 'U') {
// C++ prefix expect C++ double quote
if (in.length() > index+ 1 && in[index+1] == '"') {
index += 2;
state = IN_STRING;
}
else {
state = ERROR;
break;
}
}
if (c == 'u') {
// C++ prefix expect C++ double quote
if (in.length() > index+ 2 && in[index+1] == '8' && in[index+2] == '"') {
index += 3;
state = IN_STRING;
}
else if (in.length() > index+ 1 && in[index+1] == '"') {
index += 2;
state = IN_STRING;
}
else {
state = ERROR;
break;
}
}
else {
state = ERROR;
break;
}
}
else if (state == IN_STRING) {
// scan contents of string and process any escapes encountered
bool escape = false;
while (state != ERROR && index < in.length()) {
QChar c = in[index];
if (escape) {
if (c == 'a') output += '\x07';
else if (c == 'a') output += '\x07';
else if (c == 'b') output += '\x08';
else if (c == 'f') output += '\x0c';
else if (c == 'n') output += '\n';
else if (c == 'r') ;
else if (c == 'v') ;
else if (c >= '0' && c <= '7') {
// process octal escape
if (in.length() > index + 2) {
char buf[4];
buf[0] = c.toLatin1();
buf[1] = in[++index].toLatin1();
buf[2] = in[++index].toLatin1();
buf[3] = 0;
long int v = strtol(buf, nullptr, 8);
if (v < 0x80) {
output += static_cast<QChar>(static_cast<char>(v));
}
else {
state = ERROR;
break;
}
}
else {
state = ERROR;
break;
}
}
else if (c == 'x') {
// hex byte
if (in.length() > index + 2) {
char buf[3];
buf[0] = in[++index].toLatin1();
buf[1] = in[++index].toLatin1();
buf[2] = 0;
long int v = strtol(buf, nullptr, 16);
output += static_cast<QChar>(static_cast<char>(v));
}
else {
state = ERROR;
break;
}
}
else if (c == 'u') {
// 4 digit hax unicode codepoint
// hex byte
if (in.length() > index + 4) {
char buf[5];
buf[0] = in[++index].toLatin1();
buf[1] = in[++index].toLatin1();
buf[2] = in[++index].toLatin1();
buf[3] = in[++index].toLatin1();
buf[4] = 0;
long int v = strtol(buf, nullptr, 16);
output += static_cast<QChar>(static_cast<ushort>(v));
}
else {
state = ERROR;
break;
}
}
else if (c == 'U') {
// 8 digit hax unicode codepoint
if (in.length() > index + 8) {
char buf[9];
buf[0] = in[++index].toLatin1();
buf[1] = in[++index].toLatin1();
buf[2] = in[++index].toLatin1();
buf[3] = in[++index].toLatin1();
buf[4] = in[++index].toLatin1();
buf[5] = in[++index].toLatin1();
buf[6] = in[++index].toLatin1();
buf[7] = in[++index].toLatin1();
buf[8] = 0;
uint v = static_cast<uint>(strtol(buf, nullptr, 16));
if (QChar::requiresSurrogates(v)) {
output += QChar(QChar::highSurrogate(v));
output += QChar(QChar::lowSurrogate(v));
}
}
else {
state = ERROR;
break;
}
}
else {
output += c;
}
escape = false;
}
else {
if (c == quote) {
state = WHITESPACE;
++index;
break;
}
// Is there any language where string literals do not have to be terminated before the end of the line
// Not considering string literals that explicitly allow for multiline strings as these are often raw strings
// and can be copy pasted normally.
else if (c == '\n') {
state = WHITESPACE;
++index;
break;
}
else if (c == '\\') {
escape = true;
}
else {
output += c;
}
}
++index;
}
}
}
return output;
}