pgLab/SqlSyntaxHighlighter.cpp

#include "SqlSyntaxHighlighter.h"

#include "pgtypecontainer.h"

namespace {


	enum class BasicTokenType {
		None,
		End, // End of input
		Symbol, // can be many things, keyword, object name, operator, ..
		Comment,
		QuotedString,
		DollarQuotedString,
		QuotedIdentifier
	};

	enum class LexerState {
		Null,
		InDollarQuotedString
	};


	class Lexer {
	private:
		QString m_block;
		int m_pos = -1;
		LexerState m_state;
	public:
		Lexer(const QString &block, LexerState currentstate)
			: m_block(block)
			, m_state(currentstate)
		{}

		QChar nextChar()
		{
			QChar result = QChar::Null;
			if (m_pos+1 < m_block.size()) {
				result = m_block.at(++m_pos);
			}
			else {
				++m_pos;
			}
			return result;
		}
		QChar peekChar()
		{
			QChar result = QChar::Null;
			if (m_pos+1 < m_block.size()) {
				result = m_block.at(m_pos+1);
			}
			return result;
		}
		/**
		 * @brief NextBasicToken
		 * @param in
		 * @param ofs
		 * @param start
		 * @param length
		 * @return false when input seems invalid, it will return what it did recognize but something wasn't right, parser should try to recover
		 */
		bool nextBasicToken(int &startpos, int &length, BasicTokenType &tokentype, QString &out)
		{
			// Basically chops based on white space
			// it does also recognize comments and quoted strings/identifiers
			bool result = false;
			while (true) {
				QChar c = nextChar();
				if (c.isSpace()) {
					// Just skip whitespace
				}
				else if (c == '-' && peekChar() == '-') { // two dashes, start of comment
					startpos = m_pos;
					// Loop till end of line or end of block
					c = nextChar();
					while (c != QChar::Null && c != '\n') {
						c = nextChar();
					}
					length = m_pos - startpos;
					tokentype = BasicTokenType::Comment;
					return true;
				}
				else if (c == '\'') {
					startpos = m_pos;
					// Single quoted string so it's an SQL text literal
					while (true) {
						c = nextChar();
						if (c == QChar::Null || c == '\n') {
							// unexpected end, pretend nothings wrong
							length = m_pos - startpos;
							tokentype = BasicTokenType::QuotedString;
							return true;
						}
						else if (c == '\'') {
							// maybe end of string literal
							if (peekChar() == '\'') {
								// Nope, just double quote to escape quote
								nextChar(); // eat it
							}
							else {
								length = m_pos - startpos;
								tokentype = BasicTokenType::QuotedString;
								return true;
							}
						}
					}
				}
				else if (c == '"') {
					startpos = m_pos;
					// Double quoted identifier
					while (true) {
						c = nextChar();
						if (c == QChar::Null || c == '\n') {
							// unexpected end, pretend nothings wrong
							length = m_pos - startpos;
							tokentype = BasicTokenType::QuotedIdentifier;
							return true;
						}
						else if (c == '"') {
							// maybe end of string literal
							if (peekChar() == '"') {
								// Nope, just double quote to escape quote
								nextChar(); // eat it
							}
							else {
								length = m_pos - startpos;
								tokentype = BasicTokenType::QuotedIdentifier;
								return true;
							}
						}
					}
				}
				else if (c == QChar::Null) {
					break;
				}
				else {
					startpos = m_pos;
					// Undetermined symbol
					while (!c.isSpace() && c != QChar::Null) {
						c = nextChar();
					}
					length = m_pos - startpos;
					tokentype = BasicTokenType::Symbol;
					QStringRef sr(&m_block, startpos, length);
					out = sr.toString();
					return true;
				}
			}
			return false;
		}

	};


	t_SymbolSet g_Keywords = {
		"a", "abort", "abs", "absent", "absolute", "access", "according", "action", "ada", "add",
		"admin", "after", "aggregate", "all", "allocate", "also", "alter", "analyse", "analyze", "and",
		"any", "are", "array", "array_agg", "array_max_cardinality", "as", "asc", "asensitive",
		"assetion", "assignment", "asymmetric", "at", "atomic", "attribute", "attributes", "authorization",	"avg",
		"backward", "base64", "before", "begin", "begin_frame", "begin_partition", "bernoulli", "between", "binary",
		"bit", "bit_length", "blob", "blocked", "bom", "boolean", "both", "breadth", "buffer", "by",
		"c", "cache", "call", "called", "cardinality", "cascade", "cascaded", "case", "cast",
		"catalog", "catalog_name", "ceil", "ceiling", "chain", "char", "character", "characteristics",
		"characters", "character_length", "character_set_catalog", "character_set_name", "character_set_schema",
		"char_length", "check", "checkpoint", "class", "class_origin", "clob", "close", "cluster",
		"coalesce", "cobol", "collate", "collation", "collation_catalog", "collation_name", "collation_schema",
		"collect", "column", "columns", "column_name", "command_function", "command_function_code",
		"comment", "comments", "commit", "committed", "concurrently", "condition", "condition_number",
		"configuration", "conflict", "connect", "connection", "connection_name", "constraint", "constraints",
		"constraint_catalog", "constraint_name", "constraint_schema", "constructor", "contains", "content",
		"continue", "control", "conversion", "convert", "copy", "corr", "corresponding", "cost", "count",
		"covar_pop", "covar_samp", "create", "cross", "csv", "cube", "cume_dist", "current", "current_catalog",
		"current_date", "current_default_transform_group", "current_path", "current_role", "current_row",
		"current_schema", "current_time", "current_timestamp", "current_transform_group_for_type",
		"current_user", "cursor", "cursor_name", "cycle",
		"data", "database", "datalink", "date", "datetime_interval_code", "datetime_interval_precision",
		"day", "db", "deallocate", "dec", "decimal", "declare", "default", "defaults", "deferrable", "deferred",
		"defined", "definer", "degree", "delete", "delimiter", "delimiters", "dense_rank", "depends", "depth",
		"deref", "derived", "desc", "describe", "descriptor", "deterministic", "diagnostics", "dictionary",
		"disable", "discard", "disconnect", "dispatch", "distinct", "dlnewcopy", "dlpreviouscopy", "dlurlcomplete",
		"dlurlcompleteonly", "dlurlcompletewrite", "dlurlpatch", "dlurlpathonly", "dlurlpathwrite", "dlurlscheme",
		"dlurlserver", "do", "domain", "drop",
		"elif", "end", "event", "exclude", "execute", "exists", "extenstion",
		"fetch", "first", "foreign", "from", "function", "full",
		"global", "grant", "group",
		"having",
		"if", "ilike", "immediate", "in", "index", "inherits", "initially", "inner", "insert", "into", "is",
		"join",
		"key",
		"language", "last", "left", "like", "limit", "listen", "local", "lock",
		"match",
		"natural", "not", "null", "nulls",
		"offset", "oids", "on", "or", "order", "outer", "over",
		"partial", "partition", "prepare", "preserve", "primary", "privileges", "public",
		"references", "refresh", "reindex", "release", "replace", "reset", "restrict", "revoke", "right", "role", "rollback", "row", "rows", "rule",
		"savepoint", "schema", "select", "sequence", "server", "set", "show", "simple", "statement",
		"table", "tablespace", "temp", "temporary", "trigger", "truncate",
		"unique", "unlisten", "unlogged", "update", "user", "using",
		"vacuum", "values", "view", "volatile",
		"when", "where", "with", "wrapper"
	};

//"bigint",

}


SqlSyntaxHighlighter::SqlSyntaxHighlighter(QTextDocument *parent)
	: QSyntaxHighlighter(parent)
{
	m_keywordFormat.setForeground(QColor(32, 32, 192));
	m_keywordFormat.setFontWeight(QFont::Bold);

	m_commentFormat.setForeground(QColor(64, 64, 64));
	m_quotedStringFormat.setForeground(QColor(192, 32, 192));

	m_typeFormat.setForeground(QColor(32, 192, 32));
	m_typeFormat.setFontWeight(QFont::Bold);
}

SqlSyntaxHighlighter::~SqlSyntaxHighlighter()
{
}

void SqlSyntaxHighlighter::setTypes(const PgTypeContainer *types)
{
	m_typeNames.clear();
	for (auto &e : *types) {
		m_typeNames.insert(e.typname);
	}
}

void SqlSyntaxHighlighter::highlightBlock(const QString &text)
{
	Lexer lexer(text, LexerState::Null);
	int startpos, length;
	BasicTokenType tokentype;
	QString s;
	while (lexer.nextBasicToken(startpos, length, tokentype, s)) {
		switch (tokentype) {
		case BasicTokenType::None:
		case BasicTokenType::End: // End of input
		case BasicTokenType::DollarQuotedString:
			break;
		case BasicTokenType::Symbol: // can be many things, keyword, object name, operator, ..
			if (g_Keywords.count(s.toLower()) > 0) {
				setFormat(startpos, length, m_keywordFormat);
			}
			else if (m_typeNames.count(s.toLower()) > 0) {
				setFormat(startpos, length, m_typeFormat);
			}
			break;
		case BasicTokenType::Comment:
			setFormat(startpos, length, m_commentFormat);
			break;
		case BasicTokenType::QuotedString:
			setFormat(startpos, length, m_quotedStringFormat);
			break;
		case BasicTokenType::QuotedIdentifier:
			break;
		}
	}
}