aboutsummaryrefslogtreecommitdiffstats
path: root/lib/libtmi8/src/kv1_lexer.cpp
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
committerLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
commit17a3ea880402338420699e03bcb24181e4ff3924 (patch)
treeda666ef91e0b60d20aa0b01529644c136fd1f4ab /lib/libtmi8/src/kv1_lexer.cpp
downloadoeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz
oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip
Initial commit
Based on dc4ba6a
Diffstat (limited to 'lib/libtmi8/src/kv1_lexer.cpp')
-rw-r--r--lib/libtmi8/src/kv1_lexer.cpp152
1 files changed, 152 insertions, 0 deletions
diff --git a/lib/libtmi8/src/kv1_lexer.cpp b/lib/libtmi8/src/kv1_lexer.cpp
new file mode 100644
index 0000000..028127b
--- /dev/null
+++ b/lib/libtmi8/src/kv1_lexer.cpp
@@ -0,0 +1,152 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <tmi8/kv1_lexer.hpp>
4
5Kv1Lexer::Kv1Lexer(std::string_view input)
6 : input(input), slice(input)
7{}
8
9// Does not eat newline character.
10void Kv1Lexer::eatRestOfLine() {
11 size_t end = slice.size();
12 for (size_t i = 0; i < slice.size(); i++) {
13 if (slice[i] == '\r' || slice[i] == '\n') {
14 end = i;
15 break;
16 }
17 }
18 slice = slice.substr(end);
19}
20
21void Kv1Lexer::lexOptionalHeader() {
22 if (slice.starts_with('[')) eatRestOfLine();
23}
24
25void Kv1Lexer::lexOptionalComment() {
26 if (slice.starts_with(';')) eatRestOfLine();
27}
28
29inline bool Kv1Lexer::isWhitespace(int c) {
30 return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
31}
32
33void Kv1Lexer::readQuotedColumn() {
34 Kv1Token token{ .type = KV1_TOKEN_CELL };
35
36 if (slice.size() == 0 || slice[0] != '"') {
37 errors.push_back("(internal error) readQuotedColumn: slice[0] != '\"'");
38 return;
39 }
40 slice = slice.substr(1);
41 while (true) {
42 size_t quote = slice.find('"');
43 if (quote == std::string_view::npos) {
44 errors.push_back("readQuotedColumn: no matching closing quote found");
45 return;
46 }
47 if (quote+1 == slice.size() || slice[quote + 1] != '"') {
48 token.data.append(slice.substr(0, quote));
49 break;
50 }
51 token.data.append(slice.substr(0, quote + 1));
52 slice = slice.substr(quote + 2);
53 }
54
55 size_t end = slice.size();
56 for (size_t i = 0; i < slice.size(); i++) {
57 if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') {
58 end = i;
59 break;
60 }
61 if (!isWhitespace(slice[i])) {
62 errors.push_back("readQuotedColumn: encountered non-whitespace character after closing quote");
63 return;
64 }
65 }
66 if (end != std::string_view::npos) slice = slice.substr(end);
67 else slice = slice.substr(slice.size());
68
69 tokens.push_back(std::move(token));
70}
71
72void Kv1Lexer::readUnquotedColumn() {
73 size_t end = slice.size();
74 size_t content_end = 0;
75 for (size_t i = 0; i < slice.size(); i++) {
76 if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') {
77 end = i;
78 break;
79 } else if (!isWhitespace(slice[i])) {
80 content_end = i + 1;
81 }
82 }
83 tokens.emplace_back(KV1_TOKEN_CELL, std::string(slice.substr(0, content_end)));
84 if (end != std::string_view::npos) slice = slice.substr(end);
85 else slice = slice.substr(slice.size());
86}
87
88void Kv1Lexer::lexRow() {
89 size_t cols = 0;
90 while (slice.size() > 0 && slice[0] != '\r' && slice[0] != '\n') {
91 if (slice[0] == '"') readQuotedColumn();
92 else readUnquotedColumn();
93 if (!errors.empty()) return;
94 cols++;
95 if (slice.size() != 0) {
96 if (slice[0] == '|') {
97 slice = slice.substr(1);
98 // A newline/eof right after pipe? That means an empty field at the end
99 // of the record, we also want to emit that as a token.
100 if (slice.size() == 0 || slice[0] == '\r' || slice[0] == '\n') {
101 tokens.push_back({ .type = KV1_TOKEN_CELL });
102 }
103 } else if (slice[0] == '\r') {
104 if (slice.size() > 1 && slice[1] == '\n') slice = slice.substr(2);
105 else slice = slice.substr(1);
106 break;
107 } else if (slice[0] == '\n') {
108 slice = slice.substr(1);
109 break;
110 } else {
111 errors.push_back("lexRow: expected CR, LF or |");
112 return;
113 }
114 }
115 }
116 tokens.push_back({ .type = KV1_TOKEN_ROW_END });
117}
118
119// Returns true when a line ending was consumed.
120bool Kv1Lexer::eatWhitespace() {
121 for (size_t i = 0; i < slice.size(); i++) {
122 if (slice[i] == '\r') {
123 slice = slice.substr(i + 1);
124 if (slice.size() > 1 && slice[i + 1] == '\n')
125 slice = slice.substr(i + 2);
126 return true;
127 }
128 if (slice[i] == '\n') {
129 slice = slice.substr(i + 1);
130 return true;
131 }
132
133 if (slice[i] != ' ' && slice[i] != '\f' && slice[i] != '\t' && slice[i] != '\v') {
134 slice = slice.substr(i);
135 return false;
136 }
137 }
138 return false;
139}
140
141void Kv1Lexer::lex() {
142 lexOptionalHeader();
143 eatWhitespace();
144
145 while (errors.empty() && !slice.empty()) {
146 lexOptionalComment();
147 bool newline = eatWhitespace();
148 if (newline) continue;
149 // We are now either (1) at the end of the file or (2) at the start of some column data
150 if (errors.empty()) lexRow();
151 }
152}