aboutsummaryrefslogtreecommitdiffstats
path: root/src/querykv1/grammar.ebnf
diff options
context:
space:
mode:
Diffstat (limited to 'src/querykv1/grammar.ebnf')
-rw-r--r--src/querykv1/grammar.ebnf47
1 files changed, 47 insertions, 0 deletions
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf
new file mode 100644
index 0000000..94f8cde
--- /dev/null
+++ b/src/querykv1/grammar.ebnf
@@ -0,0 +1,47 @@
1/* This grammar does allow fields to contain stray LFs, not after any specific
2 * CR. I took the liberty to take some inspiration from the somewhat similar
3 * IETF RFC 4180.
4 */
5document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header
6
7header ::= OPENBRACK NOTCR*
8comment ::= SEMICOLON NOTCR*
9
10empty-line ::= WHITESPACE*
11
12record ::= field (PIPE field)*
13field ::= WHITESPACE* field-data WHITESPACE*
14field-data ::= DQUOTE escaped DQUOTE | unescaped
15
16/* Unescaped fields are also allowed to contain double quotes, they are just
17 * not interpreted in any special way.
18 */
19escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)*
20unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)?
21
22HTAB ::= #x09 /* <horizontal tab, "\t"> */
23LF ::= #x0A /* <line feed, "\n"> */
24VTAB ::= #x0B /* <vertical tab, "\v"> */
25FF ::= #x0C /* <form feed, "\f"> */
26CR ::= #x0D /* <carriage return, "\r"> */
27SPACE ::= #x20 /* <space, " "> */
28DQUOTE ::= #x22 /* " */
29SEMICOLON ::= #x3B /* ; */
30OPENBRACK ::= #x5B /* [ */
31PIPE ::= #x7C /* | */
32
33/* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE.
34 * Semicolon is included, as comments are only defined as 'lines starting with
35 * a semicolon'. So it should be fine if a semicolon is part of a field, the
36 * rest of the line would not be interpreted as a comment in that case.
37 */
38TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF]
39
40/* Including LF here as TMI8/KV1 does not consider it a newline,
41 * as newlines are defined as 'CR optionally followed by LF'
42 */
43WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB
44
45/* All codepoints excluding CR and LF */
46NOTCR ::= [#x00-#x0C#x0E-#x10FFFF]
47NEWLINE ::= CR LF?