diff options
Diffstat (limited to 'src/querykv1/grammar.ebnf')
-rw-r--r-- | src/querykv1/grammar.ebnf | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf new file mode 100644 index 0000000..94f8cde --- /dev/null +++ b/src/querykv1/grammar.ebnf | |||
@@ -0,0 +1,47 @@ | |||
1 | /* This grammar does allow fields to contain stray LFs, not after any specific | ||
2 | * CR. I took the liberty to take some inspiration from the somewhat similar | ||
3 | * IETF RFC 4180. | ||
4 | */ | ||
5 | document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header | ||
6 | |||
7 | header ::= OPENBRACK NOTCR* | ||
8 | comment ::= SEMICOLON NOTCR* | ||
9 | |||
10 | empty-line ::= WHITESPACE* | ||
11 | |||
12 | record ::= field (PIPE field)* | ||
13 | field ::= WHITESPACE* field-data WHITESPACE* | ||
14 | field-data ::= DQUOTE escaped DQUOTE | unescaped | ||
15 | |||
16 | /* Unescaped fields are also allowed to contain double quotes, they are just | ||
17 | * not interpreted in any special way. | ||
18 | */ | ||
19 | escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* | ||
20 | unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? | ||
21 | |||
22 | HTAB ::= #x09 /* <horizontal tab, "\t"> */ | ||
23 | LF ::= #x0A /* <line feed, "\n"> */ | ||
24 | VTAB ::= #x0B /* <vertical tab, "\v"> */ | ||
25 | FF ::= #x0C /* <form feed, "\f"> */ | ||
26 | CR ::= #x0D /* <carriage return, "\r"> */ | ||
27 | SPACE ::= #x20 /* <space, " "> */ | ||
28 | DQUOTE ::= #x22 /* " */ | ||
29 | SEMICOLON ::= #x3B /* ; */ | ||
30 | OPENBRACK ::= #x5B /* [ */ | ||
31 | PIPE ::= #x7C /* | */ | ||
32 | |||
33 | /* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE. | ||
34 | * Semicolon is included, as comments are only defined as 'lines starting with | ||
35 | * a semicolon'. So it should be fine if a semicolon is part of a field, the | ||
36 | * rest of the line would not be interpreted as a comment in that case. | ||
37 | */ | ||
38 | TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] | ||
39 | |||
40 | /* Including LF here as TMI8/KV1 does not consider it a newline, | ||
41 | * as newlines are defined as 'CR optionally followed by LF' | ||
42 | */ | ||
43 | WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB | ||
44 | |||
45 | /* All codepoints excluding CR and LF */ | ||
46 | NOTCR ::= [#x00-#x0C#x0E-#x10FFFF] | ||
47 | NEWLINE ::= CR LF? | ||