diff options
| author | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
|---|---|---|
| committer | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
| commit | 17a3ea880402338420699e03bcb24181e4ff3924 (patch) | |
| tree | da666ef91e0b60d20aa0b01529644c136fd1f4ab /src/querykv1/grammar.ebnf | |
| download | oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip | |
Initial commit
Based on dc4ba6a
Diffstat (limited to 'src/querykv1/grammar.ebnf')
| -rw-r--r-- | src/querykv1/grammar.ebnf | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf new file mode 100644 index 0000000..94f8cde --- /dev/null +++ b/src/querykv1/grammar.ebnf | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | /* This grammar does allow fields to contain stray LFs, not after any specific | ||
| 2 | * CR. I took the liberty to take some inspiration from the somewhat similar | ||
| 3 | * IETF RFC 4180. | ||
| 4 | */ | ||
| 5 | document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header | ||
| 6 | |||
| 7 | header ::= OPENBRACK NOTCR* | ||
| 8 | comment ::= SEMICOLON NOTCR* | ||
| 9 | |||
| 10 | empty-line ::= WHITESPACE* | ||
| 11 | |||
| 12 | record ::= field (PIPE field)* | ||
| 13 | field ::= WHITESPACE* field-data WHITESPACE* | ||
| 14 | field-data ::= DQUOTE escaped DQUOTE | unescaped | ||
| 15 | |||
| 16 | /* Unescaped fields are also allowed to contain double quotes, they are just | ||
| 17 | * not interpreted in any special way. | ||
| 18 | */ | ||
| 19 | escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* | ||
| 20 | unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? | ||
| 21 | |||
| 22 | HTAB ::= #x09 /* <horizontal tab, "\t"> */ | ||
| 23 | LF ::= #x0A /* <line feed, "\n"> */ | ||
| 24 | VTAB ::= #x0B /* <vertical tab, "\v"> */ | ||
| 25 | FF ::= #x0C /* <form feed, "\f"> */ | ||
| 26 | CR ::= #x0D /* <carriage return, "\r"> */ | ||
| 27 | SPACE ::= #x20 /* <space, " "> */ | ||
| 28 | DQUOTE ::= #x22 /* " */ | ||
| 29 | SEMICOLON ::= #x3B /* ; */ | ||
| 30 | OPENBRACK ::= #x5B /* [ */ | ||
| 31 | PIPE ::= #x7C /* | */ | ||
| 32 | |||
| 33 | /* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE. | ||
| 34 | * Semicolon is included, as comments are only defined as 'lines starting with | ||
| 35 | * a semicolon'. So it should be fine if a semicolon is part of a field, the | ||
| 36 | * rest of the line would not be interpreted as a comment in that case. | ||
| 37 | */ | ||
| 38 | TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] | ||
| 39 | |||
| 40 | /* Including LF here as TMI8/KV1 does not consider it a newline, | ||
| 41 | * as newlines are defined as 'CR optionally followed by LF' | ||
| 42 | */ | ||
| 43 | WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB | ||
| 44 | |||
| 45 | /* All codepoints excluding CR and LF */ | ||
| 46 | NOTCR ::= [#x00-#x0C#x0E-#x10FFFF] | ||
| 47 | NEWLINE ::= CR LF? | ||