diff options
| author | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
|---|---|---|
| committer | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
| commit | 17a3ea880402338420699e03bcb24181e4ff3924 (patch) | |
| tree | da666ef91e0b60d20aa0b01529644c136fd1f4ab /src/querykv1/grammar.abnf | |
| download | oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip | |
Initial commit
Based on dc4ba6a
Diffstat (limited to 'src/querykv1/grammar.abnf')
| -rw-r--r-- | src/querykv1/grammar.abnf | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/src/querykv1/grammar.abnf b/src/querykv1/grammar.abnf new file mode 100644 index 0000000..1c93760 --- /dev/null +++ b/src/querykv1/grammar.abnf | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | ; This grammar does *not* allow fields to contain LF, unless the entire content | ||
| 2 | ; of the field is quoted. The file is simply rejected otherwise. | ||
| 3 | ; I took the liberty to take some inspiration from the somewhat similar IETF RFC 4180. | ||
| 4 | |||
| 5 | document = [header NEWLINE] (comment / record / empty-line) *(NEWLINE (comment / record / empty-line)) [NEWLINE] / header | ||
| 6 | |||
| 7 | header = OPENBRACK *NOTCRLF | ||
| 8 | comment = SEMICOLON *NOTCRLF | ||
| 9 | |||
| 10 | empty-line = *WHITESPACE | ||
| 11 | |||
| 12 | record = field *(PIPE field) | ||
| 13 | field = *WHITESPACE field-data *WHITESPACE | ||
| 14 | field-data = escaped / unescaped | ||
| 15 | |||
| 16 | ; Unescaped fields are also allowed to contain double quotes, | ||
| 17 | ; they are just not interpreted in any special way. | ||
| 18 | escaped = DQUOTE *(TEXTDATA / WHITESPACE / NEWLINE / PIPE / 2DQUOTE) DQUOTE | ||
| 19 | unescaped = [TEXTDATA *(*WHITESPACE (TEXTDATA / DQUOTE))] | ||
| 20 | |||
| 21 | HTAB = %x09 ; <horizontal tab, "\t"> | ||
| 22 | LF = %x0A ; <line feed, "\n"> | ||
| 23 | VTAB = %x0B ; <vertical tab, "\v"> | ||
| 24 | FF = %x0C ; <form feed, "\f"> | ||
| 25 | CR = %x0D ; <carriage return, "\r"> | ||
| 26 | SPACE = %x20 ; <space, " "> | ||
| 27 | DQUOTE = %x22 ; " | ||
| 28 | SEMICOLON = %x3B ; ; | ||
| 29 | OPENBRACK = %x5B ; [ | ||
| 30 | PIPE = %x7C ; | | ||
| 31 | |||
| 32 | ; All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE | ||
| 33 | ; Semicolon is included, as comments are only defined as 'lines starting with a semicolon'. | ||
| 34 | ; So it should be fine if a semicolon is part of a field, the rest of the line would not | ||
| 35 | ; be interpreted as a comment in that case. | ||
| 36 | TEXTDATA = %x00-08 / %x0E-1F / %x21 / %x23-5A / %x5C-7B / %x7D-10FFFF | ||
| 37 | |||
| 38 | ; Not including LF here even though TMI8/KV1 does not officially consider it | ||
| 39 | ; a newline, as newlines are defined as 'CR optionally followed by LF' | ||
| 40 | WHITESPACE = SPACE / FF / HTAB / VTAB | ||
| 41 | |||
| 42 | ; All codepoints excluding CR and LF | ||
| 43 | NOTCRLF = %x00-09 / %x0B-0C / %x0E-10FFFF | ||
| 44 | NEWLINE = CR [LF] | ||