aboutsummaryrefslogtreecommitdiffstats
path: root/src/querykv1/grammar.abnf
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
committerLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
commit17a3ea880402338420699e03bcb24181e4ff3924 (patch)
treeda666ef91e0b60d20aa0b01529644c136fd1f4ab /src/querykv1/grammar.abnf
downloadoeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz
oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip
Initial commit
Based on dc4ba6a
Diffstat (limited to 'src/querykv1/grammar.abnf')
-rw-r--r--src/querykv1/grammar.abnf44
1 files changed, 44 insertions, 0 deletions
diff --git a/src/querykv1/grammar.abnf b/src/querykv1/grammar.abnf
new file mode 100644
index 0000000..1c93760
--- /dev/null
+++ b/src/querykv1/grammar.abnf
@@ -0,0 +1,44 @@
1; This grammar does *not* allow fields to contain LF, unless the entire content
2; of the field is quoted. The file is simply rejected otherwise.
3; I took the liberty to take some inspiration from the somewhat similar IETF RFC 4180.
4
5document = [header NEWLINE] (comment / record / empty-line) *(NEWLINE (comment / record / empty-line)) [NEWLINE] / header
6
7header = OPENBRACK *NOTCRLF
8comment = SEMICOLON *NOTCRLF
9
10empty-line = *WHITESPACE
11
12record = field *(PIPE field)
13field = *WHITESPACE field-data *WHITESPACE
14field-data = escaped / unescaped
15
16; Unescaped fields are also allowed to contain double quotes,
17; they are just not interpreted in any special way.
18escaped = DQUOTE *(TEXTDATA / WHITESPACE / NEWLINE / PIPE / 2DQUOTE) DQUOTE
19unescaped = [TEXTDATA *(*WHITESPACE (TEXTDATA / DQUOTE))]
20
21HTAB = %x09 ; <horizontal tab, "\t">
22LF = %x0A ; <line feed, "\n">
23VTAB = %x0B ; <vertical tab, "\v">
24FF = %x0C ; <form feed, "\f">
25CR = %x0D ; <carriage return, "\r">
26SPACE = %x20 ; <space, " ">
27DQUOTE = %x22 ; "
28SEMICOLON = %x3B ; ;
29OPENBRACK = %x5B ; [
30PIPE = %x7C ; |
31
32; All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE
33; Semicolon is included, as comments are only defined as 'lines starting with a semicolon'.
34; So it should be fine if a semicolon is part of a field, the rest of the line would not
35; be interpreted as a comment in that case.
36TEXTDATA = %x00-08 / %x0E-1F / %x21 / %x23-5A / %x5C-7B / %x7D-10FFFF
37
38; Not including LF here even though TMI8/KV1 does not officially consider it
39; a newline, as newlines are defined as 'CR optionally followed by LF'
40WHITESPACE = SPACE / FF / HTAB / VTAB
41
42; All codepoints excluding CR and LF
43NOTCRLF = %x00-09 / %x0B-0C / %x0E-10FFFF
44NEWLINE = CR [LF]