Initial commit

Based on dc4ba6a
author: Rutger Broekhoff 2024-05-02 20:27:40 +0200
committer: Rutger Broekhoff 2024-05-02 20:27:40 +0200
commit: 17a3ea880402338420699e03bcb24181e4ff3924 (patch)
tree: da666ef91e0b60d20aa0b01529644c136fd1f4ab /src/querykv1/grammar.ebnf
download: oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz
oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip
1 files changed, 47 insertions, 0 deletions
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf
new file mode 100644
index 0000000..94f8cde
--- /dev/null
+++ b/src/querykv1/grammar.ebnf
@@ -0,0 +1,47 @@
+/* This grammar does allow fields to contain stray LFs, not after any specific
+ * CR. I took the liberty to take some inspiration from the somewhat similar
+ * IETF RFC 4180.
+ */
+document   ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header
+header     ::= OPENBRACK NOTCR*
+comment    ::= SEMICOLON NOTCR*
+empty-line ::= WHITESPACE*
+record     ::= field (PIPE field)*
+field      ::= WHITESPACE* field-data WHITESPACE*
+field-data ::= DQUOTE escaped DQUOTE | unescaped
+/* Unescaped fields are also allowed to contain double quotes, they are just
+ * not interpreted in any special way.
+ */
+escaped    ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)*
+unescaped  ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)?
+HTAB       ::= #x09  /* <horizontal tab,  "\t"> */
+LF         ::= #x0A  /* <line feed,       "\n"> */
+VTAB       ::= #x0B  /* <vertical tab,    "\v"> */
+FF         ::= #x0C  /* <form feed,       "\f"> */
+CR         ::= #x0D  /* <carriage return, "\r"> */
+SPACE      ::= #x20  /* <space, " "> */
+DQUOTE     ::= #x22  /* " */
+SEMICOLON  ::= #x3B  /* ; */
+OPENBRACK  ::= #x5B  /* [ */
+PIPE       ::= #x7C  /* | */
+/* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE.
+ * Semicolon is included, as comments are only defined as 'lines starting with
+ * a semicolon'. So it should be fine if a semicolon is part of a field, the
+ * rest of the line would not be interpreted as a comment in that case.
+ */
+TEXTDATA   ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF]
+/* Including LF here as TMI8/KV1 does not consider it a newline,
+ * as newlines are defined as 'CR optionally followed by LF'
+ */
+WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB
+/* All codepoints excluding CR and LF */
+NOTCR      ::= [#x00-#x0C#x0E-#x10FFFF]
+NEWLINE    ::= CR LF?
author	Rutger Broekhoff	2024-05-02 20:27:40 +0200
committer	Rutger Broekhoff	2024-05-02 20:27:40 +0200
commit	17a3ea880402338420699e03bcb24181e4ff3924 (patch)
tree	da666ef91e0b60d20aa0b01529644c136fd1f4ab /src/querykv1/grammar.ebnf
download	oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip

diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf new file mode 100644 index 0000000..94f8cde --- /dev/null +++ b/src/querykv1/grammar.ebnf
@@ -0,0 +1,47 @@
	1	/* This grammar does allow fields to contain stray LFs, not after any specific
	2	* CR. I took the liberty to take some inspiration from the somewhat similar
	3	* IETF RFC 4180.
	4	*/
	5	document ::= (header NEWLINE)? (comment \| record \| empty-line) (NEWLINE (comment \| record \| empty-line))* NEWLINE? \| header
	6
	7	header ::= OPENBRACK NOTCR*
	8	comment ::= SEMICOLON NOTCR*
	9
	10	empty-line ::= WHITESPACE*
	11
	12	record ::= field (PIPE field)*
	13	field ::= WHITESPACE* field-data WHITESPACE*
	14	field-data ::= DQUOTE escaped DQUOTE \| unescaped
	15
	16	/* Unescaped fields are also allowed to contain double quotes, they are just
	17	* not interpreted in any special way.
	18	*/
	19	escaped ::= (TEXTDATA \| WHITESPACE \| NEWLINE \| PIPE \| DQUOTE DQUOTE)*
	20	unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA \| DQUOTE))*)?
	21
	22	HTAB ::= #x09 /* <horizontal tab, "\t"> */
	23	LF ::= #x0A /* <line feed, "\n"> */
	24	VTAB ::= #x0B /* <vertical tab, "\v"> */
	25	FF ::= #x0C /* <form feed, "\f"> */
	26	CR ::= #x0D /* <carriage return, "\r"> */
	27	SPACE ::= #x20 /* <space, " "> */
	28	DQUOTE ::= #x22 /* " */
	29	SEMICOLON ::= #x3B /* ; */
	30	OPENBRACK ::= #x5B /* [ */
	31	PIPE ::= #x7C /* \| */
	32
	33	/* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE.
	34	* Semicolon is included, as comments are only defined as 'lines starting with
	35	* a semicolon'. So it should be fine if a semicolon is part of a field, the
	36	* rest of the line would not be interpreted as a comment in that case.
	37	*/
	38	TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF]
	39
	40	/* Including LF here as TMI8/KV1 does not consider it a newline,
	41	* as newlines are defined as 'CR optionally followed by LF'
	42	*/
	43	WHITESPACE ::= SPACE \| LF \| FF \| HTAB \| VTAB
	44
	45	/* All codepoints excluding CR and LF */
	46	NOTCR ::= [#x00-#x0C#x0E-#x10FFFF]
	47	NEWLINE ::= CR LF?