aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
committerLibravatar Rutger Broekhoff2024-05-02 20:27:40 +0200
commit17a3ea880402338420699e03bcb24181e4ff3924 (patch)
treeda666ef91e0b60d20aa0b01529644c136fd1f4ab
downloadoeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz
oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip
Initial commit
Based on dc4ba6a
-rw-r--r--.envrc1
-rw-r--r--.gitattributes1
-rw-r--r--.gitignore7
-rw-r--r--LICENSE287
-rw-r--r--README.txt13
-rwxr-xr-xall-deterministic.sh3
-rwxr-xr-xbuild-all.sh3
-rw-r--r--flake.lock79
-rw-r--r--flake.nix137
-rw-r--r--lib/libtmi8/.envrc3
-rw-r--r--lib/libtmi8/.gitignore3
-rw-r--r--lib/libtmi8/Makefile41
-rw-r--r--lib/libtmi8/flake.lock58
-rw-r--r--lib/libtmi8/flake.nix42
-rw-r--r--lib/libtmi8/include/tmi8/kv1_index.hpp135
-rw-r--r--lib/libtmi8/include/tmi8/kv1_lexer.hpp46
-rw-r--r--lib/libtmi8/include/tmi8/kv1_parser.hpp87
-rw-r--r--lib/libtmi8/include/tmi8/kv1_types.hpp1528
-rw-r--r--lib/libtmi8/include/tmi8/kv6_parquet.hpp46
-rw-r--r--lib/libtmi8/src/kv1_index.cpp461
-rw-r--r--lib/libtmi8/src/kv1_lexer.cpp152
-rw-r--r--lib/libtmi8/src/kv1_parser.cpp1258
-rw-r--r--lib/libtmi8/src/kv1_types.cpp773
-rw-r--r--lib/libtmi8/src/kv6_parquet.cpp102
-rw-r--r--module/default.nix118
-rw-r--r--script/archiver/default.nix15
-rwxr-xr-xscript/archiver/oeuf-archiver.sh31
-rw-r--r--script/synckv6/default.nix15
-rwxr-xr-xscript/synckv6/oeuf-synckv6.sh43
-rw-r--r--src/augmentkv6/.envrc2
-rw-r--r--src/augmentkv6/Makefile21
-rw-r--r--src/augmentkv6/main.cpp510
-rw-r--r--src/bundleparquet/.envrc2
-rw-r--r--src/bundleparquet/Makefile21
-rw-r--r--src/bundleparquet/main.cpp213
-rw-r--r--src/bundleparquet/spliturl.cpp203
-rw-r--r--src/bundleparquet/spliturl.hpp11
-rw-r--r--src/filterkv6/.envrc2
-rw-r--r--src/filterkv6/Makefile21
-rw-r--r--src/filterkv6/main.cpp106
-rw-r--r--src/querykv1/.envrc2
-rw-r--r--src/querykv1/.gitignore1
-rw-r--r--src/querykv1/Makefile28
-rw-r--r--src/querykv1/cliopts.cpp456
-rw-r--r--src/querykv1/cliopts.hpp35
-rw-r--r--src/querykv1/daterange.cpp91
-rw-r--r--src/querykv1/daterange.hpp118
-rw-r--r--src/querykv1/grammar.abnf44
-rw-r--r--src/querykv1/grammar.ebnf47
-rw-r--r--src/querykv1/grammar.ebnf.bak23
-rw-r--r--src/querykv1/joparoute.cpp102
-rw-r--r--src/querykv1/joparoute.hpp13
-rw-r--r--src/querykv1/journeyinfo.cpp64
-rw-r--r--src/querykv1/journeyinfo.hpp13
-rw-r--r--src/querykv1/journeyroute.cpp96
-rw-r--r--src/querykv1/journeyroute.hpp13
-rw-r--r--src/querykv1/journeys.cpp95
-rw-r--r--src/querykv1/journeys.hpp13
-rw-r--r--src/querykv1/main.cpp198
-rw-r--r--src/querykv1/schedule.cpp63
-rw-r--r--src/querykv1/schedule.hpp13
-rw-r--r--src/recvkv6/.envrc2
-rw-r--r--src/recvkv6/Makefile21
-rw-r--r--src/recvkv6/main.cpp1300
64 files changed, 9451 insertions, 0 deletions
diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..3550a30
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
use flake
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..d92417a
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1 @@
*.tif filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9d7718b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,7 @@
1.direnv/
2src/augmentkv6/augmentkv6
3src/bundleparquet/bundleparquet
4src/filterkv6/filterkv6
5src/querykv1/querykv1
6src/recvkv6/recvkv6
7result*
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..4153cd3
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,287 @@
1 EUROPEAN UNION PUBLIC LICENCE v. 1.2
2 EUPL © the European Union 2007, 2016
3
4This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined
5below) which is provided under the terms of this Licence. Any use of the Work,
6other than as authorised under this Licence is prohibited (to the extent such
7use is covered by a right of the copyright holder of the Work).
8
9The Work is provided under the terms of this Licence when the Licensor (as
10defined below) has placed the following notice immediately following the
11copyright notice for the Work:
12
13 Licensed under the EUPL
14
15or has expressed by any other means his willingness to license under the EUPL.
16
171. Definitions
18
19In this Licence, the following terms have the following meaning:
20
21- ‘The Licence’: this Licence.
22
23- ‘The Original Work’: the work or software distributed or communicated by the
24 Licensor under this Licence, available as Source Code and also as Executable
25 Code as the case may be.
26
27- ‘Derivative Works’: the works or software that could be created by the
28 Licensee, based upon the Original Work or modifications thereof. This Licence
29 does not define the extent of modification or dependence on the Original Work
30 required in order to classify a work as a Derivative Work; this extent is
31 determined by copyright law applicable in the country mentioned in Article 15.
32
33- ‘The Work’: the Original Work or its Derivative Works.
34
35- ‘The Source Code’: the human-readable form of the Work which is the most
36 convenient for people to study and modify.
37
38- ‘The Executable Code’: any code which has generally been compiled and which is
39 meant to be interpreted by a computer as a program.
40
41- ‘The Licensor’: the natural or legal person that distributes or communicates
42 the Work under the Licence.
43
44- ‘Contributor(s)’: any natural or legal person who modifies the Work under the
45 Licence, or otherwise contributes to the creation of a Derivative Work.
46
47- ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of
48 the Work under the terms of the Licence.
49
50- ‘Distribution’ or ‘Communication’: any act of selling, giving, lending,
51 renting, distributing, communicating, transmitting, or otherwise making
52 available, online or offline, copies of the Work or providing access to its
53 essential functionalities at the disposal of any other natural or legal
54 person.
55
562. Scope of the rights granted by the Licence
57
58The Licensor hereby grants You a worldwide, royalty-free, non-exclusive,
59sublicensable licence to do the following, for the duration of copyright vested
60in the Original Work:
61
62- use the Work in any circumstance and for all usage,
63- reproduce the Work,
64- modify the Work, and make Derivative Works based upon the Work,
65- communicate to the public, including the right to make available or display
66 the Work or copies thereof to the public and perform publicly, as the case may
67 be, the Work,
68- distribute the Work or copies thereof,
69- lend and rent the Work or copies thereof,
70- sublicense rights in the Work or copies thereof.
71
72Those rights can be exercised on any media, supports and formats, whether now
73known or later invented, as far as the applicable law permits so.
74
75In the countries where moral rights apply, the Licensor waives his right to
76exercise his moral right to the extent allowed by law in order to make effective
77the licence of the economic rights here above listed.
78
79The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to
80any patents held by the Licensor, to the extent necessary to make use of the
81rights granted on the Work under this Licence.
82
833. Communication of the Source Code
84
85The Licensor may provide the Work either in its Source Code form, or as
86Executable Code. If the Work is provided as Executable Code, the Licensor
87provides in addition a machine-readable copy of the Source Code of the Work
88along with each copy of the Work that the Licensor distributes or indicates, in
89a notice following the copyright notice attached to the Work, a repository where
90the Source Code is easily and freely accessible for as long as the Licensor
91continues to distribute or communicate the Work.
92
934. Limitations on copyright
94
95Nothing in this Licence is intended to deprive the Licensee of the benefits from
96any exception or limitation to the exclusive rights of the rights owners in the
97Work, of the exhaustion of those rights or of other applicable limitations
98thereto.
99
1005. Obligations of the Licensee
101
102The grant of the rights mentioned above is subject to some restrictions and
103obligations imposed on the Licensee. Those obligations are the following:
104
105Attribution right: The Licensee shall keep intact all copyright, patent or
106trademarks notices and all notices that refer to the Licence and to the
107disclaimer of warranties. The Licensee must include a copy of such notices and a
108copy of the Licence with every copy of the Work he/she distributes or
109communicates. The Licensee must cause any Derivative Work to carry prominent
110notices stating that the Work has been modified and the date of modification.
111
112Copyleft clause: If the Licensee distributes or communicates copies of the
113Original Works or Derivative Works, this Distribution or Communication will be
114done under the terms of this Licence or of a later version of this Licence
115unless the Original Work is expressly distributed only under this version of the
116Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee
117(becoming Licensor) cannot offer or impose any additional terms or conditions on
118the Work or Derivative Work that alter or restrict the terms of the Licence.
119
120Compatibility clause: If the Licensee Distributes or Communicates Derivative
121Works or copies thereof based upon both the Work and another work licensed under
122a Compatible Licence, this Distribution or Communication can be done under the
123terms of this Compatible Licence. For the sake of this clause, ‘Compatible
124Licence’ refers to the licences listed in the appendix attached to this Licence.
125Should the Licensee's obligations under the Compatible Licence conflict with
126his/her obligations under this Licence, the obligations of the Compatible
127Licence shall prevail.
128
129Provision of Source Code: When distributing or communicating copies of the Work,
130the Licensee will provide a machine-readable copy of the Source Code or indicate
131a repository where this Source will be easily and freely available for as long
132as the Licensee continues to distribute or communicate the Work.
133
134Legal Protection: This Licence does not grant permission to use the trade names,
135trademarks, service marks, or names of the Licensor, except as required for
136reasonable and customary use in describing the origin of the Work and
137reproducing the content of the copyright notice.
138
1396. Chain of Authorship
140
141The original Licensor warrants that the copyright in the Original Work granted
142hereunder is owned by him/her or licensed to him/her and that he/she has the
143power and authority to grant the Licence.
144
145Each Contributor warrants that the copyright in the modifications he/she brings
146to the Work are owned by him/her or licensed to him/her and that he/she has the
147power and authority to grant the Licence.
148
149Each time You accept the Licence, the original Licensor and subsequent
150Contributors grant You a licence to their contributions to the Work, under the
151terms of this Licence.
152
1537. Disclaimer of Warranty
154
155The Work is a work in progress, which is continuously improved by numerous
156Contributors. It is not a finished work and may therefore contain defects or
157‘bugs’ inherent to this type of development.
158
159For the above reason, the Work is provided under the Licence on an ‘as is’ basis
160and without warranties of any kind concerning the Work, including without
161limitation merchantability, fitness for a particular purpose, absence of defects
162or errors, accuracy, non-infringement of intellectual property rights other than
163copyright as stated in Article 6 of this Licence.
164
165This disclaimer of warranty is an essential part of the Licence and a condition
166for the grant of any rights to the Work.
167
1688. Disclaimer of Liability
169
170Except in the cases of wilful misconduct or damages directly caused to natural
171persons, the Licensor will in no event be liable for any direct or indirect,
172material or moral, damages of any kind, arising out of the Licence or of the use
173of the Work, including without limitation, damages for loss of goodwill, work
174stoppage, computer failure or malfunction, loss of data or any commercial
175damage, even if the Licensor has been advised of the possibility of such damage.
176However, the Licensor will be liable under statutory product liability laws as
177far such laws apply to the Work.
178
1799. Additional agreements
180
181While distributing the Work, You may choose to conclude an additional agreement,
182defining obligations or services consistent with this Licence. However, if
183accepting obligations, You may act only on your own behalf and on your sole
184responsibility, not on behalf of the original Licensor or any other Contributor,
185and only if You agree to indemnify, defend, and hold each Contributor harmless
186for any liability incurred by, or claims asserted against such Contributor by
187the fact You have accepted any warranty or additional liability.
188
18910. Acceptance of the Licence
190
191The provisions of this Licence can be accepted by clicking on an icon ‘I agree’
192placed under the bottom of a window displaying the text of this Licence or by
193affirming consent in any other similar way, in accordance with the rules of
194applicable law. Clicking on that icon indicates your clear and irrevocable
195acceptance of this Licence and all of its terms and conditions.
196
197Similarly, you irrevocably accept this Licence and all of its terms and
198conditions by exercising any rights granted to You by Article 2 of this Licence,
199such as the use of the Work, the creation by You of a Derivative Work or the
200Distribution or Communication by You of the Work or copies thereof.
201
20211. Information to the public
203
204In case of any Distribution or Communication of the Work by means of electronic
205communication by You (for example, by offering to download the Work from a
206remote location) the distribution channel or media (for example, a website) must
207at least provide to the public the information requested by the applicable law
208regarding the Licensor, the Licence and the way it may be accessible, concluded,
209stored and reproduced by the Licensee.
210
21112. Termination of the Licence
212
213The Licence and the rights granted hereunder will terminate automatically upon
214any breach by the Licensee of the terms of the Licence.
215
216Such a termination will not terminate the licences of any person who has
217received the Work from the Licensee under the Licence, provided such persons
218remain in full compliance with the Licence.
219
22013. Miscellaneous
221
222Without prejudice of Article 9 above, the Licence represents the complete
223agreement between the Parties as to the Work.
224
225If any provision of the Licence is invalid or unenforceable under applicable
226law, this will not affect the validity or enforceability of the Licence as a
227whole. Such provision will be construed or reformed so as necessary to make it
228valid and enforceable.
229
230The European Commission may publish other linguistic versions or new versions of
231this Licence or updated versions of the Appendix, so far this is required and
232reasonable, without reducing the scope of the rights granted by the Licence. New
233versions of the Licence will be published with a unique version number.
234
235All linguistic versions of this Licence, approved by the European Commission,
236have identical value. Parties can take advantage of the linguistic version of
237their choice.
238
23914. Jurisdiction
240
241Without prejudice to specific agreement between parties,
242
243- any litigation resulting from the interpretation of this License, arising
244 between the European Union institutions, bodies, offices or agencies, as a
245 Licensor, and any Licensee, will be subject to the jurisdiction of the Court
246 of Justice of the European Union, as laid down in article 272 of the Treaty on
247 the Functioning of the European Union,
248
249- any litigation arising between other parties and resulting from the
250 interpretation of this License, will be subject to the exclusive jurisdiction
251 of the competent court where the Licensor resides or conducts its primary
252 business.
253
25415. Applicable Law
255
256Without prejudice to specific agreement between parties,
257
258- this Licence shall be governed by the law of the European Union Member State
259 where the Licensor has his seat, resides or has his registered office,
260
261- this licence shall be governed by Belgian law if the Licensor has no seat,
262 residence or registered office inside a European Union Member State.
263
264Appendix
265
266‘Compatible Licences’ according to Article 5 EUPL are:
267
268- GNU General Public License (GPL) v. 2, v. 3
269- GNU Affero General Public License (AGPL) v. 3
270- Open Software License (OSL) v. 2.1, v. 3.0
271- Eclipse Public License (EPL) v. 1.0
272- CeCILL v. 2.0, v. 2.1
273- Mozilla Public Licence (MPL) v. 2
274- GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3
275- Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for
276 works other than software
277- European Union Public Licence (EUPL) v. 1.1, v. 1.2
278- Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong
279 Reciprocity (LiLiQ-R+).
280
281The European Commission may update this Appendix to later versions of the above
282licences without producing a new version of the EUPL, as long as they provide
283the rights granted in Article 2 of this Licence and protect the covered Source
284Code from exclusive appropriation.
285
286All other changes or additions to this Appendix require the production of a new
287EUPL version.
diff --git a/README.txt b/README.txt
new file mode 100644
index 0000000..469f5b4
--- /dev/null
+++ b/README.txt
@@ -0,0 +1,13 @@
1## Copyright Notice and License
2
3Copyright 2024 Rutger Broekhoff.
4Licensed under the EUPL.
5
6An English copy of version 1.2 of the EUPL license can be found in the LICENSE
7file. If you wish to read the license in another one of the (currently) 23
8official languages of the European union, you can! You may find your version at
9 https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12
10
11For now, the code is licensed under the EUPL-1.2 license. If the time comes
12that I seriously need to think about what license to use, this may change.
13Nevertheless, I expect that this project will retain a copyleft license. \ No newline at end of file
diff --git a/all-deterministic.sh b/all-deterministic.sh
new file mode 100755
index 0000000..5a857ef
--- /dev/null
+++ b/all-deterministic.sh
@@ -0,0 +1,3 @@
1#!/usr/bin/env bash
2
3nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build --rebuild
diff --git a/build-all.sh b/build-all.sh
new file mode 100755
index 0000000..ea45126
--- /dev/null
+++ b/build-all.sh
@@ -0,0 +1,3 @@
1#!/usr/bin/env bash
2
3nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..3b8b6fe
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,79 @@
1{
2 "nodes": {
3 "flake-utils": {
4 "inputs": {
5 "systems": "systems"
6 },
7 "locked": {
8 "lastModified": 1701680307,
9 "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
10 "rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
11 "revCount": 88,
12 "type": "tarball",
13 "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88%2Brev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz"
14 },
15 "original": {
16 "type": "tarball",
17 "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"
18 }
19 },
20 "libtmi8": {
21 "inputs": {
22 "flake-utils": [
23 "flake-utils"
24 ],
25 "nixpkgs": [
26 "nixpkgs"
27 ]
28 },
29 "locked": {
30 "lastModified": 1,
31 "narHash": "sha256-ST9E5LANnA7IV5cY0fbk+MSukaOczxnkXV1/IH7ps4U=",
32 "path": "./lib/libtmi8",
33 "type": "path"
34 },
35 "original": {
36 "path": "./lib/libtmi8",
37 "type": "path"
38 }
39 },
40 "nixpkgs": {
41 "locked": {
42 "lastModified": 1702346276,
43 "narHash": "sha256-eAQgwIWApFQ40ipeOjVSoK4TEHVd6nbSd9fApiHIw5A=",
44 "rev": "cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7",
45 "revCount": 553141,
46 "type": "tarball",
47 "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.553141%2Brev-cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7/018c652c-2ff2-777b-bade-dae9c2abe1e1/source.tar.gz"
48 },
49 "original": {
50 "type": "tarball",
51 "url": "https://flakehub.com/f/NixOs/nixpkgs/%2A.tar.gz"
52 }
53 },
54 "root": {
55 "inputs": {
56 "flake-utils": "flake-utils",
57 "libtmi8": "libtmi8",
58 "nixpkgs": "nixpkgs"
59 }
60 },
61 "systems": {
62 "locked": {
63 "lastModified": 1681028828,
64 "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
65 "owner": "nix-systems",
66 "repo": "default",
67 "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
68 "type": "github"
69 },
70 "original": {
71 "owner": "nix-systems",
72 "repo": "default",
73 "type": "github"
74 }
75 }
76 },
77 "root": "root",
78 "version": 7
79}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..df5fffb
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,137 @@
1{
2 inputs = {
3 nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz";
4 flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz";
5 libtmi8 = {
6 url = "path:./lib/libtmi8";
7 inputs.nixpkgs.follows = "nixpkgs";
8 inputs.flake-utils.follows = "flake-utils";
9 };
10 };
11
12 outputs = { self, nixpkgs, flake-utils, libtmi8, ... }@inputs:
13 {
14 nixosModules = rec {
15 oeuf = import ./module self;
16 default = oeuf;
17 };
18 } // flake-utils.lib.eachDefaultSystem
19 (system:
20 let
21 libtmi8Overlay = final: prev: { oeuf-libtmi8 = libtmi8.packages.${system}.oeuf-libtmi8; };
22
23 pkgs = import nixpkgs {
24 inherit system;
25 overlays = [ libtmi8Overlay ];
26 };
27 boostPkg = pkgs.boost182;
28
29 inherit (pkgs.gcc13) stdenv;
30
31 oeuf-augmentkv6 = stdenv.mkDerivation {
32 name = "oeuf-augmentkv6";
33 src = ./.;
34
35 nativeBuildInputs = with pkgs; [ gcc13 boostPkg ];
36 buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ];
37 buildPhase = ''
38 cd src/augmentkv6
39 make augmentkv6
40 '';
41
42 installPhase = ''
43 mkdir -p $out/bin
44 cp augmentkv6 $out/bin/oeuf-augmentkv6
45 '';
46 };
47
48 oeuf-filterkv6 = stdenv.mkDerivation {
49 name = "oeuf-filterkv6";
50 src = ./.;
51
52 nativeBuildInputs = with pkgs; [ gcc13 ];
53 buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ];
54 buildPhase = ''
55 cd src/filterkv6
56 make filterkv6
57 '';
58
59 installPhase = ''
60 mkdir -p $out/bin
61 cp filterkv6 $out/bin/oeuf-filterkv6
62 '';
63 };
64
65 oeuf-bundleparquet = stdenv.mkDerivation {
66 name = "oeuf-bundleparquet";
67 src = ./.;
68
69 nativeBuildInputs = with pkgs; [ gcc13 ];
70 buildInputs = with pkgs; [ arrow-cpp curl nlohmann_json prometheus-cpp zlib oeuf-libtmi8 ];
71 buildPhase = ''
72 cd src/bundleparquet
73 make bundleparquet
74 '';
75
76 installPhase = ''
77 mkdir -p $out/bin
78 cp bundleparquet $out/bin/oeuf-bundleparquet
79 '';
80 };
81
82 oeuf-querykv1 = stdenv.mkDerivation {
83 name = "oeuf-querykv1";
84 src = ./.;
85
86 nativeBuildInputs = with pkgs; [ gcc13 ];
87 buildInputs = with pkgs; [ oeuf-libtmi8 boostPkg ];
88 buildPhase = ''
89 cd src/querykv1
90 make querykv1
91 '';
92
93 installPhase = ''
94 mkdir -p $out/bin
95 cp querykv1 $out/bin/oeuf-querykv1
96 '';
97 };
98
99 oeuf-recvkv6 = stdenv.mkDerivation {
100 name = "oeuf-recvkv6";
101 src = ./.;
102
103 nativeBuildInputs = with pkgs; [ gcc13 ];
104 buildInputs = with pkgs; [ zeromq zlib arrow-cpp nlohmann_json prometheus-cpp rapidxml oeuf-libtmi8 ];
105 buildPhase = ''
106 cd src/recvkv6
107 make recvkv6
108 '';
109
110 installPhase = ''
111 mkdir -p $out/bin
112 cp recvkv6 $out/bin/oeuf-recvkv6
113 '';
114 };
115
116 oeuf-archiver = import ./script/archiver {
117 pkgs = pkgs // { inherit oeuf-bundleparquet; };
118 };
119
120 oeuf-synckv6 = import ./script/synckv6 { inherit pkgs; };
121 in
122 {
123 packages.oeuf-archiver = oeuf-archiver;
124 packages.oeuf-augmentkv6 = oeuf-augmentkv6;
125 packages.oeuf-synckv6 = oeuf-synckv6;
126 packages.oeuf-filterkv6 = oeuf-filterkv6;
127 packages.oeuf-bundleparquet = oeuf-bundleparquet;
128 packages.oeuf-querykv1 = oeuf-querykv1;
129 packages.oeuf-recvkv6 = oeuf-recvkv6;
130
131 devShells.default = pkgs.mkShell {
132 inputsFrom = [ oeuf-bundleparquet oeuf-querykv1 oeuf-recvkv6 ];
133 };
134
135 formatter = pkgs.nixpkgs-fmt;
136 });
137}
diff --git a/lib/libtmi8/.envrc b/lib/libtmi8/.envrc
new file mode 100644
index 0000000..4e0d702
--- /dev/null
+++ b/lib/libtmi8/.envrc
@@ -0,0 +1,3 @@
1use flake
2
3export DEVMODE=1
diff --git a/lib/libtmi8/.gitignore b/lib/libtmi8/.gitignore
new file mode 100644
index 0000000..f6b8cf6
--- /dev/null
+++ b/lib/libtmi8/.gitignore
@@ -0,0 +1,3 @@
1src/*.o
2libtmi8.a
3libtmi8.so
diff --git a/lib/libtmi8/Makefile b/lib/libtmi8/Makefile
new file mode 100644
index 0000000..52a9807
--- /dev/null
+++ b/lib/libtmi8/Makefile
@@ -0,0 +1,41 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer -Iinclude $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-larrow -lparquet -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15DESTDIR=/usr/local
16
17LIBHDRS=include/tmi8/kv1_lexer.hpp include/tmi8/kv1_parser.hpp include/tmi8/kv1_types.hpp include/tmi8/kv6_parquet.hpp
18LIBSRCS=src/kv1_index.cpp src/kv1_lexer.cpp src/kv1_parser.cpp src/kv1_types.cpp src/kv6_parquet.cpp
19LIBOBJS=$(patsubst %.cpp,%.o,$(LIBSRCS))
20
21.PHONY: all install libtmi8 clean
22all: libtmi8
23
24libtmi8: libtmi8.a libtmi8.so
25
26clean:
27 rm libtmi8.a libtmi8.so $(LIBOBJS)
28
29install: libtmi8.a $(LIBHDRS)
30 install -D -m644 include/tmi8/* -t $(DESTDIR)/include/tmi8
31 install -D -m644 libtmi8.a -t $(DESTDIR)/lib
32 install -D -m644 libtmi8.so -t $(DESTDIR)/lib
33
34src/%.o: src/%.cpp $(LIBHDRS)
35 $(CXX) -c -o $@ $< $(CXXFLAGS)
36
37libtmi8.a: $(LIBOBJS)
38 $(AR) rcs $@ $^
39
40libtmi8.so: $(LIBOBJS)
41 $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
diff --git a/lib/libtmi8/flake.lock b/lib/libtmi8/flake.lock
new file mode 100644
index 0000000..5ff7d5d
--- /dev/null
+++ b/lib/libtmi8/flake.lock
@@ -0,0 +1,58 @@
1{
2 "nodes": {
3 "flake-utils": {
4 "inputs": {
5 "systems": "systems"
6 },
7 "locked": {
8 "lastModified": 1701680307,
9 "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=",
10 "rev": "4022d587cbbfd70fe950c1e2083a02621806a725",
11 "revCount": 88,
12 "type": "tarball",
13 "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88+rev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz"
14 },
15 "original": {
16 "type": "tarball",
17 "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"
18 }
19 },
20 "nixpkgs": {
21 "locked": {
22 "lastModified": 1701539137,
23 "narHash": "sha256-nVO/5QYpf1GwjvtpXhyxx5M3U/WN0MwBro4Lsk+9mL0=",
24 "rev": "933d7dc155096e7575d207be6fb7792bc9f34f6d",
25 "revCount": 552571,
26 "type": "tarball",
27 "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.552571+rev-933d7dc155096e7575d207be6fb7792bc9f34f6d/018c3242-a93c-7779-8d13-ddba0a38d24a/source.tar.gz"
28 },
29 "original": {
30 "type": "tarball",
31 "url": "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz"
32 }
33 },
34 "root": {
35 "inputs": {
36 "flake-utils": "flake-utils",
37 "nixpkgs": "nixpkgs"
38 }
39 },
40 "systems": {
41 "locked": {
42 "lastModified": 1681028828,
43 "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
44 "owner": "nix-systems",
45 "repo": "default",
46 "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
47 "type": "github"
48 },
49 "original": {
50 "owner": "nix-systems",
51 "repo": "default",
52 "type": "github"
53 }
54 }
55 },
56 "root": "root",
57 "version": 7
58}
diff --git a/lib/libtmi8/flake.nix b/lib/libtmi8/flake.nix
new file mode 100644
index 0000000..2ae7fc9
--- /dev/null
+++ b/lib/libtmi8/flake.nix
@@ -0,0 +1,42 @@
1{
2 inputs = {
3 nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz";
4 flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz";
5 };
6
7 outputs = { self, nixpkgs, flake-utils, ... }@inputs:
8 flake-utils.lib.eachDefaultSystem
9 (system:
10 let
11 pkgs = import nixpkgs {
12 inherit system;
13 overlays = [ ];
14 };
15
16 inherit (pkgs.gcc13) stdenv;
17
18 oeuf-libtmi8 = stdenv.mkDerivation {
19 name = "oeuf-libtmi8";
20 src = pkgs.lib.cleanSource ./.;
21
22 nativeBuildInputs = with pkgs; [ gcc13 ];
23 buildInputs = with pkgs; [ arrow-cpp boost182 ];
24 buildPhase = ''
25 make libtmi8
26 '';
27
28 installPhase = ''
29 make install DESTDIR="$out"
30 '';
31 };
32 in
33 {
34 packages.oeuf-libtmi8 = oeuf-libtmi8;
35
36 devShells.default = pkgs.mkShell {
37 inputsFrom = [ oeuf-libtmi8 ];
38 };
39
40 formatter = pkgs.nixpkgs-fmt;
41 });
42}
diff --git a/lib/libtmi8/include/tmi8/kv1_index.hpp b/lib/libtmi8/include/tmi8/kv1_index.hpp
new file mode 100644
index 0000000..621acf6
--- /dev/null
+++ b/lib/libtmi8/include/tmi8/kv1_index.hpp
@@ -0,0 +1,135 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_LIBTMI8_KV1_INDEX_HPP
4#define OEUF_LIBTMI8_KV1_INDEX_HPP
5
6#include <unordered_map>
7
8#include <boost/container_hash/hash.hpp>
9
10#include <tmi8/kv1_types.hpp>
11
12struct Kv1Index {
13 Kv1Records *records;
14
15 explicit Kv1Index(Kv1Records *records);
16
17 std::unordered_map<
18 Kv1OrganizationalUnit::Key,
19 Kv1OrganizationalUnit *,
20 boost::hash<Kv1OrganizationalUnit::Key>> organizational_units;
21 std::unordered_map<
22 Kv1HigherOrganizationalUnit::Key,
23 Kv1HigherOrganizationalUnit *,
24 boost::hash<Kv1HigherOrganizationalUnit::Key>> higher_organizational_units;
25 std::unordered_map<
26 Kv1UserStopPoint::Key,
27 Kv1UserStopPoint *,
28 boost::hash<Kv1UserStopPoint::Key>> user_stop_points;
29 std::unordered_map<
30 Kv1UserStopArea::Key,
31 Kv1UserStopArea *,
32 boost::hash<Kv1UserStopArea::Key>> user_stop_areas;
33 std::unordered_map<
34 Kv1TimingLink::Key,
35 Kv1TimingLink *,
36 boost::hash<Kv1TimingLink::Key>> timing_links;
37 std::unordered_map<
38 Kv1Link::Key,
39 Kv1Link *,
40 boost::hash<Kv1Link::Key>> links;
41 std::unordered_map<
42 Kv1Line::Key,
43 Kv1Line *,
44 boost::hash<Kv1Line::Key>> lines;
45 std::unordered_map<
46 Kv1Destination::Key,
47 Kv1Destination *,
48 boost::hash<Kv1Destination::Key>> destinations;
49 std::unordered_map<
50 Kv1JourneyPattern::Key,
51 Kv1JourneyPattern *,
52 boost::hash<Kv1JourneyPattern::Key>> journey_patterns;
53 std::unordered_map<
54 Kv1ConcessionFinancerRelation::Key,
55 Kv1ConcessionFinancerRelation *,
56 boost::hash<Kv1ConcessionFinancerRelation::Key>> concession_financer_relations;
57 std::unordered_map<
58 Kv1ConcessionArea::Key,
59 Kv1ConcessionArea *,
60 boost::hash<Kv1ConcessionArea::Key>> concession_areas;
61 std::unordered_map<
62 Kv1Financer::Key,
63 Kv1Financer *,
64 boost::hash<Kv1Financer::Key>> financers;
65 std::unordered_map<
66 Kv1JourneyPatternTimingLink::Key,
67 Kv1JourneyPatternTimingLink *,
68 boost::hash<Kv1JourneyPatternTimingLink::Key>> journey_pattern_timing_links;
69 std::unordered_map<
70 Kv1Point::Key,
71 Kv1Point *,
72 boost::hash<Kv1Point::Key>> points;
73 std::unordered_map<
74 Kv1PointOnLink::Key,
75 Kv1PointOnLink *,
76 boost::hash<Kv1PointOnLink::Key>> point_on_links;
77 std::unordered_map<
78 Kv1Icon::Key,
79 Kv1Icon *,
80 boost::hash<Kv1Icon::Key>> icons;
81 std::unordered_map<
82 Kv1Notice::Key,
83 Kv1Notice *,
84 boost::hash<Kv1Notice::Key>> notices;
85 std::unordered_map<
86 Kv1TimeDemandGroup::Key,
87 Kv1TimeDemandGroup *,
88 boost::hash<Kv1TimeDemandGroup::Key>> time_demand_groups;
89 std::unordered_map<
90 Kv1TimeDemandGroupRunTime::Key,
91 Kv1TimeDemandGroupRunTime *,
92 boost::hash<Kv1TimeDemandGroupRunTime::Key>> time_demand_group_run_times;
93 std::unordered_map<
94 Kv1PeriodGroup::Key,
95 Kv1PeriodGroup *,
96 boost::hash<Kv1PeriodGroup::Key>> period_groups;
97 std::unordered_map<
98 Kv1SpecificDay::Key,
99 Kv1SpecificDay *,
100 boost::hash<Kv1SpecificDay::Key>> specific_days;
101 std::unordered_map<
102 Kv1TimetableVersion::Key,
103 Kv1TimetableVersion *,
104 boost::hash<Kv1TimetableVersion::Key>> timetable_versions;
105 std::unordered_map<
106 Kv1PublicJourney::Key,
107 Kv1PublicJourney *,
108 boost::hash<Kv1PublicJourney::Key>> public_journeys;
109 std::unordered_map<
110 Kv1PeriodGroupValidity::Key,
111 Kv1PeriodGroupValidity *,
112 boost::hash<Kv1PeriodGroupValidity::Key>> period_group_validities;
113 std::unordered_map<
114 Kv1ExceptionalOperatingDay::Key,
115 Kv1ExceptionalOperatingDay *,
116 boost::hash<Kv1ExceptionalOperatingDay::Key>> exceptional_operating_days;
117 std::unordered_map<
118 Kv1ScheduleVersion::Key,
119 Kv1ScheduleVersion *,
120 boost::hash<Kv1ScheduleVersion::Key>> schedule_versions;
121 std::unordered_map<
122 Kv1PublicJourneyPassingTimes::Key,
123 Kv1PublicJourneyPassingTimes *,
124 boost::hash<Kv1PublicJourneyPassingTimes::Key>> public_journey_passing_times;
125 std::unordered_map<
126 Kv1OperatingDay::Key,
127 Kv1OperatingDay *,
128 boost::hash<Kv1OperatingDay::Key>> operating_days;
129
130 size_t size() const;
131};
132
133void kv1LinkRecords(Kv1Index &index);
134
135#endif // OEUF_LIBTMI8_KV1_INDEX_HPP
diff --git a/lib/libtmi8/include/tmi8/kv1_lexer.hpp b/lib/libtmi8/include/tmi8/kv1_lexer.hpp
new file mode 100644
index 0000000..df6a57c
--- /dev/null
+++ b/lib/libtmi8/include/tmi8/kv1_lexer.hpp
@@ -0,0 +1,46 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_LIBTMI8_KV1_LEXER_HPP
4#define OEUF_LIBTMI8_KV1_LEXER_HPP
5
6#include <cstdint>
7#include <cstring>
8#include <iostream>
9#include <string>
10#include <vector>
11#include <variant>
12
13enum Kv1TokenType {
14 KV1_TOKEN_CELL,
15 KV1_TOKEN_ROW_END,
16};
17struct Kv1Token { Kv1TokenType type; std::string data; };
18
19struct Kv1Lexer {
20 std::vector<std::string> errors;
21 std::vector<Kv1Token> tokens;
22
23 explicit Kv1Lexer(std::string_view input);
24
25 void lex();
26
27 private:
28 // Does not eat newline character.
29 void eatRestOfLine();
30 void lexOptionalHeader();
31 void lexOptionalComment();
32
33 static bool isWhitespace(int c);
34
35 void readQuotedColumn();
36 void readUnquotedColumn();
37 void lexRow();
38 // Returns true when a line ending was consumed.
39 bool eatWhitespace();
40
41 std::string_view input;
42 std::string_view slice;
43 std::string colbuf;
44};
45
46#endif // OEUF_LIBTMI8_KV1_LEXER_HPP
diff --git a/lib/libtmi8/include/tmi8/kv1_parser.hpp b/lib/libtmi8/include/tmi8/kv1_parser.hpp
new file mode 100644
index 0000000..ccd8ec6
--- /dev/null
+++ b/lib/libtmi8/include/tmi8/kv1_parser.hpp
@@ -0,0 +1,87 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_LIBTMI8_KV1_PARSER_HPP
4#define OEUF_LIBTMI8_KV1_PARSER_HPP
5
6#include <optional>
7#include <string>
8#include <string_view>
9#include <unordered_map>
10#include <vector>
11
12#include <tmi8/kv1_lexer.hpp>
13#include <tmi8/kv1_types.hpp>
14
15struct Kv1Parser {
16 explicit Kv1Parser(std::vector<Kv1Token> tokens, Kv1Records &parse_into);
17
18 void parse();
19
20 private:
21 // Method pointer to a method of Kv1Parser (i.e. a function that takes
22 // 'this'; is not static) that takes no arguments and also does not return
23 // anything.
24 using ParseFunc = void (Kv1Parser::*)();
25 static const std::unordered_map<std::string_view, ParseFunc> type_parsers;
26
27 bool atEnd() const;
28 void eatRowEnds();
29 const Kv1Token *cur() const;
30 const std::string *eatCell(std::string_view parsing_what);
31 std::string parseHeader();
32 void eatRestOfRow();
33
34 void requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value);
35 std::optional<bool> requireBoolean(std::string_view field, bool mandatory, std::string_view value);
36 std::optional<double> requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value);
37 std::optional<RgbColor> requireRgbColor(std::string_view field, bool mandatory, std::string_view value);
38 std::optional<double> requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value);
39
40 std::string eatString(std::string_view field, bool mandatory, size_t max_length);
41 std::optional<bool> eatBoolean(std::string_view field, bool mandatory);
42 std::optional<double> eatNumber(std::string_view field, bool mandatory, size_t max_digits);
43 std::optional<RgbColor> eatRgbColor(std::string_view field, bool mandatory);
44 std::optional<double> eatRdCoord(std::string_view field, bool mandatory, size_t min_digits);
45
46 void parseOrganizationalUnit();
47 void parseHigherOrganizationalUnit();
48 void parseUserStopPoint();
49 void parseUserStopArea();
50 void parseTimingLink();
51 void parseLink();
52 void parseLine();
53 void parseDestination();
54 void parseJourneyPattern();
55 void parseConcessionFinancerRelation();
56 void parseConcessionArea();
57 void parseFinancer();
58 void parseJourneyPatternTimingLink();
59 void parsePoint();
60 void parsePointOnLink();
61 void parseIcon();
62 void parseNotice();
63 void parseNoticeAssignment();
64 void parseTimeDemandGroup();
65 void parseTimeDemandGroupRunTime();
66 void parsePeriodGroup();
67 void parseSpecificDay();
68 void parseTimetableVersion();
69 void parsePublicJourney();
70 void parsePeriodGroupValidity();
71 void parseExceptionalOperatingDay();
72 void parseScheduleVersion();
73 void parsePublicJourneyPassingTimes();
74 void parseOperatingDay();
75
76 size_t pos = 0;
77 std::vector<Kv1Token> tokens;
78 const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam");
79
80 public:
81 std::vector<std::string> warns;
82 std::vector<std::string> global_errors;
83 std::vector<std::string> record_errors;
84 Kv1Records &records;
85};
86
87#endif // OEUF_LIBTMI8_KV1_PARSER_HPP
diff --git a/lib/libtmi8/include/tmi8/kv1_types.hpp b/lib/libtmi8/include/tmi8/kv1_types.hpp
new file mode 100644
index 0000000..d4a0760
--- /dev/null
+++ b/lib/libtmi8/include/tmi8/kv1_types.hpp
@@ -0,0 +1,1528 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_LIBTMI8_KV1_TYPES_HPP
4#define OEUF_LIBTMI8_KV1_TYPES_HPP
5
6#include <chrono>
7#include <cstdint>
8#include <optional>
9#include <string>
10#include <variant>
11
12struct Kv1OrganizationalUnit;
13struct Kv1HigherOrganizationalUnit;
14struct Kv1UserStopPoint;
15struct Kv1UserStopArea;
16struct Kv1TimingLink;
17struct Kv1Link;
18struct Kv1Line;
19struct Kv1Destination;
20struct Kv1JourneyPattern;
21struct Kv1ConcessionFinancerRelation;
22struct Kv1ConcessionArea;
23struct Kv1Financer;
24struct Kv1JourneyPatternTimingLink;
25struct Kv1Point;
26struct Kv1PointOnLink;
27struct Kv1Icon;
28struct Kv1Notice;
29struct Kv1NoticeAssignment;
30struct Kv1TimeDemandGroup;
31struct Kv1TimeDemandGroupRunTime;
32struct Kv1PeriodGroup;
33struct Kv1SpecificDay;
34struct Kv1TimetableVersion;
35struct Kv1PublicJourney;
36struct Kv1PeriodGroupValidity;
37struct Kv1ExceptionalOperatingDay;
38struct Kv1ScheduleVersion;
39struct Kv1PublicJourneyPassingTimes;
40struct Kv1OperatingDay;
41
42struct Kv1Records {
43 std::vector<Kv1OrganizationalUnit> organizational_units;
44 std::vector<Kv1HigherOrganizationalUnit> higher_organizational_units;
45 std::vector<Kv1UserStopPoint> user_stop_points;
46 std::vector<Kv1UserStopArea> user_stop_areas;
47 std::vector<Kv1TimingLink> timing_links;
48 std::vector<Kv1Link> links;
49 std::vector<Kv1Line> lines;
50 std::vector<Kv1Destination> destinations;
51 std::vector<Kv1JourneyPattern> journey_patterns;
52 std::vector<Kv1ConcessionFinancerRelation> concession_financer_relations;
53 std::vector<Kv1ConcessionArea> concession_areas;
54 std::vector<Kv1Financer> financers;
55 std::vector<Kv1JourneyPatternTimingLink> journey_pattern_timing_links;
56 std::vector<Kv1Point> points;
57 std::vector<Kv1PointOnLink> point_on_links;
58 std::vector<Kv1Icon> icons;
59 std::vector<Kv1Notice> notices;
60 std::vector<Kv1NoticeAssignment> notice_assignments;
61 std::vector<Kv1TimeDemandGroup> time_demand_groups;
62 std::vector<Kv1TimeDemandGroupRunTime> time_demand_group_run_times;
63 std::vector<Kv1PeriodGroup> period_groups;
64 std::vector<Kv1SpecificDay> specific_days;
65 std::vector<Kv1TimetableVersion> timetable_versions;
66 std::vector<Kv1PublicJourney> public_journeys;
67 std::vector<Kv1PeriodGroupValidity> period_group_validities;
68 std::vector<Kv1ExceptionalOperatingDay> exceptional_operating_days;
69 std::vector<Kv1ScheduleVersion> schedule_versions;
70 std::vector<Kv1PublicJourneyPassingTimes> public_journey_passing_times;
71 std::vector<Kv1OperatingDay> operating_days;
72
73 size_t size() const;
74};
75
76// These definitions implement TMI8, KV1 Dienstregeling (Timetable) version
77// 8.3.0.2 (release), published by BISON on January 8, 2020.
78// (Filename: tmi8 dienstregeling (kv 1) v8.3.0.2, release.docx)
79//
80// This specification and other BISON specifications, as well as other
81// supplementary information, can be found on BISON's website:
82// https://bison.dova.nu/
83//
84// The specification that was used to create these definitions was downloaded
85// from the following address:
86// https://bison.dova.nu/sites/default/files/bestanden/tmi8_dienstregeling_kv_1_v8.3.0.2_release.pdf
87//
88// The KV1 table structure and the corresponding documentation describing the
89// relevant tables and fields, as presented here, is derived from the original
90// specification. Most documentation is a manually translated version of the
91// documentation as present in the specification. The specification is licensed
92// under CC BY-ND 3.0. The exact text of this license can be found on
93// https://creativecommons.org/licenses/by-nd/3.0/nl/.
94
95// KV1 Table 1: Organizational Unit [ORUN] (MANDATORY)
96//
97// A collection of trips with the same validity features. An organizational
98// unit can be part of a 'higher' unit.
99//
100// An organizational unit is defined as a unity vor which the planning of trips
101// is compiled. When defining the organizational units, it is important that
102// all trips within the package have a homogeneous validity (school holidays,
103// shopping Sundays, foreign bank holidays).
104//
105// This table is part of the core data tables, which are common for all KV1
106// variants.
107struct Kv1OrganizationalUnit {
108 struct Key {
109 // Mandatory (key), at most 10 characters. Transport operator (from list as
110 // defined in BISON enumeration E1).
111 std::string data_owner_code;
112 // Mandatory (key), at most 10 characters.
113 std::string organizational_unit_code;
114
115 explicit Key(std::string data_owner_code,
116 std::string organizational_unit_code);
117 };
118
119 Key key;
120 // Mandatory, at most 50 characters.
121 std::string name;
122 // Mandatory, at most 10 characters.
123 std::string organizational_unit_type;
124 // Optional, at most 255 characters.
125 std::string description;
126};
127
128// KV1 Table 2: Higher Organizational Unit [ORUNORUN] (OPTIONAL)
129//
130// An in the hierarchy higher-ordered organizational unit for the purpose of
131// (among others) recording of (deviating) validities on the high level.
132//
133// This table is part of the core data tables, which are common for all KV1
134// variants.
135struct Kv1HigherOrganizationalUnit {
136 struct Key {
137 // Mandatory (key), at most 10 characters. Transport operator (from list as
138 // defined in BISON enumeration E1).
139 std::string data_owner_code;
140 // Mandatory (key), at most 10 characters. Parent, higher organizational unit
141 // that is referred to.
142 std::string organizational_unit_code_parent;
143 // Mandatory (key), at most 10 characters. Child, lower organizational unit.
144 std::string organizational_unit_code_child;
145 // Mandatory (key), at most 10 characters. [YYYY-MM-DD] Starting date of the
146 // hierarchical relation (can be a fixed value, e.g. 2006-12-31).
147 std::chrono::year_month_day valid_from;
148
149 explicit Key(std::string data_owner_code,
150 std::string organizational_unit_code_parent,
151 std::string organizational_unit_code_child,
152 std::chrono::year_month_day valid_from);
153 };
154
155 Key key;
156
157 Kv1OrganizationalUnit *p_organizational_unit_parent = nullptr;
158 Kv1OrganizationalUnit *p_organizational_unit_child = nullptr;
159};
160
161// KV1 Table 3: User Stop Point [USRSTOP]
162//
163// Stop or other point (e.g. Bridge, functioning as info for the bridge keeper)
164// for which times are recorded in the planning system of the transit operator.
165//
166// Coordinates of a UserStopPoint are recorded as Point. When defining
167// UserStopPoints, it is important that the coordinates can be unambiguously
168// and verifiably recorded. For a stop, the coordinates of the stop sign are
169// recorded. If there is no stop sign, the end of the bus stop (where the bus
170// normally halts) is recorded as the coordinate of the stop.
171//
172// This table is part of the core data tables, which are common for all KV1
173// variants.
174struct Kv1UserStopPoint {
175 struct Key {
176 // Mandatory (key), at most 10 characters. Transport operator (from list as
177 // defined in BISON enumeration E1).
178 std::string data_owner_code;
179 // Mandatory (key), at most 10 characters. Stop number in domain of operator.
180 std::string user_stop_code;
181
182 explicit Key(std::string data_owner_code,
183 std::string user_stop_code);
184 };
185
186 Key key;
187 // Optional, at most 10 characters. Stop number in domain of integrator,
188 // (initially) equal to UserStopCode.
189 std::string timing_point_code;
190 // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used
191 // as boarding stop, true by default. False for e.g. dummy stop for bridge
192 // keeper.
193 bool get_in = true;
194 // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used
195 // as alighting stop.
196 bool get_out = false;
197 // Mandatory, at most 50 characters. Stop name.
198 std::string name;
199 // Mandatory, at most 50 characters. Town name.
200 std::string town;
201 // Optional, at most 10 characters. Reference to StopArea of which the
202 // UserStop is part.
203 std::string user_stop_area_code;
204 // Mandatory, at most 10 characters. Platform indication/letter. The '-'
205 // value is used to indication that this is not applicable.
206 std::string stop_side_code;
207 // Mandatory, at most 5 digits. Minimal stop duration for boarding and
208 // alighting, zero by default. In seconds.
209 double minimal_stop_time_s = 0;
210 // Optional, at most 3 digits. Length of stop platform.
211 std::optional<double> stop_side_length;
212 // Optional, at most 255 characters.
213 std::string description;
214 // Mandatory, at most 10 characters. USRSTOPTYPE. Indicates the stop kind.
215 std::string user_stop_type;
216 // Optional, at most 30 characters. Nationally unique stop number.
217 std::string quay_code;
218
219 Kv1UserStopArea *p_user_stop_area = nullptr;
220 Kv1Point *p_point = nullptr;
221};
222
223// KV1 Table 4: User Stop Area [USRSTAR]
224//
225// A StopArea is a collection of stops, which have the same name for passengers
226// and logically belong together. (E.g. a bus station of transfer point.) Stops
227// lying opposite each other can also form a StopArea.
228//
229// Used for display of all stops in a stop area on an overview display and for
230// announcement of stop names (stops on both sides of the street share the same
231// name).
232//
233// This table is part of the core data tables, which are common for all KV1
234// variants.
235struct Kv1UserStopArea {
236 struct Key {
237 // Mandatory (key), at most 10 characters. Transport operator (from list as
238 // defined in BISON enumeration E1).
239 std::string data_owner_code;
240 // Mandatory (key), at most 10 characters. Code of StopArea following coding
241 // of operator, e.g. PlaceCode.
242 std::string user_stop_area_code;
243
244 explicit Key(std::string data_owner_code,
245 std::string user_stop_area_code);
246 };
247
248 Key key;
249 // Mandatory, at most 50 characters.
250 std::string name;
251 // Mandatory, at most 50 characters.
252 std::string town;
253 // Mandatory, at most 255 characters.
254 std::string description;
255};
256
257// KV1 Table 5: Timing Link [TILI]
258//
259// Link between two points which have the feature 'stop' or 'timing point'. A
260// Timing Link is set between all stops and other timing points (e.g. for the
261// bridge) which make part of a journey pattern.
262//
263// This table is part of the core data tables, which are common for all KV1
264// variants.
265struct Kv1TimingLink {
266 struct Key {
267 // Mandatory (key), at most 10 characters. Transport operator (from list as
268 // defined in BISON enumeration E1).
269 std::string data_owner_code;
270 // Mandatory (key), at most 10 characters. Stop number in the domain of
271 // DataOwner (here: the operator).
272 std::string user_stop_code_begin;
273 // Mandatory (key), at most 10 characters. Stop number in the domain of
274 // DataOwner (here: the operator).
275 std::string user_stop_code_end;
276
277 explicit Key(std::string data_owner_code,
278 std::string user_stop_code_begin,
279 std::string user_stop_code_end);
280 };
281
282 Key key;
283 // Optional, at most 5 digits. Minimal trip time (in seconds).
284 std::optional<double> minimal_drive_time_s;
285 // Optional, at most 255 characters.
286 std::string description;
287
288 Kv1UserStopPoint *p_user_stop_begin = nullptr;
289 Kv1UserStopPoint *p_user_stop_end = nullptr;
290};
291
292// KV1 Table 6: Link [LINK]
293//
294// A route link describes the connection between to points on the physical path
295// of a route.
296//
297// This table is part of the core data tables, which are common for all KV1
298// variants.
299struct Kv1Link {
300 struct Key {
301 // Mandatory (key), at most 10 characters. Transport operator (from list as
302 // defined in BISON enumeration E1).
303 std::string data_owner_code;
304 // Mandatory (key), at most 10 characters. Stop code in the domain of
305 // DataOwner (here: the operator).
306 std::string user_stop_code_begin;
307 // Mandatory (key), at most 10 characters. Stop code in the domain of
308 // DataOwner (here: the operator).
309 std::string user_stop_code_end;
310 // Mandatory (key), at most 5 characters. Modality for which the distance
311 // applies, see BISON enumeration E9.
312 // TODO: Check if BISON enumeration E9 can be put into an enum.
313 std::string transport_type;
314
315 explicit Key(std::string data_owner_code,
316 std::string user_stop_code_begin,
317 std::string user_stop_code_end,
318 std::string transport_type);
319 };
320
321 Key key;
322 // Mandatory, at most 6 digits. Length of the link (in meters).
323 double distance = 0;
324 // Optional, at most 255 characters.
325 std::string description;
326
327 Kv1UserStopPoint *p_user_stop_begin = nullptr;
328 Kv1UserStopPoint *p_user_stop_end = nullptr;
329};
330
331struct RgbColor {
332 uint8_t r, g, b = 0;
333};
334
335// KV1 Table 7: Line [LINE]
336//
337// A line is a collection of routes/journey patterns which is publically known
338// under a shared number.
339//
340// This table is part of the core data tables, which are common for all KV1
341// variants.
342struct Kv1Line {
343 struct Key {
344 // Mandatory (key), at most 10 characters. Transport operator (from list as
345 // defined in BISON enumeration E1).
346 std::string data_owner_code;
347 // Mandatory (key), at most 10 characters. Unique system line number in the
348 // domain of DataOwner.
349 std::string line_planning_number;
350
351 explicit Key(std::string data_owner_code,
352 std::string line_planning_number);
353 };
354
355 Key key;
356 // Mandatory, at most 4 characters. Line number for the public, incl. S/N
357 // indications.
358 std::string line_public_number;
359 // Mandatory, at most 50 characters.
360 std::string line_name;
361 // Mandatory, at most three digits. Should be in the range [0, 400).
362 // Only processing Connexxion's KV1 export, however, shows us that this range
363 // constrained is not honored in practice. That is why we also don't care.
364 short line_ve_tag_number = 0;
365 // Optional, at most 255 characters.
366 std::string description;
367 // Mandatory, at most 5 characters. Modality, see BISON enumeration E9.
368 // TODO: Check if BISON enumeration E9 can be put into an enum.
369 std::string transport_type;
370 // Optional, at most 4 digits. Symbol / image for the line. Reference to ICON
371 // table.
372 std::optional<short> line_icon;
373 // Optional, at most four characters. Background color for the line.
374 // Hexadecimal representation following RGB coding. Always six characters
375 // (RRGGBB), only numbers and/or capital letters.
376 std::optional<RgbColor> line_color;
377 // Optional, at most four characters. Foreground color for the line.
378 // Hexadecimal representation following RGB coding. Always six characters
379 // (RRGGBB), only numbers and/or capital letters.
380 std::optional<RgbColor> line_text_color;
381
382 Kv1Icon *p_line_icon = nullptr;
383};
384
385// KV1 Table 8: Destination [DEST]
386//
387// A destination shows the place/district/description of the route for the
388// passenger. Intermediate and detail destinations of a journey pattern are
389// shown under a single desination code, together with the primary destination.
390//
391// This table is part of the core data tables, which are common for all KV1
392// variants.
393struct Kv1Destination {
394 struct Key {
395 // Mandatory (key), at most 10 characters. Transport operator (from list as
396 // defined in BISON enumeration E1).
397 std::string data_owner_code;
398 // Mandatory (key), at most 10 characters.
399 std::string dest_code;
400
401 explicit Key(std::string data_owner_code,
402 std::string dest_code);
403 };
404
405 Key key;
406 // Mandatory, at most 50 characters. Full destination (e.g. compiled from
407 // primary, detail or intermediate destination).
408 std::string dest_name_full;
409 // Mandatory, at most 24 characters. Primary / intermediate destination in
410 // enumeration / final destination if 1 line is used.
411 std::string dest_name_main;
412 // Optional, at most 24 characters. Detail/secondary or intermediate
413 // destination for primary desination, final destination (for intermediate
414 // destination on line 1).
415 std::string dest_name_detail;
416 // Mandatory, at most 5 characters. Boolean which indcates whether
417 // DestNameDetail must always be shown (e.g. because this contains an
418 // important intermediate destination.)
419 bool relevant_dest_name_detail = false;
420 // Mandatory, at most 21 characters. Primary destination in 21 characters.
421 std::string dest_name_main_21;
422 // Optional, at most 21 characters. Detail/secondary/intermediate destination
423 // in 21 characters.
424 std::string dest_name_detail_21;
425 // Mandatory, at most 19 characters. Primary destination in 19 characters.
426 std::string dest_name_main_19;
427 // Optional, at most 19 characters. Detail/secondary/intermediate destination
428 // in 19 characters.
429 std::string dest_name_detail_19;
430 // Mandatory, at most 16 characters. Primary destination in 16 characters.
431 std::string dest_name_main_16;
432 // Optional, at most 16 characters. Detail/secondary/intermediate destination
433 // in 16 characters.
434 std::string dest_name_detail_16;
435 // Optional, at most 4 digits. Symbol/image for the destination. Reference to
436 // the ICON table.
437 std::optional<short> dest_icon;
438 // Optional, at most 6 characters. Background color for the destination.
439 // Hexadecimal representation following RGB coding. Always six characters
440 // (RRGGBB), only six digits and/or capital letters.
441 std::optional<RgbColor> dest_color;
442 // Optional, at most 30 characters (WTF?). Foreground color for the
443 // destination. Hexadecimal representation following RGB coding. Always six
444 // characters (RRGGBB), only six digits and/or capital letters.
445 std::optional<RgbColor> dest_text_color;
446};
447
448// KV1 Table 9: Journey Pattern [JOPA]
449//
450// The journey pattern describes the route from start to end point as a ordered
451// list of stops and links between stops/timing points.
452//
453// This table is part of the core data tables, which are common for all KV1
454// variants.
455struct Kv1JourneyPattern {
456 struct Key {
457 // Mandatory (key), at most 10 characters. Transport operator (from list as
458 // defined in BISON enumeration E1).
459 std::string data_owner_code;
460 // Mandatory (key), at most 10 characters.
461 std::string line_planning_number;
462 // Mandatory (key), at most 10 characters.
463 std::string journey_pattern_code;
464
465 explicit Key(std::string data_owner_code,
466 std::string line_planning_number,
467 std::string journey_pattern_code);
468 };
469
470 Key key;
471 // Mandatory, at most 10 characters. Refers to a journey pattern type
472 // (JOPATYPE).
473 std::string journey_pattern_type;
474 // Mandatory, at most 1 character. One of [1, 2, A, B].
475 char direction = 0;
476 // Optional, at most 255 characters.
477 std::string description;
478
479 Kv1Line *p_line = nullptr;
480};
481
482// KV1 Table 10: Concession Financer Relation [CONFINREL]
483//
484// Concession financer relation (mainly parcel). Smallest unit for which data
485// about a concession can be captured in relation to a financer and/or
486// concession.
487//
488// This table is part of the core data tables, which are common for all KV1
489// variants.
490struct Kv1ConcessionFinancerRelation {
491 struct Key {
492 // Mandatory (key), at most 10 characters. Transport operator (from list as
493 // defined in BISON enumeration E1).
494 std::string data_owner_code;
495 // Mandatory (key), at most 10 characters. Parcel code.
496 std::string con_fin_rel_code;
497
498 explicit Key(std::string data_owner_code,
499 std::string con_fin_rel_code);
500 };
501
502 Key key;
503 // Mandatory, at most 10 characters. Concession code.
504 std::string concession_area_code;
505 // Optional, at most 10 characters. Code of financer/client of the parcel.
506 std::string financer_code;
507
508 Kv1ConcessionArea *p_concession_area = nullptr;
509 Kv1Financer *p_financer = nullptr;
510};
511
512// KV1 Table 11: Concession Area [CONAREA]
513//
514// Concession (area).
515//
516// This table is part of the core data tables, which are common for all KV1
517// variants.
518struct Kv1ConcessionArea {
519 struct Key {
520 // Mandatory (key), at most 10 characters. Transport operator (from list as
521 // defined in BISON enumeration E1).
522 std::string data_owner_code;
523 // Mandatory (key), at most 10 characters. Code of the concession.
524 std::string concession_area_code;
525
526 explicit Key(std::string data_owner_code,
527 std::string concession_area_code);
528 };
529
530 Key key;
531 // Mandatory, at most 255 characters.
532 std::string description;
533};
534
535// KV1 Table 12: Financer [FINANCER] (OPTIONAL)
536//
537// Financer of a parcel.
538//
539// This table is part of the core data tables, which are common for all KV1
540// variants.
541struct Kv1Financer {
542 struct Key {
543 // Mandatory (key), at most 10 characters. Transport operator (from list as
544 // defined in BISON enumeration E1).
545 std::string data_owner_code;
546 // Mandatory (key), at most 10 characters.
547 std::string financer_code;
548
549 explicit Key(std::string data_owner_code,
550 std::string financer_code);
551 };
552
553 Key key;
554 // Mandatory, at most 255 characters.
555 std::string description;
556};
557
558// KV1 Table 13: Journey Pattern Timing Link [JOPATILI]
559//
560// Compilation of journey pattern from logical links (between pairs of
561// stops/timing points). Features such as the destination code, the public line
562// number, the concession financer relation (parcel) and product formula are
563// set per connection. Moreover, a color and/or image linked to the line
564// destination and the use of the (first) stop as boarding/alighting stop can
565// be set per link.
566//
567// Timing Link: A timing link is a stop, set by the transit operator, where a
568// bus / public transit vehicle may never depart earlier than set in the
569// timetable.
570//
571// A logical link may never occur more than once in a journey pattern.
572// Therefore, the combination of LinePlanningNumber, JourneyPatternCode,
573// UserStopCodeBegin and UserStopCodeEnd must be unique in JOPATILI.
574//
575// The value of GetIn and GetOut are normally copied from the corresponding
576// stop in the USRSTOP table, but can be overruled per journey pattern if so
577// desired.
578//
579// A Icon or (Text)Color set here overrules the general value of the
580// corresponding line (Line) or destination (Destination).
581//
582// A value of ShowFlexibleTrip or ProductFormulaType in PUJO or PUJOPASS
583// overrules the value in JOPATILI.
584//
585// This table is part of the core data tables, which are common for all KV1
586// variants.
587struct Kv1JourneyPatternTimingLink {
588 struct Key {
589 // Mandatory (key), at most 10 characters. Transport operator (from list as
590 // defined in BISON enumeration E1).
591 std::string data_owner_code;
592 // Mandatory (key), at most 10 characters.
593 std::string line_planning_number;
594 // Mandatory (key), at most 10 characters.
595 std::string journey_pattern_code;
596 // Mandatory (key), at most 3 digits.
597 short timing_link_order = 0;
598
599 explicit Key(std::string data_owner_code,
600 std::string line_planning_number,
601 std::string journey_pattern_code,
602 short timing_link_order);
603 };
604
605 Key key;
606 // Mandatory, at most 10 characters. Stop number in the domain of the
607 // DataOwner (here: the transit operator).
608 std::string user_stop_code_begin;
609 // Mandatory, at most 10 characters. Stop number in the domain of the
610 // DataOwner (here: the transit operator).
611 std::string user_stop_code_end;
612 // Mandatory, at most 10 characters. Concession financer relation / parcel
613 // (smallest unit).
614 std::string con_fin_rel_code;
615 // Mandatory, at most 10 characters. The destination (incl. intermediat
616 // destinations) as these are shown at the first stop of the journey pattern
617 // link.
618 std::string dest_code;
619 // Mandatory, at most 5 characters. Boolean which indicates whether the first
620 // stop of the connection is a timing stop. Indicator is at least "true" at
621 // first stop of a line and at waiting stops.
622 bool is_timing_stop = false;
623 // Optional, at most 4 characters. Public line number which must be shown on
624 // displays from the first stop of the journey pattern link (e.g. Line number
625 // + S). This is important when a deviating public line number applies from a
626 // certain point on forward. Normally, the public line number of the
627 // corresponding line is shown.
628 std::string display_public_line;
629 // Optional, at most 4 digits. Enumeration E10 (see section 2.5). A public
630 // transit service which distinguishes itself by a set of unique features,
631 // that is offered to the passenger as distinct (a marketing aspect).
632 // TODO: Check if we can turn BISON enumeration E10 into an enum
633 std::optional<short> product_formula_type;
634 // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin
635 // is used as a boarding stop in this journey pattern. Usually equal to the
636 // value of the corresponding USRSTOP.
637 bool get_in = false;
638 // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin
639 // is used as an alighting stop in this journey pattern. Usually equal to the
640 // value of the corresponding USRSTOP.
641 bool get_out = false;
642 // Optional, at most 8 characters. Indicates whether the transit operator
643 // wants a not explicitly planned trip (i.e. a trip that only operates after
644 // reservation such as a 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to
645 // be shown on displays. Values according enumeration E21: TRUE (always),
646 // FALSE (never), REALTIME (only when tracking trip).
647 // TODO: Check if we can turn BISON enumeration E21 into an enum
648 std::string show_flexible_trip;
649 // Optional, at most 4 digits. Symbol / image for display of the line
650 // destination at the journey stop passing. Reference to the ICON table.
651 std::optional<short> line_dest_icon;
652 // Optional, at most 6 characters. Background color for display of the line
653 // destination at a journey stop passing. Hexadecimal representation
654 // following RGB coding. Always six characters (RRGGBB), only numbers and/or
655 // capital letters.
656 std::optional<RgbColor> line_dest_color;
657 // Optional, at most 6 characters. Foreground color for display of the line
658 // destination at a journey stop passing. Hexadecimal representation
659 // following RGB coding. Always six characters (RRGGBB), only numbers and/or
660 // capital letters.
661 std::optional<RgbColor> line_dest_text_color;
662
663 Kv1Line *p_line = nullptr;
664 Kv1JourneyPattern *p_journey_pattern = nullptr;
665 Kv1UserStopPoint *p_user_stop_begin = nullptr;
666 Kv1UserStopPoint *p_user_stop_end = nullptr;
667 Kv1ConcessionFinancerRelation *p_con_fin_rel = nullptr;
668 Kv1Destination *p_dest = nullptr;
669 Kv1Icon *p_line_dest_icon = nullptr;
670};
671
672// KV1 Table 14: Point [POINT]
673//
674// A point is the smallest location which can be reffered to within the public
675// transit network. Every stop (USRSTOP) is a point.
676//
677// This table is part of the core data tables, which are common for all KV1
678// variants.
679struct Kv1Point {
680 struct Key {
681 // Mandatory (key), at most 10 characters. Transport operator (from list as
682 // defined in BISON enumeration E1).
683 std::string data_owner_code;
684 // Mandatory (key), at most 10 characters.
685 std::string point_code;
686
687 explicit Key(std::string data_owner_code,
688 std::string point_code);
689 };
690
691 Key key;
692 // Mandatory, at most 10 characters. Refers to the POINTTYPE table.
693 std::string point_type;
694 // Mandatory, at most 10 characters. Refers to the GEOSYSTYPE table. Only
695 // allowed to have the value "RD" (rijkdsdriehoekstelsel; the national Dutch
696 // coordinate system).
697 std::string coordinate_system_type;
698 // Mandatory, at most 15 characters. X position in the RD coordinate system,
699 // in meters (at least 6 digits).
700 double location_x_ew = 0;
701 // Mandatory, at most 15 characters. Y position in the RD coordinate system,
702 // in meters (at least 6 digits).
703 double location_y_ns = 0;
704 // Optional, at most 15 characters.
705 // NOTE: the standart (presumeably wrongly) indicates this field as having
706 // alphanumeric contents.
707 std::optional<double> location_z;
708 // Optional, at most 255 characters.
709 std::string description;
710};
711
712// KV1 Table 15: Point on Link [POOL]
713//
714// A point that is used to geographically describe the trajectory between two
715// stops.
716//
717// This table is part of the core data tables, which are common for all KV1
718// variants.
719struct Kv1PointOnLink {
720 struct Key {
721 // Mandatory (key), at most 10 characters. Transport operator (from list as
722 // defined in BISON enumeration E1).
723 std::string data_owner_code;
724 // Mandatory (key), at most 10 characters. Stop number in the domain of the
725 // DataOwner (here: transit operator).
726 std::string user_stop_code_begin;
727 // Mandatory (key), at most 10 characters. Stop number in the domain of the
728 // DataOwner (here: transit operator).
729 std::string user_stop_code_end;
730 // Mandatory (key), at most 10 characters. Code from the road manager for KAR
731 // points. For curve points of the DataOwner (often the transit operator).
732 std::string point_data_owner_code;
733 // Mandatory (key), at most 10 charcters.
734 std::string point_code;
735 // Mandatory (key), at most 5 characters. Modality for which the distance
736 // applies, see BISON enumeration E9.
737 std::string transport_type;
738
739 explicit Key(std::string data_owner_code,
740 std::string user_stop_code_begin,
741 std::string user_stop_code_end,
742 std::string point_data_owner_code,
743 std::string point_code,
744 std::string transport_type);
745 };
746
747 Key key;
748 // Mandatory, at most 5 digits. Distance in meters relative to the start of
749 // the link.
750 double distance_since_start_of_link = 0;
751 // Optional, at most 4 digits. Crossing speed for a public transit vehicle
752 // from the previous point (on a link) in m/s.
753 std::optional<double> segment_speed_mps = 0;
754 // Optional, at most 4 digits. Comfort speed for a public transit vehicle on
755 // the curve point.
756 std::optional<double> local_point_speed_mps = 0;
757 // Optional, at most 255 characters.
758 std::string description;
759
760 Kv1UserStopPoint *p_user_stop_begin = nullptr;
761 Kv1UserStopPoint *p_user_stop_end = nullptr;
762 Kv1Point *p_point = nullptr;
763};
764
765// KV1 Table 16: Icon [ICON]
766//
767// Table with images which can be referred to from DEST.DestIcon, LINE.LineIcon
768// and JOPATILI.LineDestIcon to load the correct image.
769//
770// This table is part of the core data tables, which are common for all KV1
771// variants.
772struct Kv1Icon {
773 struct Key {
774 // Mandatory (key), at most 10 characters. Transport operator (from list as
775 // defined in BISON enumeration E1).
776 std::string data_owner_code;
777 // Mandatory (key), at most 4 digits. Reference from other tables for the
778 // requested image.
779 short icon_number = 0;
780
781 explicit Key(std::string data_owner_code,
782 short icon_number);
783 };
784
785 Key key;
786 // Mandatory, at most 1024 characters. Absolute URI to a publically available
787 // location from which the image can be loaded. The extension of the file
788 // indicates the image type.
789 // Supported file types are: GIF (.gif), JPEG (.jpg, .jpeg),
790 // PNG (.png), SVG (.svg)
791 // Supported protocols are: HTTP, HTTPS, FTP
792 // Prefer to not use any capital letters. Examples:
793 // - http://bison.dova.nu/images/logo.png
794 // - https://bison.dova.nu/images/logo.png
795 // - ftp://ftp.dova.nu/images/logo.png
796 std::string icon_uri;
797};
798
799// KV1 Table 17: Notice [NOTICE] (OPTIONAL)
800//
801// A (reusable) text with supplementary information about exceptions /
802// clarifications for a line, journey pattern etc.
803//
804// Usage is optional; when there are no clarifying texts, the NOTICE table does
805// not need to be provided in a KV1 set.
806//
807// This table is part of the core data tables, which are common for all KV1
808// variants.
809struct Kv1Notice {
810 struct Key {
811 // Mandatory (key), at most 10 characters. Transport operator (from list as
812 // defined in BISON enumeration E1).
813 std::string data_owner_code;
814 // Mandatory (key), at most 20 characters. Identification of Notice (remark,
815 // clarifying text).
816 std::string notice_code;
817
818 explicit Key(std::string data_owner_code,
819 std::string notice_code);
820 };
821
822 Key key;
823 // Mandatory, at most 1024 characters. Content, text. Contains contact
824 // information such as telephone number, web address and reservation time for
825 // 'call buses' (belbussen) and other demand-based transit.
826 std::string notice_content;
827};
828
829// KV1 Table 18: Notice Assignment [NTCASSGNM] (OPTIONAL)
830//
831// Linking table in which Notice (remark, clarfiying text) is assigned to a
832// line, journey pattern, stops within a journey pattern, journey etc. Notice
833// Assignment contains all logical key elements of the corresponding objects to
834// which a Notice can be assigned.
835//
836// Different attributes are required for the Notice Assignment, depending on
837// the type object to which the Notice is assigned. In the following table
838// structure, this is indicated as 'Only relevant for ...'. This means that
839// fields for other object types in the Notice Assignment can be ignored.
840//
841// Moreover, it can also occur that not all key fields of the linked table are
842// of interest (content-wise) for recording the Notice.
843//
844// Both matters are summarised in this overview:
845//
846// --------------------------------------------------------
847// AssignedObject PUJO PUJOPASS LINE JOPATILI
848// --------------------------------------------------------
849// DataOwnerCode........... x ...... x ...... x ..... x ...
850// TimetableVersionCode ... o .............................
851// OrganizationalUnitCode . o ...... o ....................
852// ScheduleCode .................... o ....................
853// ScheduleTypeCode ................ o ....................
854// PeriodGroupCode ........ o .............................
855// SpecificDayCode ........ o .............................
856// DayType ................ o .............................
857// LinePlanningNumber ..... x ...... x ...... x ..... x ...
858// JourneyNumber .......... x ...... x ....................
859// StopOrder ....................... o .............. o ...
860// JourneyPatternCode ............................... x ...
861// TimingLinkOrder .................................. o ...
862// UserStopCode .................... o .............. o ...
863// --------------------------------------------------------
864//
865// Legend:
866// x - Mandatory. The Notice for this object type is always depndent on the
867// value of the attribute.
868// o - Optional. The Notice can be independent of the value of this
869// attribute for this object type.
870// <empty> - Attribute is no key field for this object type and can be
871// ignored when processed.
872//
873// Usage of Notice Assignment is optional in KV1. If there are no clarifying
874// texts, then the Notice Assignment table is not required to be present in the
875// provided KV1 set.
876//
877// This table is part of the core data tables, which are common for all KV1
878// variants.
879struct Kv1NoticeAssignment {
880 // Mandatory, at most 10 characters. Transport operator (from list as
881 // defined in BISON enumeration E1).
882 std::string data_owner_code;
883 // Mandatory, at most 20 characters. Notice that is assigned.
884 std::string notice_code;
885 // Mandatory, at most 8 characters. Object type to which Notice is assigned.
886 std::string assigned_object;
887 // Optional, at most 10 characters. Only relevant for PUJO.
888 std::string timetable_version_code;
889 // Optional, at most 10 characters. Only relevant for PUJO and PUJOPASS.
890 std::string organizational_unit_code;
891 // Optional, at most 10 characters. Only relevant for PUJOPASS.
892 std::string schedule_code;
893 // Optional, at most 10 characters. Only relevant for PUJOPASS.
894 std::string schedule_type_code;
895 // Optional, at most 10 characters. Only relevant for PUJO.
896 std::string period_group_code;
897 // Optional, at most 10 characters. Only relevant for PUJO.
898 std::string specific_day_code;
899 // Optional, at most 10 characters. Only relevant for PUJO.
900 // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun.
901 // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun.
902 std::string day_type;
903 // Mandatory, at most 10 characters. Mandatory for all object types.
904 std::string line_planning_number;
905 // Optional (for all object types except PUJO and PUJOPASS), at most 6
906 // digits. Only relevant for PUJO and PUJOPASS. Must be in the range
907 // [0-1000000).
908 std::optional<int> journey_number;
909 // Optional, at most 4 digits. Only relevant for PUJOPASS and JOPATILI.
910 std::optional<int> stop_order;
911 // Optional (for all object types except JOPATILI), at most 4 digits. Only
912 // relevant for JOPATILI.
913 std::string journey_pattern_code;
914 // Optional (at most 3 digits). Only relevant for JOPATILI.
915 std::optional<short> timing_link_order;
916 // Optional (at most 10 characters). Only relevant for PUJOPASS and JOPATILI.
917 // For JOPATILI, this correspond to the first stop of the link.
918 std::string user_stop_code;
919
920 Kv1Notice *p_notice = nullptr;
921};
922
923// KV1 Table 19: Time Demand Group [TIMDEMGRP]
924//
925// A time demand group is a grouping of the run time distribution from stop to
926// stop, for a journey pattern (from start to end point).
927//
928// This table is part of the KV1 variant "validities and time demand groups".
929struct Kv1TimeDemandGroup {
930 struct Key {
931 // Mandatory (key), at most 10 characters. Transport operator (from list as
932 // defined in BISON enumeration E1).
933 std::string data_owner_code;
934 // Mandatory (key), at most 10 characters.
935 std::string line_planning_number;
936 // Mandatory (key), at most 10 characters. Refers to the JOPATILI table.
937 std::string journey_pattern_code;
938 // Mandatory (key), at most 10 characters. Defines the code for the time
939 // demand group. (NOTE: this is not entirely made clear by the specification.
940 // This claim must be verified.)
941 std::string time_demand_group_code;
942
943 explicit Key(std::string data_owner_code,
944 std::string line_planning_number,
945 std::string journey_pattern_code,
946 std::string time_demand_group_code);
947 };
948
949 Key key;
950
951 Kv1Line *p_line = nullptr;
952 Kv1JourneyPattern *p_journey_pattern = nullptr;
953};
954
955// KV1 Table 20: Time Demand Group Run Time [TIMDEMRNT]
956//
957// The run time structure/distribution for all timing links of a journey
958// pattern or a time demand group.
959//
960// Optional run time elements are, when these are present, used to more
961// accurately calculate expected departure times based on punctuality
962// deviations.
963//
964// This table is part of the KV1 variant "validities and time demand groups".
965struct Kv1TimeDemandGroupRunTime {
966 struct Key {
967 // Mandatory (key), at most 10 characters. Transport operator (from list as
968 // defined in BISON enumeration E1).
969 std::string data_owner_code;
970 // Mandatory (key), at most 10 characters.
971 std::string line_planning_number;
972 // Mandatory (key), at most 10 characters. Refers to the JOPATILI table.
973 std::string journey_pattern_code;
974 // Mandatory (key), at most 10 characters. Refers to the TIMDEMGRP table.
975 std::string time_demand_group_code;
976 // Mandatory (key), at most 3 digits. Reference number of a link within the
977 // journey pattern (a link can occur more than once within a journey
978 // pattern).
979 short timing_link_order = 0;
980
981 explicit Key(std::string data_owner_code,
982 std::string line_planning_number,
983 std::string journey_pattern_code,
984 std::string time_demand_group_code,
985 short timing_link_order);
986 };
987
988 Key key;
989 // Mandatory, at most 10 characters. Refers to the first stop of the link.
990 std::string user_stop_code_begin;
991 // Mandatory, at most 10 characters. Refers to the last stop of the link.
992 std::string user_stop_code_end;
993 // Mandatory, at most 5 digits. Planned total run time on link for time
994 // demand group: (Departure time end stop - departure time begin stop)
995 // corresponding to the time demand group. In seconds.
996 double total_drive_time_s = 0;
997 // Mandatory, at most 5 digits. Planned minimal run time on link for time
998 // demand group. Often calculated as: (Arrival time end stop - arrival time
999 // begin stop) corresponding to the time demand group. In seconds.
1000 double drive_time_s = 0;
1001 // Optional, at most 5 digits. Expected/planned delay/congestion on link for
1002 // time demand group. In seconds.
1003 std::optional<double> expected_delay_s;
1004 // Optional, at most 5 digits. Layover/catch-up time. Gives play in the
1005 // timetable. In seconds.
1006 // LayOverTime = TotDriveTime - DriveTime + ExpectedDelay - StopWaitTime.
1007 std::optional<double> layover_time;
1008 // Mandatory, at most 5 digits. Planned stop waiting time at the final stop
1009 // of the link for the time demand group. Determined based on the difference
1010 // between the departure time and arrival time at this stop. Is zero when no
1011 // waiting time is planned for this stop. In seconds.
1012 double stop_wait_time = 0;
1013 // Optional, at most 5 digits. Planned minimal stop time for
1014 // boarding/alighting of passengers at the final stop of the link for the
1015 // time demand group. Application: at hub stops with a planned waiting time,
1016 // the difference between the planned waiting time and the minimum stop time
1017 // is the layover/catch-up time. In seconds.
1018 std::optional<double> minimum_stop_time;
1019
1020 Kv1Line *p_line = nullptr;
1021 Kv1UserStopPoint *p_user_stop_begin = nullptr;
1022 Kv1UserStopPoint *p_user_stop_end = nullptr;
1023 Kv1JourneyPattern *p_journey_pattern = nullptr;
1024 Kv1TimeDemandGroup *p_time_demand_group = nullptr;
1025 Kv1JourneyPatternTimingLink *p_journey_pattern_timing_link = nullptr;
1026};
1027
1028// KV1 Table 21: Period Group [PEGR]
1029//
1030// Period group is an indication of a 'homogeneous period' during the year,
1031// i.e. a period in which the schedule has the same composition w.r.t.
1032// frequencies and run times.
1033//
1034// This table is part of the KV1 variant "validities and time demand groups".
1035struct Kv1PeriodGroup {
1036 struct Key {
1037 // Mandatory (key), at most 10 characters. Transport operator (from list as
1038 // defined in BISON enumeration E1).
1039 std::string data_owner_code;
1040 // Mandatory (key), at most 10 characters.
1041 std::string period_group_code;
1042
1043 explicit Key(std::string data_owner_code,
1044 std::string period_group_code);
1045 };
1046
1047 Key key;
1048 // Optional, at most 255 characters.
1049 std::string description;
1050};
1051
1052// KV1 Table 22: Specific Day [SPECDAY]
1053//
1054// A specific day is a feature of a day for which a deviating service level is
1055// provided, respective to a normal day of the week.
1056//
1057// E.g. shopping Sundays (koopzondagen, if not every Sunday), New Year's Eve
1058// (oudejaarsdag), foreign bank holidays (as applicable).
1059//
1060// This table is part of the KV1 variant "validities and time demand groups".
1061struct Kv1SpecificDay {
1062 struct Key {
1063 // Mandatory (key), at most 10 characters. Transport operator (from list as
1064 // defined in BISON enumeration E1).
1065 std::string data_owner_code;
1066 // Mandatory (key), at most 10 characters. Default: "NORMAL".
1067 std::string specific_day_code;
1068
1069 explicit Key(std::string data_owner_code,
1070 std::string specific_day_code);
1071 };
1072
1073 Key key;
1074 // Mandatory, at most 50 characters.
1075 std::string name;
1076 // Optional, at most 255 characters.
1077 std::string description;
1078};
1079
1080// KV1 Table 23: Timetable Version [TIVE]
1081//
1082// A timetable version budles all planned activities for an organizational
1083// unit. For the public schedule, these are trips, routes, run times etc.
1084//
1085// When processing a new Timetable Version, it is checked if another TIVE with
1086// the same key has already been processed. If this is the case, ValidFrom must
1087// be equal to the starting date of the previously provided set. The new set
1088// replaces the older one. A package with a new starting date is only processed
1089// if another TimetableVersionCode is used.
1090//
1091// This table is part of the KV1 variant "validities and time demand groups".
1092struct Kv1TimetableVersion {
1093 struct Key {
1094 // Mandatory (key), at most 10 characters. Transport operator (from list as
1095 // defined in BISON enumeration E1).
1096 std::string data_owner_code;
1097 // Mandatory (key), at most 10 characters.
1098 std::string organizational_unit_code;
1099 // Mandatory (key), at most 10 characters.
1100 std::string timetable_version_code;
1101 // Mandatory (key), at most 10 charactes.
1102 std::string period_group_code;
1103 // Mandatory (key), at most 10 characters. Default: "NORMAL".
1104 std::string specific_day_code;
1105
1106 explicit Key(std::string data_owner_code,
1107 std::string organizational_unit_code,
1108 std::string timetable_version_code,
1109 std::string period_group_code,
1110 std::string specific_day_code);
1111 };
1112
1113 Key key;
1114 // Mandatory, at most 10 characters. Datum on which the timetable goes into
1115 // effect, following the YYYY-MM-DD format.
1116 std::chrono::year_month_day valid_from;
1117 // Mandatory, at most 10 characters. Value: "PUBT".
1118 std::string timetable_version_type;
1119 // Optional, at most 10 characters. Datum on which the timetable goes out of
1120 // effect, following the YYYY-MM-DD format.
1121 std::optional<std::chrono::year_month_day> valid_thru;
1122 // Optional, at most 255 characters. Should be null/empty.
1123 std::string description;
1124
1125 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1126 Kv1PeriodGroup *p_period_group = nullptr;
1127 Kv1SpecificDay *p_specific_day = nullptr;
1128};
1129
1130// KV1 Table 24: Public Journey [PUJO]
1131//
1132// Public journeys are journeys that are operated by a public transit
1133// organization and are accessible to the passenger.
1134//
1135// Business rules:
1136// - If ShowFlexibleTrip or ProductFormulaType is set in a record of this
1137// table, this takes precedence over the value as in the corresponding
1138// JOPATILI entry.
1139//
1140// This table is part of the KV1 variant "validities and time demand groups".
1141struct Kv1PublicJourney {
1142 struct Key {
1143 // Mandatory (key), at most 10 characters. Transport operator (from list as
1144 // defined in BISON enumeration E1).
1145 std::string data_owner_code;
1146 // Mandatory (key), at most 10 characters.
1147 std::string timetable_version_code;
1148 // Mandatory (key), at most 10 characters.
1149 std::string organizational_unit_code;
1150 // Mandatory (key), at most 10 characters.
1151 std::string period_group_code;
1152 // Mandatory (key), at most 10 characters.
1153 std::string specific_day_code;
1154 // Mandatory (key), at most 7 characters.
1155 // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun.
1156 // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun.
1157 // TODO: See if we can make this into a more concrete type
1158 std::string day_type;
1159 // Mandatory (key), at most 10 characters.
1160 std::string line_planning_number;
1161 // Mandatory (key), at most 6 digits. Must be in the range [0-1000000).
1162 int journey_number = 0;
1163
1164 explicit Key(std::string data_owner_code,
1165 std::string timetable_version_code,
1166 std::string organizational_unit_code,
1167 std::string period_group_code,
1168 std::string specific_day_code,
1169 std::string day_type,
1170 std::string line_planning_number,
1171 int journey_number);
1172 };
1173
1174 Key key;
1175 // Mandatory, at most 10 characters.
1176 std::string time_demand_group_code;
1177 // Mandatory, at most 10 characters.
1178 std::string journey_pattern_code;
1179 // Mandatory, at most 8 characters. Format: "HH:MM:SS".
1180 std::chrono::hh_mm_ss<std::chrono::seconds> departure_time;
1181 // Mandatory, at most 13 characters. Values as in BISON enumeration E3.
1182 // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN".
1183 // TODO: See if we can fit BISON enumeration E3 into an enum
1184 std::string wheelchair_accessible;
1185 // Mandatory, at most 5 characters. Boolean. Value "true": journey is
1186 // operator by DataOwner. Value "false": journey is operator by a different
1187 // DataOwner. Indicator is meant for a line that is operated jointly by
1188 // multiple transit operators. The indicator is used to be able to match the
1189 // journey operation (KV6, KV19 etc.); only journeys for which the indicator
1190 // is "true" can be expected to have corresponding current/real-time
1191 // information, although "true" doesn't necessarily mean that this
1192 // current/real-time information will (always) become available.
1193 bool data_owner_is_operator = false;
1194 // Mandatory, at most 5 characters. Boolean. Indicates whether
1195 // current/real-time journey information may be expected for the
1196 // corresponding journey ("true" or "false").
1197 bool planned_monitored = false;
1198 // Optional, at most 4 digits. BISON enumeration E10. Intended to allow
1199 // capturing transit mode features at the journey level.
1200 // TODO: See if we can make BISON enumeration E10 into an enum
1201 std::optional<short> product_formula_type;
1202 // Optional, at most 8 characters. Indicates whether the transit operator
1203 // wants that a not-explicitly planned trip (i.e. a journey that only runs on
1204 // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be
1205 // shown on displays. Values following BISON enumeration E21: TRUE (always),
1206 // FALSE (never), REALTIME (only when journey is tracked).
1207 // TODO: See if we can make BISON enumeration E21 into an enum
1208 std::string show_flexible_trip;
1209
1210 Kv1TimetableVersion *p_timetable_version = nullptr;
1211 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1212 Kv1PeriodGroup *p_period_group = nullptr;
1213 Kv1SpecificDay *p_specific_day = nullptr;
1214 Kv1Line *p_line = nullptr;
1215 Kv1TimeDemandGroup *p_time_demand_group = nullptr;
1216 Kv1JourneyPattern *p_journey_pattern = nullptr;
1217};
1218
1219// KV1 Table 25: Period Group Validity [PEGRVAL]
1220//
1221// Validities (multiple from-thru data) of a period group.
1222//
1223// This table is part of the KV1 variant "validities and time demand groups".
1224struct Kv1PeriodGroupValidity {
1225 struct Key {
1226 // Mandatory (key), at most 10 characters. Transport operator (from list as
1227 // defined in BISON enumeration E1).
1228 std::string data_owner_code;
1229 // Mandatory (key), at most 10 characters.
1230 std::string organizational_unit_code;
1231 // Mandatory (key), at most 10 characters.
1232 std::string period_group_code;
1233 // Mandatory (key), at most 10 characters. Date of the start of the validity
1234 // period. Format: "YYYY-MM-DD".
1235 std::chrono::year_month_day valid_from;
1236
1237 explicit Key(std::string data_owner_code,
1238 std::string organizational_unit_code,
1239 std::string period_group_code,
1240 std::chrono::year_month_day valid_from);
1241 };
1242
1243 Key key;
1244 // Mandatory, at most 10 characters. Date of the end of the validity period.
1245 // Format: "YYYY-MM-DD".
1246 std::chrono::year_month_day valid_thru;
1247
1248 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1249 Kv1PeriodGroup *p_period_group = nullptr;
1250};
1251
1252// KV1 Table 26: Exceptional Operating Day [EXCOPDAY]
1253//
1254// Contains exceptional validity dates, for which the service runs following a
1255// different day type (such as another day of the week or a different period).
1256//
1257// This table is part of the KV1 variant "validities and time demand groups".
1258struct Kv1ExceptionalOperatingDay {
1259 struct Key {
1260 // Mandatory (key), at most 10 characters. Transport operator (from list as
1261 // defined in BISON enumeration E1).
1262 std::string data_owner_code;
1263 // Mandatory (key), at most 10 characters. Organization unit for which an
1264 // exceptional day validity applies.
1265 std::string organizational_unit_code;
1266 // Mandatory (key), at most 23 characters. Date (+ time) for which the
1267 // exceptional validity applies. Format: "YYYYMMDDThh:mm:ssTZD".
1268 std::chrono::sys_seconds valid_date;
1269
1270 explicit Key(std::string data_owner_code,
1271 std::string organizational_unit_code,
1272 std::chrono::sys_seconds valid_date);
1273 };
1274
1275 Key key;
1276 // Mandatory, at most 7 characters. The exceptional day type that applies on
1277 // a calendar day: [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed,
1278 // Thu, Fri, Sat.
1279 // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun.
1280 // TODO: See if we can make this into a more concrete type
1281 std::string day_type_as_on;
1282 // Mandatory, at most 10 characters. Specific day service level to which the
1283 // exceptional day validity refers.
1284 std::string specific_day_code;
1285 // Optional, at most 10 characters. An exceptional day validity can be
1286 // related to the service level of another period (e.g. the school holiday
1287 // schedule). This exceptional period reference is set here.
1288 //
1289 // E.g. on Good Friday or the day after Ascension day, transit runs according
1290 // to the holiday season schedule, while transit runs following the winter
1291 // package in the surrounding days.
1292 std::string period_group_code;
1293 // Optional, at most 255 characters.
1294 std::string description;
1295
1296 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1297 Kv1SpecificDay *p_specific_day = nullptr;
1298 Kv1PeriodGroup *p_period_group = nullptr;
1299};
1300
1301// KV1 Table 27: Schedule Version [SCHEDVERS]
1302//
1303// A schedule version bundles the planned activities for an organisation unit
1304// per day type. The journeys with passing times and corresponding routes are
1305// for the public timetable.
1306//
1307// When processing a new Schedule Version, it is checked if another SCHEDVERS
1308// with the same key has already been processed. If this is the case, ValidFrom
1309// must be equal to the starting date of the previously provided set. The new
1310// set replaces the older one. A package with a new starting date is only
1311// processed if another Schedule Code is used.
1312//
1313// This table is part of the KV1 variant "schedules and passing times".
1314struct Kv1ScheduleVersion {
1315 struct Key {
1316 // Mandatory (key), at most 10 characters. Transport operator (from list as
1317 // defined in BISON enumeration E1).
1318 std::string data_owner_code;
1319 // Mandatory (key), at most 10 characters.
1320 std::string organizational_unit_code;
1321 // Mandatory (key), at most 10 characters. A unique code in combination with
1322 // the ScheduleTypeCode of the package within the ORUN.
1323 std::string schedule_code;
1324 // Mandatory (key), at most 10 characters. Code for the Schedule Type (Day Type).
1325 std::string schedule_type_code;
1326
1327 explicit Key(std::string data_owner_code,
1328 std::string organizational_unit_code,
1329 std::string schedule_code,
1330 std::string schedule_type_code);
1331 };
1332
1333 Key key;
1334 // Mandatory, at most 10 characters. Date on which the schedule goes into
1335 // effect. Format: "YYYY-MM-DD".
1336 std::chrono::year_month_day valid_from;
1337 // Optional, at most 10 characters. Date on which the schedule goes out of
1338 // effect. Format: "YYYY-MM-DD".
1339 std::optional<std::chrono::year_month_day> valid_thru;
1340 // Optional, at most 255 characters. Should be empty/null.
1341 std::string description;
1342
1343 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1344};
1345
1346// KV1 Table 28: Public Journey Passing Times [PUJOPASS]
1347//
1348// Public journey with arrival and departure times at all stops (and other
1349// timing points).
1350//
1351// Business rules:
1352// - If ShowFlexibleTrip or ProductFormulaType is set here, then this takes
1353// precedence over the value in the corresponding JOPATILI record.
1354// - All stop passings of a public journey refer to the same journey pattern
1355// (JOPA)!
1356//
1357// This table is part of the KV1 variant "schedules and passing times".
1358struct Kv1PublicJourneyPassingTimes {
1359 struct Key {
1360 // Mandatory (key), at most 10 characters. Transport operator (from list as
1361 // defined in BISON enumeration E1).
1362 std::string data_owner_code;
1363 // Mandatory (key), at most 10 characters.
1364 std::string organizational_unit_code;
1365 // Mandatory (key), at most 10 characters. A unique code in combination with
1366 // the ScheduleTypeCode of the package within the ORUN.
1367 std::string schedule_code;
1368 // Mandatory (key), at most 10 characters. Code for the Schedule Type (e.g.
1369 // Day Type).
1370 std::string schedule_type_code;
1371 // Mandatory (key), at most 10 characters.
1372 std::string line_planning_number;
1373 // Mandatory (key), at most 6 digits. Must be in the range [0-1000000).
1374 int journey_number = 0;
1375 // Mandatory (key), at most 4 digits.
1376 short stop_order = 0;
1377
1378 explicit Key(std::string data_owner_code,
1379 std::string organizational_unit_code,
1380 std::string schedule_code,
1381 std::string schedule_type_code,
1382 std::string line_planning_number,
1383 int journey_number,
1384 short stop_order);
1385 };
1386
1387 Key key;
1388 // Mandatory, at most 10 characters.
1389 std::string journey_pattern_code;
1390 // Mandatory, at most 10 characters.
1391 std::string user_stop_code;
1392 // Mandatory (except for the first stop of a journey), at most 8 digits. Not
1393 // compulsory for the first stop of a journey. Format: "HH:MM:SS".
1394 std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_arrival_time;
1395 // Mandatory (expect for the last stop of a journey), at most 8 digits. Not
1396 // compulsory for the last stop of a journey. Format: "HH:MM:SS".
1397 std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_departure_time;
1398 // Mandatory, at most 13 characters. Values as in BISON enumeration E3.
1399 // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN".
1400 // TODO: See if we can fit BISON enumeration E3 into an enum
1401 std::string wheelchair_accessible;
1402 // Mandatory, at most 5 characters. Boolean. Value "true": journey is
1403 // operator by DataOwner. Value "false": journey is operator by a different
1404 // DataOwner. Indicator is meant for a line that is operated jointly by
1405 // multiple transit operators. The indicator is used to be able to match the
1406 // journey operation (KV6, KV19 etc.); only journeys for which the indicator
1407 // is "true" can be expected to have corresponding current/real-time
1408 // information, although "true" doesn't necessarily mean that this
1409 // current/real-time information will (always) become available.
1410 bool data_owner_is_operator = false;
1411 // Mandatory, at most 5 characters. Boolean. Indicates whether
1412 // current/real-time journey information may be expected for the
1413 // corresponding journey ("true" or "false").
1414 bool planned_monitored = false;
1415 // Optional, at most 4 digits. BISON enumeration E10. Intended to allow
1416 // capturing transit mode features at the journey level.
1417 // TODO: See if we can make BISON enumeration E10 into an enum
1418 std::optional<short> product_formula_type;
1419 // Optional, at most 8 characters. Indicates whether the transit operator
1420 // wants that a not-explicitly planned trip (i.e. a journey that only runs on
1421 // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be
1422 // shown on displays. Values following BISON enumeration E21: TRUE (always),
1423 // FALSE (never), REALTIME (only when journey is tracked).
1424 // TODO: See if we can make BISON enumeration E21 into an enum
1425 std::string show_flexible_trip;
1426
1427 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1428 Kv1ScheduleVersion *p_schedule_version = nullptr;
1429 Kv1Line *p_line = nullptr;
1430 Kv1JourneyPattern *p_journey_pattern = nullptr;
1431 Kv1UserStopPoint *p_user_stop = nullptr;
1432};
1433
1434// KV1 Table 29: Operating Day [OPERDAY]
1435//
1436// Contains the operational calendar. Which package (schedule version) applies
1437// is specified per day, per organisation unit.
1438//
1439// This table is part of the KV1 variant "schedules and passing times".
1440struct Kv1OperatingDay {
1441 struct Key {
1442 // Mandatory (key), at most 10 characters. Transport operator (from list as
1443 // defined in BISON enumeration E1).
1444 std::string data_owner_code;
1445 // Mandatory (key), at most 10 characters.
1446 std::string organizational_unit_code;
1447 // Mandatory (key), at most 10 characters.
1448 std::string schedule_code;
1449 // Mandatory (key), at most 10 characters.
1450 std::string schedule_type_code;
1451 // Mandatory (key), at most 10 characters. Date on which the package
1452 // (schedule version) applies. Format: "YYYY-MM-DD".
1453 std::chrono::year_month_day valid_date;
1454
1455 explicit Key(std::string data_owner_code,
1456 std::string organizational_unit_code,
1457 std::string schedule_code,
1458 std::string schedule_type_code,
1459 std::chrono::year_month_day valid_date);
1460 };
1461
1462 Key key;
1463 // Optional, at most 255 characters.
1464 std::string description;
1465
1466 Kv1OrganizationalUnit *p_organizational_unit = nullptr;
1467 Kv1ScheduleVersion *p_schedule_version = nullptr;
1468};
1469
1470bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b);
1471bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b);
1472bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b);
1473bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b);
1474bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b);
1475bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b);
1476bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b);
1477bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b);
1478bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b);
1479bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b);
1480bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b);
1481bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b);
1482bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b);
1483bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b);
1484bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b);
1485bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b);
1486bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b);
1487bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b);
1488bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b);
1489bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b);
1490bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b);
1491bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b);
1492bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b);
1493bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b);
1494bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b);
1495bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b);
1496bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b);
1497bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b);
1498
1499size_t hash_value(const Kv1OrganizationalUnit::Key &k);
1500size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k);
1501size_t hash_value(const Kv1UserStopPoint::Key &k);
1502size_t hash_value(const Kv1UserStopArea::Key &k);
1503size_t hash_value(const Kv1TimingLink::Key &k);
1504size_t hash_value(const Kv1Link::Key &k);
1505size_t hash_value(const Kv1Line::Key &k);
1506size_t hash_value(const Kv1Destination::Key &k);
1507size_t hash_value(const Kv1JourneyPattern::Key &k);
1508size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k);
1509size_t hash_value(const Kv1ConcessionArea::Key &k);
1510size_t hash_value(const Kv1Financer::Key &k);
1511size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k);
1512size_t hash_value(const Kv1Point::Key &k);
1513size_t hash_value(const Kv1PointOnLink::Key &k);
1514size_t hash_value(const Kv1Icon::Key &k);
1515size_t hash_value(const Kv1Notice::Key &k);
1516size_t hash_value(const Kv1TimeDemandGroup::Key &k);
1517size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k);
1518size_t hash_value(const Kv1PeriodGroup::Key &k);
1519size_t hash_value(const Kv1SpecificDay::Key &k);
1520size_t hash_value(const Kv1TimetableVersion::Key &k);
1521size_t hash_value(const Kv1PublicJourney::Key &k);
1522size_t hash_value(const Kv1PeriodGroupValidity::Key &k);
1523size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k);
1524size_t hash_value(const Kv1ScheduleVersion::Key &k);
1525size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k);
1526size_t hash_value(const Kv1OperatingDay::Key &k);
1527
1528#endif // OEUF_LIBTMI8_KV1_TYPES_HPP
diff --git a/lib/libtmi8/include/tmi8/kv6_parquet.hpp b/lib/libtmi8/include/tmi8/kv6_parquet.hpp
new file mode 100644
index 0000000..33b57ca
--- /dev/null
+++ b/lib/libtmi8/include/tmi8/kv6_parquet.hpp
@@ -0,0 +1,46 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_LIBTMI8_KV6_PARQUET_HPP
4#define OEUF_LIBTMI8_KV6_PARQUET_HPP
5
6#include <filesystem>
7
8#include <arrow/api.h>
9#include <arrow/io/api.h>
10#include <parquet/arrow/writer.h>
11
12static const size_t MAX_PARQUET_CHUNK = 10000;
13
14struct ParquetBuilder {
15 ParquetBuilder();
16 arrow::Result<std::shared_ptr<arrow::Table>> getTable();
17
18 std::shared_ptr<arrow::Schema> schema;
19
20 arrow::StringBuilder types;
21 arrow::StringBuilder data_owner_codes;
22 arrow::StringBuilder line_planning_numbers;
23 arrow::Date32Builder operating_days;
24 arrow::UInt32Builder journey_numbers;
25 arrow::UInt8Builder reinforcement_numbers;
26 arrow::TimestampBuilder timestamps{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::default_memory_pool()};
27 arrow::StringBuilder sources;
28 arrow::Int16Builder punctualities;
29 arrow::StringBuilder user_stop_codes;
30 arrow::UInt16Builder passage_sequence_numbers;
31 arrow::UInt32Builder vehicle_numbers;
32 arrow::UInt32Builder block_codes;
33 arrow::StringBuilder wheelchair_accessibles;
34 arrow::UInt8Builder number_of_coaches;
35 arrow::Int32Builder rd_ys;
36 arrow::Int32Builder rd_xs;
37 arrow::UInt32Builder distance_since_last_user_stops;
38};
39
40[[nodiscard]]
41arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename);
42
43[[nodiscard]]
44arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename);
45
46#endif // OEUF_LIBTMI8_KV6_PARQUET_HPP
diff --git a/lib/libtmi8/src/kv1_index.cpp b/lib/libtmi8/src/kv1_index.cpp
new file mode 100644
index 0000000..23e9596
--- /dev/null
+++ b/lib/libtmi8/src/kv1_index.cpp
@@ -0,0 +1,461 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <tmi8/kv1_index.hpp>
4
5Kv1Index::Kv1Index(Kv1Records *records) : records(records) {
6 organizational_units.reserve(records->organizational_units.size());
7 for (size_t i = 0; i < records->organizational_units.size(); i++) {
8 auto *it = &records->organizational_units[i];
9 organizational_units[it->key] = it;
10 }
11 higher_organizational_units.reserve(records->higher_organizational_units.size());
12 for (size_t i = 0; i < records->higher_organizational_units.size(); i++) {
13 auto *it = &records->higher_organizational_units[i];
14 higher_organizational_units[it->key] = it;
15 }
16 user_stop_points.reserve(records->user_stop_points.size());
17 for (size_t i = 0; i < records->user_stop_points.size(); i++) {
18 auto *it = &records->user_stop_points[i];
19 user_stop_points[it->key] = it;
20 }
21 user_stop_areas.reserve(records->user_stop_areas.size());
22 for (size_t i = 0; i < records->user_stop_areas.size(); i++) {
23 auto *it = &records->user_stop_areas[i];
24 user_stop_areas[it->key] = it;
25 }
26 timing_links.reserve(records->timing_links.size());
27 for (size_t i = 0; i < records->timing_links.size(); i++) {
28 auto *it = &records->timing_links[i];
29 timing_links[it->key] = it;
30 }
31 links.reserve(records->links.size());
32 for (size_t i = 0; i < records->links.size(); i++) {
33 auto *it = &records->links[i];
34 links[it->key] = it;
35 }
36 lines.reserve(records->lines.size());
37 for (size_t i = 0; i < records->lines.size(); i++) {
38 auto *it = &records->lines[i];
39 lines[it->key] = it;
40 }
41 destinations.reserve(records->destinations.size());
42 for (size_t i = 0; i < records->destinations.size(); i++) {
43 auto *it = &records->destinations[i];
44 destinations[it->key] = it;
45 }
46 journey_patterns.reserve(records->journey_patterns.size());
47 for (size_t i = 0; i < records->journey_patterns.size(); i++) {
48 auto *it = &records->journey_patterns[i];
49 journey_patterns[it->key] = it;
50 }
51 concession_financer_relations.reserve(records->concession_financer_relations.size());
52 for (size_t i = 0; i < records->concession_financer_relations.size(); i++) {
53 auto *it = &records->concession_financer_relations[i];
54 concession_financer_relations[it->key] = it;
55 }
56 concession_areas.reserve(records->concession_areas.size());
57 for (size_t i = 0; i < records->concession_areas.size(); i++) {
58 auto *it = &records->concession_areas[i];
59 concession_areas[it->key] = it;
60 }
61 financers.reserve(records->financers.size());
62 for (size_t i = 0; i < records->financers.size(); i++) {
63 auto *it = &records->financers[i];
64 financers[it->key] = it;
65 }
66 journey_pattern_timing_links.reserve(records->journey_pattern_timing_links.size());
67 for (size_t i = 0; i < records->journey_pattern_timing_links.size(); i++) {
68 auto *it = &records->journey_pattern_timing_links[i];
69 journey_pattern_timing_links[it->key] = it;
70 }
71 points.reserve(records->points.size());
72 for (size_t i = 0; i < records->points.size(); i++) {
73 auto *it = &records->points[i];
74 points[it->key] = it;
75 }
76 point_on_links.reserve(records->point_on_links.size());
77 for (size_t i = 0; i < records->point_on_links.size(); i++) {
78 auto *it = &records->point_on_links[i];
79 point_on_links[it->key] = it;
80 }
81 icons.reserve(records->icons.size());
82 for (size_t i = 0; i < records->icons.size(); i++) {
83 auto *it = &records->icons[i];
84 icons[it->key] = it;
85 }
86 notices.reserve(records->notices.size());
87 for (size_t i = 0; i < records->notices.size(); i++) {
88 auto *it = &records->notices[i];
89 notices[it->key] = it;
90 }
91 time_demand_groups.reserve(records->time_demand_groups.size());
92 for (size_t i = 0; i < records->time_demand_groups.size(); i++) {
93 auto *it = &records->time_demand_groups[i];
94 time_demand_groups[it->key] = it;
95 }
96 time_demand_group_run_times.reserve(records->time_demand_group_run_times.size());
97 for (size_t i = 0; i < records->time_demand_group_run_times.size(); i++) {
98 auto *it = &records->time_demand_group_run_times[i];
99 time_demand_group_run_times[it->key] = it;
100 }
101 period_groups.reserve(records->period_groups.size());
102 for (size_t i = 0; i < records->period_groups.size(); i++) {
103 auto *it = &records->period_groups[i];
104 period_groups[it->key] = it;
105 }
106 specific_days.reserve(records->specific_days.size());
107 for (size_t i = 0; i < records->specific_days.size(); i++) {
108 auto *it = &records->specific_days[i];
109 specific_days[it->key] = it;
110 }
111 timetable_versions.reserve(records->timetable_versions.size());
112 for (size_t i = 0; i < records->timetable_versions.size(); i++) {
113 auto *it = &records->timetable_versions[i];
114 timetable_versions[it->key] = it;
115 }
116 public_journeys.reserve(records->public_journeys.size());
117 for (size_t i = 0; i < records->public_journeys.size(); i++) {
118 auto *it = &records->public_journeys[i];
119 public_journeys[it->key] = it;
120 }
121 period_group_validities.reserve(records->period_group_validities.size());
122 for (size_t i = 0; i < records->period_group_validities.size(); i++) {
123 auto *it = &records->period_group_validities[i];
124 period_group_validities[it->key] = it;
125 }
126 exceptional_operating_days.reserve(records->exceptional_operating_days.size());
127 for (size_t i = 0; i < records->exceptional_operating_days.size(); i++) {
128 auto *it = &records->exceptional_operating_days[i];
129 exceptional_operating_days[it->key] = it;
130 }
131 schedule_versions.reserve(records->schedule_versions.size());
132 for (size_t i = 0; i < records->schedule_versions.size(); i++) {
133 auto *it = &records->schedule_versions[i];
134 schedule_versions[it->key] = it;
135 }
136 public_journey_passing_times.reserve(records->public_journey_passing_times.size());
137 for (size_t i = 0; i < records->public_journey_passing_times.size(); i++) {
138 auto *it = &records->public_journey_passing_times[i];
139 public_journey_passing_times[it->key] = it;
140 }
141 operating_days.reserve(records->operating_days.size());
142 for (size_t i = 0; i < records->operating_days.size(); i++) {
143 auto *it = &records->operating_days[i];
144 operating_days[it->key] = it;
145 }
146}
147
148size_t Kv1Index::size() const {
149 return organizational_units.size()
150 + higher_organizational_units.size()
151 + user_stop_points.size()
152 + user_stop_areas.size()
153 + timing_links.size()
154 + links.size()
155 + lines.size()
156 + destinations.size()
157 + journey_patterns.size()
158 + concession_financer_relations.size()
159 + concession_areas.size()
160 + financers.size()
161 + journey_pattern_timing_links.size()
162 + points.size()
163 + point_on_links.size()
164 + icons.size()
165 + notices.size()
166 + time_demand_groups.size()
167 + time_demand_group_run_times.size()
168 + period_groups.size()
169 + specific_days.size()
170 + timetable_versions.size()
171 + public_journeys.size()
172 + period_group_validities.size()
173 + exceptional_operating_days.size()
174 + schedule_versions.size()
175 + public_journey_passing_times.size()
176 + operating_days.size();
177}
178
179void kv1LinkRecords(Kv1Index &index) {
180 for (auto &orunorun : index.records->higher_organizational_units) {
181 Kv1OrganizationalUnit::Key orun_parent_key(
182 orunorun.key.data_owner_code,
183 orunorun.key.organizational_unit_code_parent);
184 Kv1OrganizationalUnit::Key orun_child_key(
185 orunorun.key.data_owner_code,
186 orunorun.key.organizational_unit_code_child);
187 orunorun.p_organizational_unit_parent = index.organizational_units[orun_parent_key];
188 orunorun.p_organizational_unit_child = index.organizational_units[orun_child_key];
189 }
190 for (auto &usrstop : index.records->user_stop_points) {
191 Kv1Point::Key point_key(
192 usrstop.key.data_owner_code,
193 usrstop.key.user_stop_code);
194 usrstop.p_point = index.points[point_key];
195 if (!usrstop.user_stop_area_code.empty()) {
196 Kv1UserStopArea::Key usrstar_key(
197 usrstop.key.data_owner_code,
198 usrstop.user_stop_area_code);
199 usrstop.p_user_stop_area = index.user_stop_areas[usrstar_key];
200 }
201 }
202 for (auto &tili : index.records->timing_links) {
203 Kv1UserStopPoint::Key usrstop_begin_key(
204 tili.key.data_owner_code,
205 tili.key.user_stop_code_begin);
206 Kv1UserStopPoint::Key usrstop_end_key(
207 tili.key.data_owner_code,
208 tili.key.user_stop_code_end);
209 tili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key];
210 tili.p_user_stop_end = index.user_stop_points[usrstop_end_key];
211 }
212 for (auto &link : index.records->links) {
213 Kv1UserStopPoint::Key usrstop_begin_key(
214 link.key.data_owner_code,
215 link.key.user_stop_code_begin);
216 Kv1UserStopPoint::Key usrstop_end_key(
217 link.key.data_owner_code,
218 link.key.user_stop_code_end);
219 link.p_user_stop_begin = index.user_stop_points[usrstop_begin_key];
220 link.p_user_stop_end = index.user_stop_points[usrstop_end_key];
221 }
222 for (auto &line : index.records->lines) {
223 if (!line.line_icon)
224 continue;
225 Kv1Icon::Key icon_key(
226 line.key.data_owner_code,
227 *line.line_icon);
228 line.p_line_icon = index.icons[icon_key];
229 }
230 for (auto &jopa : index.records->journey_patterns) {
231 Kv1Line::Key line_key(
232 jopa.key.data_owner_code,
233 jopa.key.line_planning_number);
234 jopa.p_line = index.lines[line_key];
235 }
236 for (auto &confinrel : index.records->concession_financer_relations) {
237 Kv1ConcessionArea::Key conarea_key(
238 confinrel.key.data_owner_code,
239 confinrel.concession_area_code);
240 confinrel.p_concession_area = index.concession_areas[conarea_key];
241 if (!confinrel.financer_code.empty()) {
242 Kv1Financer::Key financer_key(
243 confinrel.key.data_owner_code,
244 confinrel.financer_code);
245 confinrel.p_financer = index.financers[financer_key];
246 }
247 }
248 for (auto &jopatili : index.records->journey_pattern_timing_links) {
249 Kv1Line::Key line_key(
250 jopatili.key.data_owner_code,
251 jopatili.key.line_planning_number);
252 Kv1JourneyPattern::Key jopa_key(
253 jopatili.key.data_owner_code,
254 jopatili.key.line_planning_number,
255 jopatili.key.journey_pattern_code);
256 Kv1UserStopPoint::Key usrstop_begin_key(
257 jopatili.key.data_owner_code,
258 jopatili.user_stop_code_begin);
259 Kv1UserStopPoint::Key usrstop_end_key(
260 jopatili.key.data_owner_code,
261 jopatili.user_stop_code_end);
262 Kv1ConcessionFinancerRelation::Key confinrel_key(
263 jopatili.key.data_owner_code,
264 jopatili.con_fin_rel_code);
265 Kv1Destination::Key dest_key(
266 jopatili.key.data_owner_code,
267 jopatili.dest_code);
268 jopatili.p_line = index.lines[line_key];
269 jopatili.p_journey_pattern = index.journey_patterns[jopa_key];
270 jopatili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key];
271 jopatili.p_user_stop_end = index.user_stop_points[usrstop_end_key];
272 jopatili.p_con_fin_rel = index.concession_financer_relations[confinrel_key];
273 jopatili.p_dest = index.destinations[dest_key];
274 if (jopatili.line_dest_icon) {
275 Kv1Icon::Key icon_key{
276 jopatili.key.data_owner_code,
277 *jopatili.line_dest_icon,
278 };
279 jopatili.p_line_dest_icon = index.icons[icon_key];
280 }
281 }
282 for (auto &pool : index.records->point_on_links) {
283 Kv1UserStopPoint::Key usrstop_begin_key(
284 pool.key.data_owner_code,
285 pool.key.user_stop_code_begin);
286 Kv1UserStopPoint::Key usrstop_end_key(
287 pool.key.data_owner_code,
288 pool.key.user_stop_code_end);
289 Kv1Point::Key point_key(
290 pool.key.point_data_owner_code,
291 pool.key.point_code);
292 pool.p_user_stop_begin = index.user_stop_points[usrstop_begin_key];
293 pool.p_user_stop_end = index.user_stop_points[usrstop_end_key];
294 pool.p_point = index.points[point_key];
295 }
296 for (auto &ntcassgnm : index.records->notice_assignments) {
297 Kv1Notice::Key notice_key(
298 ntcassgnm.data_owner_code,
299 ntcassgnm.notice_code);
300 ntcassgnm.p_notice = index.notices[notice_key];
301 }
302 for (auto &timdemgrp : index.records->time_demand_groups) {
303 Kv1Line::Key line_key(
304 timdemgrp.key.data_owner_code,
305 timdemgrp.key.line_planning_number);
306 Kv1JourneyPattern::Key jopa_key(
307 timdemgrp.key.data_owner_code,
308 timdemgrp.key.line_planning_number,
309 timdemgrp.key.journey_pattern_code);
310 timdemgrp.p_line = index.lines[line_key];
311 timdemgrp.p_journey_pattern = index.journey_patterns[jopa_key];
312 }
313 for (auto &timdemrnt : index.records->time_demand_group_run_times) {
314 Kv1Line::Key line_key(
315 timdemrnt.key.data_owner_code,
316 timdemrnt.key.line_planning_number);
317 Kv1JourneyPattern::Key jopa_key(
318 timdemrnt.key.data_owner_code,
319 timdemrnt.key.line_planning_number,
320 timdemrnt.key.journey_pattern_code);
321 Kv1TimeDemandGroup::Key timdemgrp_key(
322 timdemrnt.key.data_owner_code,
323 timdemrnt.key.line_planning_number,
324 timdemrnt.key.journey_pattern_code,
325 timdemrnt.key.time_demand_group_code);
326 Kv1UserStopPoint::Key usrstop_begin_key(
327 timdemrnt.key.data_owner_code,
328 timdemrnt.user_stop_code_begin);
329 Kv1UserStopPoint::Key usrstop_end_key(
330 timdemrnt.key.data_owner_code,
331 timdemrnt.user_stop_code_end);
332 Kv1JourneyPatternTimingLink::Key jopatili_key(
333 timdemrnt.key.data_owner_code,
334 timdemrnt.key.line_planning_number,
335 timdemrnt.key.journey_pattern_code,
336 timdemrnt.key.timing_link_order);
337 timdemrnt.p_line = index.lines[line_key];
338 timdemrnt.p_user_stop_end = index.user_stop_points[usrstop_end_key];
339 timdemrnt.p_user_stop_begin = index.user_stop_points[usrstop_begin_key];
340 timdemrnt.p_journey_pattern = index.journey_patterns[jopa_key];
341 timdemrnt.p_time_demand_group = index.time_demand_groups[timdemgrp_key];
342 timdemrnt.p_journey_pattern_timing_link = index.journey_pattern_timing_links[jopatili_key];
343 }
344 for (auto &tive : index.records->timetable_versions) {
345 Kv1OrganizationalUnit::Key orun_key(
346 tive.key.data_owner_code,
347 tive.key.organizational_unit_code);
348 Kv1PeriodGroup::Key pegr_key(
349 tive.key.data_owner_code,
350 tive.key.period_group_code);
351 Kv1SpecificDay::Key specday_key(
352 tive.key.data_owner_code,
353 tive.key.specific_day_code);
354 tive.p_organizational_unit = index.organizational_units[orun_key];
355 tive.p_period_group = index.period_groups[pegr_key];
356 tive.p_specific_day = index.specific_days[specday_key];
357 }
358 for (auto &pujo : index.records->public_journeys) {
359 Kv1TimetableVersion::Key tive_key(
360 pujo.key.data_owner_code,
361 pujo.key.organizational_unit_code,
362 pujo.key.timetable_version_code,
363 pujo.key.period_group_code,
364 pujo.key.specific_day_code);
365 Kv1OrganizationalUnit::Key orun_key(
366 pujo.key.data_owner_code,
367 pujo.key.organizational_unit_code);
368 Kv1PeriodGroup::Key pegr_key(
369 pujo.key.data_owner_code,
370 pujo.key.period_group_code);
371 Kv1SpecificDay::Key specday_key(
372 pujo.key.data_owner_code,
373 pujo.key.specific_day_code);
374 Kv1Line::Key line_key(
375 pujo.key.data_owner_code,
376 pujo.key.line_planning_number);
377 Kv1TimeDemandGroup::Key timdemgrp_key(
378 pujo.key.data_owner_code,
379 pujo.key.line_planning_number,
380 pujo.journey_pattern_code,
381 pujo.time_demand_group_code);
382 Kv1JourneyPattern::Key jopa_key(
383 pujo.key.data_owner_code,
384 pujo.key.line_planning_number,
385 pujo.journey_pattern_code);
386 pujo.p_timetable_version = index.timetable_versions[tive_key];
387 pujo.p_organizational_unit = index.organizational_units[orun_key];
388 pujo.p_period_group = index.period_groups[pegr_key];
389 pujo.p_specific_day = index.specific_days[specday_key];
390 pujo.p_line = index.lines[line_key];
391 pujo.p_time_demand_group = index.time_demand_groups[timdemgrp_key];
392 pujo.p_journey_pattern = index.journey_patterns[jopa_key];
393 }
394 for (auto &pegrval : index.records->period_group_validities) {
395 Kv1OrganizationalUnit::Key orun_key(
396 pegrval.key.data_owner_code,
397 pegrval.key.organizational_unit_code);
398 Kv1PeriodGroup::Key pegr_key(
399 pegrval.key.data_owner_code,
400 pegrval.key.period_group_code);
401 pegrval.p_organizational_unit = index.organizational_units[orun_key];
402 pegrval.p_period_group = index.period_groups[pegr_key];
403 }
404 for (auto &excopday : index.records->exceptional_operating_days) {
405 Kv1OrganizationalUnit::Key orun_key(
406 excopday.key.data_owner_code,
407 excopday.key.organizational_unit_code);
408 Kv1SpecificDay::Key specday_key(
409 excopday.key.data_owner_code,
410 excopday.specific_day_code);
411 Kv1PeriodGroup::Key pegr_key(
412 excopday.key.data_owner_code,
413 excopday.period_group_code);
414 excopday.p_organizational_unit = index.organizational_units[orun_key];
415 excopday.p_specific_day = index.specific_days[specday_key];
416 excopday.p_period_group = index.period_groups[pegr_key];
417 }
418 for (auto &schedvers : index.records->schedule_versions) {
419 Kv1OrganizationalUnit::Key orun_key(
420 schedvers.key.data_owner_code,
421 schedvers.key.organizational_unit_code);
422 schedvers.p_organizational_unit = index.organizational_units[orun_key];
423 }
424 for (auto &pujopass : index.records->public_journey_passing_times) {
425 Kv1OrganizationalUnit::Key orun_key(
426 pujopass.key.data_owner_code,
427 pujopass.key.organizational_unit_code);
428 Kv1ScheduleVersion::Key schedvers_key(
429 pujopass.key.data_owner_code,
430 pujopass.key.organizational_unit_code,
431 pujopass.key.schedule_code,
432 pujopass.key.schedule_type_code);
433 Kv1Line::Key line_key(
434 pujopass.key.data_owner_code,
435 pujopass.key.line_planning_number);
436 Kv1JourneyPattern::Key jopa_key(
437 pujopass.key.data_owner_code,
438 pujopass.key.line_planning_number,
439 pujopass.journey_pattern_code);
440 Kv1UserStopPoint::Key usrstop_key(
441 pujopass.key.data_owner_code,
442 pujopass.user_stop_code);
443 pujopass.p_organizational_unit = index.organizational_units[orun_key];
444 pujopass.p_schedule_version = index.schedule_versions[schedvers_key];
445 pujopass.p_line = index.lines[line_key];
446 pujopass.p_journey_pattern = index.journey_patterns[jopa_key];
447 pujopass.p_user_stop = index.user_stop_points[usrstop_key];
448 }
449 for (auto &operday : index.records->operating_days) {
450 Kv1OrganizationalUnit::Key orun_key(
451 operday.key.data_owner_code,
452 operday.key.organizational_unit_code);
453 Kv1ScheduleVersion::Key schedvers_key(
454 operday.key.data_owner_code,
455 operday.key.organizational_unit_code,
456 operday.key.schedule_code,
457 operday.key.schedule_type_code);
458 operday.p_organizational_unit = index.organizational_units[orun_key];
459 operday.p_schedule_version = index.schedule_versions[schedvers_key];
460 }
461}
diff --git a/lib/libtmi8/src/kv1_lexer.cpp b/lib/libtmi8/src/kv1_lexer.cpp
new file mode 100644
index 0000000..028127b
--- /dev/null
+++ b/lib/libtmi8/src/kv1_lexer.cpp
@@ -0,0 +1,152 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <tmi8/kv1_lexer.hpp>
4
5Kv1Lexer::Kv1Lexer(std::string_view input)
6 : input(input), slice(input)
7{}
8
9// Does not eat newline character.
10void Kv1Lexer::eatRestOfLine() {
11 size_t end = slice.size();
12 for (size_t i = 0; i < slice.size(); i++) {
13 if (slice[i] == '\r' || slice[i] == '\n') {
14 end = i;
15 break;
16 }
17 }
18 slice = slice.substr(end);
19}
20
21void Kv1Lexer::lexOptionalHeader() {
22 if (slice.starts_with('[')) eatRestOfLine();
23}
24
25void Kv1Lexer::lexOptionalComment() {
26 if (slice.starts_with(';')) eatRestOfLine();
27}
28
29inline bool Kv1Lexer::isWhitespace(int c) {
30 return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
31}
32
33void Kv1Lexer::readQuotedColumn() {
34 Kv1Token token{ .type = KV1_TOKEN_CELL };
35
36 if (slice.size() == 0 || slice[0] != '"') {
37 errors.push_back("(internal error) readQuotedColumn: slice[0] != '\"'");
38 return;
39 }
40 slice = slice.substr(1);
41 while (true) {
42 size_t quote = slice.find('"');
43 if (quote == std::string_view::npos) {
44 errors.push_back("readQuotedColumn: no matching closing quote found");
45 return;
46 }
47 if (quote+1 == slice.size() || slice[quote + 1] != '"') {
48 token.data.append(slice.substr(0, quote));
49 break;
50 }
51 token.data.append(slice.substr(0, quote + 1));
52 slice = slice.substr(quote + 2);
53 }
54
55 size_t end = slice.size();
56 for (size_t i = 0; i < slice.size(); i++) {
57 if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') {
58 end = i;
59 break;
60 }
61 if (!isWhitespace(slice[i])) {
62 errors.push_back("readQuotedColumn: encountered non-whitespace character after closing quote");
63 return;
64 }
65 }
66 if (end != std::string_view::npos) slice = slice.substr(end);
67 else slice = slice.substr(slice.size());
68
69 tokens.push_back(std::move(token));
70}
71
72void Kv1Lexer::readUnquotedColumn() {
73 size_t end = slice.size();
74 size_t content_end = 0;
75 for (size_t i = 0; i < slice.size(); i++) {
76 if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') {
77 end = i;
78 break;
79 } else if (!isWhitespace(slice[i])) {
80 content_end = i + 1;
81 }
82 }
83 tokens.emplace_back(KV1_TOKEN_CELL, std::string(slice.substr(0, content_end)));
84 if (end != std::string_view::npos) slice = slice.substr(end);
85 else slice = slice.substr(slice.size());
86}
87
88void Kv1Lexer::lexRow() {
89 size_t cols = 0;
90 while (slice.size() > 0 && slice[0] != '\r' && slice[0] != '\n') {
91 if (slice[0] == '"') readQuotedColumn();
92 else readUnquotedColumn();
93 if (!errors.empty()) return;
94 cols++;
95 if (slice.size() != 0) {
96 if (slice[0] == '|') {
97 slice = slice.substr(1);
98 // A newline/eof right after pipe? That means an empty field at the end
99 // of the record, we also want to emit that as a token.
100 if (slice.size() == 0 || slice[0] == '\r' || slice[0] == '\n') {
101 tokens.push_back({ .type = KV1_TOKEN_CELL });
102 }
103 } else if (slice[0] == '\r') {
104 if (slice.size() > 1 && slice[1] == '\n') slice = slice.substr(2);
105 else slice = slice.substr(1);
106 break;
107 } else if (slice[0] == '\n') {
108 slice = slice.substr(1);
109 break;
110 } else {
111 errors.push_back("lexRow: expected CR, LF or |");
112 return;
113 }
114 }
115 }
116 tokens.push_back({ .type = KV1_TOKEN_ROW_END });
117}
118
119// Returns true when a line ending was consumed.
120bool Kv1Lexer::eatWhitespace() {
121 for (size_t i = 0; i < slice.size(); i++) {
122 if (slice[i] == '\r') {
123 slice = slice.substr(i + 1);
124 if (slice.size() > 1 && slice[i + 1] == '\n')
125 slice = slice.substr(i + 2);
126 return true;
127 }
128 if (slice[i] == '\n') {
129 slice = slice.substr(i + 1);
130 return true;
131 }
132
133 if (slice[i] != ' ' && slice[i] != '\f' && slice[i] != '\t' && slice[i] != '\v') {
134 slice = slice.substr(i);
135 return false;
136 }
137 }
138 return false;
139}
140
141void Kv1Lexer::lex() {
142 lexOptionalHeader();
143 eatWhitespace();
144
145 while (errors.empty() && !slice.empty()) {
146 lexOptionalComment();
147 bool newline = eatWhitespace();
148 if (newline) continue;
149 // We are now either (1) at the end of the file or (2) at the start of some column data
150 if (errors.empty()) lexRow();
151 }
152}
diff --git a/lib/libtmi8/src/kv1_parser.cpp b/lib/libtmi8/src/kv1_parser.cpp
new file mode 100644
index 0000000..ac0c6bf
--- /dev/null
+++ b/lib/libtmi8/src/kv1_parser.cpp
@@ -0,0 +1,1258 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <tmi8/kv1_parser.hpp>
4
5using rune = uint32_t;
6
7static size_t decodeUtf8Cp(std::string_view s, rune *dest = nullptr) {
8 rune res = 0xFFFD;
9 size_t length = 1;
10
11 if (s.size() == 0)
12 return 0;
13 const uint8_t *b = reinterpret_cast<const uint8_t *>(s.data());
14 if (!(b[0] & 0x80))
15 res = static_cast<rune>(b[0]);
16 else if ((b[0] & 0xE0) == 0xC0) {
17 length = 2;
18 if (s.size() >= 2 && (b[1] & 0xC0) == 0x80) {
19 res = static_cast<rune>(b[0] & ~0xC0) << 6;
20 res |= static_cast<rune>(b[1] & ~0x80);
21 }
22 } else if ((b[0] & 0xF0) == 0xE0) {
23 length = 3;
24 if (s.size() >= 3 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80) {
25 res = static_cast<rune>(b[0] & ~0xE0) << 12;
26 res |= static_cast<rune>(b[1] & ~0x80) << 6;
27 res |= static_cast<rune>(b[2] & ~0x80);
28 }
29 } else if (b[0] == 0xF0) {
30 length = 4;
31 if (s.size() >= 4 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80) {
32 res = static_cast<rune>(b[0] & ~0xF0) << 18;
33 res |= static_cast<rune>(b[1] & ~0x80) << 12;
34 res |= static_cast<rune>(b[2] & ~0x80) << 6;
35 res |= static_cast<rune>(b[3] & ~0x80);
36 }
37 }
38
39 if (dest)
40 *dest = res;
41 return length;
42}
43
44// Counts the number of codepoints in a valid UTF-8 string. Returns SIZE_MAX if
45// the string contains invalid UTF-8 codepoints.
46static size_t stringViewLengthUtf8(std::string_view sv) {
47 size_t codepoints = 0;
48 while (sv.size() > 0) {
49 size_t codepoint_size = decodeUtf8Cp(sv);
50 if (codepoint_size == 0) return SIZE_MAX;
51 codepoints++;
52 sv = sv.substr(codepoint_size);
53 }
54 return codepoints;
55}
56
57Kv1Parser::Kv1Parser(std::vector<Kv1Token> tokens, Kv1Records &parse_into)
58 : tokens(std::move(tokens)),
59 records(parse_into)
60{}
61
62bool Kv1Parser::atEnd() const {
63 return pos >= tokens.size();
64}
65
66void Kv1Parser::eatRowEnds() {
67 while (!atEnd() && tokens[pos].type == KV1_TOKEN_ROW_END) pos++;
68}
69
70const Kv1Token *Kv1Parser::cur() const {
71 if (atEnd()) return nullptr;
72 return &tokens[pos];
73}
74
75const std::string *Kv1Parser::eatCell(std::string_view parsing_what) {
76 const Kv1Token *tok = cur();
77 if (!tok) {
78 record_errors.push_back(std::format("Expected cell but got end of file when parsing {}", parsing_what));
79 return nullptr;
80 }
81 if (tok->type == KV1_TOKEN_ROW_END) {
82 record_errors.push_back(std::format("Expected cell but got end of row when parsing {}", parsing_what));
83 return nullptr;
84 }
85 pos++;
86 return &tok->data;
87}
88
89void Kv1Parser::requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value) {
90 if (value.empty() && mandatory) {
91 record_errors.push_back(std::format("{} has length zero but is required", field));
92 return;
93 }
94 size_t codepoints = stringViewLengthUtf8(value);
95 if (codepoints == SIZE_MAX) {
96 global_errors.push_back(std::format("{} contains invalid UTF-8 code points", field));
97 return;
98 }
99 if (codepoints > max_length) {
100 record_errors.push_back(std::format("{} has length ({}) that is greater than maximum length ({})",
101 field, value.size(), max_length));
102 }
103}
104
105static inline std::optional<bool> parseBoolean(std::string_view src) {
106 if (src == "1") return true;
107 if (src == "0") return false;
108 if (src == "true") return true;
109 if (src == "false") return false;
110 return std::nullopt;
111}
112
113std::optional<bool> Kv1Parser::requireBoolean(std::string_view field, bool mandatory, std::string_view value) {
114 if (value.empty()) {
115 if (mandatory)
116 record_errors.push_back(std::format("{} is required, but has no value", field));
117 return std::nullopt;
118 }
119 auto parsed = parseBoolean(value);
120 if (!parsed.has_value())
121 record_errors.push_back(std::format("{} should have value \"1\", \"0\", \"true\" or \"false\"", field));
122 return parsed;
123}
124
125static inline size_t countDigits(long x) {
126 size_t digits = 0;
127 while (x != 0) { digits++; x /= 10; }
128 return digits;
129}
130
131std::optional<double> Kv1Parser::requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value) {
132 if (value.empty()) {
133 if (mandatory)
134 record_errors.push_back(std::format("{} has no value but is required", field));
135 return std::nullopt;
136 }
137
138 double parsed;
139 auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed);
140 if (ec != std::errc()) {
141 record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field));
142 return std::nullopt;
143 }
144 if (ptr != value.data() + value.size()) {
145 record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field));
146 return std::nullopt;
147 }
148
149 size_t digits = countDigits(static_cast<long>(parsed));
150 if (digits > max_digits) {
151 record_errors.push_back(std::format("{} contains more digits (in the integral part) ({}) than allowed ({})",
152 field, digits, max_digits));
153 return std::nullopt;
154 }
155
156 return parsed;
157}
158
159static inline bool isHexDigit(char c) {
160 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F');
161}
162
163static inline uint8_t fromHex(char c) {
164 if (c >= '0' && c <= '9') return static_cast<uint8_t>(c - '0');
165 else if (c >= 'A' && c <= 'F') return static_cast<uint8_t>(c - 'A' + 10);
166 return 0;
167}
168
169static std::optional<RgbColor> parseRgbColor(std::string_view src) {
170 bool valid = src.size() == 6
171 && isHexDigit(src[0]) && isHexDigit(src[1])
172 && isHexDigit(src[2]) && isHexDigit(src[3])
173 && isHexDigit(src[4]) && isHexDigit(src[5]);
174 if (!valid) return std::nullopt;
175 uint8_t r = static_cast<uint8_t>(fromHex(src[0]) << 4) + fromHex(src[1]);
176 uint8_t g = static_cast<uint8_t>(fromHex(src[2]) << 4) + fromHex(src[3]);
177 uint8_t b = static_cast<uint8_t>(fromHex(src[4]) << 4) + fromHex(src[5]);
178 return RgbColor{ r, g, b };
179}
180
181std::optional<RgbColor> Kv1Parser::requireRgbColor(std::string_view field, bool mandatory, std::string_view value) {
182 if (value.empty()) {
183 if (mandatory)
184 record_errors.push_back(std::format("{} is required, but has no value", field));
185 return std::nullopt;
186 }
187 auto parsed = parseRgbColor(value);
188 if (!parsed.has_value())
189 record_errors.push_back(std::format("{} should be an RGB color, i.e. a sequence of six hexadecimally represented nibbles", field));
190 return parsed;
191}
192
193std::optional<double> Kv1Parser::requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value) {
194 if (value.empty()) {
195 if (mandatory)
196 record_errors.push_back(std::format("{} is required, but has no value", field));
197 return std::nullopt;
198 }
199 if (value.size() > 15) {
200 record_errors.push_back(std::format("{} may not have more than 15 characters", field));
201 return std::nullopt;
202 }
203
204 double parsed;
205 auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed);
206 if (ec != std::errc()) {
207 record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field));
208 return std::nullopt;
209 }
210 if (ptr != value.data() + value.size()) {
211 record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field));
212 return std::nullopt;
213 }
214
215 size_t digits = countDigits(static_cast<long>(parsed));
216 if (digits < min_digits) {
217 record_errors.push_back(std::format("{} contains less digits (in the integral part) ({}) than required ({}) [value: {}]",
218 field, digits, min_digits, value));
219 return std::nullopt;
220 }
221
222 return parsed;
223}
224
225std::string Kv1Parser::eatString(std::string_view field, bool mandatory, size_t max_length) {
226 auto value = eatCell(field);
227 if (!record_errors.empty()) return {};
228 requireString(field, mandatory, max_length, *value);
229 return std::move(*value);
230}
231
232std::optional<bool> Kv1Parser::eatBoolean(std::string_view field, bool mandatory) {
233 auto value = eatCell(field);
234 if (!record_errors.empty()) return {};
235 return requireBoolean(field, mandatory, *value);
236}
237
238std::optional<double> Kv1Parser::eatNumber(std::string_view field, bool mandatory, size_t max_digits) {
239 auto value = eatCell(field);
240 if (!record_errors.empty()) return {};
241 return requireNumber(field, mandatory, max_digits, *value);
242}
243
244std::optional<RgbColor> Kv1Parser::eatRgbColor(std::string_view field, bool mandatory) {
245 auto value = eatCell(field);
246 if (!record_errors.empty()) return {};
247 return requireRgbColor(field, mandatory, *value);
248}
249
250std::optional<double> Kv1Parser::eatRdCoord(std::string_view field, bool mandatory, size_t min_digits) {
251 auto value = eatCell(field);
252 if (!record_errors.empty()) return {};
253 return requireRdCoord(field, mandatory, min_digits, *value);
254}
255
256std::string Kv1Parser::parseHeader() {
257 auto record_type = eatString("<header>.Recordtype", true, 10);
258 auto version_number = eatString("<header>.VersionNumber", true, 2);
259 auto implicit_explicit = eatString("<header>.Implicit/Explicit", true, 1);
260 if (!record_errors.empty()) return {};
261
262 if (version_number != "1") {
263 record_errors.push_back("<header>.VersionNumber should be 1");
264 return "";
265 }
266 if (implicit_explicit != "I") {
267 record_errors.push_back("<header>.Implicit/Explicit should be 'I'");
268 return "";
269 }
270
271 return record_type;
272}
273
274void Kv1Parser::eatRestOfRow() {
275 while (!atEnd() && cur()->type != KV1_TOKEN_ROW_END) pos++;
276}
277
278void Kv1Parser::parse() {
279 while (!atEnd()) {
280 eatRowEnds();
281 if (atEnd()) return;
282
283 std::string record_type = parseHeader();
284 if (!record_errors.empty()) break;
285 if (!type_parsers.contains(record_type)) {
286 warns.push_back(std::format("Recordtype ({}) is bad or names a record type that this program cannot process",
287 record_type));
288 eatRestOfRow();
289 continue;
290 }
291
292 ParseFunc parseType = Kv1Parser::type_parsers.at(record_type);
293 (this->*parseType)();
294 if (cur() && cur()->type != KV1_TOKEN_ROW_END) {
295 record_errors.push_back(std::format("Parser function for Recordtype ({}) did not eat all record fields",
296 record_type));
297 eatRestOfRow();
298 }
299 if (!record_errors.empty()) {
300 global_errors.insert(global_errors.end(), record_errors.begin(), record_errors.end());
301 record_errors.clear();
302 }
303 }
304}
305
306void Kv1Parser::parseOrganizationalUnit() {
307 auto data_owner_code = eatString("ORUN.DataOwnerCode", true, 10);
308 auto organizational_unit_code = eatString("ORUN.OrganizationalUnitCode", true, 10);
309 auto name = eatString("ORUN.Name", true, 50);
310 auto organizational_unit_type = eatString("ORUN.OrganizationalUnitType", true, 10);
311 auto description = eatString("ORUN.Description", false, 255);
312 if (!record_errors.empty()) return;
313
314 records.organizational_units.emplace_back(
315 Kv1OrganizationalUnit::Key(
316 data_owner_code,
317 organizational_unit_code),
318 name,
319 organizational_unit_type,
320 description);
321}
322
323static inline bool isDigit(char c) {
324 return c >= '0' && c <= '9';
325}
326
327// Parse a string of the format YYYY-MM-DD.
328static std::optional<std::chrono::year_month_day> parseYyyymmdd(std::string_view src) {
329 bool valid = src.size() == 10
330 && isDigit(src[0]) && isDigit(src[1])
331 && isDigit(src[2]) && isDigit(src[3]) && src[4] == '-'
332 && isDigit(src[5]) && isDigit(src[6]) && src[7] == '-'
333 && isDigit(src[8]) && isDigit(src[9]);
334 if (!valid) return std::nullopt;
335 int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0';
336 int month = (src[5] - '0') * 10 + src[6] - '0';
337 int day = (src[8] - '0') * 10 + src[9] - '0';
338 return std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day);
339}
340
341// Parse a string of the format HH:MM:SS.
342static std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> parseHhmmss(std::string_view src) {
343 bool valid = src.size() == 8
344 && isDigit(src[0]) && isDigit(src[1]) && src[2] == ':'
345 && isDigit(src[3]) && isDigit(src[4]) && src[5] == ':'
346 && isDigit(src[6]) && isDigit(src[7]);
347 if (!valid) return std::nullopt;
348 int hh = (src[0] - '0') * 10 + src[1] - '0';
349 int mm = (src[3] - '0') * 10 + src[4] - '0';
350 int ss = (src[6] - '0') * 10 + src[7] - '0';
351 // The check for the hour not being greater than 32 comes from the fact the
352 // specification explicitly allows hours greater than 23, noting that the
353 // period 24:00-32:00 is equivalent to 00:00-08:00 in the next day, for
354 // exploitation of two days.
355 if (hh > 32 || mm > 59 || ss > 59) return std::nullopt;
356 return std::chrono::hh_mm_ss(std::chrono::hours(hh) + std::chrono::minutes(mm) + std::chrono::seconds(ss));
357}
358
359static std::optional<std::chrono::sys_seconds> parseDateTime(std::string_view src, const std::chrono::time_zone *amsterdam, std::string_view *error = nullptr) {
360#define ERROR(err) do { if (error) *error = err; return std::nullopt; } while (0)
361 if (src.size() > 23) ERROR("timestamp string is too big");
362 if (src.size() < 17) ERROR("timestamp string is too small");
363
364 bool valid_year = isDigit(src[0]) && isDigit(src[1]) && isDigit(src[2]) && isDigit(src[3]);
365 if (!valid_year) ERROR("year has bad format");
366
367 size_t month_off = src[4] == '-' ? 5 : 4;
368 size_t day_off = src[month_off + 2] == '-' ? month_off + 3 : month_off + 2;
369 size_t time_off = day_off + 2;
370 if (src[time_off] != 'T' && src[time_off] != ' ')
371 ERROR("missing date/time separator");
372 size_t tzd_off = time_off + 9;
373 // For clarity, TZD stands for Time Zone Designator. It often takes the form
374 // of Z (Zulu, UTC+00:00) or as an offset from UTC in hours and minutes,
375 // formatted as +|-HH:MM (e.g. +01:00, -12:00).
376
377 if (time_off + 8 >= src.size()) ERROR("bad format, not enough space for hh:mm:ss");
378
379 int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0';
380 int month = (src[month_off] - '0') * 10 + src[month_off + 1] - '0';
381 int day = (src[day_off] - '0') * 10 + src[day_off + 1] - '0';
382 int hour = (src[time_off + 1] - '0') * 10 + src[time_off + 2] - '0';
383 int minute = (src[time_off + 4] - '0') * 10 + src[time_off + 5] - '0';
384 int second = (src[time_off + 7] - '0') * 10 + src[time_off + 8] - '0';
385
386 auto date = std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day);
387 auto time = std::chrono::hours(hour) + std::chrono::minutes(minute) + std::chrono::seconds(second);
388
389 std::chrono::sys_seconds unix_start_of_day;
390 if (tzd_off < src.size()) {
391 unix_start_of_day = std::chrono::sys_days(date);
392 } else {
393 auto local_days = std::chrono::local_days(date);
394 std::chrono::zoned_seconds zoned_start_of_day = std::chrono::zoned_time(amsterdam, local_days);
395 unix_start_of_day = std::chrono::sys_seconds(zoned_start_of_day);
396 }
397
398 std::chrono::minutes offset(0);
399 if (tzd_off + 1 == src.size() && src[tzd_off] != 'Z') {
400 ERROR("bad TZD (missing Zulu indicator)");
401 } else if (tzd_off + 6 == src.size()) {
402 bool valid_tzd = (src[tzd_off] == '+' || src[tzd_off] == '-')
403 && isDigit(src[tzd_off + 1]) && isDigit(src[tzd_off + 2]) && src[tzd_off + 3] == ':'
404 && isDigit(src[tzd_off + 4]) && isDigit(src[tzd_off + 5]);
405 if (!valid_tzd) ERROR("bad offset TZD format (expected +|-hh:mm)");
406 int sign = src[tzd_off] == '-' ? -1 : 1;
407 int tzd_hh = (src[tzd_off + 1] - '0') * 10 + src[tzd_off + 2] - '0';
408 int tzd_mm = (src[tzd_off + 3] - '0') * 10 + src[tzd_off + 4] - '0';
409 offset = sign * std::chrono::minutes(tzd_hh * 60 + tzd_mm);
410 } else if (tzd_off < src.size()) {
411 // There is a TZD but we literally have no clue how to parse it :/
412 ERROR("cannot parse TZD of unexpected length");
413 }
414
415 return unix_start_of_day + time - offset;
416#undef ERROR
417}
418
419void Kv1Parser::parseHigherOrganizationalUnit() {
420 auto data_owner_code = eatString("ORUNORUN.DataOwnerCode", true, 10);
421 auto organizational_unit_code_parent = eatString("ORUNORUN.OrganizationalUnitCodeParent", true, 10);
422 auto organizational_unit_code_child = eatString("ORUNORUN.OrganizationalUnitCodeChild", true, 10);
423 auto valid_from_raw = eatString("ORUNORUN.ValidFrom", true, 10);
424 if (!record_errors.empty()) return;
425
426 auto valid_from = parseYyyymmdd(valid_from_raw);
427 if (!valid_from) {
428 record_errors.push_back("ORUNORUN.ValidFrom has invalid format, should be YYYY-MM-DD");
429 return;
430 }
431
432 records.higher_organizational_units.emplace_back(
433 Kv1HigherOrganizationalUnit::Key(
434 data_owner_code,
435 organizational_unit_code_parent,
436 organizational_unit_code_child,
437 *valid_from));
438}
439
440void Kv1Parser::parseUserStopPoint() {
441 auto data_owner_code = eatString ("USRSTOP.DataOwnerCode", true, 10);
442 auto user_stop_code = eatString ("USRSTOP.UserStopCode", true, 10);
443 auto timing_point_code = eatString ("USRSTOP.TimingPointCode", false, 10);
444 auto get_in = eatBoolean("USRSTOP.GetIn", true );
445 auto get_out = eatBoolean("USRSTOP.GetOut", true );
446 eatCell ("USRSTOP.<deprecated field #1>" );
447 auto name = eatString ("USRSTOP.Name", true, 50);
448 auto town = eatString ("USRSTOP.Town", true, 50);
449 auto user_stop_area_code = eatString ("USRSTOP.UserStopAreaCode", false, 10);
450 auto stop_side_code = eatString ("USRSTOP.StopSideCode", true, 10);
451 eatCell ("USRSTOP.<deprecated field #2>" );
452 eatCell ("USRSTOP.<deprecated field #3>" );
453 auto minimal_stop_time = eatNumber ("USRSTOP.MinimalStopTime", true, 5);
454 auto stop_side_length = eatNumber ("USRSTOP.StopSideLength", false, 3);
455 auto description = eatString ("USRSTOP.Description", false, 255);
456 auto user_stop_type = eatString ("USRSTOP.UserStopType", true, 10);
457 auto quay_code = eatString ("USRSTOP.QuayCode", false, 30);
458 if (!record_errors.empty()) return;
459
460 records.user_stop_points.emplace_back(
461 Kv1UserStopPoint::Key(
462 data_owner_code,
463 user_stop_code),
464 timing_point_code,
465 *get_in,
466 *get_out,
467 name,
468 town,
469 user_stop_area_code,
470 stop_side_code,
471 *minimal_stop_time,
472 stop_side_length,
473 description,
474 user_stop_type,
475 quay_code);
476}
477
478void Kv1Parser::parseUserStopArea() {
479 auto data_owner_code = eatString("USRSTAR.DataOwnerCode", true, 10);
480 auto user_stop_area_code = eatString("USRSTAR.UserStopAreaCode", true, 10);
481 auto name = eatString("USRSTAR.Name", true, 50);
482 auto town = eatString("USRSTAR.Town", true, 50);
483 eatCell ("USRSTAR.<deprecated field #1>" );
484 eatCell ("USRSTAR.<deprecated field #2>" );
485 auto description = eatString("USRSTAR.Description", false, 255);
486 if (!record_errors.empty()) return;
487
488 records.user_stop_areas.emplace_back(
489 Kv1UserStopArea::Key(
490 data_owner_code,
491 user_stop_area_code),
492 name,
493 town,
494 description);
495}
496
497void Kv1Parser::parseTimingLink() {
498 auto data_owner_code = eatString("TILI.DataOwnerCode", true, 10);
499 auto user_stop_code_begin = eatString("TILI.UserStopCodeBegin", true, 10);
500 auto user_stop_code_end = eatString("TILI.UserStopCodeEnd", true, 10);
501 auto minimal_drive_time = eatNumber("TILI.MinimalDriveTime", false, 5);
502 auto description = eatString("TILI.Description", false, 255);
503 if (!record_errors.empty()) return;
504
505 records.timing_links.emplace_back(
506 Kv1TimingLink::Key(
507 data_owner_code,
508 user_stop_code_begin,
509 user_stop_code_end),
510 minimal_drive_time,
511 description);
512}
513
514void Kv1Parser::parseLink() {
515 auto data_owner_code = eatString("LINK.DataOwnerCode", true, 10);
516 auto user_stop_code_begin = eatString("LINK.UserStopCodeBegin", true, 10);
517 auto user_stop_code_end = eatString("LINK.UserStopCodeEnd", true, 10);
518 eatCell("LINK.<deprecated field #1>" );
519 auto distance = eatNumber("LINK.Distance", true, 6);
520 auto description = eatString("LINK.Description", false, 255);
521 auto transport_type = eatString("LINK.TransportType", true, 5);
522 if (!record_errors.empty()) return;
523
524 records.links.emplace_back(
525 Kv1Link::Key(
526 data_owner_code,
527 user_stop_code_begin,
528 user_stop_code_end,
529 transport_type),
530 *distance,
531 description);
532}
533
534void Kv1Parser::parseLine() {
535 auto data_owner_code = eatString ("LINE.DataOwnerCode", true, 10);
536 auto line_planning_number = eatString ("LINE.LinePlanningNumber", true, 10);
537 auto line_public_number = eatString ("LINE.LinePublicNumber", true, 4);
538 auto line_name = eatString ("LINE.LineName", true, 50);
539 auto line_ve_tag_number = eatNumber ("LINE.LineVeTagNumber", true, 3);
540 auto description = eatString ("LINE.Description", false, 255);
541 auto transport_type = eatString ("LINE.TransportType", true, 5);
542 auto line_icon = eatNumber ("LINE.LineIcon", false, 4);
543 auto line_color = eatRgbColor("LINE.LineColor", false );
544 auto line_text_color = eatRgbColor("LINE.LineTextColor", false );
545 if (!record_errors.empty()) return;
546
547 // NOTE: This check, although it should be performed to comply with the
548 // specification, is not actually honored by transit operators (such as
549 // Connexxion) :/ That's enough reason to keep it disabled here for now.
550 // if (*line_ve_tag_number < 0 || *line_ve_tag_number > 399) {
551 // record_errors.push_back(std::format("LINE.LineVeTagNumber is out of range [0-399] with value {}", *line_ve_tag_number));
552 // return;
553 // }
554 if (*line_ve_tag_number != static_cast<short>(*line_ve_tag_number))
555 record_errors.push_back("LINE.LineVeTagNumber should be an integer");
556 if (line_icon && *line_icon != static_cast<short>(*line_icon))
557 record_errors.push_back("LINE.LineIcon should be an integer");
558 if (!record_errors.empty()) return;
559
560 records.lines.emplace_back(
561 Kv1Line::Key(
562 data_owner_code,
563 line_planning_number),
564 line_public_number,
565 line_name,
566 static_cast<short>(*line_ve_tag_number),
567 description,
568 transport_type,
569 static_cast<std::optional<short>>(line_icon),
570 line_color,
571 line_text_color);
572}
573
574void Kv1Parser::parseDestination() {
575 auto data_owner_code = eatString ("DEST.DataOwnerCode", true, 10);
576 auto dest_code = eatString ("DEST.DestCode", true, 10);
577 auto dest_name_full = eatString ("DEST.DestNameFull", true, 50);
578 auto dest_name_main = eatString ("DEST.DestNameMain", true, 24);
579 auto dest_name_detail = eatString ("DEST.DestNameDetail", false, 24);
580 auto relevant_dest_name_detail = eatBoolean ("DEST.RelevantDestNameDetail", true );
581 auto dest_name_main_21 = eatString ("DEST.DestNameMain21", true, 21);
582 auto dest_name_detail_21 = eatString ("DEST.DestNameDetail21", false, 21);
583 auto dest_name_main_19 = eatString ("DEST.DestNameMain19", true, 19);
584 auto dest_name_detail_19 = eatString ("DEST.DestNameDetail19", false, 19);
585 auto dest_name_main_16 = eatString ("DEST.DestNameMain16", true, 16);
586 auto dest_name_detail_16 = eatString ("DEST.DestNameDetail16", false, 16);
587 auto dest_icon = eatNumber ("DEST.DestIcon", false, 4);
588 auto dest_color = eatRgbColor("DEST.DestColor", false );
589 // NOTE: Deviating from the offical KV1 specification here. It specifies that
590 // the maximum length for this field should be 30, but then proceeds to
591 // specify that it should contain a RGB value comprising of three
592 // hexadecimally encoded octets, i.e. six characters. We assume that the
593 // latter is correct and the intended interpretation.
594 auto dest_text_color = eatRgbColor("DEST.DestTextColor", false );
595 if (!record_errors.empty()) return;
596
597 if (dest_icon && *dest_icon != static_cast<short>(*dest_icon)) {
598 record_errors.push_back("DEST.DestIcon should be an integer");
599 return;
600 }
601
602 records.destinations.emplace_back(
603 Kv1Destination::Key(
604 data_owner_code,
605 dest_code),
606 dest_name_full,
607 dest_name_main,
608 dest_name_detail,
609 *relevant_dest_name_detail,
610 dest_name_main_21,
611 dest_name_detail_21,
612 dest_name_main_19,
613 dest_name_detail_19,
614 dest_name_main_16,
615 dest_name_detail_16,
616 dest_icon,
617 dest_color,
618 dest_text_color);
619}
620
621void Kv1Parser::parseJourneyPattern() {
622 auto data_owner_code = eatString("JOPA.DataOwnerCode", true, 10);
623 auto line_planning_number = eatString("JOPA.LinePlanningNumber", true, 10);
624 auto journey_pattern_code = eatString("JOPA.JourneyPatternCode", true, 10);
625 auto journey_pattern_type = eatString("JOPA.JourneyPatternType", true, 10);
626 auto direction = eatString("JOPA.Direction", true, 1);
627 auto description = eatString("JOPA.Description", false, 255);
628 if (!record_errors.empty()) return;
629
630 if (direction != "1" && direction != "2" && direction != "A" && direction != "B") {
631 record_errors.push_back("JOPA.Direction should be in [1, 2, A, B]");
632 return;
633 }
634
635 records.journey_patterns.emplace_back(
636 Kv1JourneyPattern::Key(
637 data_owner_code,
638 line_planning_number,
639 journey_pattern_code),
640 journey_pattern_type,
641 direction[0],
642 description);
643}
644
645void Kv1Parser::parseConcessionFinancerRelation() {
646 auto data_owner_code = eatString("CONFINREL.DataOwnerCode", true, 10);
647 auto con_fin_rel_code = eatString("CONFINREL.ConFinRelCode", true, 10);
648 auto concession_area_code = eatString("CONFINREL.ConcessionAreaCode", true, 10);
649 auto financer_code = eatString("CONFINREL.FinancerCode", false, 10);
650 if (!record_errors.empty()) return;
651
652 records.concession_financer_relations.emplace_back(
653 Kv1ConcessionFinancerRelation::Key(
654 data_owner_code,
655 con_fin_rel_code),
656 concession_area_code,
657 financer_code);
658}
659
660void Kv1Parser::parseConcessionArea() {
661 auto data_owner_code = eatString("CONAREA.DataOwnerCode", true, 10);
662 auto concession_area_code = eatString("CONAREA.ConcessionAreaCode", true, 10);
663 auto description = eatString("CONAREA.Description", true, 255);
664 if (!record_errors.empty()) return;
665
666 records.concession_areas.emplace_back(
667 Kv1ConcessionArea::Key(
668 data_owner_code,
669 concession_area_code),
670 description);
671}
672
673void Kv1Parser::parseFinancer() {
674 auto data_owner_code = eatString("FINANCER.DataOwnerCode", true, 10);
675 auto financer_code = eatString("FINANCER.FinancerCode", true, 10);
676 auto description = eatString("FINANCER.Description", true, 255);
677 if (!record_errors.empty()) return;
678
679 records.financers.emplace_back(
680 Kv1Financer::Key(
681 data_owner_code,
682 financer_code),
683 description);
684}
685
686void Kv1Parser::parseJourneyPatternTimingLink() {
687 auto data_owner_code = eatString ("JOPATILI.DataOwnerCode", true, 10);
688 auto line_planning_number = eatString ("JOPATILI.LinePlanningNumber", true, 10);
689 auto journey_pattern_code = eatString ("JOPATILI.JourneyPatternCode", true, 10);
690 auto timing_link_order = eatNumber ("JOPATILI.TimingLinkOrder", true, 3);
691 auto user_stop_code_begin = eatString ("JOPATILI.UserStopCodeBegin", true, 10);
692 auto user_stop_code_end = eatString ("JOPATILI.UserStopCodeEnd", true, 10);
693 auto con_fin_rel_code = eatString ("JOPATILI.ConFinRelCode", true, 10);
694 auto dest_code = eatString ("JOPATILI.DestCode", true, 10);
695 eatCell ("JOPATILI.<deprecated field #1>" );
696 auto is_timing_stop = eatBoolean ("JOPATILI.IsTimingStop", true );
697 auto display_public_line = eatString ("JOPATILI.DisplayPublicLine", false, 4);
698 auto product_formula_type = eatNumber ("JOPATILI.ProductFormulaType", false, 4);
699 auto get_in = eatBoolean ("JOPATILI.GetIn", true );
700 auto get_out = eatBoolean ("JOPATILI.GetOut", true );
701 auto show_flexible_trip = eatString ("JOPATILI.ShowFlexibleTrip", false, 8);
702 auto line_dest_icon = eatNumber ("JOPATILI.LineDestIcon", false, 4);
703 auto line_dest_color = eatRgbColor("JOPATILI.LineDestColor", false );
704 auto line_dest_text_color = eatRgbColor("JOPATILI.LineDestTextColor", false );
705 if (!record_errors.empty()) return;
706
707 if (line_dest_icon && *line_dest_icon != static_cast<short>(*line_dest_icon))
708 record_errors.push_back("JOPATILI.LineDestIcon should be an integer");
709 if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" &&
710 show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME")
711 record_errors.push_back("JOPATILI.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]");
712 if (!record_errors.empty()) return;
713
714 records.journey_pattern_timing_links.emplace_back(
715 Kv1JourneyPatternTimingLink::Key(
716 data_owner_code,
717 line_planning_number,
718 journey_pattern_code,
719 static_cast<short>(*timing_link_order)),
720 user_stop_code_begin,
721 user_stop_code_end,
722 con_fin_rel_code,
723 dest_code,
724 *is_timing_stop,
725 display_public_line,
726 product_formula_type,
727 *get_in,
728 *get_out,
729 show_flexible_trip,
730 line_dest_icon,
731 line_dest_color,
732 line_dest_text_color);
733}
734
735void Kv1Parser::parsePoint() {
736 auto data_owner_code = eatString("POINT.DataOwnerCode", true, 10);
737 auto point_code = eatString("POINT.PointCode", true, 10);
738 eatCell ("POINT.<deprecated field #1>" );
739 auto point_type = eatString("POINT.PointType", true, 10);
740 auto coordinate_system_type = eatString("POINT.CoordinateSystemType", true, 10);
741 // NOTE: We deviate from the specification here once again. The specification
742 // notes that LocationX_EW should contain 'at least 6 positions'. Assuming
743 // that this is referring to the amount of digits, we have to lower this to
744 // 4. Otherwise, some positions in the Netherlands and Belgium are
745 // unrepresentable.
746 auto location_x_ew = eatRdCoord("POINT.LocationX_EW", true, 4);
747 auto location_y_ew = eatRdCoord("POINT.LocationX_EW", true, 6);
748 auto location_z = eatRdCoord("POINT.LocationZ", false, 0);
749 auto description = eatString ("POINT.Description", false, 255);
750 if (!record_errors.empty()) return;
751
752 records.points.emplace_back(
753 Kv1Point::Key(
754 std::move(data_owner_code),
755 std::move(point_code)),
756 std::move(point_type),
757 std::move(coordinate_system_type),
758 *location_x_ew,
759 *location_y_ew,
760 location_z,
761 std::move(description));
762}
763
764void Kv1Parser::parsePointOnLink() {
765 auto data_owner_code = eatString("POOL.DataOwnerCode", true, 10);
766 auto user_stop_code_begin = eatString("POOL.UserStopCodeBegin", true, 10);
767 auto user_stop_code_end = eatString("POOL.UserStopCodeEnd", true, 10);
768 eatCell ("POOL.<deprecated field #1>" );
769 auto point_data_owner_code = eatString("POOL.PointDataOwnerCode", true, 10);
770 auto point_code = eatString("POOL.PointCode", true, 10);
771 auto distance_since_start_of_link = eatNumber("POOL.DistanceSinceStartOfLink", true, 5);
772 auto segment_speed = eatNumber("POOL.SegmentSpeed", false, 4);
773 auto local_point_speed = eatNumber("POOL.LocalPointSpeed", false, 4);
774 auto description = eatString("POOL.Description", false, 255);
775 auto transport_type = eatString("POOL.TransportType", true, 5);
776 if (!record_errors.empty()) return;
777
778 records.point_on_links.emplace_back(
779 Kv1PointOnLink::Key(
780 data_owner_code,
781 user_stop_code_begin,
782 user_stop_code_end,
783 point_data_owner_code,
784 point_code,
785 transport_type),
786 *distance_since_start_of_link,
787 segment_speed,
788 local_point_speed,
789 std::move(description));
790}
791
792void Kv1Parser::parseIcon() {
793 auto data_owner_code = eatString("ICON.DataOwnerCode", true, 10);
794 auto icon_number = eatNumber("ICON.IconNumber", true, 4);
795 auto icon_uri = eatString("ICON.IconURI", true, 1024);
796 if (!record_errors.empty()) return;
797
798 if (*icon_number != static_cast<short>(*icon_number)) {
799 record_errors.push_back("ICON.IconNumber should be an integer");
800 return;
801 }
802
803 records.icons.emplace_back(
804 Kv1Icon::Key(
805 data_owner_code,
806 static_cast<short>(*icon_number)),
807 icon_uri);
808}
809
810void Kv1Parser::parseNotice() {
811 auto data_owner_code = eatString("NOTICE.DataOwnerCode", true, 10);
812 auto notice_code = eatString("NOTICE.NoticeCode", true, 20);
813 auto notice_content = eatString("NOTICE.NoticeContent", true, 1024);
814 if (!record_errors.empty()) return;
815
816 records.notices.emplace_back(
817 Kv1Notice::Key(
818 data_owner_code,
819 notice_code),
820 notice_content);
821}
822
823void Kv1Parser::parseNoticeAssignment() {
824 auto data_owner_code = eatString("NTCASSGNM.DataOwnerCode", true, 10);
825 auto notice_code = eatString("NTCASSGNM.NoticeCode", true, 20);
826 auto assigned_object = eatString("NTCASSGNM.AssignedObject", true, 8);
827 auto timetable_version_code = eatString("NTCASSGNM.TimetableVersionCode", false, 10);
828 auto organizational_unit_code = eatString("NTCASSGNM.OrganizationalUnitCode", false, 10);
829 auto schedule_code = eatString("NTCASSGNM.ScheduleCode", false, 10);
830 auto schedule_type_code = eatString("NTCASSGNM.ScheduleTypeCode", false, 10);
831 auto period_group_code = eatString("NTCASSGNM.PeriodGroupCode", false, 10);
832 auto specific_day_code = eatString("NTCASSGNM.SpecificDayCode", false, 10);
833 auto day_type = eatString("NTCASSGNM.DayType", false, 7);
834 auto line_planning_number = eatString("NTCASSGNM.LinePlanningNumber", true, 10);
835 auto journey_number = eatNumber("NTCASSGNM.JourneyNumber", false, 6);
836 auto stop_order = eatNumber("NTCASSGNM.StopOrder", false, 4);
837 auto journey_pattern_code = eatString("NTCASSGNM.JourneyPatternCode", false, 10);
838 auto timing_link_order = eatNumber("NTCASSGNM.TimingLinkOrder", false, 3);
839 auto user_stop_code = eatString("NTCASSGNM.UserStopCode", false, 10);
840 if (!record_errors.empty()) return;
841
842 if (journey_number && *journey_number != static_cast<short>(*journey_number))
843 record_errors.push_back("NTCASSGNM.JourneyNumber should be an integer");
844 if (journey_number && (*journey_number < 0 || *journey_number > 999'999))
845 record_errors.push_back("NTCASSGNM.JourneyNumber should be within the range [0-999999]");
846 if (stop_order && *stop_order != static_cast<short>(*stop_order))
847 record_errors.push_back("NTCASSGNM.StopOrder should be an integer");
848 if (!journey_number && (assigned_object == "PUJO" || assigned_object == "PUJOPASS"))
849 record_errors.push_back("NTCASSGNM.JourneyNumber is required for AssignedObject PUJO/PUJOPASS");
850 if (journey_pattern_code.empty() && assigned_object == "JOPATILI")
851 record_errors.push_back("NTCASSGNM.JourneyPatternCode is required for AssignedObject JOPATILI");
852 if (!record_errors.empty()) return;
853
854 records.notice_assignments.emplace_back(
855 data_owner_code,
856 notice_code,
857 assigned_object,
858 timetable_version_code,
859 organizational_unit_code,
860 schedule_code,
861 schedule_type_code,
862 period_group_code,
863 specific_day_code,
864 day_type,
865 line_planning_number,
866 static_cast<std::optional<int>>(journey_number),
867 static_cast<std::optional<short>>(stop_order),
868 journey_pattern_code,
869 timing_link_order,
870 user_stop_code);
871}
872
873void Kv1Parser::parseTimeDemandGroup() {
874 auto data_owner_code = eatString("TIMDEMGRP.DataOwnerCode", true, 10);
875 auto line_planning_number = eatString("TIMDEMGRP.LinePlanningNumber", true, 10);
876 auto journey_pattern_code = eatString("TIMDEMGRP.JourneyPatternCode", true, 10);
877 auto time_demand_group_code = eatString("TIMDEMGRP.TimeDemandGroupCode", true, 10);
878 if (!record_errors.empty()) return;
879
880 records.time_demand_groups.emplace_back(
881 Kv1TimeDemandGroup::Key(
882 data_owner_code,
883 line_planning_number,
884 journey_pattern_code,
885 time_demand_group_code));
886}
887
888void Kv1Parser::parseTimeDemandGroupRunTime() {
889 auto data_owner_code = eatString("TIMDEMRNT.DataOwnerCode", true, 10);
890 auto line_planning_number = eatString("TIMDEMRNT.LinePlanningNumber", true, 10);
891 auto journey_pattern_code = eatString("TIMDEMRNT.JourneyPatternCode", true, 10);
892 auto time_demand_group_code = eatString("TIMDEMRNT.TimeDemandGroupCode", true, 10);
893 auto timing_link_order = eatNumber("TIMDEMRNT.TimingLinkOrder", true, 3);
894 auto user_stop_code_begin = eatString("TIMDEMRNT.UserStopCodeBegin", true, 10);
895 auto user_stop_code_end = eatString("TIMDEMRNT.UserStopCodeEnd", true, 10);
896 auto total_drive_time = eatNumber("TIMDEMRNT.TotalDriveTime", true, 5);
897 auto drive_time = eatNumber("TIMDEMRNT.DriveTime", true, 5);
898 auto expected_delay = eatNumber("TIMDEMRNT.ExpectedDelay", false, 5);
899 auto layover_time = eatNumber("TIMDEMRNT.LayOverTime", false, 5);
900 auto stop_wait_time = eatNumber("TIMDEMRNT.StopWaitTime", true, 5);
901 auto minimum_stop_time = eatNumber("TIMDEMRNT.MinimumStopTime", false, 5);
902 if (!record_errors.empty()) return;
903
904 if (timing_link_order && *timing_link_order != static_cast<short>(*timing_link_order)) {
905 record_errors.push_back("TIMDEMRNT.TimingLinkOrder should be an integer");
906 return;
907 }
908
909 records.time_demand_group_run_times.emplace_back(
910 Kv1TimeDemandGroupRunTime::Key(
911 data_owner_code,
912 line_planning_number,
913 journey_pattern_code,
914 time_demand_group_code,
915 static_cast<short>(*timing_link_order)),
916 user_stop_code_begin,
917 user_stop_code_end,
918 *total_drive_time,
919 *drive_time,
920 expected_delay,
921 layover_time,
922 *stop_wait_time,
923 minimum_stop_time);
924}
925
926void Kv1Parser::parsePeriodGroup() {
927 auto data_owner_code = eatString("PEGR.DataOwnerCode", true, 10);
928 auto period_group_code = eatString("PEGR.PeriodGroupCode", true, 10);
929 auto description = eatString("PEGR.Description", false, 255);
930 if (!record_errors.empty()) return;
931
932 records.period_groups.emplace_back(
933 Kv1PeriodGroup::Key(
934 data_owner_code,
935 period_group_code),
936 description);
937}
938
939void Kv1Parser::parseSpecificDay() {
940 auto data_owner_code = eatString("SPECDAY.DataOwnerCode", true, 10);
941 auto specific_day_code = eatString("SPECDAY.SpecificDayCode", true, 10);
942 auto name = eatString("SPECDAY.Name", true, 50);
943 auto description = eatString("SPECDAY.Description", false, 255);
944 if (!record_errors.empty()) return;
945
946 records.specific_days.emplace_back(
947 Kv1SpecificDay::Key(
948 data_owner_code,
949 specific_day_code),
950 name,
951 description);
952}
953
954void Kv1Parser::parseTimetableVersion() {
955 auto data_owner_code = eatString("TIVE.DataOwnerCode", true, 10);
956 auto organizational_unit_code = eatString("TIVE.OrganizationalUnitCode", true, 10);
957 auto timetable_version_code = eatString("TIVE.TimetableVersionCode", true, 10);
958 auto period_group_code = eatString("TIVE.PeriodGroupCode", true, 10);
959 auto specific_day_code = eatString("TIVE.SpecificDayCode", true, 10);
960 auto valid_from_raw = eatString("TIVE.ValidFrom", true, 10);
961 auto timetable_version_type = eatString("TIVE.TimetableVersionType", true, 10);
962 auto valid_thru_raw = eatString("TIVE.ValidThru", false, 10);
963 auto description = eatString("TIVE.Description", false, 255);
964 if (!record_errors.empty()) return;
965
966 auto valid_from = parseYyyymmdd(valid_from_raw);
967 if (!valid_from)
968 record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD");
969 std::optional<std::chrono::year_month_day> valid_thru;
970 if (!valid_thru_raw.empty()) {
971 valid_thru = parseYyyymmdd(valid_thru_raw);
972 if (!valid_thru) {
973 record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD");
974 }
975 }
976 if (!description.empty())
977 record_errors.push_back("TIVE.Description should be empty");
978 if (!record_errors.empty()) return;
979
980 records.timetable_versions.emplace_back(
981 Kv1TimetableVersion::Key(
982 data_owner_code,
983 organizational_unit_code,
984 timetable_version_code,
985 period_group_code,
986 specific_day_code),
987 *valid_from,
988 timetable_version_type,
989 valid_thru,
990 description);
991}
992
993void Kv1Parser::parsePublicJourney() {
994 auto data_owner_code = eatString ("PUJO.DataOwnerCode", true, 10);
995 auto timetable_version_code = eatString ("PUJO.TimetableVersionCode", true, 10);
996 auto organizational_unit_code = eatString ("PUJO.OrganizationalUnitCode", true, 10);
997 auto period_group_code = eatString ("PUJO.PeriodGroupCode", true, 10);
998 auto specific_day_code = eatString ("PUJO.SpecificDayCode", true, 10);
999 auto day_type = eatString ("PUJO.DayType", true, 7);
1000 auto line_planning_number = eatString ("PUJO.LinePlanningNumber", true, 10);
1001 auto journey_number = eatNumber ("PUJO.JourneyNumber", true, 6);
1002 auto time_demand_group_code = eatString ("PUJO.TimeDemandGroupCode", true, 10);
1003 auto journey_pattern_code = eatString ("PUJO.JourneyPatternCode", true, 10);
1004 auto departure_time_raw = eatString ("PUJO.DepartureTime", true, 8);
1005 auto wheelchair_accessible = eatString ("PUJO.WheelChairAccessible", true, 13);
1006 auto data_owner_is_operator = eatBoolean("PUJO.DataOwnerIsOperator", true );
1007 auto planned_monitored = eatBoolean("PUJO.PlannedMonitored", true );
1008 auto product_formula_type = eatNumber ("PUJO.ProductFormulaType", false, 4);
1009 auto show_flexible_trip = eatString ("PUJO.ShowFlexibleTrip", false, 8);
1010 if (!record_errors.empty()) return;
1011
1012 auto departure_time = parseHhmmss(departure_time_raw);
1013 if (!departure_time)
1014 record_errors.push_back("PUJO.DepartureTime has a bad format");
1015 if (*journey_number < 0 || *journey_number > 999'999)
1016 record_errors.push_back("PUJO.JourneyNumber should be within the range [0-999999]");
1017 if (*journey_number != static_cast<int>(*journey_number))
1018 record_errors.push_back("PUJO.JourneyNumber should be an integer");
1019 if (product_formula_type && *product_formula_type != static_cast<short>(*product_formula_type))
1020 record_errors.push_back("PUJO.ProductFormulaType should be an integer");
1021 if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN")
1022 record_errors.push_back("PUJO.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]");
1023 if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" &&
1024 show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME")
1025 record_errors.push_back("PUJO.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]");
1026 if (!record_errors.empty()) return;
1027
1028 records.public_journeys.emplace_back(
1029 Kv1PublicJourney::Key(
1030 data_owner_code,
1031 timetable_version_code,
1032 organizational_unit_code,
1033 period_group_code,
1034 specific_day_code,
1035 day_type,
1036 line_planning_number,
1037 static_cast<int>(*journey_number)),
1038 time_demand_group_code,
1039 journey_pattern_code,
1040 *departure_time,
1041 wheelchair_accessible,
1042 *data_owner_is_operator,
1043 *planned_monitored,
1044 product_formula_type,
1045 show_flexible_trip);
1046}
1047
1048void Kv1Parser::parsePeriodGroupValidity() {
1049 auto data_owner_code = eatString("PEGRVAL.DataOwnerCode", true, 10);
1050 auto organizational_unit_code = eatString("PEGRVAL.OrganizationalUnitCode", true, 10);
1051 auto period_group_code = eatString("PEGRVAL.PeriodGroupCode", true, 10);
1052 auto valid_from_raw = eatString("PEGRVAL.ValidFrom", true, 10);
1053 auto valid_thru_raw = eatString("PEGRVAL.ValidThru", true, 10);
1054 if (!record_errors.empty()) return;
1055
1056 auto valid_from = parseYyyymmdd(valid_from_raw);
1057 auto valid_thru = parseYyyymmdd(valid_thru_raw);
1058 if (!valid_from)
1059 record_errors.push_back("PEGRVAL.ValidFrom has invalid format, should be YYYY-MM-DD");
1060 if (!valid_thru)
1061 record_errors.push_back("PEGRVAL.ValidThru has invalid format, should be YYYY-MM-DD");
1062 if (!record_errors.empty()) return;
1063
1064 records.period_group_validities.emplace_back(
1065 Kv1PeriodGroupValidity::Key(
1066 data_owner_code,
1067 organizational_unit_code,
1068 period_group_code,
1069 *valid_from),
1070 *valid_thru);
1071}
1072
1073void Kv1Parser::parseExceptionalOperatingDay() {
1074 auto data_owner_code = eatString("EXCOPDAY.DataOwnerCode", true, 10);
1075 auto organizational_unit_code = eatString("EXCOPDAY.OrganizationalUnitCode", true, 10);
1076 auto valid_date_raw = eatString("EXCOPDAY.ValidDate", true, 23);
1077 auto day_type_as_on = eatString("EXCOPDAY.DayTypeAsOn", true, 7);
1078 auto specific_day_code = eatString("EXCOPDAY.SpecificDayCode", true, 10);
1079 auto period_group_code = eatString("EXCOPDAY.PeriodGroupCode", false, 10);
1080 auto description = eatString("EXCOPDAY.Description", false, 255);
1081 if (!record_errors.empty()) return;
1082
1083 std::string_view error;
1084 auto valid_date = parseDateTime(valid_date_raw, amsterdam, &error);
1085 if (!valid_date) {
1086 record_errors.push_back(std::format("EXCOPDAY.ValidDate has an bad format (value: {}): {}", valid_date_raw, error));
1087 return;
1088 }
1089
1090 records.exceptional_operating_days.emplace_back(
1091 Kv1ExceptionalOperatingDay::Key(
1092 data_owner_code,
1093 organizational_unit_code,
1094 *valid_date),
1095 day_type_as_on,
1096 specific_day_code,
1097 period_group_code,
1098 description);
1099}
1100
1101void Kv1Parser::parseScheduleVersion() {
1102 auto data_owner_code = eatString("SCHEDVERS.DataOwnerCode", true, 10);
1103 auto organizational_unit_code = eatString("SCHEDVERS.OrganizationalUnitCode", true, 10);
1104 auto schedule_code = eatString("SCHEDVERS.ScheduleCode", true, 10);
1105 auto schedule_type_code = eatString("SCHEDVERS.ScheduleTypeCode", true, 10);
1106 auto valid_from_raw = eatString("SCHEDVERS.ValidFrom", true, 10);
1107 auto valid_thru_raw = eatString("SCHEDVERS.ValidThru", false, 10);
1108 auto description = eatString("SCHEDVERS.Description", false, 255);
1109 if (!record_errors.empty()) return;
1110
1111 auto valid_from = parseYyyymmdd(valid_from_raw);
1112 if (!valid_from)
1113 record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD");
1114 std::optional<std::chrono::year_month_day> valid_thru;
1115 if (!valid_thru_raw.empty()) {
1116 valid_thru = parseYyyymmdd(valid_thru_raw);
1117 if (!valid_thru) {
1118 record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD");
1119 }
1120 }
1121 if (!description.empty())
1122 record_errors.push_back("SCHEDVERS.Description should be empty");
1123 if (!record_errors.empty()) return;
1124
1125 records.schedule_versions.emplace_back(
1126 Kv1ScheduleVersion::Key(
1127 data_owner_code,
1128 organizational_unit_code,
1129 schedule_code,
1130 schedule_type_code),
1131 *valid_from,
1132 valid_thru,
1133 description);
1134}
1135
1136void Kv1Parser::parsePublicJourneyPassingTimes() {
1137 auto data_owner_code = eatString ("PUJOPASS.DataOwnerCode", true, 10);
1138 auto organizational_unit_code = eatString ("PUJOPASS.OrganizationalUnitCode", true, 10);
1139 auto schedule_code = eatString ("PUJOPASS.ScheduleCode", true, 10);
1140 auto schedule_type_code = eatString ("PUJOPASS.ScheduleTypeCode", true, 10);
1141 auto line_planning_number = eatString ("PUJOPASS.LinePlanningNumber", true, 10);
1142 auto journey_number = eatNumber ("PUJOPASS.JourneyNumber", true, 6);
1143 auto stop_order = eatNumber ("PUJOPASS.StopOrder", true, 4);
1144 auto journey_pattern_code = eatString ("PUJOPASS.JourneyPatternCode", true, 10);
1145 auto user_stop_code = eatString ("PUJOPASS.UserStopCode", true, 10);
1146 auto target_arrival_time_raw = eatString ("PUJOPASS.TargetArrivalTime", false, 8);
1147 auto target_departure_time_raw = eatString ("PUJOPASS.TargetDepartureTime", false, 8);
1148 auto wheelchair_accessible = eatString ("PUJOPASS.WheelChairAccessible", true, 13);
1149 auto data_owner_is_operator = eatBoolean("PUJOPASS.DataOwnerIsOperator", true );
1150 auto planned_monitored = eatBoolean("PUJOPASS.PlannedMonitored", true );
1151 auto product_formula_type = eatNumber ("PUJOPASS.ProductFormulaType", false, 4);
1152 auto show_flexible_trip = eatString ("PUJOPASS.ShowFlexibleTrip", false, 8);
1153 if (!record_errors.empty()) return;
1154
1155 if (*journey_number < 0 || *journey_number > 999'999)
1156 record_errors.push_back("PUJOPASS.JourneyNumber should be within the range [0-999999]");
1157 if (*journey_number != static_cast<int>(*journey_number))
1158 record_errors.push_back("PUJOPASS.JourneyNumber should be an integer");
1159 if (*stop_order != static_cast<short>(*stop_order))
1160 record_errors.push_back("PUJOPASS.StopOrder should be an integer");
1161 if (product_formula_type && *product_formula_type != static_cast<short>(*product_formula_type))
1162 record_errors.push_back("PUJOPASS.ProductFormulaType should be an integer");
1163 if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN")
1164 record_errors.push_back("PUJOPASS.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]");
1165 if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" &&
1166 show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME")
1167 record_errors.push_back("PUJOPASS.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]");
1168 std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_arrival_time;
1169 if (!target_arrival_time_raw.empty()) {
1170 target_arrival_time = parseHhmmss(target_arrival_time_raw);
1171 if (!target_arrival_time) {
1172 record_errors.push_back("PUJOPASS.TargetArrivalTime has invalid format, should be HH:MM:SS");
1173 }
1174 }
1175 std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_departure_time;
1176 if (!target_departure_time_raw.empty()) {
1177 target_departure_time = parseHhmmss(target_departure_time_raw);
1178 if (!target_departure_time) {
1179 record_errors.push_back("PUJOPASS.TargetDepartureTime has invalid format, should be HH:MM:SS");
1180 }
1181 }
1182 if (!record_errors.empty()) return;
1183
1184 records.public_journey_passing_times.emplace_back(
1185 Kv1PublicJourneyPassingTimes::Key(
1186 data_owner_code,
1187 organizational_unit_code,
1188 schedule_code,
1189 schedule_type_code,
1190 line_planning_number,
1191 static_cast<int>(*journey_number),
1192 static_cast<short>(*stop_order)),
1193 journey_pattern_code,
1194 user_stop_code,
1195 target_arrival_time,
1196 target_departure_time,
1197 wheelchair_accessible,
1198 *data_owner_is_operator,
1199 *planned_monitored,
1200 product_formula_type,
1201 show_flexible_trip);
1202}
1203
1204void Kv1Parser::parseOperatingDay() {
1205 auto data_owner_code = eatString("OPERDAY.DataOwnerCode", true, 10);
1206 auto organizational_unit_code = eatString("OPERDAY.OrganizationalUnitCode", true, 10);
1207 auto schedule_code = eatString("OPERDAY.ScheduleCode", true, 10);
1208 auto schedule_type_code = eatString("OPERDAY.ScheduleTypeCode", true, 10);
1209 auto valid_date_raw = eatString("OPERDAY.ValidDate", true, 10);
1210 auto description = eatString("OPERDAY.Description", false, 255);
1211 if (!record_errors.empty()) return;
1212
1213 auto valid_date = parseYyyymmdd(valid_date_raw);
1214 if (!valid_date)
1215 record_errors.push_back("OPERDAY.ValidDate has invalid format, should be YYYY-MM-DD");
1216 if (!record_errors.empty()) return;
1217
1218 records.operating_days.emplace_back(
1219 Kv1OperatingDay::Key(
1220 data_owner_code,
1221 organizational_unit_code,
1222 schedule_code,
1223 schedule_type_code,
1224 *valid_date),
1225 description);
1226}
1227
1228const std::unordered_map<std::string_view, Kv1Parser::ParseFunc> Kv1Parser::type_parsers{
1229 { "ORUN", &Kv1Parser::parseOrganizationalUnit },
1230 { "ORUNORUN", &Kv1Parser::parseHigherOrganizationalUnit },
1231 { "USRSTOP", &Kv1Parser::parseUserStopPoint },
1232 { "USRSTAR", &Kv1Parser::parseUserStopArea },
1233 { "TILI", &Kv1Parser::parseTimingLink },
1234 { "LINK", &Kv1Parser::parseLink },
1235 { "LINE", &Kv1Parser::parseLine },
1236 { "DEST", &Kv1Parser::parseDestination },
1237 { "JOPA", &Kv1Parser::parseJourneyPattern },
1238 { "CONFINREL", &Kv1Parser::parseConcessionFinancerRelation },
1239 { "CONAREA", &Kv1Parser::parseConcessionArea },
1240 { "FINANCER", &Kv1Parser::parseFinancer },
1241 { "JOPATILI", &Kv1Parser::parseJourneyPatternTimingLink },
1242 { "POINT", &Kv1Parser::parsePoint },
1243 { "POOL", &Kv1Parser::parsePointOnLink },
1244 { "ICON", &Kv1Parser::parseIcon },
1245 { "NOTICE", &Kv1Parser::parseNotice },
1246 { "NTCASSGNM", &Kv1Parser::parseNoticeAssignment },
1247 { "TIMDEMGRP", &Kv1Parser::parseTimeDemandGroup },
1248 { "TIMDEMRNT", &Kv1Parser::parseTimeDemandGroupRunTime },
1249 { "PEGR", &Kv1Parser::parsePeriodGroup },
1250 { "SPECDAY", &Kv1Parser::parseSpecificDay },
1251 { "TIVE", &Kv1Parser::parseTimetableVersion },
1252 { "PUJO", &Kv1Parser::parsePublicJourney },
1253 { "PEGRVAL", &Kv1Parser::parsePeriodGroupValidity },
1254 { "EXCOPDAY", &Kv1Parser::parseExceptionalOperatingDay },
1255 { "SCHEDVERS", &Kv1Parser::parseScheduleVersion },
1256 { "PUJOPASS", &Kv1Parser::parsePublicJourneyPassingTimes },
1257 { "OPERDAY", &Kv1Parser::parseOperatingDay },
1258};
diff --git a/lib/libtmi8/src/kv1_types.cpp b/lib/libtmi8/src/kv1_types.cpp
new file mode 100644
index 0000000..49e306e
--- /dev/null
+++ b/lib/libtmi8/src/kv1_types.cpp
@@ -0,0 +1,773 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <boost/container_hash/hash.hpp>
4
5#include <tmi8/kv1_types.hpp>
6
7size_t Kv1Records::size() const {
8 return organizational_units.size()
9 + higher_organizational_units.size()
10 + user_stop_points.size()
11 + user_stop_areas.size()
12 + timing_links.size()
13 + links.size()
14 + lines.size()
15 + destinations.size()
16 + journey_patterns.size()
17 + concession_financer_relations.size()
18 + concession_areas.size()
19 + financers.size()
20 + journey_pattern_timing_links.size()
21 + points.size()
22 + point_on_links.size()
23 + icons.size()
24 + notices.size()
25 + notice_assignments.size()
26 + time_demand_groups.size()
27 + time_demand_group_run_times.size()
28 + period_groups.size()
29 + specific_days.size()
30 + timetable_versions.size()
31 + public_journeys.size()
32 + period_group_validities.size()
33 + exceptional_operating_days.size()
34 + schedule_versions.size()
35 + public_journey_passing_times.size()
36 + operating_days.size();
37}
38
39Kv1OrganizationalUnit::Key::Key(
40 std::string data_owner_code,
41 std::string organizational_unit_code)
42 : data_owner_code(std::move(data_owner_code)),
43 organizational_unit_code(std::move(organizational_unit_code))
44{}
45
46Kv1HigherOrganizationalUnit::Key::Key(
47 std::string data_owner_code,
48 std::string organizational_unit_code_parent,
49 std::string organizational_unit_code_child,
50 std::chrono::year_month_day valid_from)
51 : data_owner_code(std::move(data_owner_code)),
52 organizational_unit_code_parent(std::move(organizational_unit_code_parent)),
53 organizational_unit_code_child(std::move(organizational_unit_code_child)),
54 valid_from(valid_from)
55{}
56
57Kv1UserStopPoint::Key::Key(
58 std::string data_owner_code,
59 std::string user_stop_code)
60 : data_owner_code(std::move(data_owner_code)),
61 user_stop_code(std::move(user_stop_code))
62{}
63
64Kv1UserStopArea::Key::Key(
65 std::string data_owner_code,
66 std::string user_stop_area_code)
67 : data_owner_code(std::move(data_owner_code)),
68 user_stop_area_code(std::move(user_stop_area_code))
69{}
70
71Kv1TimingLink::Key::Key(
72 std::string data_owner_code,
73 std::string user_stop_code_begin,
74 std::string user_stop_code_end)
75 : data_owner_code(std::move(data_owner_code)),
76 user_stop_code_begin(std::move(user_stop_code_begin)),
77 user_stop_code_end(std::move(user_stop_code_end))
78{}
79
80Kv1Link::Key::Key(std::string data_owner_code,
81 std::string user_stop_code_begin,
82 std::string user_stop_code_end,
83 std::string transport_type)
84 : data_owner_code(std::move(data_owner_code)),
85 user_stop_code_begin(std::move(user_stop_code_begin)),
86 user_stop_code_end(std::move(user_stop_code_end)),
87 transport_type(std::move(transport_type))
88{}
89
90Kv1Line::Key::Key(std::string data_owner_code,
91 std::string line_planning_number)
92 : data_owner_code(std::move(data_owner_code)),
93 line_planning_number(std::move(line_planning_number))
94{}
95
96Kv1Destination::Key::Key(std::string data_owner_code,
97 std::string dest_code)
98 : data_owner_code(std::move(data_owner_code)),
99 dest_code(std::move(dest_code))
100{}
101
102Kv1JourneyPattern::Key::Key(std::string data_owner_code,
103 std::string line_planning_number,
104 std::string journey_pattern_code)
105 : data_owner_code(std::move(data_owner_code)),
106 line_planning_number(std::move(line_planning_number)),
107 journey_pattern_code(std::move(journey_pattern_code))
108{}
109
110Kv1ConcessionFinancerRelation::Key::Key(std::string data_owner_code,
111 std::string con_fin_rel_code)
112 : data_owner_code(std::move(data_owner_code)),
113 con_fin_rel_code(std::move(con_fin_rel_code))
114{}
115
116Kv1ConcessionArea::Key::Key(std::string data_owner_code,
117 std::string concession_area_code)
118 : data_owner_code(std::move(data_owner_code)),
119 concession_area_code(std::move(concession_area_code))
120{}
121
122Kv1Financer::Key::Key(std::string data_owner_code,
123 std::string financer_code)
124 : data_owner_code(std::move(data_owner_code)),
125 financer_code(std::move(financer_code))
126{}
127
128Kv1JourneyPatternTimingLink::Key::Key(std::string data_owner_code,
129 std::string line_planning_number,
130 std::string journey_pattern_code,
131 short timing_link_order)
132 : data_owner_code(std::move(data_owner_code)),
133 line_planning_number(std::move(line_planning_number)),
134 journey_pattern_code(journey_pattern_code),
135 timing_link_order(timing_link_order)
136{}
137
138Kv1Point::Key::Key(std::string data_owner_code,
139 std::string point_code)
140 : data_owner_code(std::move(data_owner_code)),
141 point_code(std::move(point_code))
142{}
143
144Kv1PointOnLink::Key::Key(std::string data_owner_code,
145 std::string user_stop_code_begin,
146 std::string user_stop_code_end,
147 std::string point_data_owner_code,
148 std::string point_code,
149 std::string transport_type)
150 : data_owner_code(std::move(data_owner_code)),
151 user_stop_code_begin(std::move(user_stop_code_begin)),
152 user_stop_code_end(std::move(user_stop_code_end)),
153 point_data_owner_code(std::move(point_data_owner_code)),
154 point_code(std::move(point_code)),
155 transport_type(std::move(transport_type))
156{}
157
158Kv1Icon::Key::Key(std::string data_owner_code,
159 short icon_number)
160 : data_owner_code(std::move(data_owner_code)),
161 icon_number(icon_number)
162{}
163
164Kv1Notice::Key::Key(std::string data_owner_code,
165 std::string notice_code)
166 : data_owner_code(std::move(data_owner_code)),
167 notice_code(std::move(notice_code))
168{}
169
170Kv1TimeDemandGroup::Key::Key(std::string data_owner_code,
171 std::string line_planning_number,
172 std::string journey_pattern_code,
173 std::string time_demand_group_code)
174 : data_owner_code(std::move(data_owner_code)),
175 line_planning_number(std::move(line_planning_number)),
176 journey_pattern_code(std::move(journey_pattern_code)),
177 time_demand_group_code(std::move(time_demand_group_code))
178{}
179
180Kv1TimeDemandGroupRunTime::Key::Key(std::string data_owner_code,
181 std::string line_planning_number,
182 std::string journey_pattern_code,
183 std::string time_demand_group_code,
184 short timing_link_order)
185 : data_owner_code(std::move(data_owner_code)),
186 line_planning_number(std::move(line_planning_number)),
187 journey_pattern_code(std::move(journey_pattern_code)),
188 time_demand_group_code(std::move(time_demand_group_code)),
189 timing_link_order(std::move(timing_link_order))
190{}
191
192Kv1PeriodGroup::Key::Key(std::string data_owner_code,
193 std::string period_group_code)
194 : data_owner_code(std::move(data_owner_code)),
195 period_group_code(std::move(period_group_code))
196{}
197
198Kv1SpecificDay::Key::Key(std::string data_owner_code,
199 std::string specific_day_code)
200 : data_owner_code(std::move(data_owner_code)),
201 specific_day_code(std::move(specific_day_code))
202{}
203
204Kv1TimetableVersion::Key::Key(std::string data_owner_code,
205 std::string organizational_unit_code,
206 std::string timetable_version_code,
207 std::string period_group_code,
208 std::string specific_day_code)
209 : data_owner_code(std::move(data_owner_code)),
210 organizational_unit_code(std::move(organizational_unit_code)),
211 timetable_version_code(std::move(timetable_version_code)),
212 period_group_code(std::move(period_group_code)),
213 specific_day_code(std::move(specific_day_code))
214{}
215
216Kv1PublicJourney::Key::Key(std::string data_owner_code,
217 std::string timetable_version_code,
218 std::string organizational_unit_code,
219 std::string period_group_code,
220 std::string specific_day_code,
221 std::string day_type,
222 std::string line_planning_number,
223 int journey_number)
224 : data_owner_code(std::move(data_owner_code)),
225 timetable_version_code(std::move(timetable_version_code)),
226 organizational_unit_code(std::move(organizational_unit_code)),
227 period_group_code(std::move(period_group_code)),
228 specific_day_code(std::move(specific_day_code)),
229 day_type(std::move(day_type)),
230 line_planning_number(std::move(line_planning_number)),
231 journey_number(journey_number)
232{}
233
234Kv1PeriodGroupValidity::Key::Key(std::string data_owner_code,
235 std::string organizational_unit_code,
236 std::string period_group_code,
237 std::chrono::year_month_day valid_from)
238 : data_owner_code(std::move(data_owner_code)),
239 organizational_unit_code(std::move(organizational_unit_code)),
240 period_group_code(std::move(period_group_code)),
241 valid_from(valid_from)
242{}
243
244Kv1ExceptionalOperatingDay::Key::Key(std::string data_owner_code,
245 std::string organizational_unit_code,
246 std::chrono::sys_seconds valid_date)
247 : data_owner_code(std::move(data_owner_code)),
248 organizational_unit_code(std::move(organizational_unit_code)),
249 valid_date(valid_date)
250{}
251
252Kv1ScheduleVersion::Key::Key(std::string data_owner_code,
253 std::string organizational_unit_code,
254 std::string schedule_code,
255 std::string schedule_type_code)
256 : data_owner_code(std::move(data_owner_code)),
257 organizational_unit_code(std::move(organizational_unit_code)),
258 schedule_code(std::move(schedule_code)),
259 schedule_type_code(std::move(schedule_type_code))
260{}
261
262Kv1PublicJourneyPassingTimes::Key::Key(std::string data_owner_code,
263 std::string organizational_unit_code,
264 std::string schedule_code,
265 std::string schedule_type_code,
266 std::string line_planning_number,
267 int journey_number,
268 short stop_order)
269 : data_owner_code(std::move(data_owner_code)),
270 organizational_unit_code(std::move(organizational_unit_code)),
271 schedule_code(std::move(schedule_code)),
272 schedule_type_code(std::move(schedule_type_code)),
273 line_planning_number(std::move(line_planning_number)),
274 journey_number(journey_number),
275 stop_order(stop_order)
276{}
277
278Kv1OperatingDay::Key::Key(std::string data_owner_code,
279 std::string organizational_unit_code,
280 std::string schedule_code,
281 std::string schedule_type_code,
282 std::chrono::year_month_day valid_date)
283 : data_owner_code(std::move(data_owner_code)),
284 organizational_unit_code(std::move(organizational_unit_code)),
285 schedule_code(std::move(schedule_code)),
286 schedule_type_code(std::move(schedule_type_code)),
287 valid_date(valid_date)
288{}
289
290bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b) {
291 return a.data_owner_code == b.data_owner_code
292 && a.organizational_unit_code == b.organizational_unit_code;
293}
294
295bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b) {
296 return a.data_owner_code == b.data_owner_code
297 && a.organizational_unit_code_parent == b.organizational_unit_code_parent
298 && a.organizational_unit_code_child == b.organizational_unit_code_child;
299}
300
301bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b) {
302 return a.data_owner_code == b.data_owner_code
303 && a.user_stop_code == b.user_stop_code;
304}
305
306bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b) {
307 return a.data_owner_code == b.data_owner_code
308 && a.user_stop_area_code == b.user_stop_area_code;
309}
310
311bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b) {
312 return a.data_owner_code == b.data_owner_code
313 && a.user_stop_code_begin == b.user_stop_code_begin
314 && a.user_stop_code_end == b.user_stop_code_end;
315}
316
317bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b) {
318 return a.data_owner_code == b.data_owner_code
319 && a.user_stop_code_begin == b.user_stop_code_begin
320 && a.user_stop_code_end == b.user_stop_code_end
321 && a.transport_type == b.transport_type;
322}
323
324bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b) {
325 return a.data_owner_code == b.data_owner_code
326 && a.line_planning_number == b.line_planning_number;
327}
328
329bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b) {
330 return a.data_owner_code == b.data_owner_code
331 && a.dest_code == b.dest_code;
332}
333
334bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b) {
335 return a.data_owner_code == b.data_owner_code
336 && a.line_planning_number == b.line_planning_number
337 && a.journey_pattern_code == b.journey_pattern_code;
338}
339
340bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b) {
341 return a.data_owner_code == b.data_owner_code
342 && a.con_fin_rel_code == b.con_fin_rel_code;
343}
344
345bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b) {
346 return a.data_owner_code == b.data_owner_code
347 && a.concession_area_code == b.concession_area_code;
348}
349
350bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b) {
351 return a.data_owner_code == b.data_owner_code
352 && a.financer_code == b.financer_code;
353}
354
355bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b) {
356 return a.data_owner_code == b.data_owner_code
357 && a.line_planning_number == b.line_planning_number
358 && a.journey_pattern_code == b.journey_pattern_code
359 && a.timing_link_order == b.timing_link_order;
360}
361
362bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b) {
363 return a.data_owner_code == b.data_owner_code
364 && a.point_code == b.point_code;
365}
366
367bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b) {
368 return a.data_owner_code == b.data_owner_code
369 && a.user_stop_code_begin == b.user_stop_code_begin
370 && a.user_stop_code_end == b.user_stop_code_end
371 && a.point_data_owner_code == b.point_data_owner_code
372 && a.point_code == b.point_code
373 && a.transport_type == b.transport_type;
374}
375
376bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b) {
377 return a.data_owner_code == b.data_owner_code
378 && a.icon_number == b.icon_number;
379}
380
381bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b) {
382 return a.data_owner_code == b.data_owner_code
383 && a.notice_code == b.notice_code;
384}
385
386bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b) {
387 return a.data_owner_code == b.data_owner_code
388 && a.line_planning_number == b.line_planning_number
389 && a.journey_pattern_code == b.journey_pattern_code
390 && a.time_demand_group_code == b.time_demand_group_code;
391}
392
393bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b) {
394 return a.data_owner_code == b.data_owner_code
395 && a.line_planning_number == b.line_planning_number
396 && a.journey_pattern_code == b.journey_pattern_code
397 && a.time_demand_group_code == b.time_demand_group_code
398 && a.timing_link_order == b.timing_link_order;
399}
400
401bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b) {
402 return a.data_owner_code == b.data_owner_code
403 && a.period_group_code == b.period_group_code;
404}
405
406bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b) {
407 return a.data_owner_code == b.data_owner_code
408 && a.specific_day_code == b.specific_day_code;
409}
410
411bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b) {
412 return a.data_owner_code == b.data_owner_code
413 && a.organizational_unit_code == b.organizational_unit_code
414 && a.timetable_version_code == b.timetable_version_code
415 && a.period_group_code == b.period_group_code
416 && a.specific_day_code == b.specific_day_code;
417}
418
419bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b) {
420 return a.data_owner_code == b.data_owner_code
421 && a.timetable_version_code == b.timetable_version_code
422 && a.organizational_unit_code == b.organizational_unit_code
423 && a.period_group_code == b.period_group_code
424 && a.specific_day_code == b.specific_day_code
425 && a.day_type == b.day_type
426 && a.line_planning_number == b.line_planning_number
427 && a.journey_number == b.journey_number;
428}
429
430bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b) {
431 return a.data_owner_code == b.data_owner_code
432 && a.organizational_unit_code == b.organizational_unit_code
433 && a.period_group_code == b.period_group_code
434 && a.valid_from == b.valid_from;
435}
436
437bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b) {
438 return a.data_owner_code == b.data_owner_code
439 && a.organizational_unit_code == b.organizational_unit_code
440 && a.valid_date == b.valid_date;
441}
442
443bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b) {
444 return a.data_owner_code == b.data_owner_code
445 && a.organizational_unit_code == b.organizational_unit_code
446 && a.schedule_code == b.schedule_code
447 && a.schedule_type_code == b.schedule_type_code;
448}
449
450bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b) {
451 return a.data_owner_code == b.data_owner_code
452 && a.organizational_unit_code == b.organizational_unit_code
453 && a.schedule_code == b.schedule_code
454 && a.schedule_type_code == b.schedule_type_code
455 && a.line_planning_number == b.line_planning_number
456 && a.journey_number == b.journey_number
457 && a.stop_order == b.stop_order;
458}
459
460bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b) {
461 return a.data_owner_code == b.data_owner_code
462 && a.organizational_unit_code == b.organizational_unit_code
463 && a.schedule_code == b.schedule_code
464 && a.schedule_type_code == b.schedule_type_code
465 && a.valid_date == b.valid_date;
466}
467
468namespace std::chrono {
469 static size_t hash_value(const year_month_day &ymd) {
470 size_t seed = 0;
471
472 boost::hash_combine(seed, int(ymd.year()));
473 boost::hash_combine(seed, unsigned(ymd.month()));
474 boost::hash_combine(seed, unsigned(ymd.day()));
475
476 return seed;
477 }
478
479 static size_t hash_value(const sys_seconds &s) {
480 return boost::hash<seconds::rep>()(s.time_since_epoch().count());
481 }
482}
483
484size_t hash_value(const Kv1OrganizationalUnit::Key &k) {
485 size_t seed = 0;
486
487 boost::hash_combine(seed, k.data_owner_code);
488 boost::hash_combine(seed, k.organizational_unit_code);
489
490 return seed;
491}
492
493size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k) {
494 size_t seed = 0;
495
496 boost::hash_combine(seed, k.data_owner_code);
497 boost::hash_combine(seed, k.organizational_unit_code_parent);
498 boost::hash_combine(seed, k.organizational_unit_code_child);
499 boost::hash_combine(seed, k.valid_from);
500
501 return seed;
502}
503
504size_t hash_value(const Kv1UserStopPoint::Key &k) {
505 size_t seed = 0;
506
507 boost::hash_combine(seed, k.data_owner_code);
508 boost::hash_combine(seed, k.user_stop_code);
509
510 return seed;
511}
512
513size_t hash_value(const Kv1UserStopArea::Key &k) {
514 size_t seed = 0;
515
516 boost::hash_combine(seed, k.data_owner_code);
517 boost::hash_combine(seed, k.user_stop_area_code);
518
519 return seed;
520}
521
522size_t hash_value(const Kv1TimingLink::Key &k) {
523 size_t seed = 0;
524
525 boost::hash_combine(seed, k.data_owner_code);
526 boost::hash_combine(seed, k.user_stop_code_begin);
527 boost::hash_combine(seed, k.user_stop_code_end);
528
529 return seed;
530}
531
532size_t hash_value(const Kv1Link::Key &k) {
533 size_t seed = 0;
534
535 boost::hash_combine(seed, k.data_owner_code);
536 boost::hash_combine(seed, k.user_stop_code_begin);
537 boost::hash_combine(seed, k.user_stop_code_end);
538 boost::hash_combine(seed, k.transport_type);
539
540 return seed;
541}
542
543size_t hash_value(const Kv1Line::Key &k) {
544 size_t seed = 0;
545
546 boost::hash_combine(seed, k.data_owner_code);
547 boost::hash_combine(seed, k.line_planning_number);
548
549 return seed;
550}
551
552size_t hash_value(const Kv1Destination::Key &k) {
553 size_t seed = 0;
554
555 boost::hash_combine(seed, k.data_owner_code);
556 boost::hash_combine(seed, k.dest_code);
557
558 return seed;
559}
560
561size_t hash_value(const Kv1JourneyPattern::Key &k) {
562 size_t seed = 0;
563
564 boost::hash_combine(seed, k.data_owner_code);
565 boost::hash_combine(seed, k.line_planning_number);
566 boost::hash_combine(seed, k.journey_pattern_code);
567
568 return seed;
569}
570
571size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k) {
572 size_t seed = 0;
573
574 boost::hash_combine(seed, k.data_owner_code);
575 boost::hash_combine(seed, k.con_fin_rel_code);
576
577 return seed;
578}
579
580size_t hash_value(const Kv1ConcessionArea::Key &k) {
581 size_t seed = 0;
582
583 boost::hash_combine(seed, k.data_owner_code);
584 boost::hash_combine(seed, k.concession_area_code);
585
586 return seed;
587}
588
589size_t hash_value(const Kv1Financer::Key &k) {
590 size_t seed = 0;
591
592 boost::hash_combine(seed, k.data_owner_code);
593 boost::hash_combine(seed, k.financer_code);
594
595 return seed;
596}
597
598size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k) {
599 size_t seed = 0;
600
601 boost::hash_combine(seed, k.data_owner_code);
602 boost::hash_combine(seed, k.line_planning_number);
603 boost::hash_combine(seed, k.journey_pattern_code);
604 boost::hash_combine(seed, k.timing_link_order);
605
606 return seed;
607}
608
609size_t hash_value(const Kv1Point::Key &k) {
610 size_t seed = 0;
611
612 boost::hash_combine(seed, k.data_owner_code);
613 boost::hash_combine(seed, k.point_code);
614
615 return seed;
616}
617
618size_t hash_value(const Kv1PointOnLink::Key &k) {
619 size_t seed = 0;
620
621 boost::hash_combine(seed, k.data_owner_code);
622 boost::hash_combine(seed, k.user_stop_code_begin);
623 boost::hash_combine(seed, k.user_stop_code_end);
624 boost::hash_combine(seed, k.point_data_owner_code);
625 boost::hash_combine(seed, k.point_code);
626 boost::hash_combine(seed, k.transport_type);
627
628 return seed;
629}
630
631size_t hash_value(const Kv1Icon::Key &k) {
632 size_t seed = 0;
633
634 boost::hash_combine(seed, k.data_owner_code);
635 boost::hash_combine(seed, k.icon_number);
636
637 return seed;
638}
639
640size_t hash_value(const Kv1Notice::Key &k) {
641 size_t seed = 0;
642
643 boost::hash_combine(seed, k.data_owner_code);
644 boost::hash_combine(seed, k.notice_code);
645
646 return seed;
647}
648
649size_t hash_value(const Kv1TimeDemandGroup::Key &k) {
650 size_t seed = 0;
651
652 boost::hash_combine(seed, k.data_owner_code);
653 boost::hash_combine(seed, k.line_planning_number);
654 boost::hash_combine(seed, k.journey_pattern_code);
655 boost::hash_combine(seed, k.time_demand_group_code);
656
657 return seed;
658}
659
660size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k) {
661 size_t seed = 0;
662
663 boost::hash_combine(seed, k.data_owner_code);
664 boost::hash_combine(seed, k.line_planning_number);
665 boost::hash_combine(seed, k.journey_pattern_code);
666 boost::hash_combine(seed, k.time_demand_group_code);
667 boost::hash_combine(seed, k.timing_link_order);
668
669 return seed;
670}
671
672size_t hash_value(const Kv1PeriodGroup::Key &k) {
673 size_t seed = 0;
674
675 boost::hash_combine(seed, k.data_owner_code);
676 boost::hash_combine(seed, k.period_group_code);
677
678 return seed;
679}
680
681size_t hash_value(const Kv1SpecificDay::Key &k) {
682 size_t seed = 0;
683
684 boost::hash_combine(seed, k.data_owner_code);
685 boost::hash_combine(seed, k.specific_day_code);
686
687 return seed;
688}
689
690size_t hash_value(const Kv1TimetableVersion::Key &k) {
691 size_t seed = 0;
692
693 boost::hash_combine(seed, k.data_owner_code);
694 boost::hash_combine(seed, k.organizational_unit_code);
695 boost::hash_combine(seed, k.timetable_version_code);
696 boost::hash_combine(seed, k.period_group_code);
697 boost::hash_combine(seed, k.specific_day_code);
698
699 return seed;
700}
701
702size_t hash_value(const Kv1PublicJourney::Key &k) {
703 size_t seed = 0;
704
705 boost::hash_combine(seed, k.data_owner_code);
706 boost::hash_combine(seed, k.timetable_version_code);
707 boost::hash_combine(seed, k.organizational_unit_code);
708 boost::hash_combine(seed, k.period_group_code);
709 boost::hash_combine(seed, k.specific_day_code);
710 boost::hash_combine(seed, k.day_type);
711 boost::hash_combine(seed, k.line_planning_number);
712 boost::hash_combine(seed, k.journey_number);
713
714 return seed;
715}
716
717size_t hash_value(const Kv1PeriodGroupValidity::Key &k) {
718 size_t seed = 0;
719
720 boost::hash_combine(seed, k.data_owner_code);
721 boost::hash_combine(seed, k.organizational_unit_code);
722 boost::hash_combine(seed, k.period_group_code);
723 boost::hash_combine(seed, k.valid_from);
724
725 return seed;
726}
727
728size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k) {
729 size_t seed = 0;
730
731 boost::hash_combine(seed, k.data_owner_code);
732 boost::hash_combine(seed, k.organizational_unit_code);
733 boost::hash_combine(seed, k.valid_date);
734
735 return seed;
736}
737
738size_t hash_value(const Kv1ScheduleVersion::Key &k) {
739 size_t seed = 0;
740
741 boost::hash_combine(seed, k.data_owner_code);
742 boost::hash_combine(seed, k.organizational_unit_code);
743 boost::hash_combine(seed, k.schedule_code);
744 boost::hash_combine(seed, k.schedule_type_code);
745
746 return seed;
747}
748
749size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k) {
750 size_t seed = 0;
751
752 boost::hash_combine(seed, k.data_owner_code);
753 boost::hash_combine(seed, k.organizational_unit_code);
754 boost::hash_combine(seed, k.schedule_code);
755 boost::hash_combine(seed, k.schedule_type_code);
756 boost::hash_combine(seed, k.line_planning_number);
757 boost::hash_combine(seed, k.journey_number);
758 boost::hash_combine(seed, k.stop_order);
759
760 return seed;
761}
762
763size_t hash_value(const Kv1OperatingDay::Key &k) {
764 size_t seed = 0;
765
766 boost::hash_combine(seed, k.data_owner_code);
767 boost::hash_combine(seed, k.organizational_unit_code);
768 boost::hash_combine(seed, k.schedule_code);
769 boost::hash_combine(seed, k.schedule_type_code);
770 boost::hash_combine(seed, k.valid_date);
771
772 return seed;
773}
diff --git a/lib/libtmi8/src/kv6_parquet.cpp b/lib/libtmi8/src/kv6_parquet.cpp
new file mode 100644
index 0000000..ca70b7f
--- /dev/null
+++ b/lib/libtmi8/src/kv6_parquet.cpp
@@ -0,0 +1,102 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <tmi8/kv6_parquet.hpp>
4
5ParquetBuilder::ParquetBuilder() {
6 std::shared_ptr<arrow::Field> field_type, field_data_owner_code, field_line_planning_number, field_operating_day,
7 field_journey_number, field_reinforcement_number, field_timestamp, field_source,
8 field_punctuality, field_user_stop_code, field_passage_sequence_number,
9 field_vehicle_number, field_block_code, field_wheelchair_accessible,
10 field_number_of_coaches, field_rd_y, field_rd_x, field_distance_since_last_user_stop;
11 field_type = arrow::field("type", arrow::utf8());
12 field_data_owner_code = arrow::field("data_owner_code", arrow::utf8());
13 field_line_planning_number = arrow::field("line_planning_number", arrow::utf8());
14 field_operating_day = arrow::field("operating_day", arrow::date32());
15 field_journey_number = arrow::field("journey_number", arrow::uint32());
16 field_reinforcement_number = arrow::field("reinforcement_number", arrow::uint8());
17 field_timestamp = arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::SECOND));
18 field_source = arrow::field("source", arrow::utf8());
19 field_punctuality = arrow::field("punctuality", arrow::int16());
20 field_user_stop_code = arrow::field("user_stop_code", arrow::utf8());
21 field_passage_sequence_number = arrow::field("passage_sequence_number", arrow::uint16());
22 field_vehicle_number = arrow::field("vehicle_number", arrow::uint32());
23 field_block_code = arrow::field("block_code", arrow::uint32());
24 field_wheelchair_accessible = arrow::field("wheelchair_accessible", arrow::utf8());
25 field_number_of_coaches = arrow::field("number_of_coaches", arrow::uint8());
26 field_rd_y = arrow::field("rd_y", arrow::int32());
27 field_rd_x = arrow::field("rd_x", arrow::int32());
28 field_distance_since_last_user_stop = arrow::field("distance_since_last_user_stop", arrow::uint32());
29
30 schema = arrow::schema({ field_type, field_data_owner_code, field_line_planning_number,
31 field_operating_day, field_journey_number,
32 field_reinforcement_number, field_timestamp, field_source,
33 field_punctuality, field_user_stop_code,
34 field_passage_sequence_number, field_vehicle_number,
35 field_block_code, field_wheelchair_accessible,
36 field_number_of_coaches, field_rd_y, field_rd_x,
37 field_distance_since_last_user_stop });
38}
39
40arrow::Result<std::shared_ptr<arrow::Table>> ParquetBuilder::getTable() {
41 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> types, types.Finish());
42 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> data_owner_codes, data_owner_codes.Finish());
43 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> line_planning_numbers, line_planning_numbers.Finish());
44 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> operating_days, operating_days.Finish());
45 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> journey_numbers, journey_numbers.Finish());
46 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> reinforcement_numbers, reinforcement_numbers.Finish());
47 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> timestamps, timestamps.Finish());
48 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> sources, sources.Finish());
49 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> punctualities, punctualities.Finish());
50 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> user_stop_codes, user_stop_codes.Finish());
51 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> passage_sequence_numbers, passage_sequence_numbers.Finish());
52 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> vehicle_numbers, vehicle_numbers.Finish());
53 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> block_codes, block_codes.Finish());
54 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> wheelchair_accessibles, wheelchair_accessibles.Finish());
55 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> number_of_coaches, number_of_coaches.Finish());
56 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> rd_ys, rd_ys.Finish());
57 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> rd_xs, rd_xs.Finish());
58 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> distance_since_last_user_stops, distance_since_last_user_stops.Finish());
59
60 std::vector<std::shared_ptr<arrow::Array>> columns = { types, data_owner_codes, line_planning_numbers, operating_days,
61 journey_numbers, reinforcement_numbers, timestamps, sources,
62 punctualities, user_stop_codes, passage_sequence_numbers,
63 vehicle_numbers, block_codes, wheelchair_accessibles,
64 number_of_coaches, rd_ys, rd_xs,
65 distance_since_last_user_stops };
66 return arrow::Result(arrow::Table::Make(schema, columns));
67}
68
69arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename) {
70 std::shared_ptr<parquet::WriterProperties> props = parquet::WriterProperties::Builder()
71 .compression(arrow::Compression::ZSTD)
72 ->created_by("oeuf-libtmi8")
73 ->version(parquet::ParquetVersion::PARQUET_2_6)
74 ->data_page_version(parquet::ParquetDataPageVersion::V2)
75 ->max_row_group_length(MAX_PARQUET_CHUNK)
76 ->build();
77
78 std::shared_ptr<parquet::ArrowWriterProperties> arrow_props = parquet::ArrowWriterProperties::Builder()
79 .store_schema()->build();
80
81 std::shared_ptr<arrow::io::FileOutputStream> out_file;
82 std::string filename_str = filename;
83 ARROW_ASSIGN_OR_RAISE(out_file, arrow::io::FileOutputStream::Open(filename_str + ".part"));
84
85 ARROW_ASSIGN_OR_RAISE(auto writer,
86 parquet::arrow::FileWriter::Open(*rbr.schema(), arrow::default_memory_pool(), out_file, props, arrow_props));
87 for (const auto &batchr : rbr) {
88 ARROW_ASSIGN_OR_RAISE(auto batch, batchr);
89 ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch));
90 }
91 ARROW_RETURN_NOT_OK(writer->Close());
92 ARROW_RETURN_NOT_OK(out_file->Close());
93
94 std::filesystem::rename(filename_str + ".part", filename);
95
96 return arrow::Status::OK();
97}
98
99arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename) {
100 auto tbr = arrow::TableBatchReader(table);
101 return writeArrowRecordsAsParquetFile(tbr, filename);
102}
diff --git a/module/default.nix b/module/default.nix
new file mode 100644
index 0000000..c891ceb
--- /dev/null
+++ b/module/default.nix
@@ -0,0 +1,118 @@
1flake: { lib, config, pkgs, ... }:
2with lib;
3let
4 inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-recvkv6;
5 inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-archiver;
6
7 cfg = config.services.oeuf-recvkv6;
8 archiverCfg = config.services.oeuf-archiver;
9in
10{
11 options.services.oeuf-recvkv6 = {
12 enable = mkEnableOption "oeuf-recvkv6";
13 ndovProduction = mkEnableOption "usage of the NDOV Loket production ZeroMQ server";
14 metricsAddr = mkOption {
15 type = types.str;
16 };
17 };
18
19 options.services.oeuf-archiver = with types; {
20 enable = mkEnableOption "oeuf-archiver";
21 s3 = mkOption {
22 type = submodule {
23 options = {
24 accessKeyIDFile = mkOption {
25 type = str;
26 };
27 secretAccessKeyFile = mkOption {
28 type = str;
29 };
30 provider = mkOption {
31 type = str;
32 };
33 region = mkOption {
34 type = str;
35 };
36 endpoint = mkOption {
37 type = str;
38 };
39 bucket = mkOption {
40 type = str;
41 };
42 };
43 };
44 };
45 prometheusPushURL = mkOption {
46 type = str;
47 };
48 supplementaryServiceGroups = mkOption {
49 type = listOf str;
50 };
51 };
52
53 config = mkIf (cfg.enable || archiverCfg.enable) (mkMerge [
54 {
55 users.users.oeuf = {
56 description = "oeuf service user";
57 isSystemUser = true;
58 group = "oeuf";
59 };
60
61 users.groups.oeuf = { };
62 }
63 (mkIf cfg.enable {
64 systemd.services.oeuf-recvkv6 = {
65 after = [ "network-online.target" ];
66 wantedBy = [ "multi-user.target" ];
67 environment = {
68 METRICS_ADDR = cfg.metricsAddr;
69 NDOV_PRODUCTION = lib.boolToString cfg.ndovProduction;
70 };
71 serviceConfig = {
72 User = config.users.users.oeuf.name;
73 Group = config.users.users.oeuf.group;
74 Restart = "always";
75 StateDirectory = "oeuf";
76 WorkingDirectory = "/var/lib/oeuf";
77 ExecStart = "${lib.getBin oeuf-recvkv6}/bin/oeuf-recvkv6";
78 };
79 };
80 })
81 (mkIf archiverCfg.enable {
82 systemd.timers.oeuf-archiver = {
83 wantedBy = [ "timers.target" ];
84 partOf = [ "oeuf-archiver.service" ];
85 timerConfig = {
86 OnBootSec = "5m";
87 OnUnitActiveSec = "5m";
88 Unit = "oeuf-archiver.service";
89 };
90 };
91
92 systemd.services.oeuf-archiver = {
93 after = [ "network-online.target" ];
94 environment = {
95 S3_PROVIDER = archiverCfg.s3.provider;
96 S3_REGION = archiverCfg.s3.region;
97 S3_ENDPOINT = archiverCfg.s3.endpoint;
98 S3_BUCKET = archiverCfg.s3.bucket;
99 PROMETHEUS_PUSH_URL = archiverCfg.prometheusPushURL;
100 };
101 script = ''
102 export S3_ACCESS_KEY_ID="$(cat ${archiverCfg.s3.accessKeyIDFile})"
103 export S3_SECRET_ACCESS_KEY="$(cat ${archiverCfg.s3.secretAccessKeyFile})"
104 ${lib.getBin oeuf-archiver}/bin/oeuf-archiver
105 '';
106 serviceConfig = {
107 Type = "oneshot";
108 User = config.users.users.oeuf.name;
109 Group = config.users.users.oeuf.group;
110 SupplementaryGroups = archiverCfg.supplementaryServiceGroups;
111 StateDirectory = "oeuf";
112 WorkingDirectory = "/var/lib/oeuf";
113 AmbientCapabilities = "CAP_NET_BIND_SERVICE";
114 };
115 };
116 })
117 ]);
118}
diff --git a/script/archiver/default.nix b/script/archiver/default.nix
new file mode 100644
index 0000000..4a464e0
--- /dev/null
+++ b/script/archiver/default.nix
@@ -0,0 +1,15 @@
1{ pkgs ? import <nixpkgs> { } }: with pkgs;
2
3stdenv.mkDerivation {
4 name = "oeuf-archiver";
5 src = ./.;
6
7 buildInputs = [ bash rclone oeuf-bundleparquet ];
8 nativeBuildInputs = [ makeWrapper ];
9 installPhase = ''
10 mkdir -p $out/bin
11 cp oeuf-archiver.sh $out/bin/oeuf-archiver
12 wrapProgram $out/bin/oeuf-archiver \
13 --prefix PATH : ${lib.makeBinPath [ bash rclone oeuf-bundleparquet ]}
14 '';
15}
diff --git a/script/archiver/oeuf-archiver.sh b/script/archiver/oeuf-archiver.sh
new file mode 100755
index 0000000..478d4d9
--- /dev/null
+++ b/script/archiver/oeuf-archiver.sh
@@ -0,0 +1,31 @@
1#!/usr/bin/env bash
2
3set -eux
4set -o pipefail
5
6# This option prevents the loop from running
7# if it does not match any files
8shopt -s nullglob
9
10oeuf-bundleparquet
11
12export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID"
13set +x # Don't print the secret access key to the log
14export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY"
15set -x
16
17for file in ./merged/oeuf-*.parquet; do
18 rclone move \
19 --s3-provider "$S3_PROVIDER" \
20 --s3-region "$S3_REGION" \
21 --s3-endpoint "$S3_ENDPOINT" \
22 --s3-env-auth \
23 $file.meta.json :s3:$S3_BUCKET \
24 && \
25 rclone move \
26 --s3-provider "$S3_PROVIDER" \
27 --s3-region "$S3_REGION" \
28 --s3-endpoint "$S3_ENDPOINT" \
29 --s3-env-auth \
30 $file :s3:$S3_BUCKET
31done
diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix
new file mode 100644
index 0000000..95a9331
--- /dev/null
+++ b/script/synckv6/default.nix
@@ -0,0 +1,15 @@
1{ pkgs ? import <nixpkgs> { } }: with pkgs;
2
3stdenv.mkDerivation {
4 name = "oeuf-synckv6";
5 src = ./.;
6
7 buildInputs = [ bash rclone ];
8 nativeBuildInputs = [ makeWrapper ];
9 installPhase = ''
10 mkdir -p $out/bin
11 cp oeuf-synckv6.sh $out/bin/oeuf-synckv6
12 wrapProgram $out/bin/oeuf-synckv6 \
13 --prefix PATH : ${lib.makeBinPath [ bash rclone ]}
14 '';
15}
diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh
new file mode 100755
index 0000000..6b24347
--- /dev/null
+++ b/script/synckv6/oeuf-synckv6.sh
@@ -0,0 +1,43 @@
1#!/usr/bin/env bash
2
3set -eu
4set -o pipefail
5
6export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID"
7export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY"
8
9set +x
10all_files=()
11declare -A metafiles
12
13while IFS=' ' read -r size filename; do
14 if [[ "$filename" == *.parquet.meta.json ]]; then
15 metafiles["$filename"]=1
16 else
17 all_files+=($filename)
18 fi
19done < <(rclone ls \
20 --s3-provider "$S3_PROVIDER" \
21 --s3-region "$S3_REGION" \
22 --s3-endpoint "$S3_ENDPOINT" \
23 --s3-env-auth \
24 :s3:$S3_BUCKET)
25
26files=()
27for filename in "${all_files[@]}"; do
28 if [[ -v metafiles["$filename.meta.json"] ]]; then
29 files+=($filename)
30 fi
31done
32
33echo "Found ${#files[@]} relevant KV6 Parquet files"
34echo "Synching this directory with these files"
35
36printf "%s\n" "${files[@]}" | rclone copy \
37 --s3-provider "$S3_PROVIDER" \
38 --s3-region "$S3_REGION" \
39 --s3-endpoint "$S3_ENDPOINT" \
40 --s3-env-auth \
41 --progress \
42 --files-from - \
43 :s3:$S3_BUCKET ./
diff --git a/src/augmentkv6/.envrc b/src/augmentkv6/.envrc
new file mode 100644
index 0000000..694e74f
--- /dev/null
+++ b/src/augmentkv6/.envrc
@@ -0,0 +1,2 @@
1source_env ../../
2export DEVMODE=1
diff --git a/src/augmentkv6/Makefile b/src/augmentkv6/Makefile
new file mode 100644
index 0000000..cebb291
--- /dev/null
+++ b/src/augmentkv6/Makefile
@@ -0,0 +1,21 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-larrow -larrow_acero -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15
16augmentkv6: main.cpp
17 $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
18
19.PHONY: clean
20clean:
21 rm augmentkv6
diff --git a/src/augmentkv6/main.cpp b/src/augmentkv6/main.cpp
new file mode 100644
index 0000000..81a54d3
--- /dev/null
+++ b/src/augmentkv6/main.cpp
@@ -0,0 +1,510 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <chrono>
4#include <cstdio>
5#include <deque>
6#include <filesystem>
7#include <format>
8#include <fstream>
9#include <iostream>
10#include <string>
11#include <string_view>
12#include <vector>
13
14#include <arrow/acero/exec_plan.h>
15#include <arrow/api.h>
16#include <arrow/compute/api.h>
17#include <arrow/dataset/api.h>
18#include <arrow/filesystem/api.h>
19#include <arrow/io/api.h>
20#include <parquet/arrow/reader.h>
21
22#include <tmi8/kv1_index.hpp>
23#include <tmi8/kv1_lexer.hpp>
24#include <tmi8/kv1_parser.hpp>
25#include <tmi8/kv1_types.hpp>
26#include <tmi8/kv6_parquet.hpp>
27
28using namespace std::string_view_literals;
29
30namespace ac = arrow::acero;
31namespace ds = arrow::dataset;
32namespace cp = arrow::compute;
33using namespace arrow;
34
35using TimingClock = std::conditional_t<
36 std::chrono::high_resolution_clock::is_steady,
37 std::chrono::high_resolution_clock,
38 std::chrono::steady_clock>;
39
40std::string readKv1() {
41 fputs("Reading KV1 from standard input\n", stderr);
42
43 char buf[4096];
44 std::string data;
45 while (!feof(stdin) && !ferror(stdin)) {
46 size_t read = fread(buf, sizeof(char), 4096, stdin);
47 data.append(buf, read);
48 }
49 if (ferror(stdin)) {
50 fputs("Error when reading from stdin\n", stderr);
51 exit(1);
52 }
53 fprintf(stderr, "Read %lu bytes\n", data.size());
54
55 return data;
56}
57
58std::vector<Kv1Token> lex() {
59 std::string data = readKv1();
60
61 auto start = TimingClock::now();
62 Kv1Lexer lexer(data);
63 lexer.lex();
64 auto end = TimingClock::now();
65
66 std::chrono::duration<double> elapsed{end - start};
67 double bytes = static_cast<double>(data.size()) / 1'000'000;
68 double speed = bytes / elapsed.count();
69
70 if (!lexer.errors.empty()) {
71 fputs("Lexer reported errors:\n", stderr);
72 for (const auto &error : lexer.errors)
73 fprintf(stderr, "- %s\n", error.c_str());
74 exit(1);
75 }
76
77 fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size());
78 fprintf(stderr, "Duration: %f s\n", elapsed.count());
79 fprintf(stderr, "Speed: %f MB/s\n", speed);
80
81 return std::move(lexer.tokens);
82}
83
84bool parse(Kv1Records &into) {
85 std::vector<Kv1Token> tokens = lex();
86
87 Kv1Parser parser(tokens, into);
88 parser.parse();
89
90 bool ok = true;
91 if (!parser.gerrors.empty()) {
92 ok = false;
93 fputs("Parser reported errors:\n", stderr);
94 for (const auto &error : parser.gerrors)
95 fprintf(stderr, "- %s\n", error.c_str());
96 }
97 if (!parser.warns.empty()) {
98 fputs("Parser reported warnings:\n", stderr);
99 for (const auto &warn : parser.warns)
100 fprintf(stderr, "- %s\n", warn.c_str());
101 }
102
103 fprintf(stderr, "Parsed %lu records\n", into.size());
104
105 return ok;
106}
107
108void printParsedRecords(const Kv1Records &records) {
109 fputs("Parsed records:\n", stderr);
110 fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size());
111 fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size());
112 fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size());
113 fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size());
114 fprintf(stderr, " timing_links: %lu\n", records.timing_links.size());
115 fprintf(stderr, " links: %lu\n", records.links.size());
116 fprintf(stderr, " lines: %lu\n", records.lines.size());
117 fprintf(stderr, " destinations: %lu\n", records.destinations.size());
118 fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size());
119 fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size());
120 fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size());
121 fprintf(stderr, " financers: %lu\n", records.financers.size());
122 fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size());
123 fprintf(stderr, " points: %lu\n", records.points.size());
124 fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size());
125 fprintf(stderr, " icons: %lu\n", records.icons.size());
126 fprintf(stderr, " notices: %lu\n", records.notices.size());
127 fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size());
128 fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size());
129 fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size());
130 fprintf(stderr, " period_groups: %lu\n", records.period_groups.size());
131 fprintf(stderr, " specific_days: %lu\n", records.specific_days.size());
132 fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size());
133 fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size());
134 fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size());
135 fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size());
136 fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size());
137 fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size());
138 fprintf(stderr, " operating_days: %lu\n", records.operating_days.size());
139}
140
141void printIndexSize(const Kv1Index &index) {
142 fputs("Index size:\n", stderr);
143 fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size());
144 fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size());
145 fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size());
146 fprintf(stderr, " timing_links: %lu\n", index.timing_links.size());
147 fprintf(stderr, " links: %lu\n", index.links.size());
148 fprintf(stderr, " lines: %lu\n", index.lines.size());
149 fprintf(stderr, " destinations: %lu\n", index.destinations.size());
150 fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size());
151 fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size());
152 fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size());
153 fprintf(stderr, " financers: %lu\n", index.financers.size());
154 fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size());
155 fprintf(stderr, " points: %lu\n", index.points.size());
156 fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size());
157 fprintf(stderr, " icons: %lu\n", index.icons.size());
158 fprintf(stderr, " notices: %lu\n", index.notices.size());
159 fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size());
160 fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size());
161 fprintf(stderr, " period_groups: %lu\n", index.period_groups.size());
162 fprintf(stderr, " specific_days: %lu\n", index.specific_days.size());
163 fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size());
164 fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size());
165 fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size());
166 fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size());
167 fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size());
168 fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size());
169 fprintf(stderr, " operating_days: %lu\n", index.operating_days.size());
170}
171
172struct BasicJourneyKey {
173 std::string data_owner_code;
174 std::string line_planning_number;
175 int journey_number;
176
177 auto operator<=>(const BasicJourneyKey &) const = default;
178};
179
180size_t hash_value(const BasicJourneyKey &k) {
181 size_t seed = 0;
182
183 boost::hash_combine(seed, k.data_owner_code);
184 boost::hash_combine(seed, k.line_planning_number);
185 boost::hash_combine(seed, k.journey_number);
186
187 return seed;
188}
189
190using BasicJourneyKeySet = std::unordered_set<BasicJourneyKey, boost::hash<BasicJourneyKey>>;
191
192arrow::Result<BasicJourneyKeySet> basicJourneys(std::shared_ptr<arrow::Table> table) {
193 ac::TableSourceNodeOptions table_source_node_options(table);
194 ac::Declaration table_source("table_source", std::move(table_source_node_options));
195 auto aggregate_options = ac::AggregateNodeOptions{
196 /* .aggregates = */ {},
197 /* .keys = */ { "data_owner_code", "line_planning_number", "journey_number" },
198 };
199 ac::Declaration aggregate("aggregate", { std::move(table_source) }, std::move(aggregate_options));
200
201 std::shared_ptr<arrow::Table> result;
202 ARROW_ASSIGN_OR_RAISE(result, ac::DeclarationToTable(std::move(aggregate)));
203
204 std::shared_ptr<arrow::ChunkedArray> data_owner_codes = result->GetColumnByName("data_owner_code");
205 std::shared_ptr<arrow::ChunkedArray> line_planning_numbers = result->GetColumnByName("line_planning_number");
206 std::shared_ptr<arrow::ChunkedArray> journey_numbers = result->GetColumnByName("journey_number");
207
208 int i_data_owner_codes_chunk = 0;
209 int i_journey_numbers_chunk = 0;
210 int i_line_planning_numbers_chunk = 0;
211 int i_in_data_owner_codes_chunk = 0;
212 int i_in_journey_numbers_chunk = 0;
213 int i_in_line_planning_numbers_chunk = 0;
214
215 BasicJourneyKeySet journeys;
216
217 for (int64_t i = 0; i < result->num_rows(); i++) {
218 auto data_owner_codes_chunk = std::static_pointer_cast<arrow::StringArray>(data_owner_codes->chunk(i_data_owner_codes_chunk));
219 auto line_planning_numbers_chunk = std::static_pointer_cast<arrow::StringArray>(line_planning_numbers->chunk(i_line_planning_numbers_chunk));
220 auto journey_numbers_chunk = std::static_pointer_cast<arrow::UInt32Array>(journey_numbers->chunk(i_journey_numbers_chunk));
221
222 std::string_view data_owner_code = data_owner_codes_chunk->Value(i_in_data_owner_codes_chunk);
223 std::string_view line_planning_number = line_planning_numbers_chunk->Value(i_in_line_planning_numbers_chunk);
224 uint32_t journey_number = journey_numbers_chunk->Value(i_in_journey_numbers_chunk);
225
226 journeys.emplace(
227 std::string(data_owner_code),
228 std::string(line_planning_number),
229 journey_number
230 );
231
232 i_in_data_owner_codes_chunk++;
233 i_in_line_planning_numbers_chunk++;
234 i_in_journey_numbers_chunk++;
235 if (i_in_data_owner_codes_chunk >= data_owner_codes_chunk->length()) {
236 i_data_owner_codes_chunk++;
237 i_in_data_owner_codes_chunk = 0;
238 }
239 if (i_in_line_planning_numbers_chunk >= line_planning_numbers_chunk->length()) {
240 i_line_planning_numbers_chunk++;
241 i_in_line_planning_numbers_chunk = 0;
242 }
243 if (i_in_journey_numbers_chunk >= journey_numbers_chunk->length()) {
244 i_journey_numbers_chunk++;
245 i_in_journey_numbers_chunk = 0;
246 }
247 }
248
249 return journeys;
250}
251
252struct DistanceKey {
253 BasicJourneyKey journey;
254 std::string last_passed_user_stop_code;
255
256 auto operator<=>(const DistanceKey &) const = default;
257};
258
259size_t hash_value(const DistanceKey &k) {
260 size_t seed = 0;
261
262 boost::hash_combine(seed, k.journey);
263 boost::hash_combine(seed, k.last_passed_user_stop_code);
264
265 return seed;
266}
267
268struct DistanceTimingLink {
269 const Kv1JourneyPatternTimingLink *jopatili;
270 double distance_since_start_of_journey = 0; // at the start of the link
271};
272
273using DistanceMap = std::unordered_map<DistanceKey, double, boost::hash<DistanceKey>>;
274
275// Returns a map, where
276// DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode ->
277// Distance of Last User Stop
278DistanceMap makeDistanceMap(Kv1Records &records, Kv1Index &index, BasicJourneyKeySet &journeys) {
279 std::unordered_map<
280 Kv1JourneyPattern::Key,
281 std::vector<DistanceTimingLink>,
282 boost::hash<Kv1JourneyPattern::Key>> jopatili_index;
283 std::unordered_map<
284 BasicJourneyKey,
285 const Kv1PublicJourney *,
286 boost::hash<BasicJourneyKey>> journey_index;
287 for (size_t i = 0; i < records.public_journeys.size(); i++) {
288 const Kv1PublicJourney *pujo = &records.public_journeys[i];
289
290 BasicJourneyKey journey_key(
291 pujo->key.data_owner_code,
292 pujo->key.line_planning_number,
293 pujo->key.journey_number);
294
295 if (journeys.contains(journey_key)) {
296 journey_index[journey_key] = pujo;
297
298 Kv1JourneyPattern::Key jopa_key(
299 pujo->key.data_owner_code,
300 pujo->key.line_planning_number,
301 pujo->journey_pattern_code);
302 jopatili_index[jopa_key] = {};
303 }
304 }
305
306 for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) {
307 const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i];
308 Kv1JourneyPattern::Key jopa_key(
309 jopatili->key.data_owner_code,
310 jopatili->key.line_planning_number,
311 jopatili->key.journey_pattern_code);
312 if (jopatili_index.contains(jopa_key)) {
313 jopatili_index[jopa_key].push_back(DistanceTimingLink(jopatili, 0));
314 }
315 }
316
317 for (auto &[jopa_key, timing_links] : jopatili_index) {
318 std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) {
319 return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order;
320 });
321
322 const std::string transport_type = index.journey_patterns[jopa_key]->p_line->transport_type;
323
324 for (size_t i = 1; i < timing_links.size(); i++) {
325 DistanceTimingLink *timing_link = &timing_links[i];
326 DistanceTimingLink *prev_timing_link = &timing_links[i - 1];
327
328 const Kv1Link::Key link_key(
329 prev_timing_link->jopatili->key.data_owner_code,
330 prev_timing_link->jopatili->user_stop_code_begin,
331 prev_timing_link->jopatili->user_stop_code_end,
332 transport_type);
333 double link_distance = index.links[link_key]->distance;
334 timing_link->distance_since_start_of_journey =
335 prev_timing_link->distance_since_start_of_journey + link_distance;
336 }
337 }
338
339 // DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode ->
340 // Distance of Last User Stop
341 DistanceMap distance_map;
342
343 for (const auto &journey : journeys) {
344 const Kv1PublicJourney *pujo = journey_index[journey];
345 if (pujo == nullptr) {
346 std::cerr << "Warning: No PUJO found for [" << journey.data_owner_code << "] "
347 << journey.line_planning_number << "/" << journey.journey_number << std::endl;
348 continue;
349 }
350 Kv1JourneyPattern::Key jopa_key(
351 pujo->key.data_owner_code,
352 pujo->key.line_planning_number,
353 pujo->journey_pattern_code);
354 for (const auto &timing_link : jopatili_index[jopa_key]) {
355 DistanceKey key(journey, timing_link.jopatili->user_stop_code_begin);
356 distance_map[key] = timing_link.distance_since_start_of_journey;
357 }
358 }
359
360 return distance_map;
361}
362
363arrow::Result<std::shared_ptr<arrow::Table>> augment(
364 std::shared_ptr<arrow::Table> table,
365 const DistanceMap &distance_map
366) {
367 for (int i = 0; i < table->num_columns(); i++) {
368 if (table->column(i)->num_chunks() > 1) {
369 std::stringstream ss;
370 ss << "Error: Expected column " << i
371 << " (" << table->ColumnNames()[i] << ") to have 1 chunk, got "
372 << table->column(i)->num_chunks();
373 return arrow::Status::Invalid(ss.str());
374 }
375 }
376
377 auto data_owner_codes = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("data_owner_code")->chunk(0));
378 auto line_planning_numbers = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("line_planning_number")->chunk(0));
379 auto journey_numbers = std::static_pointer_cast<arrow::UInt32Array>(table->GetColumnByName("journey_number")->chunk(0));
380 auto user_stop_codes = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("user_stop_code")->chunk(0));
381 auto distance_since_last_user_stops = std::static_pointer_cast<arrow::UInt32Array>(table->GetColumnByName("distance_since_last_user_stop")->chunk(0));
382 auto timestamps = std::static_pointer_cast<arrow::TimestampArray>(table->GetColumnByName("timestamp")->chunk(0));
383
384 auto timestamps_type = table->schema()->GetFieldByName("timestamp")->type();
385 if (timestamps_type->id() != arrow::Type::TIMESTAMP)
386 return arrow::Status::Invalid("Field 'timestamp' does not have expected type TIMESTAMP");
387 if (std::static_pointer_cast<arrow::TimestampType>(timestamps_type)->unit() != arrow::TimeUnit::MILLI)
388 return arrow::Status::Invalid("Field 'timestamp' does not have unit MILLI");
389 if (!std::static_pointer_cast<arrow::TimestampType>(timestamps_type)->timezone().empty())
390 return arrow::Status::Invalid("Field 'timestamp' should have empty time zone name");
391
392 std::shared_ptr<arrow::Field> field_distance_since_start_of_journey =
393 arrow::field("distance_since_start_of_journey", arrow::uint32());
394 std::shared_ptr<arrow::Field> field_day_of_week =
395 arrow::field("timestamp_iso_day_of_week", arrow::int64());
396 std::shared_ptr<arrow::Field> field_date =
397 arrow::field("timestamp_date", arrow::date32());
398 std::shared_ptr<arrow::Field> field_local_time =
399 arrow::field("timestamp_local_time", arrow::time32(arrow::TimeUnit::SECOND));
400 arrow::UInt32Builder distance_since_start_of_journey_builder;
401 arrow::Int64Builder day_of_week_builder;
402 arrow::Date32Builder date_builder;
403 arrow::Time32Builder local_time_builder(arrow::time32(arrow::TimeUnit::SECOND), arrow::default_memory_pool());
404
405 const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam");
406
407 for (int64_t i = 0; i < table->num_rows(); i++) {
408 DistanceKey key(
409 BasicJourneyKey(
410 std::string(data_owner_codes->Value(i)),
411 std::string(line_planning_numbers->Value(i)),
412 journey_numbers->Value(i)),
413 std::string(user_stop_codes->Value(i)));
414
415 uint32_t distance_since_last_user_stop = distance_since_last_user_stops->Value(i);
416 if (distance_map.contains(key)) {
417 uint32_t total_distance = distance_since_last_user_stop + static_cast<uint32_t>(distance_map.at(key));
418 ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.Append(total_distance));
419 } else {
420 ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.AppendNull());
421 }
422
423 // Welp, this has gotten a bit complicated!
424 std::chrono::sys_seconds timestamp(std::chrono::floor<std::chrono::seconds>(std::chrono::milliseconds(timestamps->Value(i))));
425 std::chrono::zoned_seconds zoned_timestamp(amsterdam, timestamp);
426 std::chrono::local_seconds local_timestamp(zoned_timestamp);
427 std::chrono::local_days local_date = std::chrono::floor<std::chrono::days>(local_timestamp);
428 std::chrono::year_month_day date(local_date);
429 std::chrono::weekday day_of_week(local_date);
430 std::chrono::hh_mm_ss<std::chrono::seconds> time(local_timestamp - local_date);
431 std::chrono::sys_days unix_date(date);
432
433 int64_t iso_day_of_week = day_of_week.iso_encoding();
434 int32_t unix_days = static_cast<int32_t>(unix_date.time_since_epoch().count());
435 int32_t secs_since_midnight = static_cast<int32_t>(std::chrono::seconds(time).count());
436
437 ARROW_RETURN_NOT_OK(day_of_week_builder.Append(iso_day_of_week));
438 ARROW_RETURN_NOT_OK(date_builder.Append(unix_days));
439 ARROW_RETURN_NOT_OK(local_time_builder.Append(secs_since_midnight));
440 }
441
442 ARROW_ASSIGN_OR_RAISE(auto distance_since_start_of_journey_col_chunk, distance_since_start_of_journey_builder.Finish());
443 ARROW_ASSIGN_OR_RAISE(auto day_of_week_col_chunk, day_of_week_builder.Finish());
444 ARROW_ASSIGN_OR_RAISE(auto date_col_chunk, date_builder.Finish());
445 ARROW_ASSIGN_OR_RAISE(auto local_time_col_chunk, local_time_builder.Finish());
446 auto distance_since_start_of_journey_col =
447 std::make_shared<arrow::ChunkedArray>(distance_since_start_of_journey_col_chunk);
448 auto day_of_week_col = std::make_shared<arrow::ChunkedArray>(day_of_week_col_chunk);
449 auto date_col = std::make_shared<arrow::ChunkedArray>(date_col_chunk);
450 auto local_time_col = std::make_shared<arrow::ChunkedArray>(local_time_col_chunk);
451
452 ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(
453 table->num_columns(),
454 field_distance_since_start_of_journey,
455 distance_since_start_of_journey_col));
456 ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_day_of_week, day_of_week_col));
457 ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_date, date_col));
458 ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_local_time, local_time_col));
459
460 return table;
461}
462
463arrow::Status processTables(Kv1Records &records, Kv1Index &index) {
464 std::shared_ptr<arrow::io::RandomAccessFile> input;
465 ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open("oeuf-input.parquet"));
466
467 std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
468 ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader));
469
470 std::shared_ptr<arrow::Table> table;
471 ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table));
472
473 std::cerr << "Input KV6 file has " << table->num_rows() << " rows" << std::endl;
474 ARROW_ASSIGN_OR_RAISE(BasicJourneyKeySet journeys, basicJourneys(table));
475 std::cerr << "Found " << journeys.size() << " distinct journeys" << std::endl;
476 DistanceMap distance_map = makeDistanceMap(records, index, journeys);
477 std::cerr << "Distance map has " << distance_map.size() << " keys" << std::endl;
478
479 std::cerr << "Creating augmented table" << std::endl;
480 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Table> augmented, augment(table, distance_map));
481
482 std::cerr << "Writing augmented table" << std::endl;
483 return writeArrowTableAsParquetFile(*augmented, "oeuf-augmented.parquet");
484}
485
486int main(int argc, char *argv[]) {
487 Kv1Records records;
488 if (!parse(records)) {
489 fputs("Error parsing records, exiting\n", stderr);
490 return EXIT_FAILURE;
491 }
492 printParsedRecords(records);
493 fputs("Indexing...\n", stderr);
494 Kv1Index index(&records);
495 fprintf(stderr, "Indexed %lu records\n", index.size());
496 // Only notice assignments are not indexed. If this equality is not valid,
497 // then this means that we had duplicate keys or that something else went
498 // wrong. That would really not be great.
499 assert(index.size() == records.size() - records.notice_assignments.size());
500 printIndexSize(index);
501 fputs("Linking records...\n", stderr);
502 kv1LinkRecords(index);
503 fputs("Done linking\n", stderr);
504
505 arrow::Status st = processTables(records, index);
506 if (!st.ok()) {
507 std::cerr << "Failed to process tables: " << st << std::endl;
508 return EXIT_FAILURE;
509 }
510}
diff --git a/src/bundleparquet/.envrc b/src/bundleparquet/.envrc
new file mode 100644
index 0000000..694e74f
--- /dev/null
+++ b/src/bundleparquet/.envrc
@@ -0,0 +1,2 @@
1source_env ../../
2export DEVMODE=1
diff --git a/src/bundleparquet/Makefile b/src/bundleparquet/Makefile
new file mode 100644
index 0000000..170304d
--- /dev/null
+++ b/src/bundleparquet/Makefile
@@ -0,0 +1,21 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-larrow -lcurl -lparquet -lprometheus-cpp-push -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15
16bundleparquet: main.cpp spliturl.cpp
17 $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
18
19.PHONY: clean
20clean:
21 rm bundleparquet
diff --git a/src/bundleparquet/main.cpp b/src/bundleparquet/main.cpp
new file mode 100644
index 0000000..05fd881
--- /dev/null
+++ b/src/bundleparquet/main.cpp
@@ -0,0 +1,213 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <chrono>
4#include <deque>
5#include <filesystem>
6#include <format>
7#include <fstream>
8#include <iostream>
9
10#include <arrow/api.h>
11#include <arrow/io/api.h>
12#include <parquet/arrow/reader.h>
13
14#include <nlohmann/json.hpp>
15
16#include <prometheus/counter.h>
17#include <prometheus/gateway.h>
18#include <prometheus/registry.h>
19
20#include <tmi8/kv6_parquet.hpp>
21
22#include "spliturl.hpp"
23
24static const int MIN_COMBINED_ROWS = 1000000; // one million
25static const int MAX_COMBINED_ROWS = 2000000; // two million
26
27struct FileMetadata {
28 int64_t min_timestamp = 0;
29 int64_t max_timestamp = 0;
30 int64_t rows_written = 0;
31};
32
33struct File {
34 FileMetadata metadata;
35 std::filesystem::path filename;
36};
37
38FileMetadata readMetadataOf(std::filesystem::path filename) {
39 std::string meta_filename = std::string(filename) + ".meta.json";
40 std::ifstream meta_file = std::ifstream(meta_filename, std::ifstream::in|std::ifstream::binary);
41 nlohmann::json meta_json;
42 meta_file >> meta_json;
43 FileMetadata meta = {
44 .min_timestamp = meta_json["min_timestamp"],
45 .max_timestamp = meta_json["max_timestamp"],
46 .rows_written = meta_json["rows_written"],
47 };
48 return meta;
49}
50
51arrow::Status processFirstTables(std::deque<File> &files, prometheus::Counter &rows_written) {
52 if (files.size() == 0) {
53 std::cerr << "Did not find any files" << std::endl;
54 return arrow::Status::OK();
55 }
56
57 int64_t rows = 0;
58
59 std::vector<std::shared_ptr<arrow::Table>> tables;
60 std::vector<std::filesystem::path> processed;
61 int64_t min_timestamp = std::numeric_limits<int64_t>::max();
62 int64_t max_timestamp = 0;
63
64 bool over_capacity_risk = false;
65 auto it = files.begin();
66 while (it != files.end()) {
67 const std::filesystem::path &filename = it->filename;
68 const FileMetadata &metadata = it->metadata;
69
70 std::shared_ptr<arrow::io::RandomAccessFile> input;
71 ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(filename));
72
73 std::unique_ptr<parquet::arrow::FileReader> arrow_reader;
74 ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader));
75
76 if (metadata.min_timestamp < min_timestamp)
77 min_timestamp = metadata.min_timestamp;
78 if (metadata.max_timestamp > max_timestamp)
79 max_timestamp = metadata.max_timestamp;
80
81 if (rows + metadata.rows_written > MAX_COMBINED_ROWS) {
82 over_capacity_risk = true;
83 break;
84 }
85
86 std::shared_ptr<arrow::Table> table;
87 ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table));
88 tables.push_back(table);
89 processed.push_back(filename);
90 rows += metadata.rows_written;
91 it = files.erase(it);
92 }
93
94 if (rows < MIN_COMBINED_ROWS && !over_capacity_risk) {
95 std::cerr << "Found files, but not enough to satisfy the minimum amount of rows for the combined file" << std::endl;
96 std::cerr << "(We have " << rows << "/" << MIN_COMBINED_ROWS << " rows at the moment, so "
97 << static_cast<float>(rows)/static_cast<float>(MIN_COMBINED_ROWS)*100.f << "%)" << std::endl;
98 return arrow::Status::OK();
99 } else if (rows == 0 && over_capacity_risk) {
100 const std::filesystem::path &filename = files.front().filename;
101 std::filesystem::rename(filename, "merged" / filename);
102 std::filesystem::rename(std::string(filename) + ".meta.json", std::string("merged" / filename) + ".meta.json");
103 rows_written.Increment(static_cast<double>(files.front().metadata.rows_written));
104 files.pop_front();
105 return arrow::Status::OK();
106 }
107
108 // Default options specify that the schemas are not unified, which is
109 // luckliy exactly what we want :)
110 std::shared_ptr<arrow::Table> merged_table;
111 ARROW_ASSIGN_OR_RAISE(merged_table, arrow::ConcatenateTables(tables));
112
113 auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::system_clock::now());
114 std::string filename = std::format("merged/oeuf-{:%FT%T%Ez}.parquet", timestamp);
115 ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*merged_table, filename));
116
117 std::cerr << "Wrote merged table to " << filename << std::endl;
118
119 std::ofstream metaf(filename + ".meta.json.part", std::ios::binary);
120 nlohmann::json meta{
121 { "min_timestamp", min_timestamp },
122 { "max_timestamp", max_timestamp },
123 { "rows_written", rows },
124 };
125 metaf << meta;
126 metaf.close();
127 std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json");
128
129 std::cerr << "Wrote merged table metadata" << std::endl;
130 rows_written.Increment(static_cast<double>(rows));
131
132 for (const std::filesystem::path &filename : processed) {
133 std::filesystem::remove(filename);
134 std::filesystem::remove(std::string(filename) + ".meta.json");
135 }
136
137 std::cerr << "Successfully wrote merged table, metadata and deleted old files" << std::endl;
138
139 return arrow::Status::OK();
140}
141
142arrow::Status processTables(std::deque<File> &files, prometheus::Counter &rows_written) {
143 while (!files.empty())
144 ARROW_RETURN_NOT_OK(processFirstTables(files, rows_written));
145 return arrow::Status::OK();
146}
147
148int main(int argc, char *argv[]) {
149 std::filesystem::path cwd = std::filesystem::current_path();
150 std::filesystem::create_directory(cwd / "merged");
151
152 const char *prom_push_url = getenv("PROMETHEUS_PUSH_URL");
153 if (!prom_push_url || strlen(prom_push_url) == 0) {
154 std::cerr << "Error: no PROMETHEUS_PUSH_URL set!" << std::endl;
155 return EXIT_FAILURE;
156 }
157
158 std::string split_err;
159 auto split_prom_push_url = splitUrl(prom_push_url, &split_err);
160 if (!split_prom_push_url) {
161 std::cerr << "Could not process URL in environment variable PROMETHEUS_PUSH_URL: "
162 << split_err << std::endl;
163 return EXIT_FAILURE;
164 }
165 std::cout << "Prometheus Push URL: " << split_prom_push_url->schemehost << ":"
166 << split_prom_push_url->portpath << std::endl;
167
168 prometheus::Gateway gateway{split_prom_push_url->schemehost,
169 split_prom_push_url->portpath,
170 "oeuf-archiver"};
171
172 auto registry = std::make_shared<prometheus::Registry>();
173 prometheus::Gauge &rows_available = prometheus::BuildGauge()
174 .Name("archiver_rows_available")
175 .Help("Number of rows available to the archiver")
176 .Register(*registry)
177 .Add({});
178 prometheus::Counter &rows_written = prometheus::BuildCounter()
179 .Name("archiver_rows_written")
180 .Help("Number of rows written by the archiver")
181 .Register(*registry)
182 .Add({});
183 gateway.RegisterCollectable(registry);
184
185 std::deque<File> files;
186 for (auto const &dir_entry : std::filesystem::directory_iterator{cwd}) {
187 if (!dir_entry.is_regular_file()) continue;
188 std::filesystem::path filename = dir_entry.path().filename();
189 const std::string &filename_str = filename;
190 if (filename_str.starts_with("oeuf-") && filename_str.ends_with("+00:00.parquet")) {
191 try {
192 FileMetadata meta = readMetadataOf(filename);
193 File file = { .metadata = meta, .filename = filename };
194 files.push_back(file);
195
196 rows_available.Increment(static_cast<double>(meta.rows_written));
197 } catch (const std::exception &e) {
198 std::cerr << "Failed to read metadata of file " << filename << ": " << e.what() << std::endl;
199 return EXIT_FAILURE;
200 }
201 }
202 }
203
204 std::sort(files.begin(), files.end(),
205 [](const File &f1, const File &f2) { return f1.filename < f2.filename; });
206 arrow::Status st = processTables(files, rows_written);
207 if (!st.ok()) {
208 std::cerr << "Failed to process tables: " << st << std::endl;
209 return EXIT_FAILURE;
210 }
211
212 gateway.Push();
213}
diff --git a/src/bundleparquet/spliturl.cpp b/src/bundleparquet/spliturl.cpp
new file mode 100644
index 0000000..90fd821
--- /dev/null
+++ b/src/bundleparquet/spliturl.cpp
@@ -0,0 +1,203 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <cstring>
4#include <iostream>
5#include <optional>
6#include <sstream>
7#include <string>
8
9#include <curl/curl.h>
10
11#include "spliturl.hpp"
12
13// splitUrl takes a URL of the shape '[http[s]://]HOST[:PORT][/PATH]', and
14// splits it into two URLs:
15// - scheme + host -> '[http[s]://]HOST'
16// - port + path -> '[PORT][/PATH]'
17// In case an IPv6 address is provided, the host must enclosed in square
18// brackets. The zone ID may also be indicated. Note that in the resulting
19// parts, the colon preceding the port number is omitted. This is on purpose.
20std::optional<SplitUrl> splitUrl(const std::string &url, std::string *error) {
21 std::stringstream errs;
22 std::optional<SplitUrl> result;
23 char *processed = nullptr;
24 char *scheme = nullptr;
25 char *user = nullptr;
26 char *password = nullptr;
27 char *zoneid = nullptr;
28 char *query = nullptr;
29 char *fragment = nullptr;
30 CURLU *schemehost = nullptr;
31 char *schemehost_url = nullptr;
32 char *portpath_url = nullptr;
33
34 // Parse the URL, allowing the user to omit the scheme. CURL will use 'https'
35 // by default if no scheme is specified.
36
37 CURLU *parsed = curl_url();
38 CURLUcode rc = curl_url_set(parsed, CURLUPART_URL, url.c_str(), CURLU_DEFAULT_SCHEME);
39 if (rc != CURLUE_OK) {
40 errs << "Failed to parse URL: " << curl_url_strerror(rc);
41 goto Exit;
42 }
43
44 // As we parse the URL with the option CURLU_DEFAULT_SCHEME, the CURL API
45 // won't require the user to provide the scheme part of the URL. It will
46 // automatically default the scheme to https. However, we do not usually want
47 // it to default to HTTPS, but HTTP instead (as the use case, connecting to a
48 // PushGateway server, usually is served over a private network via HTTP).
49 //
50 // This is why we check if the scheme was put there by CURL and otherwise set
51 // it to HTTP. We also check for any other schemes that the user may have
52 // provided, and reject anything that is not http/https.
53 if (!url.starts_with("http://") && !url.starts_with("https://")) {
54 rc = curl_url_get(parsed, CURLUPART_SCHEME, &scheme, 0);
55 if (rc != CURLUE_OK) {
56 errs << "Could not get scheme from parsed URL: " << curl_url_strerror(rc);
57 goto Exit;
58 }
59 if (strcmp(scheme, "https")) {
60 errs << "Unexpected scheme" << scheme << "in provided URL (expected http or https)";
61 goto Exit;
62 }
63 rc = curl_url_set(parsed, CURLUPART_SCHEME, "http", 0);
64 if (rc != CURLUE_OK) {
65 errs << "Could not set URL scheme to http: " << curl_url_strerror(rc);
66 goto Exit;
67 }
68 }
69
70 // Turn the parsed URL back into a string.
71 rc = curl_url_get(parsed, CURLUPART_URL, &processed, 0);
72 if (rc != CURLUE_OK) {
73 errs << "Failed to output parsed URL: " << curl_url_strerror(rc);
74 goto Exit;
75 }
76
77 // This part of the code checks if no prohibited parts are present in the URL
78 // (basic auth: (user, password), query, fragment).
79
80 rc = curl_url_get(parsed, CURLUPART_USER, &user, 0);
81 if (rc == CURLUE_OK && strlen(user) != 0) {
82 errs << "Provided URL should not contain a user part";
83 goto Exit;
84 } else if (rc != CURLUE_NO_USER && rc != CURLUE_OK) {
85 errs << "Failed to get check user part existence in provided url: " << curl_url_strerror(rc);
86 goto Exit;
87 }
88
89 rc = curl_url_get(parsed, CURLUPART_PASSWORD, &password, 0);
90 if (rc == CURLUE_OK && strlen(password) != 0) {
91 errs << "Provided URL should not contain a password part";
92 goto Exit;
93 } else if (rc != CURLUE_NO_PASSWORD && rc != CURLUE_OK) {
94 errs << "Failed to get check password part existence in provided url: " << curl_url_strerror(rc);
95 goto Exit;
96 }
97
98 rc = curl_url_get(parsed, CURLUPART_QUERY, &query, 0);
99 if (rc == CURLUE_OK && strlen(query) != 0) {
100 errs << "Provided URL should not contain a query part";
101 goto Exit;
102 } else if (rc != CURLUE_NO_QUERY && rc != CURLUE_OK) {
103 errs << "Failed to get check query part existence in provided url: " << curl_url_strerror(rc);
104 goto Exit;
105 }
106
107 rc = curl_url_get(parsed, CURLUPART_FRAGMENT, &fragment, 0);
108 if (rc == CURLUE_OK && strlen(fragment) != 0) {
109 errs << "Provided URL should not contain a fragment part";
110 goto Exit;
111 } else if (rc != CURLUE_NO_FRAGMENT && rc != CURLUE_OK) {
112 errs << "Failed to get check fragment part existence in provided url: " << curl_url_strerror(rc);
113 goto Exit;
114 }
115
116 // Now that we know that the provided URL makes sense, we can start doing
117 // some arts and crafts. We get started by copying the parsed URL into
118 // schemehost and simply delete all parts which are not scheme + host.
119
120 schemehost = curl_url_dup(parsed);
121
122 // CURL BUG WORKAROUND: CURLUPART_ZONEID is NOT copied by curl_url_dup!
123 // ^ fixed in CURL 8.3.0 after https://curl.se/mail/lib-2023-07/0047.html
124 rc = curl_url_get(parsed, CURLUPART_ZONEID, &zoneid, 0);
125 if (rc == CURLUE_OK) {
126 rc = curl_url_set(schemehost, CURLUPART_ZONEID, zoneid, 0);
127 if (rc != CURLUE_OK) {
128 errs << "Could not copy zone ID to duplicated URL: " << curl_url_strerror(rc);
129 goto Exit;
130 }
131 }
132 rc = curl_url_set(schemehost, CURLUPART_PORT, nullptr, 0);
133 if (rc != CURLUE_OK) {
134 errs << "Could not unset port in duplicated URL: " << curl_url_strerror(rc);
135 goto Exit;
136 }
137 rc = curl_url_set(schemehost, CURLUPART_PATH, nullptr, 0);
138 if (rc != CURLUE_OK) {
139 errs << "Could not unset path in duplicated URL: " << curl_url_strerror(rc);
140 goto Exit;
141 }
142
143 // Okay, now we have the schemehost CURLU all ready to go. Note that a URL
144 // only consisting of a scheme and host is considered valid, so CURL will be
145 // more than happy to actually turn it into a string for us. Which is exactly
146 // what we do here :)
147
148 rc = curl_url_get(schemehost, CURLUPART_URL, &schemehost_url, 0);
149 if (rc != CURLUE_OK) {
150 errs << "Could not get scheme + host URL: " << curl_url_strerror(rc);
151 goto Exit;
152 }
153
154 // Remove any trailing slash after the scheme + host URL that CURL might have
155 // put there -- we still want to get a valid URL if we paste the port + path
156 // part behind it.
157
158 if (strlen(schemehost_url) > 0) {
159 if (schemehost_url[strlen(schemehost_url) - 1] != '/') {
160 errs << "Scheme + host URL does not end with a slash";
161 goto Exit;
162 }
163 schemehost_url[strlen(schemehost_url) - 1] = '\0';
164 }
165
166 // Look, this is really gross. Because the port + path part of the URL is not
167 // a valid URL itself, but the scheme + host should be a prefix of the full
168 // URL containing the port + path, we can simply check if it is indeed a
169 // prefix, and then strip it from the full URL, giving us the port + path
170 // (after deleting the colon preceding the port).
171
172 if (!std::string_view(processed).starts_with(schemehost_url)) {
173 errs << "Scheme + host URL is not a prefix of the processed URL";
174 goto Exit;
175 }
176
177 portpath_url = processed + strlen(schemehost_url);
178 // We should not have the colon before the port, prometheus-cpp inserts it
179 if (strlen(portpath_url) > 0 && portpath_url[0] == ':') portpath_url++;
180 // We do not need a trailing slash
181 if (strlen(portpath_url) > 0 && portpath_url[strlen(portpath_url)-1] == '/')
182 portpath_url[strlen(portpath_url)-1] = '\0';
183
184 // It has been done. BLECH
185 result = std::make_optional<SplitUrl>(schemehost_url, portpath_url);
186
187Exit:
188 curl_free(processed);
189 curl_free(scheme);
190 curl_free(user);
191 curl_free(password);
192 curl_free(query);
193 curl_free(fragment);
194 curl_free(zoneid);
195 curl_free(schemehost_url);
196 curl_url_cleanup(schemehost);
197 curl_url_cleanup(parsed);
198
199 if (!result && error)
200 *error = errs.str();
201
202 return result;
203}
diff --git a/src/bundleparquet/spliturl.hpp b/src/bundleparquet/spliturl.hpp
new file mode 100644
index 0000000..d8150e0
--- /dev/null
+++ b/src/bundleparquet/spliturl.hpp
@@ -0,0 +1,11 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <optional>
4#include <string>
5
6struct SplitUrl {
7 std::string schemehost;
8 std::string portpath;
9};
10
11std::optional<SplitUrl> splitUrl(const std::string &url, std::string *error = nullptr);
diff --git a/src/filterkv6/.envrc b/src/filterkv6/.envrc
new file mode 100644
index 0000000..694e74f
--- /dev/null
+++ b/src/filterkv6/.envrc
@@ -0,0 +1,2 @@
1source_env ../../
2export DEVMODE=1
diff --git a/src/filterkv6/Makefile b/src/filterkv6/Makefile
new file mode 100644
index 0000000..13bb38e
--- /dev/null
+++ b/src/filterkv6/Makefile
@@ -0,0 +1,21 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-larrow -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15
16filterkv6: main.cpp
17 $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
18
19.PHONY: clean
20clean:
21 rm filterkv6
diff --git a/src/filterkv6/main.cpp b/src/filterkv6/main.cpp
new file mode 100644
index 0000000..a32220a
--- /dev/null
+++ b/src/filterkv6/main.cpp
@@ -0,0 +1,106 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <chrono>
4#include <deque>
5#include <filesystem>
6#include <format>
7#include <fstream>
8#include <iostream>
9
10#include <arrow/api.h>
11#include <arrow/compute/api.h>
12#include <arrow/filesystem/api.h>
13#include <arrow/dataset/api.h>
14#include <arrow/io/api.h>
15
16#include <tmi8/kv6_parquet.hpp>
17
18namespace ds = arrow::dataset;
19namespace cp = arrow::compute;
20using namespace arrow;
21
22arrow::Status processTables(std::string lineno) {
23 auto filesystem = std::make_shared<fs::LocalFileSystem>();
24
25 fs::FileSelector selector;
26 selector.base_dir = std::filesystem::current_path();
27 selector.recursive = false;
28
29 auto format = std::static_pointer_cast<ds::FileFormat>(std::make_shared<ds::ParquetFileFormat>());
30
31 ARROW_ASSIGN_OR_RAISE(auto factory,
32 ds::FileSystemDatasetFactory::Make(filesystem, selector, format,
33 ds::FileSystemFactoryOptions()));
34
35 ARROW_ASSIGN_OR_RAISE(auto dataset, factory->Finish());
36
37 printf("Scanning dataset for line %s...\n", lineno.c_str());
38 // Read specified columns with a row filter
39 ARROW_ASSIGN_OR_RAISE(auto scan_builder, dataset->NewScan());
40 ARROW_RETURN_NOT_OK(scan_builder->Filter(cp::and_({
41 cp::equal(cp::field_ref("line_planning_number"), cp::literal(lineno)),
42 cp::is_valid(cp::field_ref("rd_x")),
43 cp::is_valid(cp::field_ref("rd_y")),
44 })));
45
46 ARROW_ASSIGN_OR_RAISE(auto scanner, scan_builder->Finish());
47 ARROW_ASSIGN_OR_RAISE(auto table, scanner->ToTable());
48
49 puts("Finished loading data, computing stable sort indices...");
50
51 arrow::Datum sort_indices;
52 cp::SortOptions sort_options;
53 sort_options.sort_keys = { cp::SortKey("timestamp" /* ascending by default */) };
54 ARROW_ASSIGN_OR_RAISE(sort_indices, cp::CallFunction("sort_indices", { table }, &sort_options));
55 puts("Finished computing stable sort indices, creating sorted table...");
56
57 arrow::Datum sorted;
58 ARROW_ASSIGN_OR_RAISE(sorted, cp::CallFunction("take", { table, sort_indices }));
59
60 puts("Writing sorted table to disk...");
61 ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*sorted.table(), "merged/oeuf-merged.parquet"));
62 puts("Syncing...");
63 sync();
64 puts("Done. Have a nice day.");
65
66 return arrow::Status::OK();
67}
68
69#define NOTICE "Notice: This tool will fail if any non-Parquet files in are present in the\n" \
70 " current working directory. It does not load files which are present in\n" \
71 " any possible subdirectories."
72
73const char help[] =
74 "Usage: %s <LINENO>\n"
75 "\n"
76 " LINENO The LinePlanningNumber as in the KV1/KV6 data\n\n"
77 NOTICE "\n";
78
79void exitHelp(const char *progname, int code = 1) {
80 printf(help, progname);
81 exit(code);
82}
83
84int main(int argc, char *argv[]) {
85 const char *progname = argv[0];
86 if (argc != 2) {
87 puts("Error: incorrect number of arguments provided\n");
88 exitHelp(progname);
89 }
90 char *lineno = argv[1];
91 puts(NOTICE "\n");
92
93 std::filesystem::path cwd = std::filesystem::current_path();
94 std::filesystem::create_directory(cwd / "merged");
95
96 puts("Running this program may take a while, especially on big datasets. If you're\n"
97 "processing the data of a single bus line over the course of multiple months,\n"
98 "you may see memory usage of up to 10 GiB. Make sure that you have sufficient\n"
99 "RAM available, to avoid overloading and subsequently freezing your system.\n");
100
101 arrow::Status st = processTables(std::string(lineno));
102 if (!st.ok()) {
103 std::cerr << "Failed to process tables: " << st << std::endl;
104 return EXIT_FAILURE;
105 }
106}
diff --git a/src/querykv1/.envrc b/src/querykv1/.envrc
new file mode 100644
index 0000000..694e74f
--- /dev/null
+++ b/src/querykv1/.envrc
@@ -0,0 +1,2 @@
1source_env ../../
2export DEVMODE=1
diff --git a/src/querykv1/.gitignore b/src/querykv1/.gitignore
new file mode 100644
index 0000000..5761abc
--- /dev/null
+++ b/src/querykv1/.gitignore
@@ -0,0 +1 @@
*.o
diff --git a/src/querykv1/Makefile b/src/querykv1/Makefile
new file mode 100644
index 0000000..a8791f5
--- /dev/null
+++ b/src/querykv1/Makefile
@@ -0,0 +1,28 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-ltmi8 -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15
16HDRS=cliopts.hpp daterange.hpp joparoute.hpp journeyinfo.hpp journeyroute.hpp journeys.hpp schedule.hpp
17SRCS=main.cpp cliopts.cpp daterange.cpp joparoute.cpp journeyinfo.cpp journeyroute.cpp journeys.cpp schedule.cpp
18OBJS=$(patsubst %.cpp,%.o,$(SRCS))
19
20%.o: %.cpp $(HDRS)
21 $(CXX) -c -o $@ $< $(CXXFLAGS)
22
23querykv1: $(OBJS)
24 $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
25
26.PHONY: clean
27clean:
28 rm querykv1
diff --git a/src/querykv1/cliopts.cpp b/src/querykv1/cliopts.cpp
new file mode 100644
index 0000000..bef7a98
--- /dev/null
+++ b/src/querykv1/cliopts.cpp
@@ -0,0 +1,456 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <cstdlib>
4#include <cstdio>
5#include <string>
6#include <string_view>
7
8#include <getopt.h>
9
10#include "cliopts.hpp"
11
12using namespace std::string_view_literals;
13
14const char *opt_set = "";
15const char *opt_unset = nullptr;
16
17const char help[] = R"(Usage: %1$s [OPTIONS] <COMMAND>
18
19Global Options:
20 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
21 -h, --help Print this help
22
23Commands:
24 joparoute Generate CSV for journey pattern route
25 journeyinfo Print some information on a journey
26 journeyroute Generate CSV for journey route
27 journeys List journeys of a specific line going from stop A to B
28 schedule Generate schedule
29)";
30
31const char joparoute_help[] = R"(Usage: %1$s joparoute --line <NUMBER> --jopa <CODE> [OPTIONS]
32
33Options:
34 --line <NUMBER> Line planning number as in schedule
35 --jopa <CODE> Journey pattern code as in KV1 data
36 -o <PATH> Path of file to write to, '-' for stdout
37
38Global Options:
39 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
40 -h, --help Print this help
41)";
42
43const char journeyroute_help[] = R"(Usage: %1$s journeyroute --line <NUMBER> [OPTIONS]
44
45Options:
46 --line <NUMBER> Line planning number as in KV1 data
47 --journey <NUMBER> Journey number as in KV1 data
48 -o <PATH> Path of file to write to, '-' for stdout
49
50Global Options:
51 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
52 -h, --help Print this help
53)";
54
55const char journeys_help[] = R"(Usage: %1$s journeys --line <NUMBER> --begin <STOP> --end <STOP> [OPTIONS]
56
57For the --begin and --end arguments, use the following format:
58 --begin/--end stop:<USRSTOP CODE>
59 --begin/--end star:<USRSTAR CODE>
60
61Options:
62 --begin <STOP> User stop code/area of stop the journey should begin at
63 --end <STOP> User stop code/area of stop the journey should end at
64 --line <NUMBER> Line planning number to filter on
65 -o <PATH> Path of file to write to, '-' for stdout
66
67Global Options:
68 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
69 -h, --help Print this help
70)";
71
72const char journeyinfo_help[] = R"(Usage: %1$s journeyinfo --line <NUMBER> --journey <NUMBER> [OPTIONS]
73
74Options:
75 --line <NUMBER> Line planning number to filter on
76 --journey <NUMBER> Journey number as in schedule
77
78Global Options:
79 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
80 -h, --help Print this help
81)";
82
83const char schedule_help[] = R"(Usage: %1$s schedule --line <NUMBER> [OPTIONS]
84
85Options:
86 --line <NUMBER> Line planning number to generate schedule for
87 -o <PATH> Path of file to write to, '-' for stdout
88
89Global Options:
90 --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin
91 -h, --help Print this help
92)";
93
94void journeyRouteValidateOptions(const char *progname, Options *options) {
95#define X(name, argument, long_, short_) \
96 if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \
97 && #name != "journey_number"sv && #name != "help"sv && #name != "output_file_path"sv) \
98 if (options->name) { \
99 if (long_) { \
100 if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyroute subcommand\n\n", progname, static_cast<const char *>(long_), short_); \
101 else fprintf(stderr, "%s: unexpected flag --%s for journeyroute subcommand\n\n", progname, static_cast<const char *>(long_)); \
102 } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyroute subcommand\n\n", progname, short_); \
103 fprintf(stderr, journeyroute_help, progname); \
104 exit(1); \
105 }
106 LONG_OPTIONS
107 SHORT_OPTIONS
108#undef X
109
110 if (options->positional.size() > 0) {
111 fprintf(stderr, "%s: unexpected positional argument(s) for journeyroute subcommand\n\n", progname);
112 for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos);
113 fprintf(stderr, journeyroute_help, progname);
114 exit(1);
115 }
116
117 if (!options->kv1_file_path)
118 options->kv1_file_path = "-";
119 if (!options->output_file_path)
120 options->output_file_path = "-";
121 if (options->kv1_file_path == ""sv) {
122 fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname);
123 fprintf(stderr, journeyroute_help, progname);
124 exit(1);
125 }
126 if (options->output_file_path == ""sv) {
127 fprintf(stderr, "%s: output file path cannot be empty\n\n", progname);
128 fprintf(stderr, journeyroute_help, progname);
129 exit(1);
130 }
131 if (!options->journey_number || options->journey_number == ""sv) {
132 fprintf(stderr, "%s: journey number must be provided\n\n", progname);
133 fprintf(stderr, journeyroute_help, progname);
134 exit(1);
135 }
136 if (!options->line_planning_number || options->line_planning_number == ""sv) {
137 fprintf(stderr, "%s: line planning number must be provided\n\n", progname);
138 fprintf(stderr, journeyroute_help, progname);
139 exit(1);
140 }
141}
142
143void scheduleValidateOptions(const char *progname, Options *options) {
144#define X(name, argument, long_, short_) \
145 if (#name != "kv1_file_path"sv && #name != "help"sv \
146 && #name != "line_planning_number"sv && #name != "output_file_path"sv) \
147 if (options->name) { \
148 if (long_) { \
149 if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for schedule subcommand\n\n", progname, static_cast<const char *>(long_), short_); \
150 else fprintf(stderr, "%s: unexpected flag --%s for schedule subcommand\n\n", progname, static_cast<const char *>(long_)); \
151 } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for schedule subcommand\n\n", progname, short_); \
152 fprintf(stderr, schedule_help, progname); \
153 exit(1); \
154 }
155 LONG_OPTIONS
156 SHORT_OPTIONS
157#undef X
158
159 if (options->positional.size() > 0) {
160 fprintf(stderr, "%s: unexpected positional argument(s) for schedule subcommand\n\n", progname);
161 for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos);
162 fprintf(stderr, schedule_help, progname);
163 exit(1);
164 }
165
166 if (!options->kv1_file_path)
167 options->kv1_file_path = "-";
168 if (!options->output_file_path)
169 options->output_file_path = "-";
170 if (options->kv1_file_path == ""sv) {
171 fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname);
172 fprintf(stderr, schedule_help, progname);
173 exit(1);
174 }
175 if (options->output_file_path == ""sv) {
176 fprintf(stderr, "%s: output file path cannot be empty\n\n", progname);
177 fprintf(stderr, schedule_help, progname);
178 exit(1);
179 }
180 if (!options->line_planning_number || options->line_planning_number == ""sv) {
181 fprintf(stderr, "%s: line planning number must be provided\n\n", progname);
182 fprintf(stderr, schedule_help, progname);
183 exit(1);
184 }
185}
186
187void journeysValidateOptions(const char *progname, Options *options) {
188#define X(name, argument, long_, short_) \
189 if (#name != "kv1_file_path"sv && #name != "help"sv \
190 && #name != "line_planning_number"sv && #name != "output_file_path"sv \
191 && #name != "begin_stop_code"sv && #name != "end_stop_code"sv) \
192 if (options->name) { \
193 if (long_) { \
194 if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeys subcommand\n\n", progname, static_cast<const char *>(long_), short_); \
195 else fprintf(stderr, "%s: unexpected flag --%s for journeys subcommand\n\n", progname, static_cast<const char *>(long_)); \
196 } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeys subcommand\n\n", progname, short_); \
197 fprintf(stderr, journeys_help, progname); \
198 exit(1); \
199 }
200 LONG_OPTIONS
201 SHORT_OPTIONS
202#undef X
203
204 if (options->positional.size() > 0) {
205 fprintf(stderr, "%s: unexpected positional argument(s) for journeys subcommand\n\n", progname);
206 for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos);
207 fprintf(stderr, journeys_help, progname);
208 exit(1);
209 }
210
211 if (!options->kv1_file_path)
212 options->kv1_file_path = "-";
213 if (!options->output_file_path)
214 options->output_file_path = "-";
215 if (options->kv1_file_path == ""sv) {
216 fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname);
217 fprintf(stderr, journeys_help, progname);
218 exit(1);
219 }
220 if (options->output_file_path == ""sv) {
221 fprintf(stderr, "%s: output file path cannot be empty\n\n", progname);
222 fprintf(stderr, journeys_help, progname);
223 exit(1);
224 }
225 if (!options->line_planning_number || options->line_planning_number == ""sv) {
226 fprintf(stderr, "%s: line planning number must be provided\n\n", progname);
227 fprintf(stderr, journeys_help, progname);
228 exit(1);
229 }
230 if (!options->begin_stop_code || options->begin_stop_code == ""sv) {
231 fprintf(stderr, "%s: start user stop code must be provided\n\n", progname);
232 fprintf(stderr, journeys_help, progname);
233 exit(1);
234 }
235 if (!options->end_stop_code || options->end_stop_code == ""sv) {
236 fprintf(stderr, "%s: end user stop code must be provided\n\n", progname);
237 fprintf(stderr, journeys_help, progname);
238 exit(1);
239 }
240 if (!std::string_view(options->begin_stop_code).starts_with("star:")
241 && !std::string_view(options->begin_stop_code).starts_with("stop:")) {
242 fprintf(stderr, "%s: begin user stop code must be prefixed with star:/stop:\n\n", progname);
243 fprintf(stderr, journeys_help, progname);
244 exit(1);
245 }
246 if (!std::string_view(options->end_stop_code).starts_with("star:")
247 && !std::string_view(options->end_stop_code).starts_with("stop:")) {
248 fprintf(stderr, "%s: end user stop code must be prefixed with star:/stop:\n\n", progname);
249 fprintf(stderr, journeys_help, progname);
250 exit(1);
251 }
252}
253
254void journeyInfoValidateOptions(const char *progname, Options *options) {
255#define X(name, argument, long_, short_) \
256 if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \
257 && #name != "journey_number"sv && #name != "help"sv) \
258 if (options->name) { \
259 if (long_) { \
260 if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyinfo subcommand\n\n", progname, static_cast<const char *>(long_), short_); \
261 else fprintf(stderr, "%s: unexpected flag --%s for journeyinfo subcommand\n\n", progname, static_cast<const char *>(long_)); \
262 } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyinfo subcommand\n\n", progname, short_); \
263 fprintf(stderr, journeyinfo_help, progname); \
264 exit(1); \
265 }
266 LONG_OPTIONS
267 SHORT_OPTIONS
268#undef X
269
270 if (options->positional.size() > 0) {
271 fprintf(stderr, "%s: unexpected positional argument(s) for journeyinfo subcommand\n\n", progname);
272 for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos);
273 fprintf(stderr, journeyinfo_help, progname);
274 exit(1);
275 }
276
277 if (!options->kv1_file_path)
278 options->kv1_file_path = "-";
279 if (options->kv1_file_path == ""sv) {
280 fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname);
281 fprintf(stderr, journeyinfo_help, progname);
282 exit(1);
283 }
284 if (!options->journey_number || options->journey_number == ""sv) {
285 fprintf(stderr, "%s: journey number must be provided\n\n", progname);
286 fprintf(stderr, journeyinfo_help, progname);
287 exit(1);
288 }
289 if (!options->line_planning_number || options->line_planning_number == ""sv) {
290 fprintf(stderr, "%s: line planning number must be provided\n\n", progname);
291 fprintf(stderr, journeyinfo_help, progname);
292 exit(1);
293 }
294}
295
296void jopaRouteValidateOptions(const char *progname, Options *options) {
297#define X(name, argument, long_, short_) \
298 if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \
299 && #name != "journey_pattern_code"sv && #name != "help"sv && #name != "output_file_path"sv) \
300 if (options->name) { \
301 if (long_) { \
302 if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for joparoute subcommand\n\n", progname, static_cast<const char *>(long_), short_); \
303 else fprintf(stderr, "%s: unexpected flag --%s for joparoute subcommand\n\n", progname, static_cast<const char *>(long_)); \
304 } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for joparoute subcommand\n\n", progname, short_); \
305 fprintf(stderr, joparoute_help, progname); \
306 exit(1); \
307 }
308 LONG_OPTIONS
309 SHORT_OPTIONS
310#undef X
311
312 if (options->positional.size() > 0) {
313 fprintf(stderr, "%s: unexpected positional argument(s) for joparoute subcommand\n\n", progname);
314 for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos);
315 fprintf(stderr, joparoute_help, progname);
316 exit(1);
317 }
318
319 if (!options->kv1_file_path)
320 options->kv1_file_path = "-";
321 if (!options->output_file_path)
322 options->output_file_path = "-";
323 if (options->kv1_file_path == ""sv) {
324 fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname);
325 fprintf(stderr, joparoute_help, progname);
326 exit(1);
327 }
328 if (options->output_file_path == ""sv) {
329 fprintf(stderr, "%s: output file path cannot be empty\n\n", progname);
330 fprintf(stderr, joparoute_help, progname);
331 exit(1);
332 }
333 if (!options->journey_pattern_code || options->journey_pattern_code == ""sv) {
334 fprintf(stderr, "%s: journey pattern code must be provided\n\n", progname);
335 fprintf(stderr, joparoute_help, progname);
336 exit(1);
337 }
338 if (!options->line_planning_number || options->line_planning_number == ""sv) {
339 fprintf(stderr, "%s: line planning number must be provided\n\n", progname);
340 fprintf(stderr, joparoute_help, progname);
341 exit(1);
342 }
343}
344
345struct ShortFlag {
346 int has_arg;
347 int c;
348};
349
350template<ShortFlag ...flags>
351const std::string mkargarr =
352 (std::string()
353 + ...
354 + (flags.c == 0
355 ? ""
356 : std::string((const char[]){ flags.c, '\0' })
357 + (flags.has_arg == required_argument
358 ? ":"
359 : flags.has_arg == optional_argument
360 ? "::"
361 : "")));
362
363#define X(name, has_arg, long_, short_) ShortFlag(has_arg, short_),
364const std::string argarr = mkargarr<SHORT_OPTIONS LONG_OPTIONS ShortFlag(no_argument, 0)>;
365#undef X
366
367Options parseOptions(int argc, char *argv[]) {
368 const char *progname = argv[0];
369
370 // Struct with options for augmentkv6.
371 Options options;
372
373 static option long_options[] = {
374#define X(name, argument, long_, short_) { long_, argument, nullptr, short_ },
375 LONG_OPTIONS
376#undef X
377 { 0 },
378 };
379
380 int c;
381 int option_index = 0;
382 bool error = false;
383 while ((c = getopt_long(argc, argv, argarr.c_str(), long_options, &option_index)) != -1) {
384 // If a long option was used, c corresponds with val. We have val = 0 for
385 // options which have no short alternative, so checking for c = 0 gives us
386 // whether a long option with no short alternative was used.
387 // Below, we check for c = 'h', which corresponds with the long option
388 // '--help', for which val = 'h'.
389 if (c == 0) {
390 const char *name = long_options[option_index].name;
391#define X(opt_name, opt_has_arg, opt_long, opt_short) \
392 if (name == opt_long ## sv) { options.opt_name = optarg; continue; }
393 LONG_OPTIONS
394#undef X
395 error = true;
396 }
397#define X(opt_name, opt_has_arg, opt_long, opt_short) \
398 if (c == opt_short) { options.opt_name = optarg ? optarg : opt_set; continue; }
399 LONG_OPTIONS
400 SHORT_OPTIONS
401#undef X
402 error = true;
403 }
404
405 if (optind < argc)
406 options.subcommand = argv[optind++];
407 while (optind < argc)
408 options.positional.push_back(argv[optind++]);
409
410 if (options.subcommand
411 && options.subcommand != "schedule"sv
412 && options.subcommand != "joparoute"sv
413 && options.subcommand != "journeyinfo"sv
414 && options.subcommand != "journeyroute"sv
415 && options.subcommand != "journeys"sv) {
416 fprintf(stderr, "%s: unknown subcommand '%s'\n\n", progname, options.subcommand);
417 fprintf(stderr, help, progname);
418 exit(1);
419 }
420 if (options.subcommand && error) {
421 fputc('\n', stderr);
422 if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname);
423 if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname);
424 if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname);
425 if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname);
426 if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname);
427 exit(1);
428 }
429 if (error || !options.subcommand) {
430 if (!options.subcommand) fprintf(stderr, "%s: no subcommand provided\n", progname);
431 fputc('\n', stderr);
432 fprintf(stderr, help, progname);
433 exit(1);
434 }
435 if (options.help) {
436 if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname);
437 if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname);
438 if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname);
439 if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname);
440 if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname);
441 exit(0);
442 }
443
444 if (options.subcommand == "joparoute"sv)
445 jopaRouteValidateOptions(progname, &options);
446 if (options.subcommand == "journeyinfo"sv)
447 journeyInfoValidateOptions(progname, &options);
448 if (options.subcommand == "journeyroute"sv)
449 journeyRouteValidateOptions(progname, &options);
450 if (options.subcommand == "journeys"sv)
451 journeysValidateOptions(progname, &options);
452 if (options.subcommand == "schedule"sv)
453 scheduleValidateOptions(progname, &options);
454
455 return options;
456}
diff --git a/src/querykv1/cliopts.hpp b/src/querykv1/cliopts.hpp
new file mode 100644
index 0000000..df8630e
--- /dev/null
+++ b/src/querykv1/cliopts.hpp
@@ -0,0 +1,35 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_CLIOPTS_HPP
4#define OEUF_QUERYKV1_CLIOPTS_HPP
5
6#include <vector>
7
8#define LONG_OPTIONS \
9/* name req/opt/no arg long short */
10 X(kv1_file_path, required_argument, "kv1", 0 ) \
11 X(line_planning_number, required_argument, "line", 0 ) \
12 X(journey_number, required_argument, "journey", 0 ) \
13 X(journey_pattern_code, required_argument, "jopa", 0 ) \
14 X(begin_stop_code, required_argument, "begin", 0 ) \
15 X(end_stop_code, required_argument, "end", 0 ) \
16 X(help, no_argument, "help", 'h')
17
18#define SHORT_OPTIONS \
19 X(output_file_path, required_argument, nullptr, 'o')
20
21struct Options {
22 const char *subcommand = nullptr;
23 std::vector<const char *> positional;
24#define X(name, argument, long_, short_) const char *name = nullptr;
25 LONG_OPTIONS
26 SHORT_OPTIONS
27#undef X
28};
29
30extern const char *opt_set;
31extern const char *opt_unset;
32
33Options parseOptions(int argc, char *argv[]);
34
35#endif // OEUF_QUERYKV1_CLIOPTS_HPP
diff --git a/src/querykv1/daterange.cpp b/src/querykv1/daterange.cpp
new file mode 100644
index 0000000..5ce42bf
--- /dev/null
+++ b/src/querykv1/daterange.cpp
@@ -0,0 +1,91 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include "daterange.hpp"
4
5static std::chrono::year_month_day nextDay(std::chrono::year_month_day ymd) {
6 return std::chrono::sys_days(ymd) + std::chrono::days(1);
7}
8
9// DateRange expresses the date range [from, thru].
10DateRange::Iterator &DateRange::Iterator::operator++() {
11 ymd_ = nextDay(ymd_);
12 return *this;
13}
14
15std::chrono::year_month_day DateRange::Iterator::operator*() const {
16 return ymd_;
17}
18
19std::chrono::year_month_day DateRange::Iterator::ymd() const {
20 return ymd_;
21}
22
23DateRange::Iterator::Iterator(std::chrono::year_month_day ymd) : ymd_(ymd) {}
24
25DateRange::DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru)
26 : from_(from), thru_(thru)
27{}
28
29DateRange::Iterator DateRange::begin() const {
30 return DateRange::Iterator(from_);
31}
32
33DateRange::Iterator DateRange::end() const {
34 return DateRange::Iterator(nextDay(thru_));
35}
36
37bool DateRange::valid() const {
38 return from_ <= thru_;
39}
40
41std::chrono::year_month_day DateRange::from() const {
42 return from_;
43}
44
45std::chrono::year_month_day DateRange::thru() const {
46 return thru_;
47}
48
49bool operator==(const DateRange::Iterator a, const DateRange::Iterator b) {
50 return *a == *b;
51}
52
53DateRangeSeq::DateRangeSeq(std::initializer_list<DateRange> ranges)
54 : DateRangeSeq(ranges.begin(), ranges.end())
55{}
56
57DateRangeSeq DateRangeSeq::clampFrom(std::chrono::year_month_day from) const {
58 std::vector<DateRange> new_ranges;
59 new_ranges.reserve(ranges_.size());
60 for (const DateRange range : ranges_) {
61 if (range.from() < from) {
62 if (range.thru() < from)
63 continue;
64 new_ranges.emplace_back(from, range.thru());
65 }
66 new_ranges.push_back(range);
67 }
68 return DateRangeSeq(new_ranges.begin(), new_ranges.end());
69}
70
71DateRangeSeq DateRangeSeq::clampThru(std::chrono::year_month_day thru) const {
72 std::vector<DateRange> new_ranges;
73 new_ranges.reserve(ranges_.size());
74 for (const DateRange range : ranges_) {
75 if (range.thru() > thru) {
76 if (range.from() > thru)
77 continue;
78 new_ranges.emplace_back(range.from(), thru);
79 }
80 new_ranges.push_back(range);
81 }
82 return DateRangeSeq(new_ranges.begin(), new_ranges.end());
83}
84
85std::vector<DateRange>::const_iterator DateRangeSeq::begin() const {
86 return ranges_.begin();
87}
88
89std::vector<DateRange>::const_iterator DateRangeSeq::end() const {
90 return ranges_.end();
91}
diff --git a/src/querykv1/daterange.hpp b/src/querykv1/daterange.hpp
new file mode 100644
index 0000000..e34c39c
--- /dev/null
+++ b/src/querykv1/daterange.hpp
@@ -0,0 +1,118 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_DATERANGE_HPP
4#define OEUF_QUERYKV1_DATERANGE_HPP
5
6#include <cassert>
7#include <chrono>
8#include <concepts>
9#include <iterator>
10#include <utility>
11#include <vector>
12
13// DateRange expresses the date range [from, thru].
14class DateRange {
15 public:
16 class Iterator {
17 friend class DateRange;
18
19 public:
20 Iterator &operator++();
21
22 std::chrono::year_month_day operator*() const;
23 std::chrono::year_month_day ymd() const;
24
25 private:
26 explicit Iterator(std::chrono::year_month_day ymd);
27
28 std::chrono::year_month_day ymd_;
29 };
30
31 explicit DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru);
32
33 Iterator begin() const;
34 Iterator end() const;
35 bool valid() const;
36 std::chrono::year_month_day from() const;
37 std::chrono::year_month_day thru() const;
38
39 private:
40 std::chrono::year_month_day from_;
41 std::chrono::year_month_day thru_;
42};
43
44bool operator==(const DateRange::Iterator a, const DateRange::Iterator b);
45
46template<typename Tp, typename T>
47concept DerefsTo = requires(Tp p) {
48 { *p } -> std::convertible_to<T>;
49};
50
51class DateRangeSeq {
52 // The way LE and GE are ordered makes a difference for how the sorting
53 // (insertion based on lower_bound) works. Do not carelessly reorder this.
54 enum LeGe {
55 GE, // >=
56 LE, // <=
57 };
58
59 std::vector<DateRange> ranges_;
60
61 public:
62 template<std::input_iterator InputIt>
63 requires DerefsTo<InputIt, DateRange>
64 explicit DateRangeSeq(InputIt begin, InputIt end) {
65 // We convert every inclusive date range [x, y] into (x, >=) and (y, <=)
66 // and put these into a list, using binary search to make sure that these
67 // stay ordered. We then reduce this list, removing tautological
68 // predicates, giving us a final list of ranges that do not overlap.
69
70 std::vector<std::pair<std::chrono::year_month_day, LeGe>> preds;
71
72 size_t n = 0;
73 for (auto it = begin; it != end; it++) {
74 auto &range = *it;
75 if (!range.valid()) continue;
76
77 auto a = std::make_pair(range.from(), GE);
78 auto b = std::make_pair(range.thru(), LE);
79 preds.insert(std::lower_bound(preds.begin(), preds.end(), a), a);
80 preds.insert(std::lower_bound(preds.begin(), preds.end(), b), b);
81
82 n++;
83 }
84
85 if (preds.empty())
86 return;
87
88 assert(preds.size() >= 2);
89 assert(preds.front().second == GE);
90 assert(preds.back().second == LE);
91
92 std::chrono::year_month_day begin_ymd = preds[0].first;
93 for (size_t i = 1; i < preds.size(); i++) {
94 if (preds[i].second == LE && (i + 1 == preds.size() || preds[i + 1].second == GE)) {
95 std::chrono::year_month_day end_ymd = preds[i].first;
96 if (!ranges_.empty() && ranges_.back().thru() == begin_ymd)
97 ranges_.back() = DateRange(ranges_.back().from(), end_ymd);
98 else
99 ranges_.push_back(DateRange(begin_ymd, end_ymd));
100 if (i + 1 != preds.size()) {
101 begin_ymd = preds[i + 1].first;
102 i++;
103 }
104 }
105 }
106 }
107
108 explicit DateRangeSeq(std::initializer_list<DateRange> ranges);
109
110 DateRangeSeq clampFrom(std::chrono::year_month_day from) const;
111 DateRangeSeq clampThru(std::chrono::year_month_day thru) const;
112
113 public:
114 std::vector<DateRange>::const_iterator begin() const;
115 std::vector<DateRange>::const_iterator end() const;
116};
117
118#endif // OEUF_QUERYKV1_DATERANGE_HPP
diff --git a/src/querykv1/grammar.abnf b/src/querykv1/grammar.abnf
new file mode 100644
index 0000000..1c93760
--- /dev/null
+++ b/src/querykv1/grammar.abnf
@@ -0,0 +1,44 @@
1; This grammar does *not* allow fields to contain LF, unless the entire content
2; of the field is quoted. The file is simply rejected otherwise.
3; I took the liberty to take some inspiration from the somewhat similar IETF RFC 4180.
4
5document = [header NEWLINE] (comment / record / empty-line) *(NEWLINE (comment / record / empty-line)) [NEWLINE] / header
6
7header = OPENBRACK *NOTCRLF
8comment = SEMICOLON *NOTCRLF
9
10empty-line = *WHITESPACE
11
12record = field *(PIPE field)
13field = *WHITESPACE field-data *WHITESPACE
14field-data = escaped / unescaped
15
16; Unescaped fields are also allowed to contain double quotes,
17; they are just not interpreted in any special way.
18escaped = DQUOTE *(TEXTDATA / WHITESPACE / NEWLINE / PIPE / 2DQUOTE) DQUOTE
19unescaped = [TEXTDATA *(*WHITESPACE (TEXTDATA / DQUOTE))]
20
21HTAB = %x09 ; <horizontal tab, "\t">
22LF = %x0A ; <line feed, "\n">
23VTAB = %x0B ; <vertical tab, "\v">
24FF = %x0C ; <form feed, "\f">
25CR = %x0D ; <carriage return, "\r">
26SPACE = %x20 ; <space, " ">
27DQUOTE = %x22 ; "
28SEMICOLON = %x3B ; ;
29OPENBRACK = %x5B ; [
30PIPE = %x7C ; |
31
32; All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE
33; Semicolon is included, as comments are only defined as 'lines starting with a semicolon'.
34; So it should be fine if a semicolon is part of a field, the rest of the line would not
35; be interpreted as a comment in that case.
36TEXTDATA = %x00-08 / %x0E-1F / %x21 / %x23-5A / %x5C-7B / %x7D-10FFFF
37
38; Not including LF here even though TMI8/KV1 does not officially consider it
39; a newline, as newlines are defined as 'CR optionally followed by LF'
40WHITESPACE = SPACE / FF / HTAB / VTAB
41
42; All codepoints excluding CR and LF
43NOTCRLF = %x00-09 / %x0B-0C / %x0E-10FFFF
44NEWLINE = CR [LF]
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf
new file mode 100644
index 0000000..94f8cde
--- /dev/null
+++ b/src/querykv1/grammar.ebnf
@@ -0,0 +1,47 @@
1/* This grammar does allow fields to contain stray LFs, not after any specific
2 * CR. I took the liberty to take some inspiration from the somewhat similar
3 * IETF RFC 4180.
4 */
5document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header
6
7header ::= OPENBRACK NOTCR*
8comment ::= SEMICOLON NOTCR*
9
10empty-line ::= WHITESPACE*
11
12record ::= field (PIPE field)*
13field ::= WHITESPACE* field-data WHITESPACE*
14field-data ::= DQUOTE escaped DQUOTE | unescaped
15
16/* Unescaped fields are also allowed to contain double quotes, they are just
17 * not interpreted in any special way.
18 */
19escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)*
20unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)?
21
22HTAB ::= #x09 /* <horizontal tab, "\t"> */
23LF ::= #x0A /* <line feed, "\n"> */
24VTAB ::= #x0B /* <vertical tab, "\v"> */
25FF ::= #x0C /* <form feed, "\f"> */
26CR ::= #x0D /* <carriage return, "\r"> */
27SPACE ::= #x20 /* <space, " "> */
28DQUOTE ::= #x22 /* " */
29SEMICOLON ::= #x3B /* ; */
30OPENBRACK ::= #x5B /* [ */
31PIPE ::= #x7C /* | */
32
33/* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE.
34 * Semicolon is included, as comments are only defined as 'lines starting with
35 * a semicolon'. So it should be fine if a semicolon is part of a field, the
36 * rest of the line would not be interpreted as a comment in that case.
37 */
38TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF]
39
40/* Including LF here as TMI8/KV1 does not consider it a newline,
41 * as newlines are defined as 'CR optionally followed by LF'
42 */
43WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB
44
45/* All codepoints excluding CR and LF */
46NOTCR ::= [#x00-#x0C#x0E-#x10FFFF]
47NEWLINE ::= CR LF?
diff --git a/src/querykv1/grammar.ebnf.bak b/src/querykv1/grammar.ebnf.bak
new file mode 100644
index 0000000..b5acbf5
--- /dev/null
+++ b/src/querykv1/grammar.ebnf.bak
@@ -0,0 +1,23 @@
1document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header
2header ::= OPENBRACK NOTCRLF*
3comment ::= SEMICOLON NOTCRLF*
4empty-line ::= WHITESPACE*
5record ::= field (PIPE field)*
6field ::= WHITESPACE* field-data WHITESPACE*
7field-data ::= escaped | unescaped
8escaped ::= DQUOTE (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* DQUOTE
9unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)?
10HTAB ::= #x09
11LF ::= #x0A
12VTAB ::= #x0B
13FF ::= #x0C
14CR ::= #x0D
15SPACE ::= #x20
16DQUOTE ::= #x22
17SEMICOLON ::= #x3B
18OPENBRACK ::= #x5B
19PIPE ::= #x7C
20WHITESPACE ::= SPACE | FF | HTAB | VTAB
21NOTCRLF ::= [#x00-#x09#x0B-#x0C#x0E-#x10FFFF]
22TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF]
23NEWLINE ::= CR LF?
diff --git a/src/querykv1/joparoute.cpp b/src/querykv1/joparoute.cpp
new file mode 100644
index 0000000..94ed359
--- /dev/null
+++ b/src/querykv1/joparoute.cpp
@@ -0,0 +1,102 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <cstdio>
4#include <iostream>
5#include <string_view>
6
7#include "joparoute.hpp"
8
9using namespace std::string_view_literals;
10
11void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index) {
12 FILE *out = stdout;
13 if (options.output_file_path != "-"sv)
14 out = fopen(options.output_file_path, "wb");
15 if (!out) {
16 fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno));
17 exit(EXIT_FAILURE);
18 }
19
20 const std::string data_owner_code = "CXX";
21 Kv1JourneyPattern::Key jopa_key(
22 // Of course it is bad to hardcode this, but we really have no time to make
23 // everything nice and dynamic. We're only working with CXX data anyway,
24 // and provide no support for the 'Schedules and Passing Times' KV1
25 // variant.
26 data_owner_code,
27 options.line_planning_number,
28 options.journey_pattern_code);
29
30 const Kv1JourneyPattern *jopa = index.journey_patterns[jopa_key];
31 if (!jopa) {
32 std::cerr << "Journey pattern not found" << std::endl;
33 return;
34 }
35 const Kv1Line *line = jopa->p_line;
36
37 struct Point {
38 bool is_stop = false;
39 const Kv1JourneyPatternTimingLink *jopatili = nullptr;
40 const Kv1Link *link = nullptr;
41 const Kv1Point *point = nullptr;
42 double distance_since_start_of_link = 0;
43 double distance_since_start_of_journey = 0;
44 };
45 std::vector<Point> points;
46
47 for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) {
48 const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i];
49 if (jopatili->key.line_planning_number == jopa->key.line_planning_number
50 && jopatili->key.journey_pattern_code == jopa->key.journey_pattern_code) {
51 const Kv1Link::Key link_key(data_owner_code, jopatili->user_stop_code_begin,
52 jopatili->user_stop_code_end, line->transport_type);
53 const Kv1Link *link = index.links[link_key];
54 const Kv1UserStopPoint::Key link_begin_key(data_owner_code, jopatili->user_stop_code_begin);
55 const Kv1UserStopPoint::Key link_end_key(data_owner_code, jopatili->user_stop_code_end);
56 const Kv1UserStopPoint *link_begin = index.user_stop_points[link_begin_key];
57 const Kv1UserStopPoint *link_end = index.user_stop_points[link_end_key];
58
59 points.emplace_back(true, jopatili, link, link_begin->p_point, 0);
60
61 for (size_t j = 0; j < records.point_on_links.size(); j++) {
62 Kv1PointOnLink *pool = &records.point_on_links[j];
63 if (pool->key.user_stop_code_begin == jopatili->user_stop_code_begin
64 && pool->key.user_stop_code_end == jopatili->user_stop_code_end
65 && pool->key.transport_type == jopatili->p_line->transport_type) {
66 points.emplace_back(false, jopatili, link, pool->p_point, pool->distance_since_start_of_link);
67 }
68 }
69
70 points.emplace_back(true, jopatili, link, link_end->p_point, link->distance);
71 }
72 }
73
74 std::sort(points.begin(), points.end(), [](Point &a, Point &b) {
75 if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order)
76 return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order;
77 return a.distance_since_start_of_link < b.distance_since_start_of_link;
78 });
79
80 double distance_since_start_of_journey = 0;
81 for (size_t i = 0; i < points.size(); i++) {
82 Point *p = &points[i];
83 if (i > 0) {
84 Point *prev = &points[i - 1];
85 if (p->link != prev->link) {
86 distance_since_start_of_journey += prev->link->distance;
87 }
88 }
89 p->distance_since_start_of_journey = distance_since_start_of_journey + p->distance_since_start_of_link;
90 }
91
92 fputs("is_stop,link_usrstop_begin,link_usrstop_end,point_code,rd_x,rd_y,distance_since_start_of_link,distance_since_start_of_journey\n", out);
93 for (const auto &point : points) {
94 fprintf(out, "%s,%s,%s,%s,%f,%f,%f,%f\n",
95 point.is_stop ? "true" : "false",
96 point.jopatili->user_stop_code_begin.c_str(), point.jopatili->user_stop_code_end.c_str(),
97 point.point->key.point_code.c_str(), point.point->location_x_ew, point.point->location_y_ns,
98 point.distance_since_start_of_link, point.distance_since_start_of_journey);
99 }
100
101 if (options.output_file_path != "-"sv) fclose(out);
102}
diff --git a/src/querykv1/joparoute.hpp b/src/querykv1/joparoute.hpp
new file mode 100644
index 0000000..ade94e8
--- /dev/null
+++ b/src/querykv1/joparoute.hpp
@@ -0,0 +1,13 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_JOPAROUTE_HPP
4#define OEUF_QUERYKV1_JOPAROUTE_HPP
5
6#include <tmi8/kv1_types.hpp>
7#include <tmi8/kv1_index.hpp>
8
9#include "cliopts.hpp"
10
11void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index);
12
13#endif // OEUF_QUERYKV1_JOPAROUTE_HPP
diff --git a/src/querykv1/journeyinfo.cpp b/src/querykv1/journeyinfo.cpp
new file mode 100644
index 0000000..bd29490
--- /dev/null
+++ b/src/querykv1/journeyinfo.cpp
@@ -0,0 +1,64 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <iostream>
4
5#include "journeyinfo.hpp"
6
7void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index) {
8 std::cout << "Info for journey " << options.line_planning_number
9 << "/" << options.journey_number << std::endl;
10
11 std::unordered_map<std::string, const Kv1UserStopPoint *> usrstops;
12 for (size_t i = 0; i < records.user_stop_points.size(); i++) {
13 const Kv1UserStopPoint *usrstop = &records.user_stop_points[i];
14 usrstops[usrstop->key.user_stop_code] = usrstop;
15 }
16
17 for (const auto &pujo : records.public_journeys) {
18 if (pujo.key.line_planning_number != options.line_planning_number
19 || std::to_string(pujo.key.journey_number) != options.journey_number)
20 continue;
21
22 std::vector<const Kv1JourneyPatternTimingLink *> timing_links;
23 for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) {
24 const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i];
25 if (jopatili->key.line_planning_number != options.line_planning_number
26 || jopatili->key.journey_pattern_code != pujo.journey_pattern_code)
27 continue;
28 timing_links.push_back(jopatili);
29 }
30
31 std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool {
32 return a->key.timing_link_order < b->key.timing_link_order;
33 });
34 auto begin_stop = timing_links.front()->user_stop_code_begin;
35 auto end_stop = timing_links.back()->user_stop_code_end;
36
37 const auto *begin = usrstops[begin_stop];
38 const auto *end = usrstops[end_stop];
39
40 std::cout << " Journey pattern: " << pujo.key.line_planning_number
41 << "/" << pujo.journey_pattern_code << std::endl
42 << " Begin stop: " << begin_stop
43 << "; name: " << std::quoted(begin->name)
44 << "; town: " << std::quoted(begin->town) << std::endl
45 << " End stop: " << end_stop
46 << "; name: " << std::quoted(end->name)
47 << "; town: " << std::quoted(end->town) << std::endl;
48
49 const auto *begin_star = begin->p_user_stop_area;
50 const auto *end_star = end->p_user_stop_area;
51 if (begin_star)
52 std::cout << " Begin stop area: " << begin_star->key.user_stop_area_code
53 << "; name: " << std::quoted(begin_star->name)
54 << ", town: " << std::quoted(begin_star->town)
55 << std::endl;
56 if (end_star)
57 std::cout << " End stop area: " << end_star->key.user_stop_area_code
58 << "; name: " << std::quoted(end_star->name)
59 << ", town: " << std::quoted(end_star->town)
60 << std::endl;
61
62 break;
63 }
64}
diff --git a/src/querykv1/journeyinfo.hpp b/src/querykv1/journeyinfo.hpp
new file mode 100644
index 0000000..2a2118d
--- /dev/null
+++ b/src/querykv1/journeyinfo.hpp
@@ -0,0 +1,13 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_JOURNEYINFO_HPP
4#define OEUF_QUERYKV1_JOURNEYINFO_HPP
5
6#include <tmi8/kv1_types.hpp>
7#include <tmi8/kv1_index.hpp>
8
9#include "cliopts.hpp"
10
11void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index);
12
13#endif // OEUF_QUERYKV1_JOURNEYINFO_HPP
diff --git a/src/querykv1/journeyroute.cpp b/src/querykv1/journeyroute.cpp
new file mode 100644
index 0000000..013ea1c
--- /dev/null
+++ b/src/querykv1/journeyroute.cpp
@@ -0,0 +1,96 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <iostream>
4#include <string_view>
5
6#include "journeyroute.hpp"
7
8using namespace std::string_view_literals;
9
10void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index) {
11 FILE *out = stdout;
12 if (options.output_file_path != "-"sv)
13 out = fopen(options.output_file_path, "wb");
14 if (!out) {
15 fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno));
16 exit(EXIT_FAILURE);
17 }
18
19 for (auto &pujo : records.public_journeys) {
20 if (pujo.key.line_planning_number == options.line_planning_number && std::to_string(pujo.key.journey_number) == options.journey_number) {
21 fprintf(stderr, "Got PUJO %s/%s:\n", options.line_planning_number, options.journey_number);
22 fprintf(stderr, " Day type: %s\n", pujo.key.day_type.c_str());
23 auto &pegr = *pujo.p_period_group;
24 fprintf(stderr, " PEGR Code: %s\n", pegr.key.period_group_code.c_str());
25 fprintf(stderr, " PEGR Description: %s\n", pegr.description.c_str());
26 fprintf(stderr, " SPECDAY Code: %s\n", pujo.key.specific_day_code.c_str());
27 auto &timdemgrp = *pujo.p_time_demand_group;
28
29 for (auto &pegrval : records.period_group_validities) {
30 if (pegrval.key.period_group_code == pegr.key.period_group_code) {
31 fprintf(stderr, "Got PEGRVAL for PEGR %s\n", pegr.key.period_group_code.c_str());
32 std::cerr << " Valid from: " << pegrval.key.valid_from << std::endl;
33 std::cerr << " Valid thru: " << pegrval.valid_thru << std::endl;
34 }
35 }
36
37 struct Point {
38 Kv1JourneyPatternTimingLink *jopatili = nullptr;
39 Kv1TimeDemandGroupRunTime *timdemrnt = nullptr;
40 double distance_since_start_of_link = 0;
41 double rd_x = 0;
42 double rd_y = 0;
43 double total_time_s = 0;
44 };
45 std::vector<Point> points;
46
47 for (size_t i = 0; i < records.time_demand_group_run_times.size(); i++) {
48 Kv1TimeDemandGroupRunTime *timdemrnt = &records.time_demand_group_run_times[i];
49 if (timdemrnt->key.line_planning_number == timdemgrp.key.line_planning_number
50 && timdemrnt->key.journey_pattern_code == timdemgrp.key.journey_pattern_code
51 && timdemrnt->key.time_demand_group_code == timdemgrp.key.time_demand_group_code) {
52 Kv1JourneyPatternTimingLink *jopatili = timdemrnt->p_journey_pattern_timing_link;
53 for (auto &pool : records.point_on_links) {
54 if (pool.key.user_stop_code_begin == timdemrnt->user_stop_code_begin
55 && pool.key.user_stop_code_end == timdemrnt->user_stop_code_end
56 && pool.key.transport_type == jopatili->p_line->transport_type) {
57 points.emplace_back(
58 jopatili,
59 timdemrnt,
60 pool.distance_since_start_of_link,
61 pool.p_point->location_x_ew,
62 pool.p_point->location_y_ns
63 );
64 }
65 }
66 }
67 }
68
69 std::sort(points.begin(), points.end(), [](Point &a, Point &b) {
70 if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order)
71 return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order;
72 return a.distance_since_start_of_link < b.distance_since_start_of_link;
73 });
74
75 double total_time_s = 0;
76 for (size_t i = 0; i < points.size(); i++) {
77 Point *p = &points[i];
78 p->total_time_s = total_time_s;
79 if (i > 0) {
80 Point *prev = &points[i - 1];
81 if (p->timdemrnt != prev->timdemrnt) {
82 total_time_s += prev->timdemrnt->total_drive_time_s;
83 prev->total_time_s = total_time_s;
84 }
85 }
86 }
87
88 fputs("rd_x,rd_y,total_time_s,is_timing_stop\n", out);
89 for (const auto &point : points) {
90 fprintf(out, "%f,%f,%f,%d\n", point.rd_x, point.rd_y, point.total_time_s, point.jopatili->is_timing_stop);
91 }
92 }
93 }
94
95 if (options.output_file_path != "-"sv) fclose(out);
96}
diff --git a/src/querykv1/journeyroute.hpp b/src/querykv1/journeyroute.hpp
new file mode 100644
index 0000000..ccd996c
--- /dev/null
+++ b/src/querykv1/journeyroute.hpp
@@ -0,0 +1,13 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_JOURNEYROUTE_HPP
4#define OEUF_QUERYKV1_JOURNEYROUTE_HPP
5
6#include <tmi8/kv1_types.hpp>
7#include <tmi8/kv1_index.hpp>
8
9#include "cliopts.hpp"
10
11void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index);
12
13#endif // OEUF_QUERYKV1_JOURNEYROUTE_HPP
diff --git a/src/querykv1/journeys.cpp b/src/querykv1/journeys.cpp
new file mode 100644
index 0000000..96566b2
--- /dev/null
+++ b/src/querykv1/journeys.cpp
@@ -0,0 +1,95 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <iostream>
4#include <map>
5#include <string_view>
6#include <unordered_set>
7
8#include "journeys.hpp"
9
10using namespace std::string_view_literals;
11
12void journeys(const Options &options, Kv1Records &records, Kv1Index &index) {
13 const std::string_view want_begin_stop_code(options.begin_stop_code);
14 const std::string_view want_end_stop_code(options.end_stop_code);
15
16 FILE *out = stdout;
17 if (options.output_file_path != "-"sv)
18 out = fopen(options.output_file_path, "wb");
19 if (!out) {
20 fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno));
21 exit(EXIT_FAILURE);
22 }
23
24 std::cerr << "Generating journeys for " << options.line_planning_number << ", going from stop "
25 << options.begin_stop_code << " to " << options.end_stop_code << std::endl;
26
27 std::unordered_map<std::string, const Kv1UserStopPoint *> usrstops;
28 for (size_t i = 0; i < records.user_stop_points.size(); i++) {
29 const Kv1UserStopPoint *usrstop = &records.user_stop_points[i];
30 usrstops[usrstop->key.user_stop_code] = usrstop;
31 }
32
33 std::unordered_set<std::string> journey_pattern_codes;
34 for (const auto &jopa : records.journey_patterns) {
35 if (jopa.key.line_planning_number != options.line_planning_number)
36 continue;
37 journey_pattern_codes.insert(jopa.key.journey_pattern_code);
38 }
39
40 std::unordered_map<std::string, std::vector<const Kv1JourneyPatternTimingLink *>> jopatilis;
41 for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) {
42 const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i];
43 if (jopatili->key.line_planning_number != options.line_planning_number
44 || !journey_pattern_codes.contains(jopatili->key.journey_pattern_code))
45 continue;
46 jopatilis[jopatili->key.journey_pattern_code].push_back(jopatili);
47 }
48
49 std::unordered_set<std::string> valid_jopas;
50 for (auto &[journey_pattern_code, timing_links] : jopatilis) {
51 std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool {
52 return a->key.timing_link_order < b->key.timing_link_order;
53 });
54 auto begin_stop = timing_links.front()->user_stop_code_begin;
55 auto end_stop = timing_links.back()->user_stop_code_end;
56
57 const auto *begin = usrstops[begin_stop];
58 const auto *end = usrstops[end_stop];
59
60 bool begin_stop_ok = false;
61 if (want_begin_stop_code.starts_with("stop:"))
62 begin_stop_ok = want_begin_stop_code.substr(5) == begin_stop;
63 else if (want_begin_stop_code.starts_with("star:"))
64 begin_stop_ok = want_begin_stop_code.substr(5) == begin->user_stop_area_code;
65
66 bool end_stop_ok = false;
67 if (want_end_stop_code.starts_with("stop:"))
68 end_stop_ok = want_end_stop_code.substr(5) == end_stop;
69 else if (want_end_stop_code.starts_with("star:"))
70 end_stop_ok = want_end_stop_code.substr(5) == end->user_stop_area_code;
71
72 if (begin_stop_ok && end_stop_ok) {
73 valid_jopas.insert(journey_pattern_code);
74 }
75 }
76
77 std::map<int, std::pair<std::string, std::string>> valid_journeys;
78 for (const auto &pujo : records.public_journeys) {
79 if (pujo.key.line_planning_number == options.line_planning_number
80 && valid_jopas.contains(pujo.journey_pattern_code)) {
81 valid_journeys[pujo.key.journey_number] = {
82 pujo.time_demand_group_code,
83 pujo.journey_pattern_code,
84 };
85 }
86 }
87
88 fputs("journey_number,time_demand_group_code,journey_pattern_code\n", out);
89 for (const auto &[journey_number, timdemgrp_jopa] : valid_journeys) {
90 const auto &[time_demand_group_code, journey_pattern_code] = timdemgrp_jopa;
91 fprintf(out, "%d,%s,%s\n", journey_number, time_demand_group_code.c_str(), journey_pattern_code.c_str());
92 }
93
94 if (options.output_file_path != "-"sv) fclose(out);
95}
diff --git a/src/querykv1/journeys.hpp b/src/querykv1/journeys.hpp
new file mode 100644
index 0000000..cf615c7
--- /dev/null
+++ b/src/querykv1/journeys.hpp
@@ -0,0 +1,13 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_JOURNEYS_HPP
4#define OEUF_QUERYKV1_JOURNEYS_HPP
5
6#include <tmi8/kv1_types.hpp>
7#include <tmi8/kv1_index.hpp>
8
9#include "cliopts.hpp"
10
11void journeys(const Options &options, Kv1Records &records, Kv1Index &index);
12
13#endif // OEUF_QUERYKV1_JOURNEYS_HPP
diff --git a/src/querykv1/main.cpp b/src/querykv1/main.cpp
new file mode 100644
index 0000000..6c606ba
--- /dev/null
+++ b/src/querykv1/main.cpp
@@ -0,0 +1,198 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <chrono>
4#include <cstdio>
5#include <string>
6#include <string_view>
7#include <vector>
8
9#include <tmi8/kv1_types.hpp>
10#include <tmi8/kv1_index.hpp>
11#include <tmi8/kv1_lexer.hpp>
12#include <tmi8/kv1_parser.hpp>
13
14#include "cliopts.hpp"
15#include "joparoute.hpp"
16#include "journeyinfo.hpp"
17#include "journeyroute.hpp"
18#include "journeys.hpp"
19#include "schedule.hpp"
20
21using namespace std::string_view_literals;
22
23using TimingClock = std::conditional_t<
24 std::chrono::high_resolution_clock::is_steady,
25 std::chrono::high_resolution_clock,
26 std::chrono::steady_clock>;
27
28std::string readKv1(const char *path) {
29 FILE *in = stdin;
30 if (path != "-"sv) in = fopen(path, "rb");
31 else fputs("Reading KV1 from standard input\n", stderr);
32 if (!in) {
33 fprintf(stderr, "Open %s: %s\n", path, strerrordesc_np(errno));
34 exit(1);
35 }
36
37 char buf[4096];
38 std::string data;
39 while (!feof(in) && !ferror(in)) {
40 size_t read = fread(buf, sizeof(char), 4096, in);
41 data.append(buf, read);
42 }
43 if (ferror(in)) {
44 if (path == "-"sv)
45 fputs("Error when reading from stdin\n", stderr);
46 else
47 fprintf(stderr, "Error reading from file \"%s\"\n", path);
48 exit(1);
49 }
50 fprintf(stderr, "Read %lu bytes\n", data.size());
51
52 if (path != "-"sv)
53 fclose(in);
54
55 return data;
56}
57
58std::vector<Kv1Token> lex(const char *path) {
59 std::string data = readKv1(path);
60
61 auto start = TimingClock::now();
62 Kv1Lexer lexer(data);
63 lexer.lex();
64 auto end = TimingClock::now();
65
66 std::chrono::duration<double> elapsed{end - start};
67 double bytes = static_cast<double>(data.size()) / 1'000'000;
68 double speed = bytes / elapsed.count();
69
70 if (!lexer.errors.empty()) {
71 fputs("Lexer reported errors:\n", stderr);
72 for (const auto &error : lexer.errors)
73 fprintf(stderr, "- %s\n", error.c_str());
74 exit(1);
75 }
76
77 fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size());
78 fprintf(stderr, "Duration: %f s\n", elapsed.count());
79 fprintf(stderr, "Speed: %f MB/s\n", speed);
80
81 return std::move(lexer.tokens);
82}
83
84bool parse(const char *path, Kv1Records &into) {
85 std::vector<Kv1Token> tokens = lex(path);
86
87 Kv1Parser parser(tokens, into);
88 parser.parse();
89
90 bool ok = true;
91 if (!parser.gerrors.empty()) {
92 ok = false;
93 fputs("Parser reported errors:\n", stderr);
94 for (const auto &error : parser.gerrors)
95 fprintf(stderr, "- %s\n", error.c_str());
96 }
97 if (!parser.warns.empty()) {
98 fputs("Parser reported warnings:\n", stderr);
99 for (const auto &warn : parser.warns)
100 fprintf(stderr, "- %s\n", warn.c_str());
101 }
102
103 fprintf(stderr, "Parsed %lu records\n", into.size());
104
105 return ok;
106}
107
108void printParsedRecords(const Kv1Records &records) {
109 fputs("Parsed records:\n", stderr);
110 fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size());
111 fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size());
112 fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size());
113 fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size());
114 fprintf(stderr, " timing_links: %lu\n", records.timing_links.size());
115 fprintf(stderr, " links: %lu\n", records.links.size());
116 fprintf(stderr, " lines: %lu\n", records.lines.size());
117 fprintf(stderr, " destinations: %lu\n", records.destinations.size());
118 fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size());
119 fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size());
120 fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size());
121 fprintf(stderr, " financers: %lu\n", records.financers.size());
122 fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size());
123 fprintf(stderr, " points: %lu\n", records.points.size());
124 fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size());
125 fprintf(stderr, " icons: %lu\n", records.icons.size());
126 fprintf(stderr, " notices: %lu\n", records.notices.size());
127 fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size());
128 fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size());
129 fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size());
130 fprintf(stderr, " period_groups: %lu\n", records.period_groups.size());
131 fprintf(stderr, " specific_days: %lu\n", records.specific_days.size());
132 fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size());
133 fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size());
134 fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size());
135 fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size());
136 fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size());
137 fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size());
138 fprintf(stderr, " operating_days: %lu\n", records.operating_days.size());
139}
140
141void printIndexSize(const Kv1Index &index) {
142 fputs("Index size:\n", stderr);
143 fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size());
144 fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size());
145 fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size());
146 fprintf(stderr, " timing_links: %lu\n", index.timing_links.size());
147 fprintf(stderr, " links: %lu\n", index.links.size());
148 fprintf(stderr, " lines: %lu\n", index.lines.size());
149 fprintf(stderr, " destinations: %lu\n", index.destinations.size());
150 fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size());
151 fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size());
152 fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size());
153 fprintf(stderr, " financers: %lu\n", index.financers.size());
154 fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size());
155 fprintf(stderr, " points: %lu\n", index.points.size());
156 fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size());
157 fprintf(stderr, " icons: %lu\n", index.icons.size());
158 fprintf(stderr, " notices: %lu\n", index.notices.size());
159 fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size());
160 fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size());
161 fprintf(stderr, " period_groups: %lu\n", index.period_groups.size());
162 fprintf(stderr, " specific_days: %lu\n", index.specific_days.size());
163 fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size());
164 fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size());
165 fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size());
166 fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size());
167 fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size());
168 fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size());
169 fprintf(stderr, " operating_days: %lu\n", index.operating_days.size());
170}
171
172int main(int argc, char *argv[]) {
173 Options options = parseOptions(argc, argv);
174
175 Kv1Records records;
176 if (!parse(options.kv1_file_path, records)) {
177 fputs("Error parsing records, exiting\n", stderr);
178 return EXIT_FAILURE;
179 }
180 printParsedRecords(records);
181 fputs("Indexing...\n", stderr);
182 Kv1Index index(&records);
183 fprintf(stderr, "Indexed %lu records\n", index.size());
184 // Only notice assignments are not indexed. If this equality is not valid,
185 // then this means that we had duplicate keys or that something else went
186 // wrong. That would really not be great.
187 assert(index.size() == records.size() - records.notice_assignments.size());
188 printIndexSize(index);
189 fputs("Linking records...\n", stderr);
190 kv1LinkRecords(index);
191 fputs("Done linking\n", stderr);
192
193 if (options.subcommand == "joparoute"sv) jopaRoute(options, records, index);
194 if (options.subcommand == "journeyroute"sv) journeyRoute(options, records, index);
195 if (options.subcommand == "journeys"sv) journeys(options, records, index);
196 if (options.subcommand == "journeyinfo"sv) journeyInfo(options, records, index);
197 if (options.subcommand == "schedule"sv) schedule(options, records, index);
198}
diff --git a/src/querykv1/schedule.cpp b/src/querykv1/schedule.cpp
new file mode 100644
index 0000000..2bcfe0a
--- /dev/null
+++ b/src/querykv1/schedule.cpp
@@ -0,0 +1,63 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <iostream>
4#include <string_view>
5#include <unordered_map>
6#include <vector>
7
8#include "daterange.hpp"
9#include "schedule.hpp"
10
11using namespace std::string_view_literals;
12
13void schedule(const Options &options, Kv1Records &records, Kv1Index &index) {
14 FILE *out = stdout;
15 if (options.output_file_path != "-"sv)
16 out = fopen(options.output_file_path, "wb");
17 if (!out) {
18 fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno));
19 exit(EXIT_FAILURE);
20 }
21
22 std::cerr << "Generating schedule for " << options.line_planning_number << std::endl;
23
24 std::unordered_multimap<std::string, Kv1PeriodGroupValidity> period_group_validities;
25 for (const auto &pegr : records.period_group_validities)
26 period_group_validities.insert({ pegr.key.period_group_code, pegr });
27 std::unordered_multimap<std::string, Kv1PublicJourney> public_journeys;
28 for (const auto &pujo : records.public_journeys)
29 public_journeys.insert({ pujo.key.timetable_version_code, pujo });
30
31 std::cout << "line_planning_number,journey_number,date,departure_time" << std::endl;
32 for (const auto &tive : records.timetable_versions) {
33 std::vector<DateRange> tive_pegrval_ranges;
34
35 auto pegrval_range = period_group_validities.equal_range(tive.key.period_group_code);
36 for (auto it = pegrval_range.first; it != pegrval_range.second; it++) {
37 const auto &[_, pegrval] = *it;
38 tive_pegrval_ranges.emplace_back(pegrval.key.valid_from, pegrval.valid_thru);
39 }
40
41 DateRangeSeq seq(tive_pegrval_ranges.begin(), tive_pegrval_ranges.end());
42 seq = seq.clampFrom(tive.valid_from);
43 if (tive.valid_thru)
44 seq = seq.clampThru(*tive.valid_thru);
45
46 for (const auto &range : seq) for (auto date : range) {
47 auto weekday = std::chrono::year_month_weekday(std::chrono::sys_days(date)).weekday();
48
49 auto pujo_range = public_journeys.equal_range(tive.key.timetable_version_code);
50 for (auto itt = pujo_range.first; itt != pujo_range.second; itt++) {
51 const auto &[_, pujo] = *itt;
52
53 if (pujo.key.line_planning_number == options.line_planning_number && pujo.key.day_type.size() == 7
54 && pujo.key.day_type[weekday.iso_encoding() - 1] == static_cast<char>('0' + weekday.iso_encoding())) {
55 std::cout << pujo.key.line_planning_number << "," << pujo.key.journey_number << ","
56 << date << "," << pujo.departure_time << std::endl;
57 }
58 }
59 }
60 }
61
62 if (options.output_file_path != "-"sv) fclose(out);
63}
diff --git a/src/querykv1/schedule.hpp b/src/querykv1/schedule.hpp
new file mode 100644
index 0000000..100bd4c
--- /dev/null
+++ b/src/querykv1/schedule.hpp
@@ -0,0 +1,13 @@
1// vim:set sw=2 ts=2 sts et:
2
3#ifndef OEUF_QUERYKV1_SCHEDULE_HPP
4#define OEUF_QUERYKV1_SCHEDULE_HPP
5
6#include <tmi8/kv1_types.hpp>
7#include <tmi8/kv1_index.hpp>
8
9#include "cliopts.hpp"
10
11void schedule(const Options &options, Kv1Records &records, Kv1Index &index);
12
13#endif // OEUF_QUERYKV1_SCHEDULE_HPP
diff --git a/src/recvkv6/.envrc b/src/recvkv6/.envrc
new file mode 100644
index 0000000..694e74f
--- /dev/null
+++ b/src/recvkv6/.envrc
@@ -0,0 +1,2 @@
1source_env ../../
2export DEVMODE=1
diff --git a/src/recvkv6/Makefile b/src/recvkv6/Makefile
new file mode 100644
index 0000000..12ff7fb
--- /dev/null
+++ b/src/recvkv6/Makefile
@@ -0,0 +1,21 @@
1# Taken from:
2# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide
3# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01,
4# 2023. [Online]. Available:
5# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html
6CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\
7 -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \
8 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \
9 -D_GLIBCXX_ASSERTIONS \
10 -fstrict-flex-arrays=3 \
11 -fstack-clash-protection -fstack-protector-strong
12LDFLAGS=-lzmq -larrow -lparquet -lprometheus-cpp-pull -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \
13 -Wl,-z,nodlopen -Wl,-z,noexecstack \
14 -Wl,-z,relro -Wl,-z,now
15
16recvkv6: main.cpp
17 $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS)
18
19.PHONY: clean
20clean:
21 rm recvkv6
diff --git a/src/recvkv6/main.cpp b/src/recvkv6/main.cpp
new file mode 100644
index 0000000..2ac3669
--- /dev/null
+++ b/src/recvkv6/main.cpp
@@ -0,0 +1,1300 @@
1// vim:set sw=2 ts=2 sts et:
2
3#include <array>
4#include <cassert>
5#include <chrono>
6#include <csignal>
7#include <cstring>
8#include <filesystem>
9#include <format>
10#include <fstream>
11#include <iostream>
12#include <optional>
13#include <stack>
14#include <string>
15#include <sstream>
16#include <vector>
17
18#include <zlib.h>
19#include <zmq.h>
20
21#include <nlohmann/json.hpp>
22
23#include <prometheus/counter.h>
24#include <prometheus/exposer.h>
25#include <prometheus/histogram.h>
26#include <prometheus/registry.h>
27
28#include <rapidxml/rapidxml.hpp>
29
30#include <tmi8/kv6_parquet.hpp>
31
32#define CHUNK 16384
33
34struct RawMessage {
35 public:
36 // Takes ownership of envelope and body
37 RawMessage(zmq_msg_t envelope, zmq_msg_t body)
38 : envelope(envelope), body(body)
39 {}
40
41 // Prevent copying
42 RawMessage(const RawMessage &) = delete;
43 RawMessage &operator=(RawMessage const &) = delete;
44
45 std::string_view getEnvelope() {
46 return static_cast<const char *>(zmq_msg_data(&envelope));
47 }
48
49 char *getBody() {
50 return static_cast<char *>(zmq_msg_data(&body));
51 }
52
53 size_t getBodySize() {
54 return zmq_msg_size(&body);
55 }
56
57 ~RawMessage() {
58 zmq_msg_close(&envelope);
59 zmq_msg_close(&body);
60 }
61
62 private:
63 zmq_msg_t envelope;
64 zmq_msg_t body;
65};
66
67std::optional<RawMessage> recvMsg(void *socket) {
68 while (true) {
69 zmq_msg_t envelope, body;
70 int rc = zmq_msg_init(&envelope);
71 assert(rc == 0);
72 rc = zmq_msg_init(&body);
73 assert(rc == 0);
74
75 rc = zmq_msg_recv(&envelope, socket, 0);
76 if (rc == -1) return std::nullopt;
77
78 int more;
79 size_t more_size = sizeof(more);
80 rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size);
81 if (!more) {
82 zmq_msg_close(&envelope);
83 zmq_msg_close(&body);
84 continue;
85 }
86
87 rc = zmq_msg_recv(&body, socket, 0);
88 if (rc == -1) return std::nullopt;
89
90 rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size);
91 assert(!more);
92
93 return std::make_optional<RawMessage>(envelope, body);
94 }
95}
96
97// Ensures that <return value>[output_size] == 0
98char *decompress(char *raw, unsigned int input_size, unsigned int &output_size) {
99 assert(input_size <= UINT32_MAX);
100
101 z_stream strm;
102 strm.next_in = reinterpret_cast<unsigned char *>(raw);
103 strm.avail_in = input_size;
104 strm.zalloc = Z_NULL;
105 strm.zfree = Z_NULL;
106 strm.opaque = Z_NULL;
107 int rc = inflateInit2(&strm, 32);
108 assert(rc == Z_OK);
109
110 unsigned int buf_cap = CHUNK;
111 unsigned int buf_len = 0;
112 char *buf = static_cast<char *>(malloc(CHUNK));
113 do {
114 if (buf_len + CHUNK > buf_cap) {
115 assert(buf_cap <= UINT32_MAX);
116 buf_cap *= 2;
117 buf = static_cast<char *>(realloc(buf, buf_cap));
118 }
119 strm.avail_out = buf_cap - buf_len;
120 strm.next_out = reinterpret_cast<unsigned char *>(buf + buf_len);
121
122 unsigned long old_total = strm.total_out;
123 rc = inflate(&strm, Z_FINISH);
124 unsigned progress = static_cast<unsigned int>(strm.total_out - old_total);
125 buf_len += progress;
126 assert(progress != 0 || rc == Z_STREAM_END);
127 } while (strm.total_in < input_size);
128
129 if (buf_len == buf_cap) {
130 buf = static_cast<char *>(realloc(buf, buf_len + 1));
131 }
132 buf[buf_len] = 0;
133 output_size = buf_len;
134
135 rc = inflateEnd(&strm);
136 assert(rc == Z_OK);
137
138 return buf;
139}
140
141struct Date {
142 int16_t year = 0;
143 uint8_t month = 0;
144 uint8_t day = 0;
145
146 static bool parse(Date &dest, std::string_view src) {
147 dest.year = 0, dest.month = 0, dest.day = 0;
148
149 int16_t y_mul_fac = 1;
150 bool extended = false;
151
152 size_t plus = src.find('+');
153 if (plus != std::string_view::npos) {
154 extended = true;
155 src = src.substr(1); // remove plus sign from the start
156 }
157 if (!extended) {
158 size_t min_or_dash = src.find('-');
159 if (min_or_dash == std::string_view::npos) return false;
160 if (min_or_dash == 0) {
161 y_mul_fac = -1; // it's a minus sign
162 src = src.substr(1); // remove minus sign at the start
163 }
164 }
165
166 int y_chars = 0;
167 while (src.size() > 0 && src[0] >= '0' && src[0] <= '9') {
168 dest.year = static_cast<int16_t>(dest.year * 10 + src[0] - '0');
169 src = src.substr(1);
170 y_chars++;
171 }
172 if (src.size() == 0) { dest.year = 0; return false; }
173 if (src[0] != '-') { dest.year = 0; return false; }
174 src = src.substr(1); // remove dash
175 if (y_chars < 4 || (y_chars > 4 && !extended)) { dest.year = 0; return false; }
176 dest.year *= y_mul_fac;
177
178 bool rest_correct = src.size() == 5
179 && src[0] >= '0' && src[0] <= '9'
180 && src[1] >= '0' && src[1] <= '9'
181 && src[3] >= '0' && src[3] <= '9'
182 && src[4] >= '0' && src[4] <= '9';
183 if (!rest_correct) { dest.year = 0; return false; }
184 dest.month = static_cast<uint8_t>((src[0] - '0') * 10 + src[1] - '0');
185 dest.day = static_cast<uint8_t>((src[3] - '0') * 10 + src[4] - '0');
186 if (dest.month > 12 || dest.day > 31) {
187 dest.year = 0, dest.month = 0, dest.day = 0;
188 return false;
189 }
190 return true;
191 }
192
193 std::string toString() const {
194 if (year < 0 || year > 9999 || month < 0 || month > 12 || day < 0 || day > 31)
195 throw std::invalid_argument("one or more date components (year, month, day) out of range");
196 char data[11] = "XXXX-XX-XX";
197 sprintf(data, "%04u-%02u-%02u", year, month, day);
198 return data;
199 }
200
201 std::chrono::days toUnixDays() const {
202 std::chrono::year_month_day ymd{std::chrono::year(year), std::chrono::month(month), std::chrono::day(day)};
203 // This is valid since C++20: as of C++20, the system clock is defined to measure the
204 // Unix Time, the amount of seconds since Thursday 1 January 1970, without leap seconds.
205 std::chrono::days since_epoch = std::chrono::sys_days(ymd).time_since_epoch();
206 return since_epoch;
207 }
208};
209
210struct Time {
211 uint8_t hour = 0;
212 uint8_t minute = 0;
213 uint8_t second = 0;
214
215 static bool parse(Time &dest, std::string_view src) {
216 bool okay = src.size() == 8
217 && src[0] >= '0' && src[0] <= '9'
218 && src[1] >= '0' && src[1] <= '9'
219 && src[2] == ':'
220 && src[3] >= '0' && src[3] <= '9'
221 && src[4] >= '0' && src[4] <= '9'
222 && src[5] == ':'
223 && src[6] >= '0' && src[6] <= '9'
224 && src[7] >= '0' && src[7] <= '9';
225 if (!okay) return false;
226 dest.hour = static_cast<uint8_t>((src[0] - '0') * 10 + src[1] - '0');
227 dest.minute = static_cast<uint8_t>((src[3] - '0') * 10 + src[4] - '0');
228 dest.second = static_cast<uint8_t>((src[6] - '0') * 10 + src[7] - '0');
229 if (dest.hour > 23 || dest.minute > 59 || dest.second > 59) {
230 dest.hour = 0, dest.minute = 0, dest.second = 0;
231 return false;
232 }
233 return true;
234 }
235
236 std::string toString() const {
237 if (hour < 0 || hour > 23 || minute < 0 || minute > 59 || second < 0 || second > 59)
238 throw std::invalid_argument("one or more time components (hour, minute, second) out of range");
239 char data[9] = "XX:XX:XX";
240 sprintf(data, "%02u:%02u:%02u", hour, minute, second);
241 return data;
242 }
243};
244
245// Time zone designator
246struct Tzd {
247 int16_t minutes = 0;
248
249 static bool parse(Tzd &dest, std::string_view src) {
250 dest.minutes = 0;
251
252 if (src.size() == 0) return false;
253 if (src == "Z") return true;
254
255 int16_t multiplier = 1;
256 if (src[0] == '-') multiplier = -1;
257 else if (src[0] != '+') return false;
258 src = src.substr(1);
259
260 bool okay = src.size() == 5
261 && src[0] >= '0' && src[0] <= '9'
262 && src[1] >= '0' && src[1] <= '9'
263 && src[2] == ':'
264 && src[3] >= '0' && src[3] <= '9'
265 && src[4] >= '0' && src[4] <= '9';
266 if (!okay) return false;
267 int16_t hours = static_cast<int16_t>((src[0] - '0') * 10 + src[1] - '0');
268 int16_t minutes = static_cast<int16_t>((src[3] - '0') * 10 + src[4] - '0');
269 if (hours > 23 || minutes > 59) return false;
270 dest.minutes = static_cast<int16_t>(multiplier * (60 * hours + minutes));
271 return true;
272 }
273
274 std::string toString() const {
275 if (minutes == 0)
276 return "Z";
277
278 bool negative = minutes < 0;
279 int hours_off = abs(minutes / 60);
280 int mins_off = abs(minutes) - hours_off*60;
281 if (hours_off > 23 || mins_off > 59)
282 throw std::invalid_argument("offset out of range");
283 char data[7] = "+XX:XX";
284 sprintf(data, "%c%02u:%02u", negative ? '-' : '+', hours_off, mins_off);
285 return data;
286 }
287};
288
289struct Timestamp {
290 Date date;
291 Tzd off;
292 Time time;
293
294 static bool parse(Timestamp &dest, std::string_view src) {
295 size_t t = src.find('T');
296 if (t == std::string_view::npos || t + 1 >= src.size()) return false;
297
298 std::string_view date = src.substr(0, t);
299 std::string_view time_and_tzd = src.substr(t + 1);
300 if (time_and_tzd.size() < 9) return false;
301 if (!Date::parse(dest.date, date)) return false;
302
303 std::string_view time = time_and_tzd.substr(0, 8);
304 std::string_view tzd = time_and_tzd.substr(8);
305 if (!Time::parse(dest.time, time)) return false;
306 return Tzd::parse(dest.off, tzd);
307 }
308
309 std::string toString() const {
310 return date.toString() + "T" + time.toString() + off.toString();
311 }
312
313 std::chrono::seconds toUnixSeconds() const {
314 std::chrono::year_month_day ymd(std::chrono::year(date.year),
315 std::chrono::month(date.month),
316 std::chrono::day(date.day));
317 std::chrono::sys_days sys_days(ymd);
318 std::chrono::time_point<std::chrono::utc_clock, std::chrono::days> utc_days(sys_days.time_since_epoch());
319 std::chrono::utc_seconds utc_seconds = std::chrono::time_point_cast<std::chrono::seconds>(utc_days);
320 utc_seconds += std::chrono::hours(time.hour) + std::chrono::minutes(time.minute) +
321 std::chrono::seconds(time.second) - std::chrono::minutes(off.minutes);
322 std::chrono::sys_seconds sys_seconds = std::chrono::utc_clock::to_sys(utc_seconds);
323 std::chrono::seconds unix = sys_seconds.time_since_epoch();
324 return unix;
325 }
326};
327
328static const std::string_view TMI8_XML_NS = "http://bison.connekt.nl/tmi8/kv6/msg";
329
330enum Kv6RecordType {
331 KV6T_UNKNOWN = 0,
332 KV6T_DELAY = 1,
333 KV6T_INIT = 2,
334 KV6T_ARRIVAL = 3,
335 KV6T_ON_STOP = 4,
336 KV6T_DEPARTURE = 5,
337 KV6T_ON_ROUTE = 6,
338 KV6T_ON_PATH = 7,
339 KV6T_OFF_ROUTE = 8,
340 KV6T_END = 9,
341 // Always keep this updated to correspond to the
342 // first and last elements of the enumeration!
343 _KV6T_FIRST_TYPE = KV6T_UNKNOWN,
344 _KV6T_LAST_TYPE = KV6T_END,
345};
346
347enum Kv6Field {
348 KV6F_NONE = 0,
349 KV6F_DATA_OWNER_CODE = 1,
350 KV6F_LINE_PLANNING_NUMBER = 2,
351 KV6F_OPERATING_DAY = 4,
352 KV6F_JOURNEY_NUMBER = 8,
353 KV6F_REINFORCEMENT_NUMBER = 16,
354 KV6F_TIMESTAMP = 32,
355 KV6F_SOURCE = 64,
356 KV6F_PUNCTUALITY = 128,
357 KV6F_USER_STOP_CODE = 256,
358 KV6F_PASSAGE_SEQUENCE_NUMBER = 512,
359 KV6F_VEHICLE_NUMBER = 1024,
360 KV6F_BLOCK_CODE = 2048,
361 KV6F_WHEELCHAIR_ACCESSIBLE = 4096,
362 KV6F_NUMBER_OF_COACHES = 8192,
363 KV6F_RD_Y = 16384,
364 KV6F_RD_X = 32768,
365 KV6F_DISTANCE_SINCE_LAST_USER_STOP = 65536,
366};
367
368static constexpr Kv6Field KV6T_REQUIRED_FIELDS[_KV6T_LAST_TYPE + 1] = {
369 // KV6T_UNKNOWN
370 KV6F_NONE,
371 // KV6T_DELAY
372 static_cast<Kv6Field>(
373 KV6F_DATA_OWNER_CODE
374 | KV6F_LINE_PLANNING_NUMBER
375 | KV6F_OPERATING_DAY
376 | KV6F_JOURNEY_NUMBER
377 | KV6F_REINFORCEMENT_NUMBER
378 | KV6F_TIMESTAMP
379 | KV6F_SOURCE
380 | KV6F_PUNCTUALITY),
381 // KV6T_INIT
382 static_cast<Kv6Field>(
383 KV6F_DATA_OWNER_CODE
384 | KV6F_LINE_PLANNING_NUMBER
385 | KV6F_OPERATING_DAY
386 | KV6F_JOURNEY_NUMBER
387 | KV6F_REINFORCEMENT_NUMBER
388 | KV6F_TIMESTAMP
389 | KV6F_SOURCE
390 | KV6F_USER_STOP_CODE
391 | KV6F_PASSAGE_SEQUENCE_NUMBER
392 | KV6F_VEHICLE_NUMBER
393 | KV6F_BLOCK_CODE
394 | KV6F_WHEELCHAIR_ACCESSIBLE
395 | KV6F_NUMBER_OF_COACHES),
396 // KV6T_ARRIVAL
397 static_cast<Kv6Field>(
398 KV6F_DATA_OWNER_CODE
399 | KV6F_LINE_PLANNING_NUMBER
400 | KV6F_OPERATING_DAY
401 | KV6F_JOURNEY_NUMBER
402 | KV6F_REINFORCEMENT_NUMBER
403 | KV6F_USER_STOP_CODE
404 | KV6F_PASSAGE_SEQUENCE_NUMBER
405 | KV6F_TIMESTAMP
406 | KV6F_SOURCE
407 | KV6F_VEHICLE_NUMBER
408 | KV6F_PUNCTUALITY),
409 // KV6T_ON_STOP
410 static_cast<Kv6Field>(
411 KV6F_DATA_OWNER_CODE
412 | KV6F_LINE_PLANNING_NUMBER
413 | KV6F_OPERATING_DAY
414 | KV6F_JOURNEY_NUMBER
415 | KV6F_REINFORCEMENT_NUMBER
416 | KV6F_USER_STOP_CODE
417 | KV6F_PASSAGE_SEQUENCE_NUMBER
418 | KV6F_TIMESTAMP
419 | KV6F_SOURCE
420 | KV6F_VEHICLE_NUMBER
421 | KV6F_PUNCTUALITY),
422 // KV6T_DEPARTURE
423 static_cast<Kv6Field>(
424 KV6F_DATA_OWNER_CODE
425 | KV6F_LINE_PLANNING_NUMBER
426 | KV6F_OPERATING_DAY
427 | KV6F_JOURNEY_NUMBER
428 | KV6F_REINFORCEMENT_NUMBER
429 | KV6F_USER_STOP_CODE
430 | KV6F_PASSAGE_SEQUENCE_NUMBER
431 | KV6F_TIMESTAMP
432 | KV6F_SOURCE
433 | KV6F_VEHICLE_NUMBER
434 | KV6F_PUNCTUALITY),
435 // KV6T_ON_ROUTE
436 static_cast<Kv6Field>(
437 KV6F_DATA_OWNER_CODE
438 | KV6F_LINE_PLANNING_NUMBER
439 | KV6F_OPERATING_DAY
440 | KV6F_JOURNEY_NUMBER
441 | KV6F_REINFORCEMENT_NUMBER
442 | KV6F_USER_STOP_CODE
443 | KV6F_PASSAGE_SEQUENCE_NUMBER
444 | KV6F_TIMESTAMP
445 | KV6F_SOURCE
446 | KV6F_VEHICLE_NUMBER
447 | KV6F_PUNCTUALITY
448 | KV6F_RD_X
449 | KV6F_RD_Y),
450 // KV6T_ON_PATH
451 KV6F_NONE,
452 // KV6T_OFF_ROUTE
453 static_cast<Kv6Field>(
454 KV6F_DATA_OWNER_CODE
455 | KV6F_LINE_PLANNING_NUMBER
456 | KV6F_OPERATING_DAY
457 | KV6F_JOURNEY_NUMBER
458 | KV6F_REINFORCEMENT_NUMBER
459 | KV6F_TIMESTAMP
460 | KV6F_SOURCE
461 | KV6F_USER_STOP_CODE
462 | KV6F_PASSAGE_SEQUENCE_NUMBER
463 | KV6F_VEHICLE_NUMBER
464 | KV6F_RD_X
465 | KV6F_RD_Y),
466 // KV6T_END
467 static_cast<Kv6Field>(
468 KV6F_DATA_OWNER_CODE
469 | KV6F_LINE_PLANNING_NUMBER
470 | KV6F_OPERATING_DAY
471 | KV6F_JOURNEY_NUMBER
472 | KV6F_REINFORCEMENT_NUMBER
473 | KV6F_TIMESTAMP
474 | KV6F_SOURCE
475 | KV6F_USER_STOP_CODE
476 | KV6F_PASSAGE_SEQUENCE_NUMBER
477 | KV6F_VEHICLE_NUMBER),
478};
479
480static constexpr Kv6Field KV6T_OPTIONAL_FIELDS[_KV6T_LAST_TYPE + 1] = {
481 // KV6T_UNKNOWN
482 KV6F_NONE,
483 // KV6T_DELAY
484 KV6F_NONE,
485 // KV6T_INIT
486 KV6F_NONE,
487 // KV6T_ARRIVAL
488 static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y),
489 // KV6T_ON_STOP
490 static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y),
491 // KV6T_DEPARTURE
492 static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y),
493 // KV6T_ON_ROUTE
494 KV6F_DISTANCE_SINCE_LAST_USER_STOP,
495 // KV6T_ON_PATH
496 KV6F_NONE,
497 // KV6T_OFF_ROUTE
498 KV6F_NONE,
499 // KV6T_END
500 KV6F_NONE,
501};
502
503struct Kv6Record {
504 Kv6RecordType type = KV6T_UNKNOWN;
505 Kv6Field presence = KV6F_NONE;
506 Kv6Field next = KV6F_NONE;
507 std::string data_owner_code;
508 std::string line_planning_number;
509 std::string source;
510 std::string user_stop_code;
511 std::string wheelchair_accessible;
512 Date operating_day;
513 Timestamp timestamp;
514 uint32_t block_code = 0;
515 uint32_t journey_number = 0;
516 uint32_t vehicle_number = 0;
517 int32_t rd_x = 0;
518 int32_t rd_y = 0;
519 // The TMI8 specification is unclear: this field
520 // might actually be called distancesincelaststop
521 uint32_t distance_since_last_user_stop = 0;
522 uint16_t passage_sequence_number = 0;
523 int16_t punctuality = 0;
524 uint8_t number_of_coaches = 0;
525 uint8_t reinforcement_number = 0;
526
527 void markPresent(Kv6Field field) {
528 presence = static_cast<Kv6Field>(presence | field);
529 }
530
531 void removeUnsupportedFields() {
532 Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type];
533 Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type];
534 Kv6Field supported_fields = static_cast<Kv6Field>(required_fields | optional_fields);
535 presence = static_cast<Kv6Field>(presence & supported_fields);
536 }
537
538 bool valid() {
539 Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type];
540 Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type];
541 Kv6Field supported_fields = static_cast<Kv6Field>(required_fields | optional_fields);
542
543 Kv6Field required_field_presence = static_cast<Kv6Field>(presence & required_fields);
544 Kv6Field unsupported_field_presence = static_cast<Kv6Field>(presence & ~supported_fields);
545
546 return required_field_presence == required_fields && !unsupported_field_presence;
547 }
548};
549
550enum Tmi8VvTmPushInfoField {
551 TMI8F_NONE = 0,
552 TMI8F_SUBSCRIBER_ID = 1,
553 TMI8F_VERSION = 2,
554 TMI8F_DOSSIER_NAME = 4,
555 TMI8F_TIMESTAMP = 8,
556};
557
558struct Tmi8VvTmPushInfo {
559 Tmi8VvTmPushInfoField next = TMI8F_NONE;
560 Tmi8VvTmPushInfoField presence = TMI8F_NONE;
561 std::string subscriber_id;
562 std::string version;
563 std::string dossier_name;
564 Timestamp timestamp;
565 std::vector<Kv6Record> messages;
566
567 void markPresent(Tmi8VvTmPushInfoField field) {
568 presence = static_cast<Tmi8VvTmPushInfoField>(presence | field);
569 }
570
571 bool valid() {
572 const Tmi8VvTmPushInfoField REQUIRED_FIELDS =
573 static_cast<Tmi8VvTmPushInfoField>(
574 TMI8F_SUBSCRIBER_ID
575 | TMI8F_VERSION
576 | TMI8F_DOSSIER_NAME
577 | TMI8F_TIMESTAMP);
578 return (presence & REQUIRED_FIELDS) == REQUIRED_FIELDS;
579 }
580};
581
582static const std::array<std::string_view, _KV6T_LAST_TYPE + 1> KV6_POS_INFO_RECORD_TYPES = {
583 "UNKNOWN", "DELAY", "INIT", "ARRIVAL", "ONSTOP", "DEPARTURE", "ONROUTE", "ONPATH", "OFFROUTE", "END",
584};
585
586std::optional<std::string_view> findKv6PosInfoRecordTypeName(Kv6RecordType type) {
587 if (type > _KV6T_LAST_TYPE)
588 return std::nullopt;
589 return KV6_POS_INFO_RECORD_TYPES[type];
590}
591
592const std::array<std::tuple<std::string_view, Kv6Field>, 17> KV6_POS_INFO_RECORD_FIELDS = {{
593 { "dataownercode", KV6F_DATA_OWNER_CODE },
594 { "lineplanningnumber", KV6F_LINE_PLANNING_NUMBER },
595 { "operatingday", KV6F_OPERATING_DAY },
596 { "journeynumber", KV6F_JOURNEY_NUMBER },
597 { "reinforcementnumber", KV6F_REINFORCEMENT_NUMBER },
598 { "timestamp", KV6F_TIMESTAMP },
599 { "source", KV6F_SOURCE },
600 { "punctuality", KV6F_PUNCTUALITY },
601 { "userstopcode", KV6F_USER_STOP_CODE },
602 { "passagesequencenumber", KV6F_PASSAGE_SEQUENCE_NUMBER },
603 { "vehiclenumber", KV6F_VEHICLE_NUMBER },
604 { "blockcode", KV6F_BLOCK_CODE },
605 { "wheelchairaccessible", KV6F_WHEELCHAIR_ACCESSIBLE },
606 { "numberofcoaches", KV6F_NUMBER_OF_COACHES },
607 { "rd-y", KV6F_RD_Y },
608 { "rd-x", KV6F_RD_X },
609 { "distancesincelastuserstop", KV6F_DISTANCE_SINCE_LAST_USER_STOP },
610}};
611
612// Returns the maximum amount of digits such that it is guaranteed that
613// a corresponding amount of repeated 9's can be represented by the type.
614template<std::integral T>
615constexpr size_t maxDigits() {
616 size_t digits = 0;
617 for (T x = std::numeric_limits<T>::max(); x != 0; x /= 10) digits++;
618 return digits - 1;
619}
620
621template<size_t MaxDigits, std::unsigned_integral T>
622constexpr bool parseUnsigned(T &out, std::string_view src) {
623 static_assert(MaxDigits <= maxDigits<T>());
624 if (src.size() > MaxDigits) return false;
625 T res = 0;
626 while (src.size() > 0) {
627 if (src[0] < '0' || src[0] > '9') return false;
628 res = static_cast<T>(res * 10 + src[0] - '0');
629 src = src.substr(1);
630 }
631 out = res;
632 return true;
633}
634
635template<size_t MaxDigits, std::signed_integral T>
636constexpr bool parseSigned(T &out, std::string_view src) {
637 static_assert(MaxDigits <= maxDigits<T>());
638 if (src.size() == 0) return false;
639 bool negative = src[0] == '-';
640 if (negative) src = src.substr(1);
641 if (src.size() > MaxDigits) return false;
642 T res = 0;
643 while (src.size() > 0) {
644 if (src[0] < '0' || src[0] > '9') return false;
645 res = static_cast<T>(res * 10 + src[0] - '0');
646 src = src.substr(1);
647 }
648 out = negative ? -res : res;
649 return true;
650}
651
652struct Xmlns {
653 const Xmlns *next;
654 std::string_view prefix;
655 std::string_view url;
656};
657
658std::optional<std::string_view> resolve(std::string_view prefix, const Xmlns *nss) {
659 while (nss)
660 if (nss->prefix == prefix)
661 return nss->url;
662 else
663 nss = nss->next;
664 return std::nullopt;
665}
666
667template<typename T>
668void withXmlnss(const rapidxml::xml_attribute<> *attr, const Xmlns *nss, const T &fn) {
669 while (attr) {
670 std::string_view name(attr->name(), attr->name_size());
671 if (name.starts_with("xmlns")) {
672 if (name.size() == 5) { // just xmlns
673 Xmlns ns0 = {
674 .next = nss,
675 .url = std::string_view(attr->value(), attr->value_size()),
676 };
677 withXmlnss(attr->next_attribute(), &ns0, fn);
678 return;
679 } else if (name.size() > 6 && name[5] == ':') { // xmlns:<something>
680 Xmlns ns0 = {
681 .next = nss,
682 .prefix = name.substr(6),
683 .url = std::string_view(attr->value(), attr->value_size()),
684 };
685 withXmlnss(attr->next_attribute(), &ns0, fn);
686 return;
687 }
688 }
689 attr = attr->next_attribute();
690 }
691 fn(nss);
692}
693
694template<typename T>
695void ifResolvable(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) {
696 std::string_view name(node.name(), node.name_size());
697 std::string_view ns;
698 size_t colon = name.find(':');
699
700 if (colon != std::string_view::npos) {
701 if (colon >= name.size() - 1) // last character
702 return;
703 ns = name.substr(0, colon);
704 name = name.substr(colon + 1);
705 }
706
707 withXmlnss(node.first_attribute(), nss, [&](const Xmlns *nss) {
708 std::optional<std::string_view> ns_url = resolve(ns, nss);
709 if (!ns_url && !ns.empty()) return;
710 if (!ns_url) fn(std::string_view(), name, nss);
711 else fn(*ns_url, name, nss);
712 });
713}
714
715template<typename T>
716void ifTmi8Element(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) {
717 ifResolvable(node, nss, [&](std::string_view ns_url, std::string_view name, const Xmlns *nss) {
718 if (node.type() == rapidxml::node_element && (ns_url.empty() || ns_url == TMI8_XML_NS)) fn(name, nss);
719 });
720}
721
722bool onlyTextElement(const rapidxml::xml_node<> &node) {
723 return node.type() == rapidxml::node_element
724 && node.first_node()
725 && node.first_node() == node.last_node()
726 && node.first_node()->type() == rapidxml::node_data;
727}
728
729std::string_view getValue(const rapidxml::xml_node<> &node) {
730 return std::string_view(node.value(), node.value_size());
731}
732
733bool parseStringValue(std::string &into, size_t max_len, std::string_view val) {
734 if (val.size() > max_len)
735 return false;
736 into = val;
737 return true;
738}
739
740struct Kv6Parser {
741 std::stringstream &errs;
742 std::stringstream &warns;
743
744 void error(std::string_view msg) {
745 errs << msg << '\n';
746 }
747
748 void warn(std::string_view msg) {
749 warns << msg << '\n';
750 }
751
752#define PERRASSERT(msg, ...) do { if (!(__VA_ARGS__)) { error(msg); return; } } while (false)
753#define PWARNASSERT(msg, ...) do { if (!(__VA_ARGS__)) { warn(msg); return; } } while (false)
754
755 std::optional<Kv6Record> parseKv6PosInfoRecord(Kv6RecordType type, const rapidxml::xml_node<> &node, const Xmlns *nss) {
756 Kv6Record fields = { .type = type };
757 for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) {
758 ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) {
759 for (const auto &[fname, field] : KV6_POS_INFO_RECORD_FIELDS) {
760 if (field == KV6F_NONE)
761 continue;
762 if (fname == name) {
763 PWARNASSERT("Expected KV6 record field element to only contain data",
764 onlyTextElement(*child));
765 std::string_view childval = getValue(*child);
766 switch (field) {
767 case KV6F_DATA_OWNER_CODE:
768 PWARNASSERT("Invalid value for dataownercode",
769 parseStringValue(fields.data_owner_code, 10, childval));
770 break;
771 case KV6F_LINE_PLANNING_NUMBER:
772 PWARNASSERT("Invalid value for lineplanningnumber",
773 parseStringValue(fields.line_planning_number, 10, childval));
774 break;
775 case KV6F_OPERATING_DAY:
776 PWARNASSERT("Invalid value for operatatingday: not a valid date",
777 Date::parse(fields.operating_day, childval));
778 break;
779 case KV6F_JOURNEY_NUMBER:
780 PWARNASSERT("Invalid value for journeynumber:"
781 " not a valid unsigned number with at most six digits",
782 parseUnsigned<6>(fields.journey_number, childval));
783 break;
784 case KV6F_REINFORCEMENT_NUMBER:
785 PWARNASSERT("Invalid value for reinforcementnumber:"
786 " not a valid unsigned number with at most two digits",
787 parseUnsigned<2>(fields.reinforcement_number, childval));
788 break;
789 case KV6F_TIMESTAMP:
790 PWARNASSERT("Invalid value for timestamp: not a valid timestamp",
791 Timestamp::parse(fields.timestamp, childval));
792 break;
793 case KV6F_SOURCE:
794 PWARNASSERT("Invalid value for source:"
795 " not a valid string of at most 10 bytes",
796 parseStringValue(fields.source, 10, childval));
797 break;
798 case KV6F_PUNCTUALITY:
799 PWARNASSERT("Invalid value for punctuality:"
800 " not a valid signed number with at most four digits",
801 parseSigned<4>(fields.punctuality, childval));
802 break;
803 case KV6F_USER_STOP_CODE:
804 PWARNASSERT("Invalid value for userstopcode:"
805 " not a valid string of at most 10 bytes",
806 parseStringValue(fields.user_stop_code, 10, childval));
807 break;
808 case KV6F_PASSAGE_SEQUENCE_NUMBER:
809 PWARNASSERT("Invalid value for passagesequencenumber:"
810 " not a valid unsigned number with at most four digits",
811 parseUnsigned<4>(fields.passage_sequence_number, childval));
812 break;
813 case KV6F_VEHICLE_NUMBER:
814 PWARNASSERT("Invalid value for vehiclenumber:"
815 " not a valid unsigned number with at most six digits",
816 parseUnsigned<6>(fields.vehicle_number, childval));
817 break;
818 case KV6F_BLOCK_CODE:
819 PWARNASSERT("Invalid value for blockcode:"
820 " not a valid unsigned number with at most eight digits",
821 parseUnsigned<8>(fields.block_code, childval));
822 break;
823 case KV6F_WHEELCHAIR_ACCESSIBLE:
824 PWARNASSERT("Invalid value for wheelchairaccessible:"
825 " not a valid value for wheelchair accessibility",
826 childval == "ACCESSIBLE"
827 || childval == "NOTACCESSIBLE"
828 || childval == "UNKNOWN");
829 fields.wheelchair_accessible = childval;
830 break;
831 case KV6F_NUMBER_OF_COACHES:
832 PWARNASSERT("Invalid for numberofcoaches:"
833 " not a valid unsigned number with at most two digits",
834 parseUnsigned<2>(fields.number_of_coaches, childval));
835 break;
836 case KV6F_RD_X:
837 PWARNASSERT("Invalid value for rd-x:"
838 " not a valid signed number with at most six digits",
839 parseSigned<6>(fields.rd_x, childval));
840 break;
841 case KV6F_RD_Y:
842 PWARNASSERT("Invalid value for rd-y:"
843 " not a valid signed number with at most six digits",
844 parseSigned<6>(fields.rd_y, childval));
845 break;
846 case KV6F_DISTANCE_SINCE_LAST_USER_STOP:
847 PWARNASSERT("Invalid value for distancesincelastuserstop:"
848 " not a valid unsigned number with at most five digits",
849 parseUnsigned<5>(fields.distance_since_last_user_stop, childval));
850 break;
851 case KV6F_NONE:
852 error("NONE field type case should be unreachable in parseKv6PosInfoRecord");
853 return;
854 }
855 fields.markPresent(field);
856 break;
857 }
858 }
859 });
860 }
861
862 fields.removeUnsupportedFields();
863
864 if (!fields.valid())
865 return std::nullopt;
866 return fields;
867 }
868
869 std::vector<Kv6Record> parseKv6PosInfo(const rapidxml::xml_node<> &node, const Xmlns *nss) {
870 std::vector<Kv6Record> records;
871 for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) {
872 ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) {
873 for (auto type = _KV6T_FIRST_TYPE;
874 type != _KV6T_LAST_TYPE;
875 type = static_cast<Kv6RecordType>(type + 1)) {
876 if (type == KV6T_UNKNOWN)
877 continue;
878 if (KV6_POS_INFO_RECORD_TYPES[type] == name) {
879 auto record = parseKv6PosInfoRecord(type, *child, nss);
880 if (record) {
881 records.push_back(*record);
882 }
883 }
884 }
885 });
886 }
887 return records;
888 }
889
890 std::optional<Tmi8VvTmPushInfo> parseVvTmPush(const rapidxml::xml_node<> &node, const Xmlns *nss) {
891 Tmi8VvTmPushInfo info;
892 for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) {
893 ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) {
894 if (name == "Timestamp") {
895 PERRASSERT("Invalid value for Timestamp: Bad format", onlyTextElement(*child));
896 PERRASSERT("Invalid value for Timestamp: Invalid timestamp", Timestamp::parse(info.timestamp, getValue(*child)));
897 info.markPresent(TMI8F_TIMESTAMP);
898 } else if (name == "SubscriberID") {
899 PERRASSERT("Invalid value for SubscriberID: Bad format", onlyTextElement(*child));
900 info.subscriber_id = getValue(*child);
901 info.markPresent(TMI8F_SUBSCRIBER_ID);
902 } else if (name == "Version") {
903 PERRASSERT("Invalid value for Version: Bad format", onlyTextElement(*child));
904 info.version = getValue(*child);
905 info.markPresent(TMI8F_VERSION);
906 } else if (name == "DossierName") {
907 PERRASSERT("Invalid value for DossierName: Bad format", onlyTextElement(*child));
908 info.dossier_name = getValue(*child);
909 info.markPresent(TMI8F_DOSSIER_NAME);
910 } else if (name == "KV6posinfo") {
911 info.messages = parseKv6PosInfo(*child, nss);
912 }
913 });
914 }
915
916 if (!info.valid())
917 return std::nullopt;
918 return info;
919 }
920
921 std::optional<Tmi8VvTmPushInfo> parse(const rapidxml::xml_document<> &doc) {
922 std::optional<Tmi8VvTmPushInfo> msg;
923 for (const rapidxml::xml_node<> *node = doc.first_node(); node; node = node->next_sibling()) {
924 ifTmi8Element(*node, nullptr /* nss */, [&](std::string_view name, const Xmlns *nss) {
925 if (name == "VV_TM_PUSH") {
926 if (msg) {
927 error("Duplicated VV_TM_PUSH");
928 return;
929 }
930 msg = parseVvTmPush(*node, nss);
931 if (!msg) {
932 error("Invalid VV_TM_PUSH");
933 }
934 }
935 });
936 }
937 if (!msg)
938 error("Expected to find VV_TM_PUSH");
939 return msg;
940 }
941};
942
943std::optional<Tmi8VvTmPushInfo> parseXml(const rapidxml::xml_document<> &doc, std::stringstream &errs, std::stringstream &warns) {
944 Kv6Parser parser = { errs, warns };
945 return parser.parse(doc);
946}
947
948struct Metrics {
949 prometheus::Counter &messages_counter_ok;
950 prometheus::Counter &messages_counter_error;
951 prometheus::Counter &messages_counter_warning;
952 prometheus::Counter &rows_written_counter;
953 prometheus::Histogram &records_hist;
954 prometheus::Histogram &message_parse_hist;
955 prometheus::Histogram &payload_size_hist;
956
957 using BucketBoundaries = prometheus::Histogram::BucketBoundaries;
958
959 enum class ParseStatus {
960 OK,
961 WARNING,
962 ERROR,
963 };
964
965 Metrics(std::shared_ptr<prometheus::Registry> registry) :
966 Metrics(registry, prometheus::BuildCounter()
967 .Name("kv6_vv_tm_push_messages_total")
968 .Help("Number of KV6 VV_TM_PUSH messages received")
969 .Register(*registry))
970 {}
971
972 void addMeasurement(std::chrono::duration<double> took_secs, size_t payload_size, size_t records, ParseStatus parsed) {
973 double millis = took_secs.count() * 1000.0;
974
975 if (parsed == ParseStatus::OK) messages_counter_ok.Increment();
976 else if (parsed == ParseStatus::WARNING) messages_counter_warning.Increment();
977 else if (parsed == ParseStatus::ERROR) messages_counter_error.Increment();
978 records_hist.Observe(static_cast<double>(records));
979 message_parse_hist.Observe(millis);
980 payload_size_hist.Observe(static_cast<double>(payload_size));
981 }
982
983 void rowsWritten(int64_t rows) {
984 rows_written_counter.Increment(static_cast<double>(rows));
985 }
986
987 private:
988 Metrics(std::shared_ptr<prometheus::Registry> registry,
989 prometheus::Family<prometheus::Counter> &messages_counter) :
990 messages_counter_ok(messages_counter
991 .Add({{ "status", "ok" }})),
992 messages_counter_error(messages_counter
993 .Add({{ "status", "error" }})),
994 messages_counter_warning(messages_counter
995 .Add({{ "status", "warning" }})),
996 rows_written_counter(prometheus::BuildCounter()
997 .Name("kv6_vv_tm_push_records_written")
998 .Help("Numer of VV_TM_PUSH records written to disk")
999 .Register(*registry)
1000 .Add({})),
1001 records_hist(prometheus::BuildHistogram()
1002 .Name("kv6_vv_tm_push_records_amount")
1003 .Help("Number of KV6 VV_TM_PUSH records")
1004 .Register(*registry)
1005 .Add({}, BucketBoundaries{ 5.0, 10.0, 20.0, 50.0, 100.0, 250.0, 500.0 })),
1006 message_parse_hist(prometheus::BuildHistogram()
1007 .Name("kv6_vv_tm_push_message_parse_millis")
1008 .Help("Milliseconds taken to parse KV6 VV_TM_PUSH messages")
1009 .Register(*registry)
1010 .Add({}, BucketBoundaries{ 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 100.0, 1000.0, 2000.0 })),
1011 payload_size_hist(prometheus::BuildHistogram()
1012 .Name("kv6_payload_size")
1013 .Help("Sizes of KV6 ZeroMQ message payloads")
1014 .Register(*registry)
1015 .Add({}, BucketBoundaries{ 500.0, 1000.0, 2500.0, 5000.0, 10000.0, 25000.0, 50000.0 }))
1016 {}
1017};
1018
1019// Note: it *must* hold that decompressed[size] == 0
1020std::optional<Tmi8VvTmPushInfo> parseMsg(char *decompressed, size_t size, Metrics &metrics, std::stringstream &errs, std::stringstream &warns) {
1021 auto start = std::chrono::steady_clock::now();
1022
1023 std::optional<Tmi8VvTmPushInfo> info;
1024
1025 if (decompressed[size] != 0) {
1026 errs << "Not parsing: missing null terminator" << '\n';
1027 } else {
1028 rapidxml::xml_document<> doc;
1029 constexpr int PARSE_FLAGS = rapidxml::parse_trim_whitespace
1030 | rapidxml::parse_no_string_terminators
1031 | rapidxml::parse_validate_closing_tags;
1032
1033 try {
1034 doc.parse<PARSE_FLAGS>(decompressed);
1035 info = parseXml(doc, errs, warns);
1036 } catch (const rapidxml::parse_error &err) {
1037 errs << "XML parsing failed" << '\n';
1038 }
1039 }
1040
1041 auto end = std::chrono::steady_clock::now();
1042 std::chrono::duration<double> took = end - start;
1043
1044 if (info)
1045 if (warns.view().empty())
1046 metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::OK);
1047 else
1048 metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::WARNING);
1049 else
1050 metrics.addMeasurement(took, size, 0, Metrics::ParseStatus::ERROR);
1051
1052 return info;
1053}
1054
1055bool terminate = false;
1056
1057void onSigIntOrTerm(int /* signum */) {
1058 terminate = true;
1059}
1060
1061arrow::Result<std::shared_ptr<arrow::Table>> getTable(const std::vector<Kv6Record> &messages, size_t &rows_written) {
1062 ParquetBuilder builder;
1063
1064 for (const auto &msg : messages) {
1065 Kv6Field present = msg.presence;
1066 Kv6Field required = KV6T_REQUIRED_FIELDS[msg.type];
1067 Kv6Field optional = KV6T_OPTIONAL_FIELDS[msg.type];
1068 if ((~msg.presence & required) != 0) {
1069 std::cout << "Invalid message: not all required fields present; skipping" << std::endl;
1070 continue;
1071 }
1072 Kv6Field used = static_cast<Kv6Field>(present & (required | optional));
1073 rows_written++;
1074
1075 // RD-X and RD-Y fix: some datatypes have these fields marked as required, but still give option
1076 // of not providing these fields by setting them to -1. We want this normalized, where these
1077 // fields are instead simply marked as not present.
1078 if ((used & KV6F_RD_X) && msg.rd_x == -1)
1079 used = static_cast<Kv6Field>(used & ~KV6F_RD_X);
1080 if ((used & KV6F_RD_Y) && msg.rd_y == -1)
1081 used = static_cast<Kv6Field>(used & ~KV6F_RD_Y);
1082
1083 ARROW_RETURN_NOT_OK(builder.types.Append(*findKv6PosInfoRecordTypeName(msg.type)));
1084 ARROW_RETURN_NOT_OK(used & KV6F_DATA_OWNER_CODE
1085 ? builder.data_owner_codes.Append(msg.data_owner_code)
1086 : builder.data_owner_codes.AppendNull());
1087 ARROW_RETURN_NOT_OK(used & KV6F_LINE_PLANNING_NUMBER
1088 ? builder.line_planning_numbers.Append(msg.line_planning_number)
1089 : builder.line_planning_numbers.AppendNull());
1090 ARROW_RETURN_NOT_OK(used & KV6F_OPERATING_DAY
1091 ? builder.operating_days.Append(static_cast<int32_t>(msg.operating_day.toUnixDays().count()))
1092 : builder.operating_days.AppendNull());
1093 ARROW_RETURN_NOT_OK(used & KV6F_JOURNEY_NUMBER
1094 ? builder.journey_numbers.Append(msg.journey_number)
1095 : builder.journey_numbers.AppendNull());
1096 ARROW_RETURN_NOT_OK(used & KV6F_REINFORCEMENT_NUMBER
1097 ? builder.reinforcement_numbers.Append(msg.reinforcement_number)
1098 : builder.reinforcement_numbers.AppendNull());
1099 ARROW_RETURN_NOT_OK(used & KV6F_TIMESTAMP
1100 ? builder.timestamps.Append(msg.timestamp.toUnixSeconds().count())
1101 : builder.timestamps.AppendNull());
1102 ARROW_RETURN_NOT_OK(used & KV6F_SOURCE
1103 ? builder.sources.Append(msg.source)
1104 : builder.sources.AppendNull());
1105 ARROW_RETURN_NOT_OK(used & KV6F_PUNCTUALITY
1106 ? builder.punctualities.Append(msg.punctuality)
1107 : builder.punctualities.AppendNull());
1108 ARROW_RETURN_NOT_OK(used & KV6F_USER_STOP_CODE
1109 ? builder.user_stop_codes.Append(msg.user_stop_code)
1110 : builder.user_stop_codes.AppendNull());
1111 ARROW_RETURN_NOT_OK(used & KV6F_PASSAGE_SEQUENCE_NUMBER
1112 ? builder.passage_sequence_numbers.Append(msg.passage_sequence_number)
1113 : builder.passage_sequence_numbers.AppendNull());
1114 ARROW_RETURN_NOT_OK(used & KV6F_VEHICLE_NUMBER
1115 ? builder.vehicle_numbers.Append(msg.vehicle_number)
1116 : builder.vehicle_numbers.AppendNull());
1117 ARROW_RETURN_NOT_OK(used & KV6F_BLOCK_CODE
1118 ? builder.block_codes.Append(msg.block_code)
1119 : builder.block_codes.AppendNull());
1120 ARROW_RETURN_NOT_OK(used & KV6F_WHEELCHAIR_ACCESSIBLE
1121 ? builder.wheelchair_accessibles.Append(msg.wheelchair_accessible)
1122 : builder.wheelchair_accessibles.AppendNull());
1123 ARROW_RETURN_NOT_OK(used & KV6F_NUMBER_OF_COACHES
1124 ? builder.number_of_coaches.Append(msg.number_of_coaches)
1125 : builder.number_of_coaches.AppendNull());
1126 ARROW_RETURN_NOT_OK(used & KV6F_RD_Y
1127 ? builder.rd_ys.Append(msg.rd_y)
1128 : builder.rd_ys.AppendNull());
1129 ARROW_RETURN_NOT_OK(used & KV6F_RD_X
1130 ? builder.rd_xs.Append(msg.rd_x)
1131 : builder.rd_xs.AppendNull());
1132 ARROW_RETURN_NOT_OK(used & KV6F_DISTANCE_SINCE_LAST_USER_STOP
1133 ? builder.distance_since_last_user_stops.Append(msg.distance_since_last_user_stop)
1134 : builder.distance_since_last_user_stops.AppendNull());
1135 }
1136
1137 return builder.getTable();
1138}
1139
1140std::tuple<int64_t, int64_t> getMinMaxTimestamp(const std::vector<Kv6Record> &messages) {
1141 if (messages.size() == 0)
1142 return { 0, 0 };
1143 int64_t min = std::numeric_limits<int64_t>::max();
1144 int64_t max = 0;
1145 for (const auto &message : messages) {
1146 if (~message.presence & KV6F_TIMESTAMP)
1147 continue;
1148 int64_t seconds = message.timestamp.toUnixSeconds().count();
1149 if (seconds < min)
1150 min = seconds;
1151 if (seconds > max)
1152 max = seconds;
1153 }
1154 if (min == std::numeric_limits<decltype(min)>::max())
1155 return { 0, 0 }; // this is stupid
1156 return { min, max };
1157}
1158
1159arrow::Status writeParquet(const std::vector<Kv6Record> &messages, Metrics &metrics) {
1160 size_t rows_written = 0;
1161 ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Table> table, getTable(messages, rows_written));
1162
1163 auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::utc_clock::now());
1164 std::string filename = std::format("oeuf-{:%FT%T%Ez}.parquet", timestamp);
1165 ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*table, filename));
1166 std::cout << "Wrote Parquet file " << filename << std::endl;
1167
1168 auto [min_timestamp, max_timestamp] = getMinMaxTimestamp(messages);
1169 std::ofstream metaf(filename + ".meta.json.part", std::ios::binary);
1170 nlohmann::json meta{
1171 { "min_timestamp", min_timestamp },
1172 { "max_timestamp", max_timestamp },
1173 { "rows_written", rows_written },
1174 };
1175 metaf << meta;
1176 metaf.close();
1177 std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json");
1178
1179 metrics.rowsWritten(rows_written);
1180
1181 return arrow::Status::OK();
1182}
1183
1184using SteadyTime = std::chrono::steady_clock::time_point;
1185
1186std::string dumpFailedMsg(std::string_view txt, std::string_view errs, std::string_view warns) {
1187 auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::utc_clock::now());
1188 std::string filename = std::format("oeuf-error-{:%FT%T%Ez}.txt", timestamp);
1189 std::ofstream dumpf(filename, std::ios::binary);
1190 dumpf << "======= ERROR MESSAGES ========" << std::endl;
1191 dumpf << errs;
1192 dumpf << "======= WARNING MESSAGES ======" << std::endl;
1193 dumpf << warns;
1194 dumpf << "======= RECEIVED MESSAGE ======" << std::endl;
1195 dumpf << txt << std::endl;
1196 dumpf.close();
1197 return filename;
1198}
1199
1200void handleMsg(RawMessage &msg, Metrics &metrics, SteadyTime &last_output, std::vector<Kv6Record> &msg_buf) {
1201 unsigned int decompressed_size = 0;
1202 if (msg.getBodySize() > std::numeric_limits<unsigned int>::max())
1203 std::cout << "parseMsg failed due to too large message" << std::endl;
1204 char *decompressed = decompress(msg.getBody(), static_cast<unsigned int>(msg.getBodySize()), decompressed_size);
1205
1206 std::stringstream errs;
1207 std::stringstream warns;
1208 // We know that decompressed[decompressed_size] == 0 because decompress() ensures this.
1209 auto parsed_msg = parseMsg(decompressed, decompressed_size, metrics, errs, warns);
1210 if (parsed_msg) {
1211 const Tmi8VvTmPushInfo &info = *parsed_msg;
1212 auto new_msgs_it = info.messages.begin();
1213 while (new_msgs_it != info.messages.end()) {
1214 size_t remaining_space = MAX_PARQUET_CHUNK - msg_buf.size();
1215 size_t new_msgs_left = info.messages.end() - new_msgs_it;
1216 auto new_msgs_start = new_msgs_it;
1217 auto new_msgs_end = new_msgs_start + std::min(remaining_space, new_msgs_left);
1218 new_msgs_it = new_msgs_end;
1219 msg_buf.insert(msg_buf.end(), new_msgs_start, new_msgs_end);
1220
1221 bool time_expired = std::chrono::steady_clock::now() - last_output > std::chrono::minutes(5);
1222 if (msg_buf.size() >= MAX_PARQUET_CHUNK || (new_msgs_it == info.messages.end() && time_expired)) {
1223 arrow::Status status = writeParquet(msg_buf, metrics);
1224 if (!status.ok())
1225 std::cout << "Writing Parquet file failed: " << status << std::endl;
1226 msg_buf.clear();
1227 last_output = std::chrono::steady_clock::now();
1228 }
1229 }
1230 if (!errs.view().empty() || !warns.view().empty()) {
1231 std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str());
1232 std::cout << "parseMsg finished with warnings: details dumped to " << dump_file << std::endl;
1233 }
1234 } else {
1235 std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str());
1236 std::cout << "parseMsg failed: error details dumped to " << dump_file << std::endl;
1237 }
1238 free(decompressed);
1239}
1240
1241int main(int argc, char *argv[]) {
1242 std::cout << "Working directory: " << std::filesystem::current_path() << std::endl;
1243
1244 const char *metrics_addr = getenv("METRICS_ADDR");
1245 if (!metrics_addr || strlen(metrics_addr) == 0) {
1246 std::cout << "Error: no METRICS_ADDR set!" << std::endl;
1247 exit(EXIT_FAILURE);
1248 }
1249 prometheus::Exposer exposer{metrics_addr};
1250
1251 bool prod = false;
1252 const char *prod_env = getenv("NDOV_PRODUCTION");
1253 if (prod_env && strcmp(prod_env, "true") == 0) prod = true;
1254
1255 void *zmq_context = zmq_ctx_new();
1256 void *zmq_subscriber = zmq_socket(zmq_context, ZMQ_SUB);
1257 int rc = zmq_connect(zmq_subscriber, prod ? "tcp://pubsub.ndovloket.nl:7658" : "tcp://pubsub.besteffort.ndovloket.nl:7658");
1258 assert(rc == 0);
1259
1260 const char *topic = "/CXX/KV6posinfo";
1261 rc = zmq_setsockopt(zmq_subscriber, ZMQ_SUBSCRIBE, topic, strlen(topic));
1262 assert(rc == 0);
1263
1264 signal(SIGINT, onSigIntOrTerm);
1265 signal(SIGTERM, onSigIntOrTerm);
1266
1267 SteadyTime last_output = std::chrono::steady_clock::now();
1268
1269 auto registry = std::make_shared<prometheus::Registry>();
1270 Metrics metrics(registry);
1271 exposer.RegisterCollectable(registry);
1272
1273 std::vector<Kv6Record> msg_buf;
1274 while (!terminate) {
1275 std::optional<RawMessage> msg = recvMsg(zmq_subscriber);
1276 if (!msg) {
1277 if (!terminate)
1278 perror("recvMsg");
1279 continue;
1280 }
1281 handleMsg(*msg, metrics, last_output, msg_buf);
1282 }
1283
1284 std::cout << "Terminating" << std::endl;
1285 if (msg_buf.size() > 0) {
1286 arrow::Status status = writeParquet(msg_buf, metrics);
1287 if (!status.ok()) std::cout << "Writing final Parquet file failed: " << status << std::endl;
1288 else std::cout << "Final data written" << std::endl;
1289 msg_buf.clear();
1290 }
1291
1292 if (zmq_close(zmq_subscriber))
1293 perror("zmq_close");
1294 if (zmq_ctx_destroy(zmq_context))
1295 perror("zmq_ctx_destroy");
1296
1297 std::cout << "Bye" << std::endl;
1298
1299 return 0;
1300}