From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- .envrc | 1 + .gitattributes | 1 + .gitignore | 7 + LICENSE | 287 ++++++ README.txt | 13 + all-deterministic.sh | 3 + build-all.sh | 3 + flake.lock | 79 ++ flake.nix | 137 +++ lib/libtmi8/.envrc | 3 + lib/libtmi8/.gitignore | 3 + lib/libtmi8/Makefile | 41 + lib/libtmi8/flake.lock | 58 ++ lib/libtmi8/flake.nix | 42 + lib/libtmi8/include/tmi8/kv1_index.hpp | 135 +++ lib/libtmi8/include/tmi8/kv1_lexer.hpp | 46 + lib/libtmi8/include/tmi8/kv1_parser.hpp | 87 ++ lib/libtmi8/include/tmi8/kv1_types.hpp | 1528 ++++++++++++++++++++++++++++++ lib/libtmi8/include/tmi8/kv6_parquet.hpp | 46 + lib/libtmi8/src/kv1_index.cpp | 461 +++++++++ lib/libtmi8/src/kv1_lexer.cpp | 152 +++ lib/libtmi8/src/kv1_parser.cpp | 1258 ++++++++++++++++++++++++ lib/libtmi8/src/kv1_types.cpp | 773 +++++++++++++++ lib/libtmi8/src/kv6_parquet.cpp | 102 ++ module/default.nix | 118 +++ script/archiver/default.nix | 15 + script/archiver/oeuf-archiver.sh | 31 + script/synckv6/default.nix | 15 + script/synckv6/oeuf-synckv6.sh | 43 + src/augmentkv6/.envrc | 2 + src/augmentkv6/Makefile | 21 + src/augmentkv6/main.cpp | 510 ++++++++++ src/bundleparquet/.envrc | 2 + src/bundleparquet/Makefile | 21 + src/bundleparquet/main.cpp | 213 +++++ src/bundleparquet/spliturl.cpp | 203 ++++ src/bundleparquet/spliturl.hpp | 11 + src/filterkv6/.envrc | 2 + src/filterkv6/Makefile | 21 + src/filterkv6/main.cpp | 106 +++ src/querykv1/.envrc | 2 + src/querykv1/.gitignore | 1 + src/querykv1/Makefile | 28 + src/querykv1/cliopts.cpp | 456 +++++++++ src/querykv1/cliopts.hpp | 35 + src/querykv1/daterange.cpp | 91 ++ src/querykv1/daterange.hpp | 118 +++ src/querykv1/grammar.abnf | 44 + src/querykv1/grammar.ebnf | 47 + src/querykv1/grammar.ebnf.bak | 23 + src/querykv1/joparoute.cpp | 102 ++ src/querykv1/joparoute.hpp | 13 + src/querykv1/journeyinfo.cpp | 64 ++ src/querykv1/journeyinfo.hpp | 13 + src/querykv1/journeyroute.cpp | 96 ++ src/querykv1/journeyroute.hpp | 13 + src/querykv1/journeys.cpp | 95 ++ src/querykv1/journeys.hpp | 13 + src/querykv1/main.cpp | 198 ++++ src/querykv1/schedule.cpp | 63 ++ src/querykv1/schedule.hpp | 13 + src/recvkv6/.envrc | 2 + src/recvkv6/Makefile | 21 + src/recvkv6/main.cpp | 1300 +++++++++++++++++++++++++ 64 files changed, 9451 insertions(+) create mode 100644 .envrc create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.txt create mode 100755 all-deterministic.sh create mode 100755 build-all.sh create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 lib/libtmi8/.envrc create mode 100644 lib/libtmi8/.gitignore create mode 100644 lib/libtmi8/Makefile create mode 100644 lib/libtmi8/flake.lock create mode 100644 lib/libtmi8/flake.nix create mode 100644 lib/libtmi8/include/tmi8/kv1_index.hpp create mode 100644 lib/libtmi8/include/tmi8/kv1_lexer.hpp create mode 100644 lib/libtmi8/include/tmi8/kv1_parser.hpp create mode 100644 lib/libtmi8/include/tmi8/kv1_types.hpp create mode 100644 lib/libtmi8/include/tmi8/kv6_parquet.hpp create mode 100644 lib/libtmi8/src/kv1_index.cpp create mode 100644 lib/libtmi8/src/kv1_lexer.cpp create mode 100644 lib/libtmi8/src/kv1_parser.cpp create mode 100644 lib/libtmi8/src/kv1_types.cpp create mode 100644 lib/libtmi8/src/kv6_parquet.cpp create mode 100644 module/default.nix create mode 100644 script/archiver/default.nix create mode 100755 script/archiver/oeuf-archiver.sh create mode 100644 script/synckv6/default.nix create mode 100755 script/synckv6/oeuf-synckv6.sh create mode 100644 src/augmentkv6/.envrc create mode 100644 src/augmentkv6/Makefile create mode 100644 src/augmentkv6/main.cpp create mode 100644 src/bundleparquet/.envrc create mode 100644 src/bundleparquet/Makefile create mode 100644 src/bundleparquet/main.cpp create mode 100644 src/bundleparquet/spliturl.cpp create mode 100644 src/bundleparquet/spliturl.hpp create mode 100644 src/filterkv6/.envrc create mode 100644 src/filterkv6/Makefile create mode 100644 src/filterkv6/main.cpp create mode 100644 src/querykv1/.envrc create mode 100644 src/querykv1/.gitignore create mode 100644 src/querykv1/Makefile create mode 100644 src/querykv1/cliopts.cpp create mode 100644 src/querykv1/cliopts.hpp create mode 100644 src/querykv1/daterange.cpp create mode 100644 src/querykv1/daterange.hpp create mode 100644 src/querykv1/grammar.abnf create mode 100644 src/querykv1/grammar.ebnf create mode 100644 src/querykv1/grammar.ebnf.bak create mode 100644 src/querykv1/joparoute.cpp create mode 100644 src/querykv1/joparoute.hpp create mode 100644 src/querykv1/journeyinfo.cpp create mode 100644 src/querykv1/journeyinfo.hpp create mode 100644 src/querykv1/journeyroute.cpp create mode 100644 src/querykv1/journeyroute.hpp create mode 100644 src/querykv1/journeys.cpp create mode 100644 src/querykv1/journeys.hpp create mode 100644 src/querykv1/main.cpp create mode 100644 src/querykv1/schedule.cpp create mode 100644 src/querykv1/schedule.hpp create mode 100644 src/recvkv6/.envrc create mode 100644 src/recvkv6/Makefile create mode 100644 src/recvkv6/main.cpp diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..3550a30 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +use flake diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d92417a --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.tif filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9d7718b --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.direnv/ +src/augmentkv6/augmentkv6 +src/bundleparquet/bundleparquet +src/filterkv6/filterkv6 +src/querykv1/querykv1 +src/recvkv6/recvkv6 +result* diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4153cd3 --- /dev/null +++ b/LICENSE @@ -0,0 +1,287 @@ + EUROPEAN UNION PUBLIC LICENCE v. 1.2 + EUPL © the European Union 2007, 2016 + +This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined +below) which is provided under the terms of this Licence. Any use of the Work, +other than as authorised under this Licence is prohibited (to the extent such +use is covered by a right of the copyright holder of the Work). + +The Work is provided under the terms of this Licence when the Licensor (as +defined below) has placed the following notice immediately following the +copyright notice for the Work: + + Licensed under the EUPL + +or has expressed by any other means his willingness to license under the EUPL. + +1. Definitions + +In this Licence, the following terms have the following meaning: + +- ‘The Licence’: this Licence. + +- ‘The Original Work’: the work or software distributed or communicated by the + Licensor under this Licence, available as Source Code and also as Executable + Code as the case may be. + +- ‘Derivative Works’: the works or software that could be created by the + Licensee, based upon the Original Work or modifications thereof. This Licence + does not define the extent of modification or dependence on the Original Work + required in order to classify a work as a Derivative Work; this extent is + determined by copyright law applicable in the country mentioned in Article 15. + +- ‘The Work’: the Original Work or its Derivative Works. + +- ‘The Source Code’: the human-readable form of the Work which is the most + convenient for people to study and modify. + +- ‘The Executable Code’: any code which has generally been compiled and which is + meant to be interpreted by a computer as a program. + +- ‘The Licensor’: the natural or legal person that distributes or communicates + the Work under the Licence. + +- ‘Contributor(s)’: any natural or legal person who modifies the Work under the + Licence, or otherwise contributes to the creation of a Derivative Work. + +- ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of + the Work under the terms of the Licence. + +- ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, + renting, distributing, communicating, transmitting, or otherwise making + available, online or offline, copies of the Work or providing access to its + essential functionalities at the disposal of any other natural or legal + person. + +2. Scope of the rights granted by the Licence + +The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, +sublicensable licence to do the following, for the duration of copyright vested +in the Original Work: + +- use the Work in any circumstance and for all usage, +- reproduce the Work, +- modify the Work, and make Derivative Works based upon the Work, +- communicate to the public, including the right to make available or display + the Work or copies thereof to the public and perform publicly, as the case may + be, the Work, +- distribute the Work or copies thereof, +- lend and rent the Work or copies thereof, +- sublicense rights in the Work or copies thereof. + +Those rights can be exercised on any media, supports and formats, whether now +known or later invented, as far as the applicable law permits so. + +In the countries where moral rights apply, the Licensor waives his right to +exercise his moral right to the extent allowed by law in order to make effective +the licence of the economic rights here above listed. + +The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to +any patents held by the Licensor, to the extent necessary to make use of the +rights granted on the Work under this Licence. + +3. Communication of the Source Code + +The Licensor may provide the Work either in its Source Code form, or as +Executable Code. If the Work is provided as Executable Code, the Licensor +provides in addition a machine-readable copy of the Source Code of the Work +along with each copy of the Work that the Licensor distributes or indicates, in +a notice following the copyright notice attached to the Work, a repository where +the Source Code is easily and freely accessible for as long as the Licensor +continues to distribute or communicate the Work. + +4. Limitations on copyright + +Nothing in this Licence is intended to deprive the Licensee of the benefits from +any exception or limitation to the exclusive rights of the rights owners in the +Work, of the exhaustion of those rights or of other applicable limitations +thereto. + +5. Obligations of the Licensee + +The grant of the rights mentioned above is subject to some restrictions and +obligations imposed on the Licensee. Those obligations are the following: + +Attribution right: The Licensee shall keep intact all copyright, patent or +trademarks notices and all notices that refer to the Licence and to the +disclaimer of warranties. The Licensee must include a copy of such notices and a +copy of the Licence with every copy of the Work he/she distributes or +communicates. The Licensee must cause any Derivative Work to carry prominent +notices stating that the Work has been modified and the date of modification. + +Copyleft clause: If the Licensee distributes or communicates copies of the +Original Works or Derivative Works, this Distribution or Communication will be +done under the terms of this Licence or of a later version of this Licence +unless the Original Work is expressly distributed only under this version of the +Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee +(becoming Licensor) cannot offer or impose any additional terms or conditions on +the Work or Derivative Work that alter or restrict the terms of the Licence. + +Compatibility clause: If the Licensee Distributes or Communicates Derivative +Works or copies thereof based upon both the Work and another work licensed under +a Compatible Licence, this Distribution or Communication can be done under the +terms of this Compatible Licence. For the sake of this clause, ‘Compatible +Licence’ refers to the licences listed in the appendix attached to this Licence. +Should the Licensee's obligations under the Compatible Licence conflict with +his/her obligations under this Licence, the obligations of the Compatible +Licence shall prevail. + +Provision of Source Code: When distributing or communicating copies of the Work, +the Licensee will provide a machine-readable copy of the Source Code or indicate +a repository where this Source will be easily and freely available for as long +as the Licensee continues to distribute or communicate the Work. + +Legal Protection: This Licence does not grant permission to use the trade names, +trademarks, service marks, or names of the Licensor, except as required for +reasonable and customary use in describing the origin of the Work and +reproducing the content of the copyright notice. + +6. Chain of Authorship + +The original Licensor warrants that the copyright in the Original Work granted +hereunder is owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each Contributor warrants that the copyright in the modifications he/she brings +to the Work are owned by him/her or licensed to him/her and that he/she has the +power and authority to grant the Licence. + +Each time You accept the Licence, the original Licensor and subsequent +Contributors grant You a licence to their contributions to the Work, under the +terms of this Licence. + +7. Disclaimer of Warranty + +The Work is a work in progress, which is continuously improved by numerous +Contributors. It is not a finished work and may therefore contain defects or +‘bugs’ inherent to this type of development. + +For the above reason, the Work is provided under the Licence on an ‘as is’ basis +and without warranties of any kind concerning the Work, including without +limitation merchantability, fitness for a particular purpose, absence of defects +or errors, accuracy, non-infringement of intellectual property rights other than +copyright as stated in Article 6 of this Licence. + +This disclaimer of warranty is an essential part of the Licence and a condition +for the grant of any rights to the Work. + +8. Disclaimer of Liability + +Except in the cases of wilful misconduct or damages directly caused to natural +persons, the Licensor will in no event be liable for any direct or indirect, +material or moral, damages of any kind, arising out of the Licence or of the use +of the Work, including without limitation, damages for loss of goodwill, work +stoppage, computer failure or malfunction, loss of data or any commercial +damage, even if the Licensor has been advised of the possibility of such damage. +However, the Licensor will be liable under statutory product liability laws as +far such laws apply to the Work. + +9. Additional agreements + +While distributing the Work, You may choose to conclude an additional agreement, +defining obligations or services consistent with this Licence. However, if +accepting obligations, You may act only on your own behalf and on your sole +responsibility, not on behalf of the original Licensor or any other Contributor, +and only if You agree to indemnify, defend, and hold each Contributor harmless +for any liability incurred by, or claims asserted against such Contributor by +the fact You have accepted any warranty or additional liability. + +10. Acceptance of the Licence + +The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ +placed under the bottom of a window displaying the text of this Licence or by +affirming consent in any other similar way, in accordance with the rules of +applicable law. Clicking on that icon indicates your clear and irrevocable +acceptance of this Licence and all of its terms and conditions. + +Similarly, you irrevocably accept this Licence and all of its terms and +conditions by exercising any rights granted to You by Article 2 of this Licence, +such as the use of the Work, the creation by You of a Derivative Work or the +Distribution or Communication by You of the Work or copies thereof. + +11. Information to the public + +In case of any Distribution or Communication of the Work by means of electronic +communication by You (for example, by offering to download the Work from a +remote location) the distribution channel or media (for example, a website) must +at least provide to the public the information requested by the applicable law +regarding the Licensor, the Licence and the way it may be accessible, concluded, +stored and reproduced by the Licensee. + +12. Termination of the Licence + +The Licence and the rights granted hereunder will terminate automatically upon +any breach by the Licensee of the terms of the Licence. + +Such a termination will not terminate the licences of any person who has +received the Work from the Licensee under the Licence, provided such persons +remain in full compliance with the Licence. + +13. Miscellaneous + +Without prejudice of Article 9 above, the Licence represents the complete +agreement between the Parties as to the Work. + +If any provision of the Licence is invalid or unenforceable under applicable +law, this will not affect the validity or enforceability of the Licence as a +whole. Such provision will be construed or reformed so as necessary to make it +valid and enforceable. + +The European Commission may publish other linguistic versions or new versions of +this Licence or updated versions of the Appendix, so far this is required and +reasonable, without reducing the scope of the rights granted by the Licence. New +versions of the Licence will be published with a unique version number. + +All linguistic versions of this Licence, approved by the European Commission, +have identical value. Parties can take advantage of the linguistic version of +their choice. + +14. Jurisdiction + +Without prejudice to specific agreement between parties, + +- any litigation resulting from the interpretation of this License, arising + between the European Union institutions, bodies, offices or agencies, as a + Licensor, and any Licensee, will be subject to the jurisdiction of the Court + of Justice of the European Union, as laid down in article 272 of the Treaty on + the Functioning of the European Union, + +- any litigation arising between other parties and resulting from the + interpretation of this License, will be subject to the exclusive jurisdiction + of the competent court where the Licensor resides or conducts its primary + business. + +15. Applicable Law + +Without prejudice to specific agreement between parties, + +- this Licence shall be governed by the law of the European Union Member State + where the Licensor has his seat, resides or has his registered office, + +- this licence shall be governed by Belgian law if the Licensor has no seat, + residence or registered office inside a European Union Member State. + +Appendix + +‘Compatible Licences’ according to Article 5 EUPL are: + +- GNU General Public License (GPL) v. 2, v. 3 +- GNU Affero General Public License (AGPL) v. 3 +- Open Software License (OSL) v. 2.1, v. 3.0 +- Eclipse Public License (EPL) v. 1.0 +- CeCILL v. 2.0, v. 2.1 +- Mozilla Public Licence (MPL) v. 2 +- GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 +- Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for + works other than software +- European Union Public Licence (EUPL) v. 1.1, v. 1.2 +- Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong + Reciprocity (LiLiQ-R+). + +The European Commission may update this Appendix to later versions of the above +licences without producing a new version of the EUPL, as long as they provide +the rights granted in Article 2 of this Licence and protect the covered Source +Code from exclusive appropriation. + +All other changes or additions to this Appendix require the production of a new +EUPL version. diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..469f5b4 --- /dev/null +++ b/README.txt @@ -0,0 +1,13 @@ +## Copyright Notice and License + +Copyright 2024 Rutger Broekhoff. +Licensed under the EUPL. + +An English copy of version 1.2 of the EUPL license can be found in the LICENSE +file. If you wish to read the license in another one of the (currently) 23 +official languages of the European union, you can! You may find your version at + https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 + +For now, the code is licensed under the EUPL-1.2 license. If the time comes +that I seriously need to think about what license to use, this may change. +Nevertheless, I expect that this project will retain a copyleft license. \ No newline at end of file diff --git a/all-deterministic.sh b/all-deterministic.sh new file mode 100755 index 0000000..5a857ef --- /dev/null +++ b/all-deterministic.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build --rebuild diff --git a/build-all.sh b/build-all.sh new file mode 100755 index 0000000..ea45126 --- /dev/null +++ b/build-all.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..3b8b6fe --- /dev/null +++ b/flake.lock @@ -0,0 +1,79 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1701680307, + "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", + "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", + "revCount": 88, + "type": "tarball", + "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88%2Brev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz" + } + }, + "libtmi8": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1, + "narHash": "sha256-ST9E5LANnA7IV5cY0fbk+MSukaOczxnkXV1/IH7ps4U=", + "path": "./lib/libtmi8", + "type": "path" + }, + "original": { + "path": "./lib/libtmi8", + "type": "path" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1702346276, + "narHash": "sha256-eAQgwIWApFQ40ipeOjVSoK4TEHVd6nbSd9fApiHIw5A=", + "rev": "cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7", + "revCount": 553141, + "type": "tarball", + "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.553141%2Brev-cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7/018c652c-2ff2-777b-bade-dae9c2abe1e1/source.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://flakehub.com/f/NixOs/nixpkgs/%2A.tar.gz" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "libtmi8": "libtmi8", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..df5fffb --- /dev/null +++ b/flake.nix @@ -0,0 +1,137 @@ +{ + inputs = { + nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz"; + flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"; + libtmi8 = { + url = "path:./lib/libtmi8"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + }; + }; + + outputs = { self, nixpkgs, flake-utils, libtmi8, ... }@inputs: + { + nixosModules = rec { + oeuf = import ./module self; + default = oeuf; + }; + } // flake-utils.lib.eachDefaultSystem + (system: + let + libtmi8Overlay = final: prev: { oeuf-libtmi8 = libtmi8.packages.${system}.oeuf-libtmi8; }; + + pkgs = import nixpkgs { + inherit system; + overlays = [ libtmi8Overlay ]; + }; + boostPkg = pkgs.boost182; + + inherit (pkgs.gcc13) stdenv; + + oeuf-augmentkv6 = stdenv.mkDerivation { + name = "oeuf-augmentkv6"; + src = ./.; + + nativeBuildInputs = with pkgs; [ gcc13 boostPkg ]; + buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ]; + buildPhase = '' + cd src/augmentkv6 + make augmentkv6 + ''; + + installPhase = '' + mkdir -p $out/bin + cp augmentkv6 $out/bin/oeuf-augmentkv6 + ''; + }; + + oeuf-filterkv6 = stdenv.mkDerivation { + name = "oeuf-filterkv6"; + src = ./.; + + nativeBuildInputs = with pkgs; [ gcc13 ]; + buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ]; + buildPhase = '' + cd src/filterkv6 + make filterkv6 + ''; + + installPhase = '' + mkdir -p $out/bin + cp filterkv6 $out/bin/oeuf-filterkv6 + ''; + }; + + oeuf-bundleparquet = stdenv.mkDerivation { + name = "oeuf-bundleparquet"; + src = ./.; + + nativeBuildInputs = with pkgs; [ gcc13 ]; + buildInputs = with pkgs; [ arrow-cpp curl nlohmann_json prometheus-cpp zlib oeuf-libtmi8 ]; + buildPhase = '' + cd src/bundleparquet + make bundleparquet + ''; + + installPhase = '' + mkdir -p $out/bin + cp bundleparquet $out/bin/oeuf-bundleparquet + ''; + }; + + oeuf-querykv1 = stdenv.mkDerivation { + name = "oeuf-querykv1"; + src = ./.; + + nativeBuildInputs = with pkgs; [ gcc13 ]; + buildInputs = with pkgs; [ oeuf-libtmi8 boostPkg ]; + buildPhase = '' + cd src/querykv1 + make querykv1 + ''; + + installPhase = '' + mkdir -p $out/bin + cp querykv1 $out/bin/oeuf-querykv1 + ''; + }; + + oeuf-recvkv6 = stdenv.mkDerivation { + name = "oeuf-recvkv6"; + src = ./.; + + nativeBuildInputs = with pkgs; [ gcc13 ]; + buildInputs = with pkgs; [ zeromq zlib arrow-cpp nlohmann_json prometheus-cpp rapidxml oeuf-libtmi8 ]; + buildPhase = '' + cd src/recvkv6 + make recvkv6 + ''; + + installPhase = '' + mkdir -p $out/bin + cp recvkv6 $out/bin/oeuf-recvkv6 + ''; + }; + + oeuf-archiver = import ./script/archiver { + pkgs = pkgs // { inherit oeuf-bundleparquet; }; + }; + + oeuf-synckv6 = import ./script/synckv6 { inherit pkgs; }; + in + { + packages.oeuf-archiver = oeuf-archiver; + packages.oeuf-augmentkv6 = oeuf-augmentkv6; + packages.oeuf-synckv6 = oeuf-synckv6; + packages.oeuf-filterkv6 = oeuf-filterkv6; + packages.oeuf-bundleparquet = oeuf-bundleparquet; + packages.oeuf-querykv1 = oeuf-querykv1; + packages.oeuf-recvkv6 = oeuf-recvkv6; + + devShells.default = pkgs.mkShell { + inputsFrom = [ oeuf-bundleparquet oeuf-querykv1 oeuf-recvkv6 ]; + }; + + formatter = pkgs.nixpkgs-fmt; + }); +} diff --git a/lib/libtmi8/.envrc b/lib/libtmi8/.envrc new file mode 100644 index 0000000..4e0d702 --- /dev/null +++ b/lib/libtmi8/.envrc @@ -0,0 +1,3 @@ +use flake + +export DEVMODE=1 diff --git a/lib/libtmi8/.gitignore b/lib/libtmi8/.gitignore new file mode 100644 index 0000000..f6b8cf6 --- /dev/null +++ b/lib/libtmi8/.gitignore @@ -0,0 +1,3 @@ +src/*.o +libtmi8.a +libtmi8.so diff --git a/lib/libtmi8/Makefile b/lib/libtmi8/Makefile new file mode 100644 index 0000000..52a9807 --- /dev/null +++ b/lib/libtmi8/Makefile @@ -0,0 +1,41 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer -Iinclude $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-larrow -lparquet -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now +DESTDIR=/usr/local + +LIBHDRS=include/tmi8/kv1_lexer.hpp include/tmi8/kv1_parser.hpp include/tmi8/kv1_types.hpp include/tmi8/kv6_parquet.hpp +LIBSRCS=src/kv1_index.cpp src/kv1_lexer.cpp src/kv1_parser.cpp src/kv1_types.cpp src/kv6_parquet.cpp +LIBOBJS=$(patsubst %.cpp,%.o,$(LIBSRCS)) + +.PHONY: all install libtmi8 clean +all: libtmi8 + +libtmi8: libtmi8.a libtmi8.so + +clean: + rm libtmi8.a libtmi8.so $(LIBOBJS) + +install: libtmi8.a $(LIBHDRS) + install -D -m644 include/tmi8/* -t $(DESTDIR)/include/tmi8 + install -D -m644 libtmi8.a -t $(DESTDIR)/lib + install -D -m644 libtmi8.so -t $(DESTDIR)/lib + +src/%.o: src/%.cpp $(LIBHDRS) + $(CXX) -c -o $@ $< $(CXXFLAGS) + +libtmi8.a: $(LIBOBJS) + $(AR) rcs $@ $^ + +libtmi8.so: $(LIBOBJS) + $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) diff --git a/lib/libtmi8/flake.lock b/lib/libtmi8/flake.lock new file mode 100644 index 0000000..5ff7d5d --- /dev/null +++ b/lib/libtmi8/flake.lock @@ -0,0 +1,58 @@ +{ + "nodes": { + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1701680307, + "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", + "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", + "revCount": 88, + "type": "tarball", + "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88+rev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1701539137, + "narHash": "sha256-nVO/5QYpf1GwjvtpXhyxx5M3U/WN0MwBro4Lsk+9mL0=", + "rev": "933d7dc155096e7575d207be6fb7792bc9f34f6d", + "revCount": 552571, + "type": "tarball", + "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.552571+rev-933d7dc155096e7575d207be6fb7792bc9f34f6d/018c3242-a93c-7779-8d13-ddba0a38d24a/source.tar.gz" + }, + "original": { + "type": "tarball", + "url": "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz" + } + }, + "root": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/lib/libtmi8/flake.nix b/lib/libtmi8/flake.nix new file mode 100644 index 0000000..2ae7fc9 --- /dev/null +++ b/lib/libtmi8/flake.nix @@ -0,0 +1,42 @@ +{ + inputs = { + nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz"; + flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"; + }; + + outputs = { self, nixpkgs, flake-utils, ... }@inputs: + flake-utils.lib.eachDefaultSystem + (system: + let + pkgs = import nixpkgs { + inherit system; + overlays = [ ]; + }; + + inherit (pkgs.gcc13) stdenv; + + oeuf-libtmi8 = stdenv.mkDerivation { + name = "oeuf-libtmi8"; + src = pkgs.lib.cleanSource ./.; + + nativeBuildInputs = with pkgs; [ gcc13 ]; + buildInputs = with pkgs; [ arrow-cpp boost182 ]; + buildPhase = '' + make libtmi8 + ''; + + installPhase = '' + make install DESTDIR="$out" + ''; + }; + in + { + packages.oeuf-libtmi8 = oeuf-libtmi8; + + devShells.default = pkgs.mkShell { + inputsFrom = [ oeuf-libtmi8 ]; + }; + + formatter = pkgs.nixpkgs-fmt; + }); +} diff --git a/lib/libtmi8/include/tmi8/kv1_index.hpp b/lib/libtmi8/include/tmi8/kv1_index.hpp new file mode 100644 index 0000000..621acf6 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_index.hpp @@ -0,0 +1,135 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV1_INDEX_HPP +#define OEUF_LIBTMI8_KV1_INDEX_HPP + +#include + +#include + +#include + +struct Kv1Index { + Kv1Records *records; + + explicit Kv1Index(Kv1Records *records); + + std::unordered_map< + Kv1OrganizationalUnit::Key, + Kv1OrganizationalUnit *, + boost::hash> organizational_units; + std::unordered_map< + Kv1HigherOrganizationalUnit::Key, + Kv1HigherOrganizationalUnit *, + boost::hash> higher_organizational_units; + std::unordered_map< + Kv1UserStopPoint::Key, + Kv1UserStopPoint *, + boost::hash> user_stop_points; + std::unordered_map< + Kv1UserStopArea::Key, + Kv1UserStopArea *, + boost::hash> user_stop_areas; + std::unordered_map< + Kv1TimingLink::Key, + Kv1TimingLink *, + boost::hash> timing_links; + std::unordered_map< + Kv1Link::Key, + Kv1Link *, + boost::hash> links; + std::unordered_map< + Kv1Line::Key, + Kv1Line *, + boost::hash> lines; + std::unordered_map< + Kv1Destination::Key, + Kv1Destination *, + boost::hash> destinations; + std::unordered_map< + Kv1JourneyPattern::Key, + Kv1JourneyPattern *, + boost::hash> journey_patterns; + std::unordered_map< + Kv1ConcessionFinancerRelation::Key, + Kv1ConcessionFinancerRelation *, + boost::hash> concession_financer_relations; + std::unordered_map< + Kv1ConcessionArea::Key, + Kv1ConcessionArea *, + boost::hash> concession_areas; + std::unordered_map< + Kv1Financer::Key, + Kv1Financer *, + boost::hash> financers; + std::unordered_map< + Kv1JourneyPatternTimingLink::Key, + Kv1JourneyPatternTimingLink *, + boost::hash> journey_pattern_timing_links; + std::unordered_map< + Kv1Point::Key, + Kv1Point *, + boost::hash> points; + std::unordered_map< + Kv1PointOnLink::Key, + Kv1PointOnLink *, + boost::hash> point_on_links; + std::unordered_map< + Kv1Icon::Key, + Kv1Icon *, + boost::hash> icons; + std::unordered_map< + Kv1Notice::Key, + Kv1Notice *, + boost::hash> notices; + std::unordered_map< + Kv1TimeDemandGroup::Key, + Kv1TimeDemandGroup *, + boost::hash> time_demand_groups; + std::unordered_map< + Kv1TimeDemandGroupRunTime::Key, + Kv1TimeDemandGroupRunTime *, + boost::hash> time_demand_group_run_times; + std::unordered_map< + Kv1PeriodGroup::Key, + Kv1PeriodGroup *, + boost::hash> period_groups; + std::unordered_map< + Kv1SpecificDay::Key, + Kv1SpecificDay *, + boost::hash> specific_days; + std::unordered_map< + Kv1TimetableVersion::Key, + Kv1TimetableVersion *, + boost::hash> timetable_versions; + std::unordered_map< + Kv1PublicJourney::Key, + Kv1PublicJourney *, + boost::hash> public_journeys; + std::unordered_map< + Kv1PeriodGroupValidity::Key, + Kv1PeriodGroupValidity *, + boost::hash> period_group_validities; + std::unordered_map< + Kv1ExceptionalOperatingDay::Key, + Kv1ExceptionalOperatingDay *, + boost::hash> exceptional_operating_days; + std::unordered_map< + Kv1ScheduleVersion::Key, + Kv1ScheduleVersion *, + boost::hash> schedule_versions; + std::unordered_map< + Kv1PublicJourneyPassingTimes::Key, + Kv1PublicJourneyPassingTimes *, + boost::hash> public_journey_passing_times; + std::unordered_map< + Kv1OperatingDay::Key, + Kv1OperatingDay *, + boost::hash> operating_days; + + size_t size() const; +}; + +void kv1LinkRecords(Kv1Index &index); + +#endif // OEUF_LIBTMI8_KV1_INDEX_HPP diff --git a/lib/libtmi8/include/tmi8/kv1_lexer.hpp b/lib/libtmi8/include/tmi8/kv1_lexer.hpp new file mode 100644 index 0000000..df6a57c --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_lexer.hpp @@ -0,0 +1,46 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV1_LEXER_HPP +#define OEUF_LIBTMI8_KV1_LEXER_HPP + +#include +#include +#include +#include +#include +#include + +enum Kv1TokenType { + KV1_TOKEN_CELL, + KV1_TOKEN_ROW_END, +}; +struct Kv1Token { Kv1TokenType type; std::string data; }; + +struct Kv1Lexer { + std::vector errors; + std::vector tokens; + + explicit Kv1Lexer(std::string_view input); + + void lex(); + + private: + // Does not eat newline character. + void eatRestOfLine(); + void lexOptionalHeader(); + void lexOptionalComment(); + + static bool isWhitespace(int c); + + void readQuotedColumn(); + void readUnquotedColumn(); + void lexRow(); + // Returns true when a line ending was consumed. + bool eatWhitespace(); + + std::string_view input; + std::string_view slice; + std::string colbuf; +}; + +#endif // OEUF_LIBTMI8_KV1_LEXER_HPP diff --git a/lib/libtmi8/include/tmi8/kv1_parser.hpp b/lib/libtmi8/include/tmi8/kv1_parser.hpp new file mode 100644 index 0000000..ccd8ec6 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_parser.hpp @@ -0,0 +1,87 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV1_PARSER_HPP +#define OEUF_LIBTMI8_KV1_PARSER_HPP + +#include +#include +#include +#include +#include + +#include +#include + +struct Kv1Parser { + explicit Kv1Parser(std::vector tokens, Kv1Records &parse_into); + + void parse(); + + private: + // Method pointer to a method of Kv1Parser (i.e. a function that takes + // 'this'; is not static) that takes no arguments and also does not return + // anything. + using ParseFunc = void (Kv1Parser::*)(); + static const std::unordered_map type_parsers; + + bool atEnd() const; + void eatRowEnds(); + const Kv1Token *cur() const; + const std::string *eatCell(std::string_view parsing_what); + std::string parseHeader(); + void eatRestOfRow(); + + void requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value); + std::optional requireBoolean(std::string_view field, bool mandatory, std::string_view value); + std::optional requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value); + std::optional requireRgbColor(std::string_view field, bool mandatory, std::string_view value); + std::optional requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value); + + std::string eatString(std::string_view field, bool mandatory, size_t max_length); + std::optional eatBoolean(std::string_view field, bool mandatory); + std::optional eatNumber(std::string_view field, bool mandatory, size_t max_digits); + std::optional eatRgbColor(std::string_view field, bool mandatory); + std::optional eatRdCoord(std::string_view field, bool mandatory, size_t min_digits); + + void parseOrganizationalUnit(); + void parseHigherOrganizationalUnit(); + void parseUserStopPoint(); + void parseUserStopArea(); + void parseTimingLink(); + void parseLink(); + void parseLine(); + void parseDestination(); + void parseJourneyPattern(); + void parseConcessionFinancerRelation(); + void parseConcessionArea(); + void parseFinancer(); + void parseJourneyPatternTimingLink(); + void parsePoint(); + void parsePointOnLink(); + void parseIcon(); + void parseNotice(); + void parseNoticeAssignment(); + void parseTimeDemandGroup(); + void parseTimeDemandGroupRunTime(); + void parsePeriodGroup(); + void parseSpecificDay(); + void parseTimetableVersion(); + void parsePublicJourney(); + void parsePeriodGroupValidity(); + void parseExceptionalOperatingDay(); + void parseScheduleVersion(); + void parsePublicJourneyPassingTimes(); + void parseOperatingDay(); + + size_t pos = 0; + std::vector tokens; + const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam"); + + public: + std::vector warns; + std::vector global_errors; + std::vector record_errors; + Kv1Records &records; +}; + +#endif // OEUF_LIBTMI8_KV1_PARSER_HPP diff --git a/lib/libtmi8/include/tmi8/kv1_types.hpp b/lib/libtmi8/include/tmi8/kv1_types.hpp new file mode 100644 index 0000000..d4a0760 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_types.hpp @@ -0,0 +1,1528 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV1_TYPES_HPP +#define OEUF_LIBTMI8_KV1_TYPES_HPP + +#include +#include +#include +#include +#include + +struct Kv1OrganizationalUnit; +struct Kv1HigherOrganizationalUnit; +struct Kv1UserStopPoint; +struct Kv1UserStopArea; +struct Kv1TimingLink; +struct Kv1Link; +struct Kv1Line; +struct Kv1Destination; +struct Kv1JourneyPattern; +struct Kv1ConcessionFinancerRelation; +struct Kv1ConcessionArea; +struct Kv1Financer; +struct Kv1JourneyPatternTimingLink; +struct Kv1Point; +struct Kv1PointOnLink; +struct Kv1Icon; +struct Kv1Notice; +struct Kv1NoticeAssignment; +struct Kv1TimeDemandGroup; +struct Kv1TimeDemandGroupRunTime; +struct Kv1PeriodGroup; +struct Kv1SpecificDay; +struct Kv1TimetableVersion; +struct Kv1PublicJourney; +struct Kv1PeriodGroupValidity; +struct Kv1ExceptionalOperatingDay; +struct Kv1ScheduleVersion; +struct Kv1PublicJourneyPassingTimes; +struct Kv1OperatingDay; + +struct Kv1Records { + std::vector organizational_units; + std::vector higher_organizational_units; + std::vector user_stop_points; + std::vector user_stop_areas; + std::vector timing_links; + std::vector links; + std::vector lines; + std::vector destinations; + std::vector journey_patterns; + std::vector concession_financer_relations; + std::vector concession_areas; + std::vector financers; + std::vector journey_pattern_timing_links; + std::vector points; + std::vector point_on_links; + std::vector icons; + std::vector notices; + std::vector notice_assignments; + std::vector time_demand_groups; + std::vector time_demand_group_run_times; + std::vector period_groups; + std::vector specific_days; + std::vector timetable_versions; + std::vector public_journeys; + std::vector period_group_validities; + std::vector exceptional_operating_days; + std::vector schedule_versions; + std::vector public_journey_passing_times; + std::vector operating_days; + + size_t size() const; +}; + +// These definitions implement TMI8, KV1 Dienstregeling (Timetable) version +// 8.3.0.2 (release), published by BISON on January 8, 2020. +// (Filename: tmi8 dienstregeling (kv 1) v8.3.0.2, release.docx) +// +// This specification and other BISON specifications, as well as other +// supplementary information, can be found on BISON's website: +// https://bison.dova.nu/ +// +// The specification that was used to create these definitions was downloaded +// from the following address: +// https://bison.dova.nu/sites/default/files/bestanden/tmi8_dienstregeling_kv_1_v8.3.0.2_release.pdf +// +// The KV1 table structure and the corresponding documentation describing the +// relevant tables and fields, as presented here, is derived from the original +// specification. Most documentation is a manually translated version of the +// documentation as present in the specification. The specification is licensed +// under CC BY-ND 3.0. The exact text of this license can be found on +// https://creativecommons.org/licenses/by-nd/3.0/nl/. + +// KV1 Table 1: Organizational Unit [ORUN] (MANDATORY) +// +// A collection of trips with the same validity features. An organizational +// unit can be part of a 'higher' unit. +// +// An organizational unit is defined as a unity vor which the planning of trips +// is compiled. When defining the organizational units, it is important that +// all trips within the package have a homogeneous validity (school holidays, +// shopping Sundays, foreign bank holidays). +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1OrganizationalUnit { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code); + }; + + Key key; + // Mandatory, at most 50 characters. + std::string name; + // Mandatory, at most 10 characters. + std::string organizational_unit_type; + // Optional, at most 255 characters. + std::string description; +}; + +// KV1 Table 2: Higher Organizational Unit [ORUNORUN] (OPTIONAL) +// +// An in the hierarchy higher-ordered organizational unit for the purpose of +// (among others) recording of (deviating) validities on the high level. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1HigherOrganizationalUnit { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Parent, higher organizational unit + // that is referred to. + std::string organizational_unit_code_parent; + // Mandatory (key), at most 10 characters. Child, lower organizational unit. + std::string organizational_unit_code_child; + // Mandatory (key), at most 10 characters. [YYYY-MM-DD] Starting date of the + // hierarchical relation (can be a fixed value, e.g. 2006-12-31). + std::chrono::year_month_day valid_from; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code_parent, + std::string organizational_unit_code_child, + std::chrono::year_month_day valid_from); + }; + + Key key; + + Kv1OrganizationalUnit *p_organizational_unit_parent = nullptr; + Kv1OrganizationalUnit *p_organizational_unit_child = nullptr; +}; + +// KV1 Table 3: User Stop Point [USRSTOP] +// +// Stop or other point (e.g. Bridge, functioning as info for the bridge keeper) +// for which times are recorded in the planning system of the transit operator. +// +// Coordinates of a UserStopPoint are recorded as Point. When defining +// UserStopPoints, it is important that the coordinates can be unambiguously +// and verifiably recorded. For a stop, the coordinates of the stop sign are +// recorded. If there is no stop sign, the end of the bus stop (where the bus +// normally halts) is recorded as the coordinate of the stop. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1UserStopPoint { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Stop number in domain of operator. + std::string user_stop_code; + + explicit Key(std::string data_owner_code, + std::string user_stop_code); + }; + + Key key; + // Optional, at most 10 characters. Stop number in domain of integrator, + // (initially) equal to UserStopCode. + std::string timing_point_code; + // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used + // as boarding stop, true by default. False for e.g. dummy stop for bridge + // keeper. + bool get_in = true; + // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used + // as alighting stop. + bool get_out = false; + // Mandatory, at most 50 characters. Stop name. + std::string name; + // Mandatory, at most 50 characters. Town name. + std::string town; + // Optional, at most 10 characters. Reference to StopArea of which the + // UserStop is part. + std::string user_stop_area_code; + // Mandatory, at most 10 characters. Platform indication/letter. The '-' + // value is used to indication that this is not applicable. + std::string stop_side_code; + // Mandatory, at most 5 digits. Minimal stop duration for boarding and + // alighting, zero by default. In seconds. + double minimal_stop_time_s = 0; + // Optional, at most 3 digits. Length of stop platform. + std::optional stop_side_length; + // Optional, at most 255 characters. + std::string description; + // Mandatory, at most 10 characters. USRSTOPTYPE. Indicates the stop kind. + std::string user_stop_type; + // Optional, at most 30 characters. Nationally unique stop number. + std::string quay_code; + + Kv1UserStopArea *p_user_stop_area = nullptr; + Kv1Point *p_point = nullptr; +}; + +// KV1 Table 4: User Stop Area [USRSTAR] +// +// A StopArea is a collection of stops, which have the same name for passengers +// and logically belong together. (E.g. a bus station of transfer point.) Stops +// lying opposite each other can also form a StopArea. +// +// Used for display of all stops in a stop area on an overview display and for +// announcement of stop names (stops on both sides of the street share the same +// name). +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1UserStopArea { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Code of StopArea following coding + // of operator, e.g. PlaceCode. + std::string user_stop_area_code; + + explicit Key(std::string data_owner_code, + std::string user_stop_area_code); + }; + + Key key; + // Mandatory, at most 50 characters. + std::string name; + // Mandatory, at most 50 characters. + std::string town; + // Mandatory, at most 255 characters. + std::string description; +}; + +// KV1 Table 5: Timing Link [TILI] +// +// Link between two points which have the feature 'stop' or 'timing point'. A +// Timing Link is set between all stops and other timing points (e.g. for the +// bridge) which make part of a journey pattern. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1TimingLink { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Stop number in the domain of + // DataOwner (here: the operator). + std::string user_stop_code_begin; + // Mandatory (key), at most 10 characters. Stop number in the domain of + // DataOwner (here: the operator). + std::string user_stop_code_end; + + explicit Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end); + }; + + Key key; + // Optional, at most 5 digits. Minimal trip time (in seconds). + std::optional minimal_drive_time_s; + // Optional, at most 255 characters. + std::string description; + + Kv1UserStopPoint *p_user_stop_begin = nullptr; + Kv1UserStopPoint *p_user_stop_end = nullptr; +}; + +// KV1 Table 6: Link [LINK] +// +// A route link describes the connection between to points on the physical path +// of a route. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Link { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Stop code in the domain of + // DataOwner (here: the operator). + std::string user_stop_code_begin; + // Mandatory (key), at most 10 characters. Stop code in the domain of + // DataOwner (here: the operator). + std::string user_stop_code_end; + // Mandatory (key), at most 5 characters. Modality for which the distance + // applies, see BISON enumeration E9. + // TODO: Check if BISON enumeration E9 can be put into an enum. + std::string transport_type; + + explicit Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string transport_type); + }; + + Key key; + // Mandatory, at most 6 digits. Length of the link (in meters). + double distance = 0; + // Optional, at most 255 characters. + std::string description; + + Kv1UserStopPoint *p_user_stop_begin = nullptr; + Kv1UserStopPoint *p_user_stop_end = nullptr; +}; + +struct RgbColor { + uint8_t r, g, b = 0; +}; + +// KV1 Table 7: Line [LINE] +// +// A line is a collection of routes/journey patterns which is publically known +// under a shared number. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Line { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Unique system line number in the + // domain of DataOwner. + std::string line_planning_number; + + explicit Key(std::string data_owner_code, + std::string line_planning_number); + }; + + Key key; + // Mandatory, at most 4 characters. Line number for the public, incl. S/N + // indications. + std::string line_public_number; + // Mandatory, at most 50 characters. + std::string line_name; + // Mandatory, at most three digits. Should be in the range [0, 400). + // Only processing Connexxion's KV1 export, however, shows us that this range + // constrained is not honored in practice. That is why we also don't care. + short line_ve_tag_number = 0; + // Optional, at most 255 characters. + std::string description; + // Mandatory, at most 5 characters. Modality, see BISON enumeration E9. + // TODO: Check if BISON enumeration E9 can be put into an enum. + std::string transport_type; + // Optional, at most 4 digits. Symbol / image for the line. Reference to ICON + // table. + std::optional line_icon; + // Optional, at most four characters. Background color for the line. + // Hexadecimal representation following RGB coding. Always six characters + // (RRGGBB), only numbers and/or capital letters. + std::optional line_color; + // Optional, at most four characters. Foreground color for the line. + // Hexadecimal representation following RGB coding. Always six characters + // (RRGGBB), only numbers and/or capital letters. + std::optional line_text_color; + + Kv1Icon *p_line_icon = nullptr; +}; + +// KV1 Table 8: Destination [DEST] +// +// A destination shows the place/district/description of the route for the +// passenger. Intermediate and detail destinations of a journey pattern are +// shown under a single desination code, together with the primary destination. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Destination { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string dest_code; + + explicit Key(std::string data_owner_code, + std::string dest_code); + }; + + Key key; + // Mandatory, at most 50 characters. Full destination (e.g. compiled from + // primary, detail or intermediate destination). + std::string dest_name_full; + // Mandatory, at most 24 characters. Primary / intermediate destination in + // enumeration / final destination if 1 line is used. + std::string dest_name_main; + // Optional, at most 24 characters. Detail/secondary or intermediate + // destination for primary desination, final destination (for intermediate + // destination on line 1). + std::string dest_name_detail; + // Mandatory, at most 5 characters. Boolean which indcates whether + // DestNameDetail must always be shown (e.g. because this contains an + // important intermediate destination.) + bool relevant_dest_name_detail = false; + // Mandatory, at most 21 characters. Primary destination in 21 characters. + std::string dest_name_main_21; + // Optional, at most 21 characters. Detail/secondary/intermediate destination + // in 21 characters. + std::string dest_name_detail_21; + // Mandatory, at most 19 characters. Primary destination in 19 characters. + std::string dest_name_main_19; + // Optional, at most 19 characters. Detail/secondary/intermediate destination + // in 19 characters. + std::string dest_name_detail_19; + // Mandatory, at most 16 characters. Primary destination in 16 characters. + std::string dest_name_main_16; + // Optional, at most 16 characters. Detail/secondary/intermediate destination + // in 16 characters. + std::string dest_name_detail_16; + // Optional, at most 4 digits. Symbol/image for the destination. Reference to + // the ICON table. + std::optional dest_icon; + // Optional, at most 6 characters. Background color for the destination. + // Hexadecimal representation following RGB coding. Always six characters + // (RRGGBB), only six digits and/or capital letters. + std::optional dest_color; + // Optional, at most 30 characters (WTF?). Foreground color for the + // destination. Hexadecimal representation following RGB coding. Always six + // characters (RRGGBB), only six digits and/or capital letters. + std::optional dest_text_color; +}; + +// KV1 Table 9: Journey Pattern [JOPA] +// +// The journey pattern describes the route from start to end point as a ordered +// list of stops and links between stops/timing points. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1JourneyPattern { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 10 characters. + std::string journey_pattern_code; + + explicit Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code); + }; + + Key key; + // Mandatory, at most 10 characters. Refers to a journey pattern type + // (JOPATYPE). + std::string journey_pattern_type; + // Mandatory, at most 1 character. One of [1, 2, A, B]. + char direction = 0; + // Optional, at most 255 characters. + std::string description; + + Kv1Line *p_line = nullptr; +}; + +// KV1 Table 10: Concession Financer Relation [CONFINREL] +// +// Concession financer relation (mainly parcel). Smallest unit for which data +// about a concession can be captured in relation to a financer and/or +// concession. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1ConcessionFinancerRelation { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Parcel code. + std::string con_fin_rel_code; + + explicit Key(std::string data_owner_code, + std::string con_fin_rel_code); + }; + + Key key; + // Mandatory, at most 10 characters. Concession code. + std::string concession_area_code; + // Optional, at most 10 characters. Code of financer/client of the parcel. + std::string financer_code; + + Kv1ConcessionArea *p_concession_area = nullptr; + Kv1Financer *p_financer = nullptr; +}; + +// KV1 Table 11: Concession Area [CONAREA] +// +// Concession (area). +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1ConcessionArea { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Code of the concession. + std::string concession_area_code; + + explicit Key(std::string data_owner_code, + std::string concession_area_code); + }; + + Key key; + // Mandatory, at most 255 characters. + std::string description; +}; + +// KV1 Table 12: Financer [FINANCER] (OPTIONAL) +// +// Financer of a parcel. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Financer { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string financer_code; + + explicit Key(std::string data_owner_code, + std::string financer_code); + }; + + Key key; + // Mandatory, at most 255 characters. + std::string description; +}; + +// KV1 Table 13: Journey Pattern Timing Link [JOPATILI] +// +// Compilation of journey pattern from logical links (between pairs of +// stops/timing points). Features such as the destination code, the public line +// number, the concession financer relation (parcel) and product formula are +// set per connection. Moreover, a color and/or image linked to the line +// destination and the use of the (first) stop as boarding/alighting stop can +// be set per link. +// +// Timing Link: A timing link is a stop, set by the transit operator, where a +// bus / public transit vehicle may never depart earlier than set in the +// timetable. +// +// A logical link may never occur more than once in a journey pattern. +// Therefore, the combination of LinePlanningNumber, JourneyPatternCode, +// UserStopCodeBegin and UserStopCodeEnd must be unique in JOPATILI. +// +// The value of GetIn and GetOut are normally copied from the corresponding +// stop in the USRSTOP table, but can be overruled per journey pattern if so +// desired. +// +// A Icon or (Text)Color set here overrules the general value of the +// corresponding line (Line) or destination (Destination). +// +// A value of ShowFlexibleTrip or ProductFormulaType in PUJO or PUJOPASS +// overrules the value in JOPATILI. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1JourneyPatternTimingLink { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 10 characters. + std::string journey_pattern_code; + // Mandatory (key), at most 3 digits. + short timing_link_order = 0; + + explicit Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + short timing_link_order); + }; + + Key key; + // Mandatory, at most 10 characters. Stop number in the domain of the + // DataOwner (here: the transit operator). + std::string user_stop_code_begin; + // Mandatory, at most 10 characters. Stop number in the domain of the + // DataOwner (here: the transit operator). + std::string user_stop_code_end; + // Mandatory, at most 10 characters. Concession financer relation / parcel + // (smallest unit). + std::string con_fin_rel_code; + // Mandatory, at most 10 characters. The destination (incl. intermediat + // destinations) as these are shown at the first stop of the journey pattern + // link. + std::string dest_code; + // Mandatory, at most 5 characters. Boolean which indicates whether the first + // stop of the connection is a timing stop. Indicator is at least "true" at + // first stop of a line and at waiting stops. + bool is_timing_stop = false; + // Optional, at most 4 characters. Public line number which must be shown on + // displays from the first stop of the journey pattern link (e.g. Line number + // + S). This is important when a deviating public line number applies from a + // certain point on forward. Normally, the public line number of the + // corresponding line is shown. + std::string display_public_line; + // Optional, at most 4 digits. Enumeration E10 (see section 2.5). A public + // transit service which distinguishes itself by a set of unique features, + // that is offered to the passenger as distinct (a marketing aspect). + // TODO: Check if we can turn BISON enumeration E10 into an enum + std::optional product_formula_type; + // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin + // is used as a boarding stop in this journey pattern. Usually equal to the + // value of the corresponding USRSTOP. + bool get_in = false; + // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin + // is used as an alighting stop in this journey pattern. Usually equal to the + // value of the corresponding USRSTOP. + bool get_out = false; + // Optional, at most 8 characters. Indicates whether the transit operator + // wants a not explicitly planned trip (i.e. a trip that only operates after + // reservation such as a 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to + // be shown on displays. Values according enumeration E21: TRUE (always), + // FALSE (never), REALTIME (only when tracking trip). + // TODO: Check if we can turn BISON enumeration E21 into an enum + std::string show_flexible_trip; + // Optional, at most 4 digits. Symbol / image for display of the line + // destination at the journey stop passing. Reference to the ICON table. + std::optional line_dest_icon; + // Optional, at most 6 characters. Background color for display of the line + // destination at a journey stop passing. Hexadecimal representation + // following RGB coding. Always six characters (RRGGBB), only numbers and/or + // capital letters. + std::optional line_dest_color; + // Optional, at most 6 characters. Foreground color for display of the line + // destination at a journey stop passing. Hexadecimal representation + // following RGB coding. Always six characters (RRGGBB), only numbers and/or + // capital letters. + std::optional line_dest_text_color; + + Kv1Line *p_line = nullptr; + Kv1JourneyPattern *p_journey_pattern = nullptr; + Kv1UserStopPoint *p_user_stop_begin = nullptr; + Kv1UserStopPoint *p_user_stop_end = nullptr; + Kv1ConcessionFinancerRelation *p_con_fin_rel = nullptr; + Kv1Destination *p_dest = nullptr; + Kv1Icon *p_line_dest_icon = nullptr; +}; + +// KV1 Table 14: Point [POINT] +// +// A point is the smallest location which can be reffered to within the public +// transit network. Every stop (USRSTOP) is a point. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Point { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string point_code; + + explicit Key(std::string data_owner_code, + std::string point_code); + }; + + Key key; + // Mandatory, at most 10 characters. Refers to the POINTTYPE table. + std::string point_type; + // Mandatory, at most 10 characters. Refers to the GEOSYSTYPE table. Only + // allowed to have the value "RD" (rijkdsdriehoekstelsel; the national Dutch + // coordinate system). + std::string coordinate_system_type; + // Mandatory, at most 15 characters. X position in the RD coordinate system, + // in meters (at least 6 digits). + double location_x_ew = 0; + // Mandatory, at most 15 characters. Y position in the RD coordinate system, + // in meters (at least 6 digits). + double location_y_ns = 0; + // Optional, at most 15 characters. + // NOTE: the standart (presumeably wrongly) indicates this field as having + // alphanumeric contents. + std::optional location_z; + // Optional, at most 255 characters. + std::string description; +}; + +// KV1 Table 15: Point on Link [POOL] +// +// A point that is used to geographically describe the trajectory between two +// stops. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1PointOnLink { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Stop number in the domain of the + // DataOwner (here: transit operator). + std::string user_stop_code_begin; + // Mandatory (key), at most 10 characters. Stop number in the domain of the + // DataOwner (here: transit operator). + std::string user_stop_code_end; + // Mandatory (key), at most 10 characters. Code from the road manager for KAR + // points. For curve points of the DataOwner (often the transit operator). + std::string point_data_owner_code; + // Mandatory (key), at most 10 charcters. + std::string point_code; + // Mandatory (key), at most 5 characters. Modality for which the distance + // applies, see BISON enumeration E9. + std::string transport_type; + + explicit Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string point_data_owner_code, + std::string point_code, + std::string transport_type); + }; + + Key key; + // Mandatory, at most 5 digits. Distance in meters relative to the start of + // the link. + double distance_since_start_of_link = 0; + // Optional, at most 4 digits. Crossing speed for a public transit vehicle + // from the previous point (on a link) in m/s. + std::optional segment_speed_mps = 0; + // Optional, at most 4 digits. Comfort speed for a public transit vehicle on + // the curve point. + std::optional local_point_speed_mps = 0; + // Optional, at most 255 characters. + std::string description; + + Kv1UserStopPoint *p_user_stop_begin = nullptr; + Kv1UserStopPoint *p_user_stop_end = nullptr; + Kv1Point *p_point = nullptr; +}; + +// KV1 Table 16: Icon [ICON] +// +// Table with images which can be referred to from DEST.DestIcon, LINE.LineIcon +// and JOPATILI.LineDestIcon to load the correct image. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Icon { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 4 digits. Reference from other tables for the + // requested image. + short icon_number = 0; + + explicit Key(std::string data_owner_code, + short icon_number); + }; + + Key key; + // Mandatory, at most 1024 characters. Absolute URI to a publically available + // location from which the image can be loaded. The extension of the file + // indicates the image type. + // Supported file types are: GIF (.gif), JPEG (.jpg, .jpeg), + // PNG (.png), SVG (.svg) + // Supported protocols are: HTTP, HTTPS, FTP + // Prefer to not use any capital letters. Examples: + // - http://bison.dova.nu/images/logo.png + // - https://bison.dova.nu/images/logo.png + // - ftp://ftp.dova.nu/images/logo.png + std::string icon_uri; +}; + +// KV1 Table 17: Notice [NOTICE] (OPTIONAL) +// +// A (reusable) text with supplementary information about exceptions / +// clarifications for a line, journey pattern etc. +// +// Usage is optional; when there are no clarifying texts, the NOTICE table does +// not need to be provided in a KV1 set. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1Notice { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 20 characters. Identification of Notice (remark, + // clarifying text). + std::string notice_code; + + explicit Key(std::string data_owner_code, + std::string notice_code); + }; + + Key key; + // Mandatory, at most 1024 characters. Content, text. Contains contact + // information such as telephone number, web address and reservation time for + // 'call buses' (belbussen) and other demand-based transit. + std::string notice_content; +}; + +// KV1 Table 18: Notice Assignment [NTCASSGNM] (OPTIONAL) +// +// Linking table in which Notice (remark, clarfiying text) is assigned to a +// line, journey pattern, stops within a journey pattern, journey etc. Notice +// Assignment contains all logical key elements of the corresponding objects to +// which a Notice can be assigned. +// +// Different attributes are required for the Notice Assignment, depending on +// the type object to which the Notice is assigned. In the following table +// structure, this is indicated as 'Only relevant for ...'. This means that +// fields for other object types in the Notice Assignment can be ignored. +// +// Moreover, it can also occur that not all key fields of the linked table are +// of interest (content-wise) for recording the Notice. +// +// Both matters are summarised in this overview: +// +// -------------------------------------------------------- +// AssignedObject PUJO PUJOPASS LINE JOPATILI +// -------------------------------------------------------- +// DataOwnerCode........... x ...... x ...... x ..... x ... +// TimetableVersionCode ... o ............................. +// OrganizationalUnitCode . o ...... o .................... +// ScheduleCode .................... o .................... +// ScheduleTypeCode ................ o .................... +// PeriodGroupCode ........ o ............................. +// SpecificDayCode ........ o ............................. +// DayType ................ o ............................. +// LinePlanningNumber ..... x ...... x ...... x ..... x ... +// JourneyNumber .......... x ...... x .................... +// StopOrder ....................... o .............. o ... +// JourneyPatternCode ............................... x ... +// TimingLinkOrder .................................. o ... +// UserStopCode .................... o .............. o ... +// -------------------------------------------------------- +// +// Legend: +// x - Mandatory. The Notice for this object type is always depndent on the +// value of the attribute. +// o - Optional. The Notice can be independent of the value of this +// attribute for this object type. +// - Attribute is no key field for this object type and can be +// ignored when processed. +// +// Usage of Notice Assignment is optional in KV1. If there are no clarifying +// texts, then the Notice Assignment table is not required to be present in the +// provided KV1 set. +// +// This table is part of the core data tables, which are common for all KV1 +// variants. +struct Kv1NoticeAssignment { + // Mandatory, at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory, at most 20 characters. Notice that is assigned. + std::string notice_code; + // Mandatory, at most 8 characters. Object type to which Notice is assigned. + std::string assigned_object; + // Optional, at most 10 characters. Only relevant for PUJO. + std::string timetable_version_code; + // Optional, at most 10 characters. Only relevant for PUJO and PUJOPASS. + std::string organizational_unit_code; + // Optional, at most 10 characters. Only relevant for PUJOPASS. + std::string schedule_code; + // Optional, at most 10 characters. Only relevant for PUJOPASS. + std::string schedule_type_code; + // Optional, at most 10 characters. Only relevant for PUJO. + std::string period_group_code; + // Optional, at most 10 characters. Only relevant for PUJO. + std::string specific_day_code; + // Optional, at most 10 characters. Only relevant for PUJO. + // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun. + // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. + std::string day_type; + // Mandatory, at most 10 characters. Mandatory for all object types. + std::string line_planning_number; + // Optional (for all object types except PUJO and PUJOPASS), at most 6 + // digits. Only relevant for PUJO and PUJOPASS. Must be in the range + // [0-1000000). + std::optional journey_number; + // Optional, at most 4 digits. Only relevant for PUJOPASS and JOPATILI. + std::optional stop_order; + // Optional (for all object types except JOPATILI), at most 4 digits. Only + // relevant for JOPATILI. + std::string journey_pattern_code; + // Optional (at most 3 digits). Only relevant for JOPATILI. + std::optional timing_link_order; + // Optional (at most 10 characters). Only relevant for PUJOPASS and JOPATILI. + // For JOPATILI, this correspond to the first stop of the link. + std::string user_stop_code; + + Kv1Notice *p_notice = nullptr; +}; + +// KV1 Table 19: Time Demand Group [TIMDEMGRP] +// +// A time demand group is a grouping of the run time distribution from stop to +// stop, for a journey pattern (from start to end point). +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1TimeDemandGroup { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 10 characters. Refers to the JOPATILI table. + std::string journey_pattern_code; + // Mandatory (key), at most 10 characters. Defines the code for the time + // demand group. (NOTE: this is not entirely made clear by the specification. + // This claim must be verified.) + std::string time_demand_group_code; + + explicit Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code); + }; + + Key key; + + Kv1Line *p_line = nullptr; + Kv1JourneyPattern *p_journey_pattern = nullptr; +}; + +// KV1 Table 20: Time Demand Group Run Time [TIMDEMRNT] +// +// The run time structure/distribution for all timing links of a journey +// pattern or a time demand group. +// +// Optional run time elements are, when these are present, used to more +// accurately calculate expected departure times based on punctuality +// deviations. +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1TimeDemandGroupRunTime { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 10 characters. Refers to the JOPATILI table. + std::string journey_pattern_code; + // Mandatory (key), at most 10 characters. Refers to the TIMDEMGRP table. + std::string time_demand_group_code; + // Mandatory (key), at most 3 digits. Reference number of a link within the + // journey pattern (a link can occur more than once within a journey + // pattern). + short timing_link_order = 0; + + explicit Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code, + short timing_link_order); + }; + + Key key; + // Mandatory, at most 10 characters. Refers to the first stop of the link. + std::string user_stop_code_begin; + // Mandatory, at most 10 characters. Refers to the last stop of the link. + std::string user_stop_code_end; + // Mandatory, at most 5 digits. Planned total run time on link for time + // demand group: (Departure time end stop - departure time begin stop) + // corresponding to the time demand group. In seconds. + double total_drive_time_s = 0; + // Mandatory, at most 5 digits. Planned minimal run time on link for time + // demand group. Often calculated as: (Arrival time end stop - arrival time + // begin stop) corresponding to the time demand group. In seconds. + double drive_time_s = 0; + // Optional, at most 5 digits. Expected/planned delay/congestion on link for + // time demand group. In seconds. + std::optional expected_delay_s; + // Optional, at most 5 digits. Layover/catch-up time. Gives play in the + // timetable. In seconds. + // LayOverTime = TotDriveTime - DriveTime + ExpectedDelay - StopWaitTime. + std::optional layover_time; + // Mandatory, at most 5 digits. Planned stop waiting time at the final stop + // of the link for the time demand group. Determined based on the difference + // between the departure time and arrival time at this stop. Is zero when no + // waiting time is planned for this stop. In seconds. + double stop_wait_time = 0; + // Optional, at most 5 digits. Planned minimal stop time for + // boarding/alighting of passengers at the final stop of the link for the + // time demand group. Application: at hub stops with a planned waiting time, + // the difference between the planned waiting time and the minimum stop time + // is the layover/catch-up time. In seconds. + std::optional minimum_stop_time; + + Kv1Line *p_line = nullptr; + Kv1UserStopPoint *p_user_stop_begin = nullptr; + Kv1UserStopPoint *p_user_stop_end = nullptr; + Kv1JourneyPattern *p_journey_pattern = nullptr; + Kv1TimeDemandGroup *p_time_demand_group = nullptr; + Kv1JourneyPatternTimingLink *p_journey_pattern_timing_link = nullptr; +}; + +// KV1 Table 21: Period Group [PEGR] +// +// Period group is an indication of a 'homogeneous period' during the year, +// i.e. a period in which the schedule has the same composition w.r.t. +// frequencies and run times. +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1PeriodGroup { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string period_group_code; + + explicit Key(std::string data_owner_code, + std::string period_group_code); + }; + + Key key; + // Optional, at most 255 characters. + std::string description; +}; + +// KV1 Table 22: Specific Day [SPECDAY] +// +// A specific day is a feature of a day for which a deviating service level is +// provided, respective to a normal day of the week. +// +// E.g. shopping Sundays (koopzondagen, if not every Sunday), New Year's Eve +// (oudejaarsdag), foreign bank holidays (as applicable). +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1SpecificDay { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Default: "NORMAL". + std::string specific_day_code; + + explicit Key(std::string data_owner_code, + std::string specific_day_code); + }; + + Key key; + // Mandatory, at most 50 characters. + std::string name; + // Optional, at most 255 characters. + std::string description; +}; + +// KV1 Table 23: Timetable Version [TIVE] +// +// A timetable version budles all planned activities for an organizational +// unit. For the public schedule, these are trips, routes, run times etc. +// +// When processing a new Timetable Version, it is checked if another TIVE with +// the same key has already been processed. If this is the case, ValidFrom must +// be equal to the starting date of the previously provided set. The new set +// replaces the older one. A package with a new starting date is only processed +// if another TimetableVersionCode is used. +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1TimetableVersion { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. + std::string timetable_version_code; + // Mandatory (key), at most 10 charactes. + std::string period_group_code; + // Mandatory (key), at most 10 characters. Default: "NORMAL". + std::string specific_day_code; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string timetable_version_code, + std::string period_group_code, + std::string specific_day_code); + }; + + Key key; + // Mandatory, at most 10 characters. Datum on which the timetable goes into + // effect, following the YYYY-MM-DD format. + std::chrono::year_month_day valid_from; + // Mandatory, at most 10 characters. Value: "PUBT". + std::string timetable_version_type; + // Optional, at most 10 characters. Datum on which the timetable goes out of + // effect, following the YYYY-MM-DD format. + std::optional valid_thru; + // Optional, at most 255 characters. Should be null/empty. + std::string description; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1PeriodGroup *p_period_group = nullptr; + Kv1SpecificDay *p_specific_day = nullptr; +}; + +// KV1 Table 24: Public Journey [PUJO] +// +// Public journeys are journeys that are operated by a public transit +// organization and are accessible to the passenger. +// +// Business rules: +// - If ShowFlexibleTrip or ProductFormulaType is set in a record of this +// table, this takes precedence over the value as in the corresponding +// JOPATILI entry. +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1PublicJourney { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string timetable_version_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. + std::string period_group_code; + // Mandatory (key), at most 10 characters. + std::string specific_day_code; + // Mandatory (key), at most 7 characters. + // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun. + // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. + // TODO: See if we can make this into a more concrete type + std::string day_type; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 6 digits. Must be in the range [0-1000000). + int journey_number = 0; + + explicit Key(std::string data_owner_code, + std::string timetable_version_code, + std::string organizational_unit_code, + std::string period_group_code, + std::string specific_day_code, + std::string day_type, + std::string line_planning_number, + int journey_number); + }; + + Key key; + // Mandatory, at most 10 characters. + std::string time_demand_group_code; + // Mandatory, at most 10 characters. + std::string journey_pattern_code; + // Mandatory, at most 8 characters. Format: "HH:MM:SS". + std::chrono::hh_mm_ss departure_time; + // Mandatory, at most 13 characters. Values as in BISON enumeration E3. + // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN". + // TODO: See if we can fit BISON enumeration E3 into an enum + std::string wheelchair_accessible; + // Mandatory, at most 5 characters. Boolean. Value "true": journey is + // operator by DataOwner. Value "false": journey is operator by a different + // DataOwner. Indicator is meant for a line that is operated jointly by + // multiple transit operators. The indicator is used to be able to match the + // journey operation (KV6, KV19 etc.); only journeys for which the indicator + // is "true" can be expected to have corresponding current/real-time + // information, although "true" doesn't necessarily mean that this + // current/real-time information will (always) become available. + bool data_owner_is_operator = false; + // Mandatory, at most 5 characters. Boolean. Indicates whether + // current/real-time journey information may be expected for the + // corresponding journey ("true" or "false"). + bool planned_monitored = false; + // Optional, at most 4 digits. BISON enumeration E10. Intended to allow + // capturing transit mode features at the journey level. + // TODO: See if we can make BISON enumeration E10 into an enum + std::optional product_formula_type; + // Optional, at most 8 characters. Indicates whether the transit operator + // wants that a not-explicitly planned trip (i.e. a journey that only runs on + // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be + // shown on displays. Values following BISON enumeration E21: TRUE (always), + // FALSE (never), REALTIME (only when journey is tracked). + // TODO: See if we can make BISON enumeration E21 into an enum + std::string show_flexible_trip; + + Kv1TimetableVersion *p_timetable_version = nullptr; + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1PeriodGroup *p_period_group = nullptr; + Kv1SpecificDay *p_specific_day = nullptr; + Kv1Line *p_line = nullptr; + Kv1TimeDemandGroup *p_time_demand_group = nullptr; + Kv1JourneyPattern *p_journey_pattern = nullptr; +}; + +// KV1 Table 25: Period Group Validity [PEGRVAL] +// +// Validities (multiple from-thru data) of a period group. +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1PeriodGroupValidity { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. + std::string period_group_code; + // Mandatory (key), at most 10 characters. Date of the start of the validity + // period. Format: "YYYY-MM-DD". + std::chrono::year_month_day valid_from; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string period_group_code, + std::chrono::year_month_day valid_from); + }; + + Key key; + // Mandatory, at most 10 characters. Date of the end of the validity period. + // Format: "YYYY-MM-DD". + std::chrono::year_month_day valid_thru; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1PeriodGroup *p_period_group = nullptr; +}; + +// KV1 Table 26: Exceptional Operating Day [EXCOPDAY] +// +// Contains exceptional validity dates, for which the service runs following a +// different day type (such as another day of the week or a different period). +// +// This table is part of the KV1 variant "validities and time demand groups". +struct Kv1ExceptionalOperatingDay { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. Organization unit for which an + // exceptional day validity applies. + std::string organizational_unit_code; + // Mandatory (key), at most 23 characters. Date (+ time) for which the + // exceptional validity applies. Format: "YYYYMMDDThh:mm:ssTZD". + std::chrono::sys_seconds valid_date; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::chrono::sys_seconds valid_date); + }; + + Key key; + // Mandatory, at most 7 characters. The exceptional day type that applies on + // a calendar day: [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, + // Thu, Fri, Sat. + // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. + // TODO: See if we can make this into a more concrete type + std::string day_type_as_on; + // Mandatory, at most 10 characters. Specific day service level to which the + // exceptional day validity refers. + std::string specific_day_code; + // Optional, at most 10 characters. An exceptional day validity can be + // related to the service level of another period (e.g. the school holiday + // schedule). This exceptional period reference is set here. + // + // E.g. on Good Friday or the day after Ascension day, transit runs according + // to the holiday season schedule, while transit runs following the winter + // package in the surrounding days. + std::string period_group_code; + // Optional, at most 255 characters. + std::string description; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1SpecificDay *p_specific_day = nullptr; + Kv1PeriodGroup *p_period_group = nullptr; +}; + +// KV1 Table 27: Schedule Version [SCHEDVERS] +// +// A schedule version bundles the planned activities for an organisation unit +// per day type. The journeys with passing times and corresponding routes are +// for the public timetable. +// +// When processing a new Schedule Version, it is checked if another SCHEDVERS +// with the same key has already been processed. If this is the case, ValidFrom +// must be equal to the starting date of the previously provided set. The new +// set replaces the older one. A package with a new starting date is only +// processed if another Schedule Code is used. +// +// This table is part of the KV1 variant "schedules and passing times". +struct Kv1ScheduleVersion { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. A unique code in combination with + // the ScheduleTypeCode of the package within the ORUN. + std::string schedule_code; + // Mandatory (key), at most 10 characters. Code for the Schedule Type (Day Type). + std::string schedule_type_code; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code); + }; + + Key key; + // Mandatory, at most 10 characters. Date on which the schedule goes into + // effect. Format: "YYYY-MM-DD". + std::chrono::year_month_day valid_from; + // Optional, at most 10 characters. Date on which the schedule goes out of + // effect. Format: "YYYY-MM-DD". + std::optional valid_thru; + // Optional, at most 255 characters. Should be empty/null. + std::string description; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; +}; + +// KV1 Table 28: Public Journey Passing Times [PUJOPASS] +// +// Public journey with arrival and departure times at all stops (and other +// timing points). +// +// Business rules: +// - If ShowFlexibleTrip or ProductFormulaType is set here, then this takes +// precedence over the value in the corresponding JOPATILI record. +// - All stop passings of a public journey refer to the same journey pattern +// (JOPA)! +// +// This table is part of the KV1 variant "schedules and passing times". +struct Kv1PublicJourneyPassingTimes { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. A unique code in combination with + // the ScheduleTypeCode of the package within the ORUN. + std::string schedule_code; + // Mandatory (key), at most 10 characters. Code for the Schedule Type (e.g. + // Day Type). + std::string schedule_type_code; + // Mandatory (key), at most 10 characters. + std::string line_planning_number; + // Mandatory (key), at most 6 digits. Must be in the range [0-1000000). + int journey_number = 0; + // Mandatory (key), at most 4 digits. + short stop_order = 0; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::string line_planning_number, + int journey_number, + short stop_order); + }; + + Key key; + // Mandatory, at most 10 characters. + std::string journey_pattern_code; + // Mandatory, at most 10 characters. + std::string user_stop_code; + // Mandatory (except for the first stop of a journey), at most 8 digits. Not + // compulsory for the first stop of a journey. Format: "HH:MM:SS". + std::optional> target_arrival_time; + // Mandatory (expect for the last stop of a journey), at most 8 digits. Not + // compulsory for the last stop of a journey. Format: "HH:MM:SS". + std::optional> target_departure_time; + // Mandatory, at most 13 characters. Values as in BISON enumeration E3. + // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN". + // TODO: See if we can fit BISON enumeration E3 into an enum + std::string wheelchair_accessible; + // Mandatory, at most 5 characters. Boolean. Value "true": journey is + // operator by DataOwner. Value "false": journey is operator by a different + // DataOwner. Indicator is meant for a line that is operated jointly by + // multiple transit operators. The indicator is used to be able to match the + // journey operation (KV6, KV19 etc.); only journeys for which the indicator + // is "true" can be expected to have corresponding current/real-time + // information, although "true" doesn't necessarily mean that this + // current/real-time information will (always) become available. + bool data_owner_is_operator = false; + // Mandatory, at most 5 characters. Boolean. Indicates whether + // current/real-time journey information may be expected for the + // corresponding journey ("true" or "false"). + bool planned_monitored = false; + // Optional, at most 4 digits. BISON enumeration E10. Intended to allow + // capturing transit mode features at the journey level. + // TODO: See if we can make BISON enumeration E10 into an enum + std::optional product_formula_type; + // Optional, at most 8 characters. Indicates whether the transit operator + // wants that a not-explicitly planned trip (i.e. a journey that only runs on + // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be + // shown on displays. Values following BISON enumeration E21: TRUE (always), + // FALSE (never), REALTIME (only when journey is tracked). + // TODO: See if we can make BISON enumeration E21 into an enum + std::string show_flexible_trip; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1ScheduleVersion *p_schedule_version = nullptr; + Kv1Line *p_line = nullptr; + Kv1JourneyPattern *p_journey_pattern = nullptr; + Kv1UserStopPoint *p_user_stop = nullptr; +}; + +// KV1 Table 29: Operating Day [OPERDAY] +// +// Contains the operational calendar. Which package (schedule version) applies +// is specified per day, per organisation unit. +// +// This table is part of the KV1 variant "schedules and passing times". +struct Kv1OperatingDay { + struct Key { + // Mandatory (key), at most 10 characters. Transport operator (from list as + // defined in BISON enumeration E1). + std::string data_owner_code; + // Mandatory (key), at most 10 characters. + std::string organizational_unit_code; + // Mandatory (key), at most 10 characters. + std::string schedule_code; + // Mandatory (key), at most 10 characters. + std::string schedule_type_code; + // Mandatory (key), at most 10 characters. Date on which the package + // (schedule version) applies. Format: "YYYY-MM-DD". + std::chrono::year_month_day valid_date; + + explicit Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::chrono::year_month_day valid_date); + }; + + Key key; + // Optional, at most 255 characters. + std::string description; + + Kv1OrganizationalUnit *p_organizational_unit = nullptr; + Kv1ScheduleVersion *p_schedule_version = nullptr; +}; + +bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b); +bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b); +bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b); +bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b); +bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b); +bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b); +bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b); +bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b); +bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b); +bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b); +bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b); +bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b); +bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b); +bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b); +bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b); +bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b); +bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b); +bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b); +bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b); +bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b); +bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b); +bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b); +bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b); +bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b); +bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b); +bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b); +bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b); +bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b); + +size_t hash_value(const Kv1OrganizationalUnit::Key &k); +size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k); +size_t hash_value(const Kv1UserStopPoint::Key &k); +size_t hash_value(const Kv1UserStopArea::Key &k); +size_t hash_value(const Kv1TimingLink::Key &k); +size_t hash_value(const Kv1Link::Key &k); +size_t hash_value(const Kv1Line::Key &k); +size_t hash_value(const Kv1Destination::Key &k); +size_t hash_value(const Kv1JourneyPattern::Key &k); +size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k); +size_t hash_value(const Kv1ConcessionArea::Key &k); +size_t hash_value(const Kv1Financer::Key &k); +size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k); +size_t hash_value(const Kv1Point::Key &k); +size_t hash_value(const Kv1PointOnLink::Key &k); +size_t hash_value(const Kv1Icon::Key &k); +size_t hash_value(const Kv1Notice::Key &k); +size_t hash_value(const Kv1TimeDemandGroup::Key &k); +size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k); +size_t hash_value(const Kv1PeriodGroup::Key &k); +size_t hash_value(const Kv1SpecificDay::Key &k); +size_t hash_value(const Kv1TimetableVersion::Key &k); +size_t hash_value(const Kv1PublicJourney::Key &k); +size_t hash_value(const Kv1PeriodGroupValidity::Key &k); +size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k); +size_t hash_value(const Kv1ScheduleVersion::Key &k); +size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k); +size_t hash_value(const Kv1OperatingDay::Key &k); + +#endif // OEUF_LIBTMI8_KV1_TYPES_HPP diff --git a/lib/libtmi8/include/tmi8/kv6_parquet.hpp b/lib/libtmi8/include/tmi8/kv6_parquet.hpp new file mode 100644 index 0000000..33b57ca --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv6_parquet.hpp @@ -0,0 +1,46 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV6_PARQUET_HPP +#define OEUF_LIBTMI8_KV6_PARQUET_HPP + +#include + +#include +#include +#include + +static const size_t MAX_PARQUET_CHUNK = 10000; + +struct ParquetBuilder { + ParquetBuilder(); + arrow::Result> getTable(); + + std::shared_ptr schema; + + arrow::StringBuilder types; + arrow::StringBuilder data_owner_codes; + arrow::StringBuilder line_planning_numbers; + arrow::Date32Builder operating_days; + arrow::UInt32Builder journey_numbers; + arrow::UInt8Builder reinforcement_numbers; + arrow::TimestampBuilder timestamps{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::default_memory_pool()}; + arrow::StringBuilder sources; + arrow::Int16Builder punctualities; + arrow::StringBuilder user_stop_codes; + arrow::UInt16Builder passage_sequence_numbers; + arrow::UInt32Builder vehicle_numbers; + arrow::UInt32Builder block_codes; + arrow::StringBuilder wheelchair_accessibles; + arrow::UInt8Builder number_of_coaches; + arrow::Int32Builder rd_ys; + arrow::Int32Builder rd_xs; + arrow::UInt32Builder distance_since_last_user_stops; +}; + +[[nodiscard]] +arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename); + +[[nodiscard]] +arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename); + +#endif // OEUF_LIBTMI8_KV6_PARQUET_HPP diff --git a/lib/libtmi8/src/kv1_index.cpp b/lib/libtmi8/src/kv1_index.cpp new file mode 100644 index 0000000..23e9596 --- /dev/null +++ b/lib/libtmi8/src/kv1_index.cpp @@ -0,0 +1,461 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +Kv1Index::Kv1Index(Kv1Records *records) : records(records) { + organizational_units.reserve(records->organizational_units.size()); + for (size_t i = 0; i < records->organizational_units.size(); i++) { + auto *it = &records->organizational_units[i]; + organizational_units[it->key] = it; + } + higher_organizational_units.reserve(records->higher_organizational_units.size()); + for (size_t i = 0; i < records->higher_organizational_units.size(); i++) { + auto *it = &records->higher_organizational_units[i]; + higher_organizational_units[it->key] = it; + } + user_stop_points.reserve(records->user_stop_points.size()); + for (size_t i = 0; i < records->user_stop_points.size(); i++) { + auto *it = &records->user_stop_points[i]; + user_stop_points[it->key] = it; + } + user_stop_areas.reserve(records->user_stop_areas.size()); + for (size_t i = 0; i < records->user_stop_areas.size(); i++) { + auto *it = &records->user_stop_areas[i]; + user_stop_areas[it->key] = it; + } + timing_links.reserve(records->timing_links.size()); + for (size_t i = 0; i < records->timing_links.size(); i++) { + auto *it = &records->timing_links[i]; + timing_links[it->key] = it; + } + links.reserve(records->links.size()); + for (size_t i = 0; i < records->links.size(); i++) { + auto *it = &records->links[i]; + links[it->key] = it; + } + lines.reserve(records->lines.size()); + for (size_t i = 0; i < records->lines.size(); i++) { + auto *it = &records->lines[i]; + lines[it->key] = it; + } + destinations.reserve(records->destinations.size()); + for (size_t i = 0; i < records->destinations.size(); i++) { + auto *it = &records->destinations[i]; + destinations[it->key] = it; + } + journey_patterns.reserve(records->journey_patterns.size()); + for (size_t i = 0; i < records->journey_patterns.size(); i++) { + auto *it = &records->journey_patterns[i]; + journey_patterns[it->key] = it; + } + concession_financer_relations.reserve(records->concession_financer_relations.size()); + for (size_t i = 0; i < records->concession_financer_relations.size(); i++) { + auto *it = &records->concession_financer_relations[i]; + concession_financer_relations[it->key] = it; + } + concession_areas.reserve(records->concession_areas.size()); + for (size_t i = 0; i < records->concession_areas.size(); i++) { + auto *it = &records->concession_areas[i]; + concession_areas[it->key] = it; + } + financers.reserve(records->financers.size()); + for (size_t i = 0; i < records->financers.size(); i++) { + auto *it = &records->financers[i]; + financers[it->key] = it; + } + journey_pattern_timing_links.reserve(records->journey_pattern_timing_links.size()); + for (size_t i = 0; i < records->journey_pattern_timing_links.size(); i++) { + auto *it = &records->journey_pattern_timing_links[i]; + journey_pattern_timing_links[it->key] = it; + } + points.reserve(records->points.size()); + for (size_t i = 0; i < records->points.size(); i++) { + auto *it = &records->points[i]; + points[it->key] = it; + } + point_on_links.reserve(records->point_on_links.size()); + for (size_t i = 0; i < records->point_on_links.size(); i++) { + auto *it = &records->point_on_links[i]; + point_on_links[it->key] = it; + } + icons.reserve(records->icons.size()); + for (size_t i = 0; i < records->icons.size(); i++) { + auto *it = &records->icons[i]; + icons[it->key] = it; + } + notices.reserve(records->notices.size()); + for (size_t i = 0; i < records->notices.size(); i++) { + auto *it = &records->notices[i]; + notices[it->key] = it; + } + time_demand_groups.reserve(records->time_demand_groups.size()); + for (size_t i = 0; i < records->time_demand_groups.size(); i++) { + auto *it = &records->time_demand_groups[i]; + time_demand_groups[it->key] = it; + } + time_demand_group_run_times.reserve(records->time_demand_group_run_times.size()); + for (size_t i = 0; i < records->time_demand_group_run_times.size(); i++) { + auto *it = &records->time_demand_group_run_times[i]; + time_demand_group_run_times[it->key] = it; + } + period_groups.reserve(records->period_groups.size()); + for (size_t i = 0; i < records->period_groups.size(); i++) { + auto *it = &records->period_groups[i]; + period_groups[it->key] = it; + } + specific_days.reserve(records->specific_days.size()); + for (size_t i = 0; i < records->specific_days.size(); i++) { + auto *it = &records->specific_days[i]; + specific_days[it->key] = it; + } + timetable_versions.reserve(records->timetable_versions.size()); + for (size_t i = 0; i < records->timetable_versions.size(); i++) { + auto *it = &records->timetable_versions[i]; + timetable_versions[it->key] = it; + } + public_journeys.reserve(records->public_journeys.size()); + for (size_t i = 0; i < records->public_journeys.size(); i++) { + auto *it = &records->public_journeys[i]; + public_journeys[it->key] = it; + } + period_group_validities.reserve(records->period_group_validities.size()); + for (size_t i = 0; i < records->period_group_validities.size(); i++) { + auto *it = &records->period_group_validities[i]; + period_group_validities[it->key] = it; + } + exceptional_operating_days.reserve(records->exceptional_operating_days.size()); + for (size_t i = 0; i < records->exceptional_operating_days.size(); i++) { + auto *it = &records->exceptional_operating_days[i]; + exceptional_operating_days[it->key] = it; + } + schedule_versions.reserve(records->schedule_versions.size()); + for (size_t i = 0; i < records->schedule_versions.size(); i++) { + auto *it = &records->schedule_versions[i]; + schedule_versions[it->key] = it; + } + public_journey_passing_times.reserve(records->public_journey_passing_times.size()); + for (size_t i = 0; i < records->public_journey_passing_times.size(); i++) { + auto *it = &records->public_journey_passing_times[i]; + public_journey_passing_times[it->key] = it; + } + operating_days.reserve(records->operating_days.size()); + for (size_t i = 0; i < records->operating_days.size(); i++) { + auto *it = &records->operating_days[i]; + operating_days[it->key] = it; + } +} + +size_t Kv1Index::size() const { + return organizational_units.size() + + higher_organizational_units.size() + + user_stop_points.size() + + user_stop_areas.size() + + timing_links.size() + + links.size() + + lines.size() + + destinations.size() + + journey_patterns.size() + + concession_financer_relations.size() + + concession_areas.size() + + financers.size() + + journey_pattern_timing_links.size() + + points.size() + + point_on_links.size() + + icons.size() + + notices.size() + + time_demand_groups.size() + + time_demand_group_run_times.size() + + period_groups.size() + + specific_days.size() + + timetable_versions.size() + + public_journeys.size() + + period_group_validities.size() + + exceptional_operating_days.size() + + schedule_versions.size() + + public_journey_passing_times.size() + + operating_days.size(); +} + +void kv1LinkRecords(Kv1Index &index) { + for (auto &orunorun : index.records->higher_organizational_units) { + Kv1OrganizationalUnit::Key orun_parent_key( + orunorun.key.data_owner_code, + orunorun.key.organizational_unit_code_parent); + Kv1OrganizationalUnit::Key orun_child_key( + orunorun.key.data_owner_code, + orunorun.key.organizational_unit_code_child); + orunorun.p_organizational_unit_parent = index.organizational_units[orun_parent_key]; + orunorun.p_organizational_unit_child = index.organizational_units[orun_child_key]; + } + for (auto &usrstop : index.records->user_stop_points) { + Kv1Point::Key point_key( + usrstop.key.data_owner_code, + usrstop.key.user_stop_code); + usrstop.p_point = index.points[point_key]; + if (!usrstop.user_stop_area_code.empty()) { + Kv1UserStopArea::Key usrstar_key( + usrstop.key.data_owner_code, + usrstop.user_stop_area_code); + usrstop.p_user_stop_area = index.user_stop_areas[usrstar_key]; + } + } + for (auto &tili : index.records->timing_links) { + Kv1UserStopPoint::Key usrstop_begin_key( + tili.key.data_owner_code, + tili.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + tili.key.data_owner_code, + tili.key.user_stop_code_end); + tili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + tili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + } + for (auto &link : index.records->links) { + Kv1UserStopPoint::Key usrstop_begin_key( + link.key.data_owner_code, + link.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + link.key.data_owner_code, + link.key.user_stop_code_end); + link.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + link.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + } + for (auto &line : index.records->lines) { + if (!line.line_icon) + continue; + Kv1Icon::Key icon_key( + line.key.data_owner_code, + *line.line_icon); + line.p_line_icon = index.icons[icon_key]; + } + for (auto &jopa : index.records->journey_patterns) { + Kv1Line::Key line_key( + jopa.key.data_owner_code, + jopa.key.line_planning_number); + jopa.p_line = index.lines[line_key]; + } + for (auto &confinrel : index.records->concession_financer_relations) { + Kv1ConcessionArea::Key conarea_key( + confinrel.key.data_owner_code, + confinrel.concession_area_code); + confinrel.p_concession_area = index.concession_areas[conarea_key]; + if (!confinrel.financer_code.empty()) { + Kv1Financer::Key financer_key( + confinrel.key.data_owner_code, + confinrel.financer_code); + confinrel.p_financer = index.financers[financer_key]; + } + } + for (auto &jopatili : index.records->journey_pattern_timing_links) { + Kv1Line::Key line_key( + jopatili.key.data_owner_code, + jopatili.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + jopatili.key.data_owner_code, + jopatili.key.line_planning_number, + jopatili.key.journey_pattern_code); + Kv1UserStopPoint::Key usrstop_begin_key( + jopatili.key.data_owner_code, + jopatili.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + jopatili.key.data_owner_code, + jopatili.user_stop_code_end); + Kv1ConcessionFinancerRelation::Key confinrel_key( + jopatili.key.data_owner_code, + jopatili.con_fin_rel_code); + Kv1Destination::Key dest_key( + jopatili.key.data_owner_code, + jopatili.dest_code); + jopatili.p_line = index.lines[line_key]; + jopatili.p_journey_pattern = index.journey_patterns[jopa_key]; + jopatili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + jopatili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + jopatili.p_con_fin_rel = index.concession_financer_relations[confinrel_key]; + jopatili.p_dest = index.destinations[dest_key]; + if (jopatili.line_dest_icon) { + Kv1Icon::Key icon_key{ + jopatili.key.data_owner_code, + *jopatili.line_dest_icon, + }; + jopatili.p_line_dest_icon = index.icons[icon_key]; + } + } + for (auto &pool : index.records->point_on_links) { + Kv1UserStopPoint::Key usrstop_begin_key( + pool.key.data_owner_code, + pool.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + pool.key.data_owner_code, + pool.key.user_stop_code_end); + Kv1Point::Key point_key( + pool.key.point_data_owner_code, + pool.key.point_code); + pool.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + pool.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + pool.p_point = index.points[point_key]; + } + for (auto &ntcassgnm : index.records->notice_assignments) { + Kv1Notice::Key notice_key( + ntcassgnm.data_owner_code, + ntcassgnm.notice_code); + ntcassgnm.p_notice = index.notices[notice_key]; + } + for (auto &timdemgrp : index.records->time_demand_groups) { + Kv1Line::Key line_key( + timdemgrp.key.data_owner_code, + timdemgrp.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + timdemgrp.key.data_owner_code, + timdemgrp.key.line_planning_number, + timdemgrp.key.journey_pattern_code); + timdemgrp.p_line = index.lines[line_key]; + timdemgrp.p_journey_pattern = index.journey_patterns[jopa_key]; + } + for (auto &timdemrnt : index.records->time_demand_group_run_times) { + Kv1Line::Key line_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code); + Kv1TimeDemandGroup::Key timdemgrp_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code, + timdemrnt.key.time_demand_group_code); + Kv1UserStopPoint::Key usrstop_begin_key( + timdemrnt.key.data_owner_code, + timdemrnt.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + timdemrnt.key.data_owner_code, + timdemrnt.user_stop_code_end); + Kv1JourneyPatternTimingLink::Key jopatili_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code, + timdemrnt.key.timing_link_order); + timdemrnt.p_line = index.lines[line_key]; + timdemrnt.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + timdemrnt.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + timdemrnt.p_journey_pattern = index.journey_patterns[jopa_key]; + timdemrnt.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; + timdemrnt.p_journey_pattern_timing_link = index.journey_pattern_timing_links[jopatili_key]; + } + for (auto &tive : index.records->timetable_versions) { + Kv1OrganizationalUnit::Key orun_key( + tive.key.data_owner_code, + tive.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + tive.key.data_owner_code, + tive.key.period_group_code); + Kv1SpecificDay::Key specday_key( + tive.key.data_owner_code, + tive.key.specific_day_code); + tive.p_organizational_unit = index.organizational_units[orun_key]; + tive.p_period_group = index.period_groups[pegr_key]; + tive.p_specific_day = index.specific_days[specday_key]; + } + for (auto &pujo : index.records->public_journeys) { + Kv1TimetableVersion::Key tive_key( + pujo.key.data_owner_code, + pujo.key.organizational_unit_code, + pujo.key.timetable_version_code, + pujo.key.period_group_code, + pujo.key.specific_day_code); + Kv1OrganizationalUnit::Key orun_key( + pujo.key.data_owner_code, + pujo.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + pujo.key.data_owner_code, + pujo.key.period_group_code); + Kv1SpecificDay::Key specday_key( + pujo.key.data_owner_code, + pujo.key.specific_day_code); + Kv1Line::Key line_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number); + Kv1TimeDemandGroup::Key timdemgrp_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number, + pujo.journey_pattern_code, + pujo.time_demand_group_code); + Kv1JourneyPattern::Key jopa_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number, + pujo.journey_pattern_code); + pujo.p_timetable_version = index.timetable_versions[tive_key]; + pujo.p_organizational_unit = index.organizational_units[orun_key]; + pujo.p_period_group = index.period_groups[pegr_key]; + pujo.p_specific_day = index.specific_days[specday_key]; + pujo.p_line = index.lines[line_key]; + pujo.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; + pujo.p_journey_pattern = index.journey_patterns[jopa_key]; + } + for (auto &pegrval : index.records->period_group_validities) { + Kv1OrganizationalUnit::Key orun_key( + pegrval.key.data_owner_code, + pegrval.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + pegrval.key.data_owner_code, + pegrval.key.period_group_code); + pegrval.p_organizational_unit = index.organizational_units[orun_key]; + pegrval.p_period_group = index.period_groups[pegr_key]; + } + for (auto &excopday : index.records->exceptional_operating_days) { + Kv1OrganizationalUnit::Key orun_key( + excopday.key.data_owner_code, + excopday.key.organizational_unit_code); + Kv1SpecificDay::Key specday_key( + excopday.key.data_owner_code, + excopday.specific_day_code); + Kv1PeriodGroup::Key pegr_key( + excopday.key.data_owner_code, + excopday.period_group_code); + excopday.p_organizational_unit = index.organizational_units[orun_key]; + excopday.p_specific_day = index.specific_days[specday_key]; + excopday.p_period_group = index.period_groups[pegr_key]; + } + for (auto &schedvers : index.records->schedule_versions) { + Kv1OrganizationalUnit::Key orun_key( + schedvers.key.data_owner_code, + schedvers.key.organizational_unit_code); + schedvers.p_organizational_unit = index.organizational_units[orun_key]; + } + for (auto &pujopass : index.records->public_journey_passing_times) { + Kv1OrganizationalUnit::Key orun_key( + pujopass.key.data_owner_code, + pujopass.key.organizational_unit_code); + Kv1ScheduleVersion::Key schedvers_key( + pujopass.key.data_owner_code, + pujopass.key.organizational_unit_code, + pujopass.key.schedule_code, + pujopass.key.schedule_type_code); + Kv1Line::Key line_key( + pujopass.key.data_owner_code, + pujopass.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + pujopass.key.data_owner_code, + pujopass.key.line_planning_number, + pujopass.journey_pattern_code); + Kv1UserStopPoint::Key usrstop_key( + pujopass.key.data_owner_code, + pujopass.user_stop_code); + pujopass.p_organizational_unit = index.organizational_units[orun_key]; + pujopass.p_schedule_version = index.schedule_versions[schedvers_key]; + pujopass.p_line = index.lines[line_key]; + pujopass.p_journey_pattern = index.journey_patterns[jopa_key]; + pujopass.p_user_stop = index.user_stop_points[usrstop_key]; + } + for (auto &operday : index.records->operating_days) { + Kv1OrganizationalUnit::Key orun_key( + operday.key.data_owner_code, + operday.key.organizational_unit_code); + Kv1ScheduleVersion::Key schedvers_key( + operday.key.data_owner_code, + operday.key.organizational_unit_code, + operday.key.schedule_code, + operday.key.schedule_type_code); + operday.p_organizational_unit = index.organizational_units[orun_key]; + operday.p_schedule_version = index.schedule_versions[schedvers_key]; + } +} diff --git a/lib/libtmi8/src/kv1_lexer.cpp b/lib/libtmi8/src/kv1_lexer.cpp new file mode 100644 index 0000000..028127b --- /dev/null +++ b/lib/libtmi8/src/kv1_lexer.cpp @@ -0,0 +1,152 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +Kv1Lexer::Kv1Lexer(std::string_view input) + : input(input), slice(input) +{} + +// Does not eat newline character. +void Kv1Lexer::eatRestOfLine() { + size_t end = slice.size(); + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } + } + slice = slice.substr(end); +} + +void Kv1Lexer::lexOptionalHeader() { + if (slice.starts_with('[')) eatRestOfLine(); +} + +void Kv1Lexer::lexOptionalComment() { + if (slice.starts_with(';')) eatRestOfLine(); +} + +inline bool Kv1Lexer::isWhitespace(int c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + +void Kv1Lexer::readQuotedColumn() { + Kv1Token token{ .type = KV1_TOKEN_CELL }; + + if (slice.size() == 0 || slice[0] != '"') { + errors.push_back("(internal error) readQuotedColumn: slice[0] != '\"'"); + return; + } + slice = slice.substr(1); + while (true) { + size_t quote = slice.find('"'); + if (quote == std::string_view::npos) { + errors.push_back("readQuotedColumn: no matching closing quote found"); + return; + } + if (quote+1 == slice.size() || slice[quote + 1] != '"') { + token.data.append(slice.substr(0, quote)); + break; + } + token.data.append(slice.substr(0, quote + 1)); + slice = slice.substr(quote + 2); + } + + size_t end = slice.size(); + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } + if (!isWhitespace(slice[i])) { + errors.push_back("readQuotedColumn: encountered non-whitespace character after closing quote"); + return; + } + } + if (end != std::string_view::npos) slice = slice.substr(end); + else slice = slice.substr(slice.size()); + + tokens.push_back(std::move(token)); +} + +void Kv1Lexer::readUnquotedColumn() { + size_t end = slice.size(); + size_t content_end = 0; + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } else if (!isWhitespace(slice[i])) { + content_end = i + 1; + } + } + tokens.emplace_back(KV1_TOKEN_CELL, std::string(slice.substr(0, content_end))); + if (end != std::string_view::npos) slice = slice.substr(end); + else slice = slice.substr(slice.size()); +} + +void Kv1Lexer::lexRow() { + size_t cols = 0; + while (slice.size() > 0 && slice[0] != '\r' && slice[0] != '\n') { + if (slice[0] == '"') readQuotedColumn(); + else readUnquotedColumn(); + if (!errors.empty()) return; + cols++; + if (slice.size() != 0) { + if (slice[0] == '|') { + slice = slice.substr(1); + // A newline/eof right after pipe? That means an empty field at the end + // of the record, we also want to emit that as a token. + if (slice.size() == 0 || slice[0] == '\r' || slice[0] == '\n') { + tokens.push_back({ .type = KV1_TOKEN_CELL }); + } + } else if (slice[0] == '\r') { + if (slice.size() > 1 && slice[1] == '\n') slice = slice.substr(2); + else slice = slice.substr(1); + break; + } else if (slice[0] == '\n') { + slice = slice.substr(1); + break; + } else { + errors.push_back("lexRow: expected CR, LF or |"); + return; + } + } + } + tokens.push_back({ .type = KV1_TOKEN_ROW_END }); +} + +// Returns true when a line ending was consumed. +bool Kv1Lexer::eatWhitespace() { + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '\r') { + slice = slice.substr(i + 1); + if (slice.size() > 1 && slice[i + 1] == '\n') + slice = slice.substr(i + 2); + return true; + } + if (slice[i] == '\n') { + slice = slice.substr(i + 1); + return true; + } + + if (slice[i] != ' ' && slice[i] != '\f' && slice[i] != '\t' && slice[i] != '\v') { + slice = slice.substr(i); + return false; + } + } + return false; +} + +void Kv1Lexer::lex() { + lexOptionalHeader(); + eatWhitespace(); + + while (errors.empty() && !slice.empty()) { + lexOptionalComment(); + bool newline = eatWhitespace(); + if (newline) continue; + // We are now either (1) at the end of the file or (2) at the start of some column data + if (errors.empty()) lexRow(); + } +} diff --git a/lib/libtmi8/src/kv1_parser.cpp b/lib/libtmi8/src/kv1_parser.cpp new file mode 100644 index 0000000..ac0c6bf --- /dev/null +++ b/lib/libtmi8/src/kv1_parser.cpp @@ -0,0 +1,1258 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +using rune = uint32_t; + +static size_t decodeUtf8Cp(std::string_view s, rune *dest = nullptr) { + rune res = 0xFFFD; + size_t length = 1; + + if (s.size() == 0) + return 0; + const uint8_t *b = reinterpret_cast(s.data()); + if (!(b[0] & 0x80)) + res = static_cast(b[0]); + else if ((b[0] & 0xE0) == 0xC0) { + length = 2; + if (s.size() >= 2 && (b[1] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xC0) << 6; + res |= static_cast(b[1] & ~0x80); + } + } else if ((b[0] & 0xF0) == 0xE0) { + length = 3; + if (s.size() >= 3 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xE0) << 12; + res |= static_cast(b[1] & ~0x80) << 6; + res |= static_cast(b[2] & ~0x80); + } + } else if (b[0] == 0xF0) { + length = 4; + if (s.size() >= 4 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xF0) << 18; + res |= static_cast(b[1] & ~0x80) << 12; + res |= static_cast(b[2] & ~0x80) << 6; + res |= static_cast(b[3] & ~0x80); + } + } + + if (dest) + *dest = res; + return length; +} + +// Counts the number of codepoints in a valid UTF-8 string. Returns SIZE_MAX if +// the string contains invalid UTF-8 codepoints. +static size_t stringViewLengthUtf8(std::string_view sv) { + size_t codepoints = 0; + while (sv.size() > 0) { + size_t codepoint_size = decodeUtf8Cp(sv); + if (codepoint_size == 0) return SIZE_MAX; + codepoints++; + sv = sv.substr(codepoint_size); + } + return codepoints; +} + +Kv1Parser::Kv1Parser(std::vector tokens, Kv1Records &parse_into) + : tokens(std::move(tokens)), + records(parse_into) +{} + +bool Kv1Parser::atEnd() const { + return pos >= tokens.size(); +} + +void Kv1Parser::eatRowEnds() { + while (!atEnd() && tokens[pos].type == KV1_TOKEN_ROW_END) pos++; +} + +const Kv1Token *Kv1Parser::cur() const { + if (atEnd()) return nullptr; + return &tokens[pos]; +} + +const std::string *Kv1Parser::eatCell(std::string_view parsing_what) { + const Kv1Token *tok = cur(); + if (!tok) { + record_errors.push_back(std::format("Expected cell but got end of file when parsing {}", parsing_what)); + return nullptr; + } + if (tok->type == KV1_TOKEN_ROW_END) { + record_errors.push_back(std::format("Expected cell but got end of row when parsing {}", parsing_what)); + return nullptr; + } + pos++; + return &tok->data; +} + +void Kv1Parser::requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value) { + if (value.empty() && mandatory) { + record_errors.push_back(std::format("{} has length zero but is required", field)); + return; + } + size_t codepoints = stringViewLengthUtf8(value); + if (codepoints == SIZE_MAX) { + global_errors.push_back(std::format("{} contains invalid UTF-8 code points", field)); + return; + } + if (codepoints > max_length) { + record_errors.push_back(std::format("{} has length ({}) that is greater than maximum length ({})", + field, value.size(), max_length)); + } +} + +static inline std::optional parseBoolean(std::string_view src) { + if (src == "1") return true; + if (src == "0") return false; + if (src == "true") return true; + if (src == "false") return false; + return std::nullopt; +} + +std::optional Kv1Parser::requireBoolean(std::string_view field, bool mandatory, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + auto parsed = parseBoolean(value); + if (!parsed.has_value()) + record_errors.push_back(std::format("{} should have value \"1\", \"0\", \"true\" or \"false\"", field)); + return parsed; +} + +static inline size_t countDigits(long x) { + size_t digits = 0; + while (x != 0) { digits++; x /= 10; } + return digits; +} + +std::optional Kv1Parser::requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} has no value but is required", field)); + return std::nullopt; + } + + double parsed; + auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); + if (ec != std::errc()) { + record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); + return std::nullopt; + } + if (ptr != value.data() + value.size()) { + record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); + return std::nullopt; + } + + size_t digits = countDigits(static_cast(parsed)); + if (digits > max_digits) { + record_errors.push_back(std::format("{} contains more digits (in the integral part) ({}) than allowed ({})", + field, digits, max_digits)); + return std::nullopt; + } + + return parsed; +} + +static inline bool isHexDigit(char c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'); +} + +static inline uint8_t fromHex(char c) { + if (c >= '0' && c <= '9') return static_cast(c - '0'); + else if (c >= 'A' && c <= 'F') return static_cast(c - 'A' + 10); + return 0; +} + +static std::optional parseRgbColor(std::string_view src) { + bool valid = src.size() == 6 + && isHexDigit(src[0]) && isHexDigit(src[1]) + && isHexDigit(src[2]) && isHexDigit(src[3]) + && isHexDigit(src[4]) && isHexDigit(src[5]); + if (!valid) return std::nullopt; + uint8_t r = static_cast(fromHex(src[0]) << 4) + fromHex(src[1]); + uint8_t g = static_cast(fromHex(src[2]) << 4) + fromHex(src[3]); + uint8_t b = static_cast(fromHex(src[4]) << 4) + fromHex(src[5]); + return RgbColor{ r, g, b }; +} + +std::optional Kv1Parser::requireRgbColor(std::string_view field, bool mandatory, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + auto parsed = parseRgbColor(value); + if (!parsed.has_value()) + record_errors.push_back(std::format("{} should be an RGB color, i.e. a sequence of six hexadecimally represented nibbles", field)); + return parsed; +} + +std::optional Kv1Parser::requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + if (value.size() > 15) { + record_errors.push_back(std::format("{} may not have more than 15 characters", field)); + return std::nullopt; + } + + double parsed; + auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); + if (ec != std::errc()) { + record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); + return std::nullopt; + } + if (ptr != value.data() + value.size()) { + record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); + return std::nullopt; + } + + size_t digits = countDigits(static_cast(parsed)); + if (digits < min_digits) { + record_errors.push_back(std::format("{} contains less digits (in the integral part) ({}) than required ({}) [value: {}]", + field, digits, min_digits, value)); + return std::nullopt; + } + + return parsed; +} + +std::string Kv1Parser::eatString(std::string_view field, bool mandatory, size_t max_length) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + requireString(field, mandatory, max_length, *value); + return std::move(*value); +} + +std::optional Kv1Parser::eatBoolean(std::string_view field, bool mandatory) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireBoolean(field, mandatory, *value); +} + +std::optional Kv1Parser::eatNumber(std::string_view field, bool mandatory, size_t max_digits) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireNumber(field, mandatory, max_digits, *value); +} + +std::optional Kv1Parser::eatRgbColor(std::string_view field, bool mandatory) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireRgbColor(field, mandatory, *value); +} + +std::optional Kv1Parser::eatRdCoord(std::string_view field, bool mandatory, size_t min_digits) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireRdCoord(field, mandatory, min_digits, *value); +} + +std::string Kv1Parser::parseHeader() { + auto record_type = eatString("
.Recordtype", true, 10); + auto version_number = eatString("
.VersionNumber", true, 2); + auto implicit_explicit = eatString("
.Implicit/Explicit", true, 1); + if (!record_errors.empty()) return {}; + + if (version_number != "1") { + record_errors.push_back("
.VersionNumber should be 1"); + return ""; + } + if (implicit_explicit != "I") { + record_errors.push_back("
.Implicit/Explicit should be 'I'"); + return ""; + } + + return record_type; +} + +void Kv1Parser::eatRestOfRow() { + while (!atEnd() && cur()->type != KV1_TOKEN_ROW_END) pos++; +} + +void Kv1Parser::parse() { + while (!atEnd()) { + eatRowEnds(); + if (atEnd()) return; + + std::string record_type = parseHeader(); + if (!record_errors.empty()) break; + if (!type_parsers.contains(record_type)) { + warns.push_back(std::format("Recordtype ({}) is bad or names a record type that this program cannot process", + record_type)); + eatRestOfRow(); + continue; + } + + ParseFunc parseType = Kv1Parser::type_parsers.at(record_type); + (this->*parseType)(); + if (cur() && cur()->type != KV1_TOKEN_ROW_END) { + record_errors.push_back(std::format("Parser function for Recordtype ({}) did not eat all record fields", + record_type)); + eatRestOfRow(); + } + if (!record_errors.empty()) { + global_errors.insert(global_errors.end(), record_errors.begin(), record_errors.end()); + record_errors.clear(); + } + } +} + +void Kv1Parser::parseOrganizationalUnit() { + auto data_owner_code = eatString("ORUN.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("ORUN.OrganizationalUnitCode", true, 10); + auto name = eatString("ORUN.Name", true, 50); + auto organizational_unit_type = eatString("ORUN.OrganizationalUnitType", true, 10); + auto description = eatString("ORUN.Description", false, 255); + if (!record_errors.empty()) return; + + records.organizational_units.emplace_back( + Kv1OrganizationalUnit::Key( + data_owner_code, + organizational_unit_code), + name, + organizational_unit_type, + description); +} + +static inline bool isDigit(char c) { + return c >= '0' && c <= '9'; +} + +// Parse a string of the format YYYY-MM-DD. +static std::optional parseYyyymmdd(std::string_view src) { + bool valid = src.size() == 10 + && isDigit(src[0]) && isDigit(src[1]) + && isDigit(src[2]) && isDigit(src[3]) && src[4] == '-' + && isDigit(src[5]) && isDigit(src[6]) && src[7] == '-' + && isDigit(src[8]) && isDigit(src[9]); + if (!valid) return std::nullopt; + int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; + int month = (src[5] - '0') * 10 + src[6] - '0'; + int day = (src[8] - '0') * 10 + src[9] - '0'; + return std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); +} + +// Parse a string of the format HH:MM:SS. +static std::optional> parseHhmmss(std::string_view src) { + bool valid = src.size() == 8 + && isDigit(src[0]) && isDigit(src[1]) && src[2] == ':' + && isDigit(src[3]) && isDigit(src[4]) && src[5] == ':' + && isDigit(src[6]) && isDigit(src[7]); + if (!valid) return std::nullopt; + int hh = (src[0] - '0') * 10 + src[1] - '0'; + int mm = (src[3] - '0') * 10 + src[4] - '0'; + int ss = (src[6] - '0') * 10 + src[7] - '0'; + // The check for the hour not being greater than 32 comes from the fact the + // specification explicitly allows hours greater than 23, noting that the + // period 24:00-32:00 is equivalent to 00:00-08:00 in the next day, for + // exploitation of two days. + if (hh > 32 || mm > 59 || ss > 59) return std::nullopt; + return std::chrono::hh_mm_ss(std::chrono::hours(hh) + std::chrono::minutes(mm) + std::chrono::seconds(ss)); +} + +static std::optional parseDateTime(std::string_view src, const std::chrono::time_zone *amsterdam, std::string_view *error = nullptr) { +#define ERROR(err) do { if (error) *error = err; return std::nullopt; } while (0) + if (src.size() > 23) ERROR("timestamp string is too big"); + if (src.size() < 17) ERROR("timestamp string is too small"); + + bool valid_year = isDigit(src[0]) && isDigit(src[1]) && isDigit(src[2]) && isDigit(src[3]); + if (!valid_year) ERROR("year has bad format"); + + size_t month_off = src[4] == '-' ? 5 : 4; + size_t day_off = src[month_off + 2] == '-' ? month_off + 3 : month_off + 2; + size_t time_off = day_off + 2; + if (src[time_off] != 'T' && src[time_off] != ' ') + ERROR("missing date/time separator"); + size_t tzd_off = time_off + 9; + // For clarity, TZD stands for Time Zone Designator. It often takes the form + // of Z (Zulu, UTC+00:00) or as an offset from UTC in hours and minutes, + // formatted as +|-HH:MM (e.g. +01:00, -12:00). + + if (time_off + 8 >= src.size()) ERROR("bad format, not enough space for hh:mm:ss"); + + int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; + int month = (src[month_off] - '0') * 10 + src[month_off + 1] - '0'; + int day = (src[day_off] - '0') * 10 + src[day_off + 1] - '0'; + int hour = (src[time_off + 1] - '0') * 10 + src[time_off + 2] - '0'; + int minute = (src[time_off + 4] - '0') * 10 + src[time_off + 5] - '0'; + int second = (src[time_off + 7] - '0') * 10 + src[time_off + 8] - '0'; + + auto date = std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); + auto time = std::chrono::hours(hour) + std::chrono::minutes(minute) + std::chrono::seconds(second); + + std::chrono::sys_seconds unix_start_of_day; + if (tzd_off < src.size()) { + unix_start_of_day = std::chrono::sys_days(date); + } else { + auto local_days = std::chrono::local_days(date); + std::chrono::zoned_seconds zoned_start_of_day = std::chrono::zoned_time(amsterdam, local_days); + unix_start_of_day = std::chrono::sys_seconds(zoned_start_of_day); + } + + std::chrono::minutes offset(0); + if (tzd_off + 1 == src.size() && src[tzd_off] != 'Z') { + ERROR("bad TZD (missing Zulu indicator)"); + } else if (tzd_off + 6 == src.size()) { + bool valid_tzd = (src[tzd_off] == '+' || src[tzd_off] == '-') + && isDigit(src[tzd_off + 1]) && isDigit(src[tzd_off + 2]) && src[tzd_off + 3] == ':' + && isDigit(src[tzd_off + 4]) && isDigit(src[tzd_off + 5]); + if (!valid_tzd) ERROR("bad offset TZD format (expected +|-hh:mm)"); + int sign = src[tzd_off] == '-' ? -1 : 1; + int tzd_hh = (src[tzd_off + 1] - '0') * 10 + src[tzd_off + 2] - '0'; + int tzd_mm = (src[tzd_off + 3] - '0') * 10 + src[tzd_off + 4] - '0'; + offset = sign * std::chrono::minutes(tzd_hh * 60 + tzd_mm); + } else if (tzd_off < src.size()) { + // There is a TZD but we literally have no clue how to parse it :/ + ERROR("cannot parse TZD of unexpected length"); + } + + return unix_start_of_day + time - offset; +#undef ERROR +} + +void Kv1Parser::parseHigherOrganizationalUnit() { + auto data_owner_code = eatString("ORUNORUN.DataOwnerCode", true, 10); + auto organizational_unit_code_parent = eatString("ORUNORUN.OrganizationalUnitCodeParent", true, 10); + auto organizational_unit_code_child = eatString("ORUNORUN.OrganizationalUnitCodeChild", true, 10); + auto valid_from_raw = eatString("ORUNORUN.ValidFrom", true, 10); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) { + record_errors.push_back("ORUNORUN.ValidFrom has invalid format, should be YYYY-MM-DD"); + return; + } + + records.higher_organizational_units.emplace_back( + Kv1HigherOrganizationalUnit::Key( + data_owner_code, + organizational_unit_code_parent, + organizational_unit_code_child, + *valid_from)); +} + +void Kv1Parser::parseUserStopPoint() { + auto data_owner_code = eatString ("USRSTOP.DataOwnerCode", true, 10); + auto user_stop_code = eatString ("USRSTOP.UserStopCode", true, 10); + auto timing_point_code = eatString ("USRSTOP.TimingPointCode", false, 10); + auto get_in = eatBoolean("USRSTOP.GetIn", true ); + auto get_out = eatBoolean("USRSTOP.GetOut", true ); + eatCell ("USRSTOP." ); + auto name = eatString ("USRSTOP.Name", true, 50); + auto town = eatString ("USRSTOP.Town", true, 50); + auto user_stop_area_code = eatString ("USRSTOP.UserStopAreaCode", false, 10); + auto stop_side_code = eatString ("USRSTOP.StopSideCode", true, 10); + eatCell ("USRSTOP." ); + eatCell ("USRSTOP." ); + auto minimal_stop_time = eatNumber ("USRSTOP.MinimalStopTime", true, 5); + auto stop_side_length = eatNumber ("USRSTOP.StopSideLength", false, 3); + auto description = eatString ("USRSTOP.Description", false, 255); + auto user_stop_type = eatString ("USRSTOP.UserStopType", true, 10); + auto quay_code = eatString ("USRSTOP.QuayCode", false, 30); + if (!record_errors.empty()) return; + + records.user_stop_points.emplace_back( + Kv1UserStopPoint::Key( + data_owner_code, + user_stop_code), + timing_point_code, + *get_in, + *get_out, + name, + town, + user_stop_area_code, + stop_side_code, + *minimal_stop_time, + stop_side_length, + description, + user_stop_type, + quay_code); +} + +void Kv1Parser::parseUserStopArea() { + auto data_owner_code = eatString("USRSTAR.DataOwnerCode", true, 10); + auto user_stop_area_code = eatString("USRSTAR.UserStopAreaCode", true, 10); + auto name = eatString("USRSTAR.Name", true, 50); + auto town = eatString("USRSTAR.Town", true, 50); + eatCell ("USRSTAR." ); + eatCell ("USRSTAR." ); + auto description = eatString("USRSTAR.Description", false, 255); + if (!record_errors.empty()) return; + + records.user_stop_areas.emplace_back( + Kv1UserStopArea::Key( + data_owner_code, + user_stop_area_code), + name, + town, + description); +} + +void Kv1Parser::parseTimingLink() { + auto data_owner_code = eatString("TILI.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("TILI.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("TILI.UserStopCodeEnd", true, 10); + auto minimal_drive_time = eatNumber("TILI.MinimalDriveTime", false, 5); + auto description = eatString("TILI.Description", false, 255); + if (!record_errors.empty()) return; + + records.timing_links.emplace_back( + Kv1TimingLink::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end), + minimal_drive_time, + description); +} + +void Kv1Parser::parseLink() { + auto data_owner_code = eatString("LINK.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("LINK.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("LINK.UserStopCodeEnd", true, 10); + eatCell("LINK." ); + auto distance = eatNumber("LINK.Distance", true, 6); + auto description = eatString("LINK.Description", false, 255); + auto transport_type = eatString("LINK.TransportType", true, 5); + if (!record_errors.empty()) return; + + records.links.emplace_back( + Kv1Link::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end, + transport_type), + *distance, + description); +} + +void Kv1Parser::parseLine() { + auto data_owner_code = eatString ("LINE.DataOwnerCode", true, 10); + auto line_planning_number = eatString ("LINE.LinePlanningNumber", true, 10); + auto line_public_number = eatString ("LINE.LinePublicNumber", true, 4); + auto line_name = eatString ("LINE.LineName", true, 50); + auto line_ve_tag_number = eatNumber ("LINE.LineVeTagNumber", true, 3); + auto description = eatString ("LINE.Description", false, 255); + auto transport_type = eatString ("LINE.TransportType", true, 5); + auto line_icon = eatNumber ("LINE.LineIcon", false, 4); + auto line_color = eatRgbColor("LINE.LineColor", false ); + auto line_text_color = eatRgbColor("LINE.LineTextColor", false ); + if (!record_errors.empty()) return; + + // NOTE: This check, although it should be performed to comply with the + // specification, is not actually honored by transit operators (such as + // Connexxion) :/ That's enough reason to keep it disabled here for now. + // if (*line_ve_tag_number < 0 || *line_ve_tag_number > 399) { + // record_errors.push_back(std::format("LINE.LineVeTagNumber is out of range [0-399] with value {}", *line_ve_tag_number)); + // return; + // } + if (*line_ve_tag_number != static_cast(*line_ve_tag_number)) + record_errors.push_back("LINE.LineVeTagNumber should be an integer"); + if (line_icon && *line_icon != static_cast(*line_icon)) + record_errors.push_back("LINE.LineIcon should be an integer"); + if (!record_errors.empty()) return; + + records.lines.emplace_back( + Kv1Line::Key( + data_owner_code, + line_planning_number), + line_public_number, + line_name, + static_cast(*line_ve_tag_number), + description, + transport_type, + static_cast>(line_icon), + line_color, + line_text_color); +} + +void Kv1Parser::parseDestination() { + auto data_owner_code = eatString ("DEST.DataOwnerCode", true, 10); + auto dest_code = eatString ("DEST.DestCode", true, 10); + auto dest_name_full = eatString ("DEST.DestNameFull", true, 50); + auto dest_name_main = eatString ("DEST.DestNameMain", true, 24); + auto dest_name_detail = eatString ("DEST.DestNameDetail", false, 24); + auto relevant_dest_name_detail = eatBoolean ("DEST.RelevantDestNameDetail", true ); + auto dest_name_main_21 = eatString ("DEST.DestNameMain21", true, 21); + auto dest_name_detail_21 = eatString ("DEST.DestNameDetail21", false, 21); + auto dest_name_main_19 = eatString ("DEST.DestNameMain19", true, 19); + auto dest_name_detail_19 = eatString ("DEST.DestNameDetail19", false, 19); + auto dest_name_main_16 = eatString ("DEST.DestNameMain16", true, 16); + auto dest_name_detail_16 = eatString ("DEST.DestNameDetail16", false, 16); + auto dest_icon = eatNumber ("DEST.DestIcon", false, 4); + auto dest_color = eatRgbColor("DEST.DestColor", false ); + // NOTE: Deviating from the offical KV1 specification here. It specifies that + // the maximum length for this field should be 30, but then proceeds to + // specify that it should contain a RGB value comprising of three + // hexadecimally encoded octets, i.e. six characters. We assume that the + // latter is correct and the intended interpretation. + auto dest_text_color = eatRgbColor("DEST.DestTextColor", false ); + if (!record_errors.empty()) return; + + if (dest_icon && *dest_icon != static_cast(*dest_icon)) { + record_errors.push_back("DEST.DestIcon should be an integer"); + return; + } + + records.destinations.emplace_back( + Kv1Destination::Key( + data_owner_code, + dest_code), + dest_name_full, + dest_name_main, + dest_name_detail, + *relevant_dest_name_detail, + dest_name_main_21, + dest_name_detail_21, + dest_name_main_19, + dest_name_detail_19, + dest_name_main_16, + dest_name_detail_16, + dest_icon, + dest_color, + dest_text_color); +} + +void Kv1Parser::parseJourneyPattern() { + auto data_owner_code = eatString("JOPA.DataOwnerCode", true, 10); + auto line_planning_number = eatString("JOPA.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("JOPA.JourneyPatternCode", true, 10); + auto journey_pattern_type = eatString("JOPA.JourneyPatternType", true, 10); + auto direction = eatString("JOPA.Direction", true, 1); + auto description = eatString("JOPA.Description", false, 255); + if (!record_errors.empty()) return; + + if (direction != "1" && direction != "2" && direction != "A" && direction != "B") { + record_errors.push_back("JOPA.Direction should be in [1, 2, A, B]"); + return; + } + + records.journey_patterns.emplace_back( + Kv1JourneyPattern::Key( + data_owner_code, + line_planning_number, + journey_pattern_code), + journey_pattern_type, + direction[0], + description); +} + +void Kv1Parser::parseConcessionFinancerRelation() { + auto data_owner_code = eatString("CONFINREL.DataOwnerCode", true, 10); + auto con_fin_rel_code = eatString("CONFINREL.ConFinRelCode", true, 10); + auto concession_area_code = eatString("CONFINREL.ConcessionAreaCode", true, 10); + auto financer_code = eatString("CONFINREL.FinancerCode", false, 10); + if (!record_errors.empty()) return; + + records.concession_financer_relations.emplace_back( + Kv1ConcessionFinancerRelation::Key( + data_owner_code, + con_fin_rel_code), + concession_area_code, + financer_code); +} + +void Kv1Parser::parseConcessionArea() { + auto data_owner_code = eatString("CONAREA.DataOwnerCode", true, 10); + auto concession_area_code = eatString("CONAREA.ConcessionAreaCode", true, 10); + auto description = eatString("CONAREA.Description", true, 255); + if (!record_errors.empty()) return; + + records.concession_areas.emplace_back( + Kv1ConcessionArea::Key( + data_owner_code, + concession_area_code), + description); +} + +void Kv1Parser::parseFinancer() { + auto data_owner_code = eatString("FINANCER.DataOwnerCode", true, 10); + auto financer_code = eatString("FINANCER.FinancerCode", true, 10); + auto description = eatString("FINANCER.Description", true, 255); + if (!record_errors.empty()) return; + + records.financers.emplace_back( + Kv1Financer::Key( + data_owner_code, + financer_code), + description); +} + +void Kv1Parser::parseJourneyPatternTimingLink() { + auto data_owner_code = eatString ("JOPATILI.DataOwnerCode", true, 10); + auto line_planning_number = eatString ("JOPATILI.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString ("JOPATILI.JourneyPatternCode", true, 10); + auto timing_link_order = eatNumber ("JOPATILI.TimingLinkOrder", true, 3); + auto user_stop_code_begin = eatString ("JOPATILI.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString ("JOPATILI.UserStopCodeEnd", true, 10); + auto con_fin_rel_code = eatString ("JOPATILI.ConFinRelCode", true, 10); + auto dest_code = eatString ("JOPATILI.DestCode", true, 10); + eatCell ("JOPATILI." ); + auto is_timing_stop = eatBoolean ("JOPATILI.IsTimingStop", true ); + auto display_public_line = eatString ("JOPATILI.DisplayPublicLine", false, 4); + auto product_formula_type = eatNumber ("JOPATILI.ProductFormulaType", false, 4); + auto get_in = eatBoolean ("JOPATILI.GetIn", true ); + auto get_out = eatBoolean ("JOPATILI.GetOut", true ); + auto show_flexible_trip = eatString ("JOPATILI.ShowFlexibleTrip", false, 8); + auto line_dest_icon = eatNumber ("JOPATILI.LineDestIcon", false, 4); + auto line_dest_color = eatRgbColor("JOPATILI.LineDestColor", false ); + auto line_dest_text_color = eatRgbColor("JOPATILI.LineDestTextColor", false ); + if (!record_errors.empty()) return; + + if (line_dest_icon && *line_dest_icon != static_cast(*line_dest_icon)) + record_errors.push_back("JOPATILI.LineDestIcon should be an integer"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("JOPATILI.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + if (!record_errors.empty()) return; + + records.journey_pattern_timing_links.emplace_back( + Kv1JourneyPatternTimingLink::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + static_cast(*timing_link_order)), + user_stop_code_begin, + user_stop_code_end, + con_fin_rel_code, + dest_code, + *is_timing_stop, + display_public_line, + product_formula_type, + *get_in, + *get_out, + show_flexible_trip, + line_dest_icon, + line_dest_color, + line_dest_text_color); +} + +void Kv1Parser::parsePoint() { + auto data_owner_code = eatString("POINT.DataOwnerCode", true, 10); + auto point_code = eatString("POINT.PointCode", true, 10); + eatCell ("POINT." ); + auto point_type = eatString("POINT.PointType", true, 10); + auto coordinate_system_type = eatString("POINT.CoordinateSystemType", true, 10); + // NOTE: We deviate from the specification here once again. The specification + // notes that LocationX_EW should contain 'at least 6 positions'. Assuming + // that this is referring to the amount of digits, we have to lower this to + // 4. Otherwise, some positions in the Netherlands and Belgium are + // unrepresentable. + auto location_x_ew = eatRdCoord("POINT.LocationX_EW", true, 4); + auto location_y_ew = eatRdCoord("POINT.LocationX_EW", true, 6); + auto location_z = eatRdCoord("POINT.LocationZ", false, 0); + auto description = eatString ("POINT.Description", false, 255); + if (!record_errors.empty()) return; + + records.points.emplace_back( + Kv1Point::Key( + std::move(data_owner_code), + std::move(point_code)), + std::move(point_type), + std::move(coordinate_system_type), + *location_x_ew, + *location_y_ew, + location_z, + std::move(description)); +} + +void Kv1Parser::parsePointOnLink() { + auto data_owner_code = eatString("POOL.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("POOL.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("POOL.UserStopCodeEnd", true, 10); + eatCell ("POOL." ); + auto point_data_owner_code = eatString("POOL.PointDataOwnerCode", true, 10); + auto point_code = eatString("POOL.PointCode", true, 10); + auto distance_since_start_of_link = eatNumber("POOL.DistanceSinceStartOfLink", true, 5); + auto segment_speed = eatNumber("POOL.SegmentSpeed", false, 4); + auto local_point_speed = eatNumber("POOL.LocalPointSpeed", false, 4); + auto description = eatString("POOL.Description", false, 255); + auto transport_type = eatString("POOL.TransportType", true, 5); + if (!record_errors.empty()) return; + + records.point_on_links.emplace_back( + Kv1PointOnLink::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end, + point_data_owner_code, + point_code, + transport_type), + *distance_since_start_of_link, + segment_speed, + local_point_speed, + std::move(description)); +} + +void Kv1Parser::parseIcon() { + auto data_owner_code = eatString("ICON.DataOwnerCode", true, 10); + auto icon_number = eatNumber("ICON.IconNumber", true, 4); + auto icon_uri = eatString("ICON.IconURI", true, 1024); + if (!record_errors.empty()) return; + + if (*icon_number != static_cast(*icon_number)) { + record_errors.push_back("ICON.IconNumber should be an integer"); + return; + } + + records.icons.emplace_back( + Kv1Icon::Key( + data_owner_code, + static_cast(*icon_number)), + icon_uri); +} + +void Kv1Parser::parseNotice() { + auto data_owner_code = eatString("NOTICE.DataOwnerCode", true, 10); + auto notice_code = eatString("NOTICE.NoticeCode", true, 20); + auto notice_content = eatString("NOTICE.NoticeContent", true, 1024); + if (!record_errors.empty()) return; + + records.notices.emplace_back( + Kv1Notice::Key( + data_owner_code, + notice_code), + notice_content); +} + +void Kv1Parser::parseNoticeAssignment() { + auto data_owner_code = eatString("NTCASSGNM.DataOwnerCode", true, 10); + auto notice_code = eatString("NTCASSGNM.NoticeCode", true, 20); + auto assigned_object = eatString("NTCASSGNM.AssignedObject", true, 8); + auto timetable_version_code = eatString("NTCASSGNM.TimetableVersionCode", false, 10); + auto organizational_unit_code = eatString("NTCASSGNM.OrganizationalUnitCode", false, 10); + auto schedule_code = eatString("NTCASSGNM.ScheduleCode", false, 10); + auto schedule_type_code = eatString("NTCASSGNM.ScheduleTypeCode", false, 10); + auto period_group_code = eatString("NTCASSGNM.PeriodGroupCode", false, 10); + auto specific_day_code = eatString("NTCASSGNM.SpecificDayCode", false, 10); + auto day_type = eatString("NTCASSGNM.DayType", false, 7); + auto line_planning_number = eatString("NTCASSGNM.LinePlanningNumber", true, 10); + auto journey_number = eatNumber("NTCASSGNM.JourneyNumber", false, 6); + auto stop_order = eatNumber("NTCASSGNM.StopOrder", false, 4); + auto journey_pattern_code = eatString("NTCASSGNM.JourneyPatternCode", false, 10); + auto timing_link_order = eatNumber("NTCASSGNM.TimingLinkOrder", false, 3); + auto user_stop_code = eatString("NTCASSGNM.UserStopCode", false, 10); + if (!record_errors.empty()) return; + + if (journey_number && *journey_number != static_cast(*journey_number)) + record_errors.push_back("NTCASSGNM.JourneyNumber should be an integer"); + if (journey_number && (*journey_number < 0 || *journey_number > 999'999)) + record_errors.push_back("NTCASSGNM.JourneyNumber should be within the range [0-999999]"); + if (stop_order && *stop_order != static_cast(*stop_order)) + record_errors.push_back("NTCASSGNM.StopOrder should be an integer"); + if (!journey_number && (assigned_object == "PUJO" || assigned_object == "PUJOPASS")) + record_errors.push_back("NTCASSGNM.JourneyNumber is required for AssignedObject PUJO/PUJOPASS"); + if (journey_pattern_code.empty() && assigned_object == "JOPATILI") + record_errors.push_back("NTCASSGNM.JourneyPatternCode is required for AssignedObject JOPATILI"); + if (!record_errors.empty()) return; + + records.notice_assignments.emplace_back( + data_owner_code, + notice_code, + assigned_object, + timetable_version_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + period_group_code, + specific_day_code, + day_type, + line_planning_number, + static_cast>(journey_number), + static_cast>(stop_order), + journey_pattern_code, + timing_link_order, + user_stop_code); +} + +void Kv1Parser::parseTimeDemandGroup() { + auto data_owner_code = eatString("TIMDEMGRP.DataOwnerCode", true, 10); + auto line_planning_number = eatString("TIMDEMGRP.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("TIMDEMGRP.JourneyPatternCode", true, 10); + auto time_demand_group_code = eatString("TIMDEMGRP.TimeDemandGroupCode", true, 10); + if (!record_errors.empty()) return; + + records.time_demand_groups.emplace_back( + Kv1TimeDemandGroup::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + time_demand_group_code)); +} + +void Kv1Parser::parseTimeDemandGroupRunTime() { + auto data_owner_code = eatString("TIMDEMRNT.DataOwnerCode", true, 10); + auto line_planning_number = eatString("TIMDEMRNT.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("TIMDEMRNT.JourneyPatternCode", true, 10); + auto time_demand_group_code = eatString("TIMDEMRNT.TimeDemandGroupCode", true, 10); + auto timing_link_order = eatNumber("TIMDEMRNT.TimingLinkOrder", true, 3); + auto user_stop_code_begin = eatString("TIMDEMRNT.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("TIMDEMRNT.UserStopCodeEnd", true, 10); + auto total_drive_time = eatNumber("TIMDEMRNT.TotalDriveTime", true, 5); + auto drive_time = eatNumber("TIMDEMRNT.DriveTime", true, 5); + auto expected_delay = eatNumber("TIMDEMRNT.ExpectedDelay", false, 5); + auto layover_time = eatNumber("TIMDEMRNT.LayOverTime", false, 5); + auto stop_wait_time = eatNumber("TIMDEMRNT.StopWaitTime", true, 5); + auto minimum_stop_time = eatNumber("TIMDEMRNT.MinimumStopTime", false, 5); + if (!record_errors.empty()) return; + + if (timing_link_order && *timing_link_order != static_cast(*timing_link_order)) { + record_errors.push_back("TIMDEMRNT.TimingLinkOrder should be an integer"); + return; + } + + records.time_demand_group_run_times.emplace_back( + Kv1TimeDemandGroupRunTime::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + time_demand_group_code, + static_cast(*timing_link_order)), + user_stop_code_begin, + user_stop_code_end, + *total_drive_time, + *drive_time, + expected_delay, + layover_time, + *stop_wait_time, + minimum_stop_time); +} + +void Kv1Parser::parsePeriodGroup() { + auto data_owner_code = eatString("PEGR.DataOwnerCode", true, 10); + auto period_group_code = eatString("PEGR.PeriodGroupCode", true, 10); + auto description = eatString("PEGR.Description", false, 255); + if (!record_errors.empty()) return; + + records.period_groups.emplace_back( + Kv1PeriodGroup::Key( + data_owner_code, + period_group_code), + description); +} + +void Kv1Parser::parseSpecificDay() { + auto data_owner_code = eatString("SPECDAY.DataOwnerCode", true, 10); + auto specific_day_code = eatString("SPECDAY.SpecificDayCode", true, 10); + auto name = eatString("SPECDAY.Name", true, 50); + auto description = eatString("SPECDAY.Description", false, 255); + if (!record_errors.empty()) return; + + records.specific_days.emplace_back( + Kv1SpecificDay::Key( + data_owner_code, + specific_day_code), + name, + description); +} + +void Kv1Parser::parseTimetableVersion() { + auto data_owner_code = eatString("TIVE.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("TIVE.OrganizationalUnitCode", true, 10); + auto timetable_version_code = eatString("TIVE.TimetableVersionCode", true, 10); + auto period_group_code = eatString("TIVE.PeriodGroupCode", true, 10); + auto specific_day_code = eatString("TIVE.SpecificDayCode", true, 10); + auto valid_from_raw = eatString("TIVE.ValidFrom", true, 10); + auto timetable_version_type = eatString("TIVE.TimetableVersionType", true, 10); + auto valid_thru_raw = eatString("TIVE.ValidThru", false, 10); + auto description = eatString("TIVE.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) + record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); + std::optional valid_thru; + if (!valid_thru_raw.empty()) { + valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_thru) { + record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); + } + } + if (!description.empty()) + record_errors.push_back("TIVE.Description should be empty"); + if (!record_errors.empty()) return; + + records.timetable_versions.emplace_back( + Kv1TimetableVersion::Key( + data_owner_code, + organizational_unit_code, + timetable_version_code, + period_group_code, + specific_day_code), + *valid_from, + timetable_version_type, + valid_thru, + description); +} + +void Kv1Parser::parsePublicJourney() { + auto data_owner_code = eatString ("PUJO.DataOwnerCode", true, 10); + auto timetable_version_code = eatString ("PUJO.TimetableVersionCode", true, 10); + auto organizational_unit_code = eatString ("PUJO.OrganizationalUnitCode", true, 10); + auto period_group_code = eatString ("PUJO.PeriodGroupCode", true, 10); + auto specific_day_code = eatString ("PUJO.SpecificDayCode", true, 10); + auto day_type = eatString ("PUJO.DayType", true, 7); + auto line_planning_number = eatString ("PUJO.LinePlanningNumber", true, 10); + auto journey_number = eatNumber ("PUJO.JourneyNumber", true, 6); + auto time_demand_group_code = eatString ("PUJO.TimeDemandGroupCode", true, 10); + auto journey_pattern_code = eatString ("PUJO.JourneyPatternCode", true, 10); + auto departure_time_raw = eatString ("PUJO.DepartureTime", true, 8); + auto wheelchair_accessible = eatString ("PUJO.WheelChairAccessible", true, 13); + auto data_owner_is_operator = eatBoolean("PUJO.DataOwnerIsOperator", true ); + auto planned_monitored = eatBoolean("PUJO.PlannedMonitored", true ); + auto product_formula_type = eatNumber ("PUJO.ProductFormulaType", false, 4); + auto show_flexible_trip = eatString ("PUJO.ShowFlexibleTrip", false, 8); + if (!record_errors.empty()) return; + + auto departure_time = parseHhmmss(departure_time_raw); + if (!departure_time) + record_errors.push_back("PUJO.DepartureTime has a bad format"); + if (*journey_number < 0 || *journey_number > 999'999) + record_errors.push_back("PUJO.JourneyNumber should be within the range [0-999999]"); + if (*journey_number != static_cast(*journey_number)) + record_errors.push_back("PUJO.JourneyNumber should be an integer"); + if (product_formula_type && *product_formula_type != static_cast(*product_formula_type)) + record_errors.push_back("PUJO.ProductFormulaType should be an integer"); + if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") + record_errors.push_back("PUJO.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("PUJO.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + if (!record_errors.empty()) return; + + records.public_journeys.emplace_back( + Kv1PublicJourney::Key( + data_owner_code, + timetable_version_code, + organizational_unit_code, + period_group_code, + specific_day_code, + day_type, + line_planning_number, + static_cast(*journey_number)), + time_demand_group_code, + journey_pattern_code, + *departure_time, + wheelchair_accessible, + *data_owner_is_operator, + *planned_monitored, + product_formula_type, + show_flexible_trip); +} + +void Kv1Parser::parsePeriodGroupValidity() { + auto data_owner_code = eatString("PEGRVAL.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("PEGRVAL.OrganizationalUnitCode", true, 10); + auto period_group_code = eatString("PEGRVAL.PeriodGroupCode", true, 10); + auto valid_from_raw = eatString("PEGRVAL.ValidFrom", true, 10); + auto valid_thru_raw = eatString("PEGRVAL.ValidThru", true, 10); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + auto valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_from) + record_errors.push_back("PEGRVAL.ValidFrom has invalid format, should be YYYY-MM-DD"); + if (!valid_thru) + record_errors.push_back("PEGRVAL.ValidThru has invalid format, should be YYYY-MM-DD"); + if (!record_errors.empty()) return; + + records.period_group_validities.emplace_back( + Kv1PeriodGroupValidity::Key( + data_owner_code, + organizational_unit_code, + period_group_code, + *valid_from), + *valid_thru); +} + +void Kv1Parser::parseExceptionalOperatingDay() { + auto data_owner_code = eatString("EXCOPDAY.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("EXCOPDAY.OrganizationalUnitCode", true, 10); + auto valid_date_raw = eatString("EXCOPDAY.ValidDate", true, 23); + auto day_type_as_on = eatString("EXCOPDAY.DayTypeAsOn", true, 7); + auto specific_day_code = eatString("EXCOPDAY.SpecificDayCode", true, 10); + auto period_group_code = eatString("EXCOPDAY.PeriodGroupCode", false, 10); + auto description = eatString("EXCOPDAY.Description", false, 255); + if (!record_errors.empty()) return; + + std::string_view error; + auto valid_date = parseDateTime(valid_date_raw, amsterdam, &error); + if (!valid_date) { + record_errors.push_back(std::format("EXCOPDAY.ValidDate has an bad format (value: {}): {}", valid_date_raw, error)); + return; + } + + records.exceptional_operating_days.emplace_back( + Kv1ExceptionalOperatingDay::Key( + data_owner_code, + organizational_unit_code, + *valid_date), + day_type_as_on, + specific_day_code, + period_group_code, + description); +} + +void Kv1Parser::parseScheduleVersion() { + auto data_owner_code = eatString("SCHEDVERS.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("SCHEDVERS.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString("SCHEDVERS.ScheduleCode", true, 10); + auto schedule_type_code = eatString("SCHEDVERS.ScheduleTypeCode", true, 10); + auto valid_from_raw = eatString("SCHEDVERS.ValidFrom", true, 10); + auto valid_thru_raw = eatString("SCHEDVERS.ValidThru", false, 10); + auto description = eatString("SCHEDVERS.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) + record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); + std::optional valid_thru; + if (!valid_thru_raw.empty()) { + valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_thru) { + record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); + } + } + if (!description.empty()) + record_errors.push_back("SCHEDVERS.Description should be empty"); + if (!record_errors.empty()) return; + + records.schedule_versions.emplace_back( + Kv1ScheduleVersion::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code), + *valid_from, + valid_thru, + description); +} + +void Kv1Parser::parsePublicJourneyPassingTimes() { + auto data_owner_code = eatString ("PUJOPASS.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString ("PUJOPASS.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString ("PUJOPASS.ScheduleCode", true, 10); + auto schedule_type_code = eatString ("PUJOPASS.ScheduleTypeCode", true, 10); + auto line_planning_number = eatString ("PUJOPASS.LinePlanningNumber", true, 10); + auto journey_number = eatNumber ("PUJOPASS.JourneyNumber", true, 6); + auto stop_order = eatNumber ("PUJOPASS.StopOrder", true, 4); + auto journey_pattern_code = eatString ("PUJOPASS.JourneyPatternCode", true, 10); + auto user_stop_code = eatString ("PUJOPASS.UserStopCode", true, 10); + auto target_arrival_time_raw = eatString ("PUJOPASS.TargetArrivalTime", false, 8); + auto target_departure_time_raw = eatString ("PUJOPASS.TargetDepartureTime", false, 8); + auto wheelchair_accessible = eatString ("PUJOPASS.WheelChairAccessible", true, 13); + auto data_owner_is_operator = eatBoolean("PUJOPASS.DataOwnerIsOperator", true ); + auto planned_monitored = eatBoolean("PUJOPASS.PlannedMonitored", true ); + auto product_formula_type = eatNumber ("PUJOPASS.ProductFormulaType", false, 4); + auto show_flexible_trip = eatString ("PUJOPASS.ShowFlexibleTrip", false, 8); + if (!record_errors.empty()) return; + + if (*journey_number < 0 || *journey_number > 999'999) + record_errors.push_back("PUJOPASS.JourneyNumber should be within the range [0-999999]"); + if (*journey_number != static_cast(*journey_number)) + record_errors.push_back("PUJOPASS.JourneyNumber should be an integer"); + if (*stop_order != static_cast(*stop_order)) + record_errors.push_back("PUJOPASS.StopOrder should be an integer"); + if (product_formula_type && *product_formula_type != static_cast(*product_formula_type)) + record_errors.push_back("PUJOPASS.ProductFormulaType should be an integer"); + if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") + record_errors.push_back("PUJOPASS.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("PUJOPASS.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + std::optional> target_arrival_time; + if (!target_arrival_time_raw.empty()) { + target_arrival_time = parseHhmmss(target_arrival_time_raw); + if (!target_arrival_time) { + record_errors.push_back("PUJOPASS.TargetArrivalTime has invalid format, should be HH:MM:SS"); + } + } + std::optional> target_departure_time; + if (!target_departure_time_raw.empty()) { + target_departure_time = parseHhmmss(target_departure_time_raw); + if (!target_departure_time) { + record_errors.push_back("PUJOPASS.TargetDepartureTime has invalid format, should be HH:MM:SS"); + } + } + if (!record_errors.empty()) return; + + records.public_journey_passing_times.emplace_back( + Kv1PublicJourneyPassingTimes::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + line_planning_number, + static_cast(*journey_number), + static_cast(*stop_order)), + journey_pattern_code, + user_stop_code, + target_arrival_time, + target_departure_time, + wheelchair_accessible, + *data_owner_is_operator, + *planned_monitored, + product_formula_type, + show_flexible_trip); +} + +void Kv1Parser::parseOperatingDay() { + auto data_owner_code = eatString("OPERDAY.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("OPERDAY.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString("OPERDAY.ScheduleCode", true, 10); + auto schedule_type_code = eatString("OPERDAY.ScheduleTypeCode", true, 10); + auto valid_date_raw = eatString("OPERDAY.ValidDate", true, 10); + auto description = eatString("OPERDAY.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_date = parseYyyymmdd(valid_date_raw); + if (!valid_date) + record_errors.push_back("OPERDAY.ValidDate has invalid format, should be YYYY-MM-DD"); + if (!record_errors.empty()) return; + + records.operating_days.emplace_back( + Kv1OperatingDay::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + *valid_date), + description); +} + +const std::unordered_map Kv1Parser::type_parsers{ + { "ORUN", &Kv1Parser::parseOrganizationalUnit }, + { "ORUNORUN", &Kv1Parser::parseHigherOrganizationalUnit }, + { "USRSTOP", &Kv1Parser::parseUserStopPoint }, + { "USRSTAR", &Kv1Parser::parseUserStopArea }, + { "TILI", &Kv1Parser::parseTimingLink }, + { "LINK", &Kv1Parser::parseLink }, + { "LINE", &Kv1Parser::parseLine }, + { "DEST", &Kv1Parser::parseDestination }, + { "JOPA", &Kv1Parser::parseJourneyPattern }, + { "CONFINREL", &Kv1Parser::parseConcessionFinancerRelation }, + { "CONAREA", &Kv1Parser::parseConcessionArea }, + { "FINANCER", &Kv1Parser::parseFinancer }, + { "JOPATILI", &Kv1Parser::parseJourneyPatternTimingLink }, + { "POINT", &Kv1Parser::parsePoint }, + { "POOL", &Kv1Parser::parsePointOnLink }, + { "ICON", &Kv1Parser::parseIcon }, + { "NOTICE", &Kv1Parser::parseNotice }, + { "NTCASSGNM", &Kv1Parser::parseNoticeAssignment }, + { "TIMDEMGRP", &Kv1Parser::parseTimeDemandGroup }, + { "TIMDEMRNT", &Kv1Parser::parseTimeDemandGroupRunTime }, + { "PEGR", &Kv1Parser::parsePeriodGroup }, + { "SPECDAY", &Kv1Parser::parseSpecificDay }, + { "TIVE", &Kv1Parser::parseTimetableVersion }, + { "PUJO", &Kv1Parser::parsePublicJourney }, + { "PEGRVAL", &Kv1Parser::parsePeriodGroupValidity }, + { "EXCOPDAY", &Kv1Parser::parseExceptionalOperatingDay }, + { "SCHEDVERS", &Kv1Parser::parseScheduleVersion }, + { "PUJOPASS", &Kv1Parser::parsePublicJourneyPassingTimes }, + { "OPERDAY", &Kv1Parser::parseOperatingDay }, +}; diff --git a/lib/libtmi8/src/kv1_types.cpp b/lib/libtmi8/src/kv1_types.cpp new file mode 100644 index 0000000..49e306e --- /dev/null +++ b/lib/libtmi8/src/kv1_types.cpp @@ -0,0 +1,773 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +#include + +size_t Kv1Records::size() const { + return organizational_units.size() + + higher_organizational_units.size() + + user_stop_points.size() + + user_stop_areas.size() + + timing_links.size() + + links.size() + + lines.size() + + destinations.size() + + journey_patterns.size() + + concession_financer_relations.size() + + concession_areas.size() + + financers.size() + + journey_pattern_timing_links.size() + + points.size() + + point_on_links.size() + + icons.size() + + notices.size() + + notice_assignments.size() + + time_demand_groups.size() + + time_demand_group_run_times.size() + + period_groups.size() + + specific_days.size() + + timetable_versions.size() + + public_journeys.size() + + period_group_validities.size() + + exceptional_operating_days.size() + + schedule_versions.size() + + public_journey_passing_times.size() + + operating_days.size(); +} + +Kv1OrganizationalUnit::Key::Key( + std::string data_owner_code, + std::string organizational_unit_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)) +{} + +Kv1HigherOrganizationalUnit::Key::Key( + std::string data_owner_code, + std::string organizational_unit_code_parent, + std::string organizational_unit_code_child, + std::chrono::year_month_day valid_from) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code_parent(std::move(organizational_unit_code_parent)), + organizational_unit_code_child(std::move(organizational_unit_code_child)), + valid_from(valid_from) +{} + +Kv1UserStopPoint::Key::Key( + std::string data_owner_code, + std::string user_stop_code) + : data_owner_code(std::move(data_owner_code)), + user_stop_code(std::move(user_stop_code)) +{} + +Kv1UserStopArea::Key::Key( + std::string data_owner_code, + std::string user_stop_area_code) + : data_owner_code(std::move(data_owner_code)), + user_stop_area_code(std::move(user_stop_area_code)) +{} + +Kv1TimingLink::Key::Key( + std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)) +{} + +Kv1Link::Key::Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string transport_type) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)), + transport_type(std::move(transport_type)) +{} + +Kv1Line::Key::Key(std::string data_owner_code, + std::string line_planning_number) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)) +{} + +Kv1Destination::Key::Key(std::string data_owner_code, + std::string dest_code) + : data_owner_code(std::move(data_owner_code)), + dest_code(std::move(dest_code)) +{} + +Kv1JourneyPattern::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)) +{} + +Kv1ConcessionFinancerRelation::Key::Key(std::string data_owner_code, + std::string con_fin_rel_code) + : data_owner_code(std::move(data_owner_code)), + con_fin_rel_code(std::move(con_fin_rel_code)) +{} + +Kv1ConcessionArea::Key::Key(std::string data_owner_code, + std::string concession_area_code) + : data_owner_code(std::move(data_owner_code)), + concession_area_code(std::move(concession_area_code)) +{} + +Kv1Financer::Key::Key(std::string data_owner_code, + std::string financer_code) + : data_owner_code(std::move(data_owner_code)), + financer_code(std::move(financer_code)) +{} + +Kv1JourneyPatternTimingLink::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + short timing_link_order) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(journey_pattern_code), + timing_link_order(timing_link_order) +{} + +Kv1Point::Key::Key(std::string data_owner_code, + std::string point_code) + : data_owner_code(std::move(data_owner_code)), + point_code(std::move(point_code)) +{} + +Kv1PointOnLink::Key::Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string point_data_owner_code, + std::string point_code, + std::string transport_type) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)), + point_data_owner_code(std::move(point_data_owner_code)), + point_code(std::move(point_code)), + transport_type(std::move(transport_type)) +{} + +Kv1Icon::Key::Key(std::string data_owner_code, + short icon_number) + : data_owner_code(std::move(data_owner_code)), + icon_number(icon_number) +{} + +Kv1Notice::Key::Key(std::string data_owner_code, + std::string notice_code) + : data_owner_code(std::move(data_owner_code)), + notice_code(std::move(notice_code)) +{} + +Kv1TimeDemandGroup::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)), + time_demand_group_code(std::move(time_demand_group_code)) +{} + +Kv1TimeDemandGroupRunTime::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code, + short timing_link_order) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)), + time_demand_group_code(std::move(time_demand_group_code)), + timing_link_order(std::move(timing_link_order)) +{} + +Kv1PeriodGroup::Key::Key(std::string data_owner_code, + std::string period_group_code) + : data_owner_code(std::move(data_owner_code)), + period_group_code(std::move(period_group_code)) +{} + +Kv1SpecificDay::Key::Key(std::string data_owner_code, + std::string specific_day_code) + : data_owner_code(std::move(data_owner_code)), + specific_day_code(std::move(specific_day_code)) +{} + +Kv1TimetableVersion::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string timetable_version_code, + std::string period_group_code, + std::string specific_day_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + timetable_version_code(std::move(timetable_version_code)), + period_group_code(std::move(period_group_code)), + specific_day_code(std::move(specific_day_code)) +{} + +Kv1PublicJourney::Key::Key(std::string data_owner_code, + std::string timetable_version_code, + std::string organizational_unit_code, + std::string period_group_code, + std::string specific_day_code, + std::string day_type, + std::string line_planning_number, + int journey_number) + : data_owner_code(std::move(data_owner_code)), + timetable_version_code(std::move(timetable_version_code)), + organizational_unit_code(std::move(organizational_unit_code)), + period_group_code(std::move(period_group_code)), + specific_day_code(std::move(specific_day_code)), + day_type(std::move(day_type)), + line_planning_number(std::move(line_planning_number)), + journey_number(journey_number) +{} + +Kv1PeriodGroupValidity::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string period_group_code, + std::chrono::year_month_day valid_from) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + period_group_code(std::move(period_group_code)), + valid_from(valid_from) +{} + +Kv1ExceptionalOperatingDay::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::chrono::sys_seconds valid_date) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + valid_date(valid_date) +{} + +Kv1ScheduleVersion::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)) +{} + +Kv1PublicJourneyPassingTimes::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::string line_planning_number, + int journey_number, + short stop_order) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)), + line_planning_number(std::move(line_planning_number)), + journey_number(journey_number), + stop_order(stop_order) +{} + +Kv1OperatingDay::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::chrono::year_month_day valid_date) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)), + valid_date(valid_date) +{} + +bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code; +} + +bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code_parent == b.organizational_unit_code_parent + && a.organizational_unit_code_child == b.organizational_unit_code_child; +} + +bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code == b.user_stop_code; +} + +bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_area_code == b.user_stop_area_code; +} + +bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end; +} + +bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end + && a.transport_type == b.transport_type; +} + +bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number; +} + +bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.dest_code == b.dest_code; +} + +bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code; +} + +bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.con_fin_rel_code == b.con_fin_rel_code; +} + +bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.concession_area_code == b.concession_area_code; +} + +bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.financer_code == b.financer_code; +} + +bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.timing_link_order == b.timing_link_order; +} + +bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.point_code == b.point_code; +} + +bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end + && a.point_data_owner_code == b.point_data_owner_code + && a.point_code == b.point_code + && a.transport_type == b.transport_type; +} + +bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.icon_number == b.icon_number; +} + +bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.notice_code == b.notice_code; +} + +bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.time_demand_group_code == b.time_demand_group_code; +} + +bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.time_demand_group_code == b.time_demand_group_code + && a.timing_link_order == b.timing_link_order; +} + +bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.period_group_code == b.period_group_code; +} + +bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.specific_day_code == b.specific_day_code; +} + +bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.timetable_version_code == b.timetable_version_code + && a.period_group_code == b.period_group_code + && a.specific_day_code == b.specific_day_code; +} + +bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.timetable_version_code == b.timetable_version_code + && a.organizational_unit_code == b.organizational_unit_code + && a.period_group_code == b.period_group_code + && a.specific_day_code == b.specific_day_code + && a.day_type == b.day_type + && a.line_planning_number == b.line_planning_number + && a.journey_number == b.journey_number; +} + +bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.period_group_code == b.period_group_code + && a.valid_from == b.valid_from; +} + +bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.valid_date == b.valid_date; +} + +bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code; +} + +bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code + && a.line_planning_number == b.line_planning_number + && a.journey_number == b.journey_number + && a.stop_order == b.stop_order; +} + +bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code + && a.valid_date == b.valid_date; +} + +namespace std::chrono { + static size_t hash_value(const year_month_day &ymd) { + size_t seed = 0; + + boost::hash_combine(seed, int(ymd.year())); + boost::hash_combine(seed, unsigned(ymd.month())); + boost::hash_combine(seed, unsigned(ymd.day())); + + return seed; + } + + static size_t hash_value(const sys_seconds &s) { + return boost::hash()(s.time_since_epoch().count()); + } +} + +size_t hash_value(const Kv1OrganizationalUnit::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + + return seed; +} + +size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code_parent); + boost::hash_combine(seed, k.organizational_unit_code_child); + boost::hash_combine(seed, k.valid_from); + + return seed; +} + +size_t hash_value(const Kv1UserStopPoint::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code); + + return seed; +} + +size_t hash_value(const Kv1UserStopArea::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_area_code); + + return seed; +} + +size_t hash_value(const Kv1TimingLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + + return seed; +} + +size_t hash_value(const Kv1Link::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + boost::hash_combine(seed, k.transport_type); + + return seed; +} + +size_t hash_value(const Kv1Line::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + + return seed; +} + +size_t hash_value(const Kv1Destination::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.dest_code); + + return seed; +} + +size_t hash_value(const Kv1JourneyPattern::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + + return seed; +} + +size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.con_fin_rel_code); + + return seed; +} + +size_t hash_value(const Kv1ConcessionArea::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.concession_area_code); + + return seed; +} + +size_t hash_value(const Kv1Financer::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.financer_code); + + return seed; +} + +size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.timing_link_order); + + return seed; +} + +size_t hash_value(const Kv1Point::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.point_code); + + return seed; +} + +size_t hash_value(const Kv1PointOnLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + boost::hash_combine(seed, k.point_data_owner_code); + boost::hash_combine(seed, k.point_code); + boost::hash_combine(seed, k.transport_type); + + return seed; +} + +size_t hash_value(const Kv1Icon::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.icon_number); + + return seed; +} + +size_t hash_value(const Kv1Notice::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.notice_code); + + return seed; +} + +size_t hash_value(const Kv1TimeDemandGroup::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.time_demand_group_code); + + return seed; +} + +size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.time_demand_group_code); + boost::hash_combine(seed, k.timing_link_order); + + return seed; +} + +size_t hash_value(const Kv1PeriodGroup::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.period_group_code); + + return seed; +} + +size_t hash_value(const Kv1SpecificDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.specific_day_code); + + return seed; +} + +size_t hash_value(const Kv1TimetableVersion::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.timetable_version_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.specific_day_code); + + return seed; +} + +size_t hash_value(const Kv1PublicJourney::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.timetable_version_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.specific_day_code); + boost::hash_combine(seed, k.day_type); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_number); + + return seed; +} + +size_t hash_value(const Kv1PeriodGroupValidity::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.valid_from); + + return seed; +} + +size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.valid_date); + + return seed; +} + +size_t hash_value(const Kv1ScheduleVersion::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + + return seed; +} + +size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_number); + boost::hash_combine(seed, k.stop_order); + + return seed; +} + +size_t hash_value(const Kv1OperatingDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + boost::hash_combine(seed, k.valid_date); + + return seed; +} diff --git a/lib/libtmi8/src/kv6_parquet.cpp b/lib/libtmi8/src/kv6_parquet.cpp new file mode 100644 index 0000000..ca70b7f --- /dev/null +++ b/lib/libtmi8/src/kv6_parquet.cpp @@ -0,0 +1,102 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +ParquetBuilder::ParquetBuilder() { + std::shared_ptr field_type, field_data_owner_code, field_line_planning_number, field_operating_day, + field_journey_number, field_reinforcement_number, field_timestamp, field_source, + field_punctuality, field_user_stop_code, field_passage_sequence_number, + field_vehicle_number, field_block_code, field_wheelchair_accessible, + field_number_of_coaches, field_rd_y, field_rd_x, field_distance_since_last_user_stop; + field_type = arrow::field("type", arrow::utf8()); + field_data_owner_code = arrow::field("data_owner_code", arrow::utf8()); + field_line_planning_number = arrow::field("line_planning_number", arrow::utf8()); + field_operating_day = arrow::field("operating_day", arrow::date32()); + field_journey_number = arrow::field("journey_number", arrow::uint32()); + field_reinforcement_number = arrow::field("reinforcement_number", arrow::uint8()); + field_timestamp = arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::SECOND)); + field_source = arrow::field("source", arrow::utf8()); + field_punctuality = arrow::field("punctuality", arrow::int16()); + field_user_stop_code = arrow::field("user_stop_code", arrow::utf8()); + field_passage_sequence_number = arrow::field("passage_sequence_number", arrow::uint16()); + field_vehicle_number = arrow::field("vehicle_number", arrow::uint32()); + field_block_code = arrow::field("block_code", arrow::uint32()); + field_wheelchair_accessible = arrow::field("wheelchair_accessible", arrow::utf8()); + field_number_of_coaches = arrow::field("number_of_coaches", arrow::uint8()); + field_rd_y = arrow::field("rd_y", arrow::int32()); + field_rd_x = arrow::field("rd_x", arrow::int32()); + field_distance_since_last_user_stop = arrow::field("distance_since_last_user_stop", arrow::uint32()); + + schema = arrow::schema({ field_type, field_data_owner_code, field_line_planning_number, + field_operating_day, field_journey_number, + field_reinforcement_number, field_timestamp, field_source, + field_punctuality, field_user_stop_code, + field_passage_sequence_number, field_vehicle_number, + field_block_code, field_wheelchair_accessible, + field_number_of_coaches, field_rd_y, field_rd_x, + field_distance_since_last_user_stop }); +} + +arrow::Result> ParquetBuilder::getTable() { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr types, types.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr data_owner_codes, data_owner_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr line_planning_numbers, line_planning_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr operating_days, operating_days.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr journey_numbers, journey_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr reinforcement_numbers, reinforcement_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr timestamps, timestamps.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr sources, sources.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr punctualities, punctualities.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr user_stop_codes, user_stop_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr passage_sequence_numbers, passage_sequence_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr vehicle_numbers, vehicle_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr block_codes, block_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr wheelchair_accessibles, wheelchair_accessibles.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr number_of_coaches, number_of_coaches.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr rd_ys, rd_ys.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr rd_xs, rd_xs.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr distance_since_last_user_stops, distance_since_last_user_stops.Finish()); + + std::vector> columns = { types, data_owner_codes, line_planning_numbers, operating_days, + journey_numbers, reinforcement_numbers, timestamps, sources, + punctualities, user_stop_codes, passage_sequence_numbers, + vehicle_numbers, block_codes, wheelchair_accessibles, + number_of_coaches, rd_ys, rd_xs, + distance_since_last_user_stops }; + return arrow::Result(arrow::Table::Make(schema, columns)); +} + +arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename) { + std::shared_ptr props = parquet::WriterProperties::Builder() + .compression(arrow::Compression::ZSTD) + ->created_by("oeuf-libtmi8") + ->version(parquet::ParquetVersion::PARQUET_2_6) + ->data_page_version(parquet::ParquetDataPageVersion::V2) + ->max_row_group_length(MAX_PARQUET_CHUNK) + ->build(); + + std::shared_ptr arrow_props = parquet::ArrowWriterProperties::Builder() + .store_schema()->build(); + + std::shared_ptr out_file; + std::string filename_str = filename; + ARROW_ASSIGN_OR_RAISE(out_file, arrow::io::FileOutputStream::Open(filename_str + ".part")); + + ARROW_ASSIGN_OR_RAISE(auto writer, + parquet::arrow::FileWriter::Open(*rbr.schema(), arrow::default_memory_pool(), out_file, props, arrow_props)); + for (const auto &batchr : rbr) { + ARROW_ASSIGN_OR_RAISE(auto batch, batchr); + ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); + } + ARROW_RETURN_NOT_OK(writer->Close()); + ARROW_RETURN_NOT_OK(out_file->Close()); + + std::filesystem::rename(filename_str + ".part", filename); + + return arrow::Status::OK(); +} + +arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename) { + auto tbr = arrow::TableBatchReader(table); + return writeArrowRecordsAsParquetFile(tbr, filename); +} diff --git a/module/default.nix b/module/default.nix new file mode 100644 index 0000000..c891ceb --- /dev/null +++ b/module/default.nix @@ -0,0 +1,118 @@ +flake: { lib, config, pkgs, ... }: +with lib; +let + inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-recvkv6; + inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-archiver; + + cfg = config.services.oeuf-recvkv6; + archiverCfg = config.services.oeuf-archiver; +in +{ + options.services.oeuf-recvkv6 = { + enable = mkEnableOption "oeuf-recvkv6"; + ndovProduction = mkEnableOption "usage of the NDOV Loket production ZeroMQ server"; + metricsAddr = mkOption { + type = types.str; + }; + }; + + options.services.oeuf-archiver = with types; { + enable = mkEnableOption "oeuf-archiver"; + s3 = mkOption { + type = submodule { + options = { + accessKeyIDFile = mkOption { + type = str; + }; + secretAccessKeyFile = mkOption { + type = str; + }; + provider = mkOption { + type = str; + }; + region = mkOption { + type = str; + }; + endpoint = mkOption { + type = str; + }; + bucket = mkOption { + type = str; + }; + }; + }; + }; + prometheusPushURL = mkOption { + type = str; + }; + supplementaryServiceGroups = mkOption { + type = listOf str; + }; + }; + + config = mkIf (cfg.enable || archiverCfg.enable) (mkMerge [ + { + users.users.oeuf = { + description = "oeuf service user"; + isSystemUser = true; + group = "oeuf"; + }; + + users.groups.oeuf = { }; + } + (mkIf cfg.enable { + systemd.services.oeuf-recvkv6 = { + after = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + environment = { + METRICS_ADDR = cfg.metricsAddr; + NDOV_PRODUCTION = lib.boolToString cfg.ndovProduction; + }; + serviceConfig = { + User = config.users.users.oeuf.name; + Group = config.users.users.oeuf.group; + Restart = "always"; + StateDirectory = "oeuf"; + WorkingDirectory = "/var/lib/oeuf"; + ExecStart = "${lib.getBin oeuf-recvkv6}/bin/oeuf-recvkv6"; + }; + }; + }) + (mkIf archiverCfg.enable { + systemd.timers.oeuf-archiver = { + wantedBy = [ "timers.target" ]; + partOf = [ "oeuf-archiver.service" ]; + timerConfig = { + OnBootSec = "5m"; + OnUnitActiveSec = "5m"; + Unit = "oeuf-archiver.service"; + }; + }; + + systemd.services.oeuf-archiver = { + after = [ "network-online.target" ]; + environment = { + S3_PROVIDER = archiverCfg.s3.provider; + S3_REGION = archiverCfg.s3.region; + S3_ENDPOINT = archiverCfg.s3.endpoint; + S3_BUCKET = archiverCfg.s3.bucket; + PROMETHEUS_PUSH_URL = archiverCfg.prometheusPushURL; + }; + script = '' + export S3_ACCESS_KEY_ID="$(cat ${archiverCfg.s3.accessKeyIDFile})" + export S3_SECRET_ACCESS_KEY="$(cat ${archiverCfg.s3.secretAccessKeyFile})" + ${lib.getBin oeuf-archiver}/bin/oeuf-archiver + ''; + serviceConfig = { + Type = "oneshot"; + User = config.users.users.oeuf.name; + Group = config.users.users.oeuf.group; + SupplementaryGroups = archiverCfg.supplementaryServiceGroups; + StateDirectory = "oeuf"; + WorkingDirectory = "/var/lib/oeuf"; + AmbientCapabilities = "CAP_NET_BIND_SERVICE"; + }; + }; + }) + ]); +} diff --git a/script/archiver/default.nix b/script/archiver/default.nix new file mode 100644 index 0000000..4a464e0 --- /dev/null +++ b/script/archiver/default.nix @@ -0,0 +1,15 @@ +{ pkgs ? import { } }: with pkgs; + +stdenv.mkDerivation { + name = "oeuf-archiver"; + src = ./.; + + buildInputs = [ bash rclone oeuf-bundleparquet ]; + nativeBuildInputs = [ makeWrapper ]; + installPhase = '' + mkdir -p $out/bin + cp oeuf-archiver.sh $out/bin/oeuf-archiver + wrapProgram $out/bin/oeuf-archiver \ + --prefix PATH : ${lib.makeBinPath [ bash rclone oeuf-bundleparquet ]} + ''; +} diff --git a/script/archiver/oeuf-archiver.sh b/script/archiver/oeuf-archiver.sh new file mode 100755 index 0000000..478d4d9 --- /dev/null +++ b/script/archiver/oeuf-archiver.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -eux +set -o pipefail + +# This option prevents the loop from running +# if it does not match any files +shopt -s nullglob + +oeuf-bundleparquet + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +set +x # Don't print the secret access key to the log +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" +set -x + +for file in ./merged/oeuf-*.parquet; do + rclone move \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + $file.meta.json :s3:$S3_BUCKET \ + && \ + rclone move \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + $file :s3:$S3_BUCKET +done diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix @@ -0,0 +1,15 @@ +{ pkgs ? import { } }: with pkgs; + +stdenv.mkDerivation { + name = "oeuf-synckv6"; + src = ./.; + + buildInputs = [ bash rclone ]; + nativeBuildInputs = [ makeWrapper ]; + installPhase = '' + mkdir -p $out/bin + cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 + wrapProgram $out/bin/oeuf-synckv6 \ + --prefix PATH : ${lib.makeBinPath [ bash rclone ]} + ''; +} diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +set -eu +set -o pipefail + +export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" +export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" + +set +x +all_files=() +declare -A metafiles + +while IFS=' ' read -r size filename; do + if [[ "$filename" == *.parquet.meta.json ]]; then + metafiles["$filename"]=1 + else + all_files+=($filename) + fi +done < <(rclone ls \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + :s3:$S3_BUCKET) + +files=() +for filename in "${all_files[@]}"; do + if [[ -v metafiles["$filename.meta.json"] ]]; then + files+=($filename) + fi +done + +echo "Found ${#files[@]} relevant KV6 Parquet files" +echo "Synching this directory with these files" + +printf "%s\n" "${files[@]}" | rclone copy \ + --s3-provider "$S3_PROVIDER" \ + --s3-region "$S3_REGION" \ + --s3-endpoint "$S3_ENDPOINT" \ + --s3-env-auth \ + --progress \ + --files-from - \ + :s3:$S3_BUCKET ./ diff --git a/src/augmentkv6/.envrc b/src/augmentkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/augmentkv6/.envrc @@ -0,0 +1,2 @@ +source_env ../../ +export DEVMODE=1 diff --git a/src/augmentkv6/Makefile b/src/augmentkv6/Makefile new file mode 100644 index 0000000..cebb291 --- /dev/null +++ b/src/augmentkv6/Makefile @@ -0,0 +1,21 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-larrow -larrow_acero -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now + +augmentkv6: main.cpp + $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +.PHONY: clean +clean: + rm augmentkv6 diff --git a/src/augmentkv6/main.cpp b/src/augmentkv6/main.cpp new file mode 100644 index 0000000..81a54d3 --- /dev/null +++ b/src/augmentkv6/main.cpp @@ -0,0 +1,510 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace std::string_view_literals; + +namespace ac = arrow::acero; +namespace ds = arrow::dataset; +namespace cp = arrow::compute; +using namespace arrow; + +using TimingClock = std::conditional_t< + std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, + std::chrono::steady_clock>; + +std::string readKv1() { + fputs("Reading KV1 from standard input\n", stderr); + + char buf[4096]; + std::string data; + while (!feof(stdin) && !ferror(stdin)) { + size_t read = fread(buf, sizeof(char), 4096, stdin); + data.append(buf, read); + } + if (ferror(stdin)) { + fputs("Error when reading from stdin\n", stderr); + exit(1); + } + fprintf(stderr, "Read %lu bytes\n", data.size()); + + return data; +} + +std::vector lex() { + std::string data = readKv1(); + + auto start = TimingClock::now(); + Kv1Lexer lexer(data); + lexer.lex(); + auto end = TimingClock::now(); + + std::chrono::duration elapsed{end - start}; + double bytes = static_cast(data.size()) / 1'000'000; + double speed = bytes / elapsed.count(); + + if (!lexer.errors.empty()) { + fputs("Lexer reported errors:\n", stderr); + for (const auto &error : lexer.errors) + fprintf(stderr, "- %s\n", error.c_str()); + exit(1); + } + + fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size()); + fprintf(stderr, "Duration: %f s\n", elapsed.count()); + fprintf(stderr, "Speed: %f MB/s\n", speed); + + return std::move(lexer.tokens); +} + +bool parse(Kv1Records &into) { + std::vector tokens = lex(); + + Kv1Parser parser(tokens, into); + parser.parse(); + + bool ok = true; + if (!parser.gerrors.empty()) { + ok = false; + fputs("Parser reported errors:\n", stderr); + for (const auto &error : parser.gerrors) + fprintf(stderr, "- %s\n", error.c_str()); + } + if (!parser.warns.empty()) { + fputs("Parser reported warnings:\n", stderr); + for (const auto &warn : parser.warns) + fprintf(stderr, "- %s\n", warn.c_str()); + } + + fprintf(stderr, "Parsed %lu records\n", into.size()); + + return ok; +} + +void printParsedRecords(const Kv1Records &records) { + fputs("Parsed records:\n", stderr); + fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size()); + fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size()); + fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size()); + fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size()); + fprintf(stderr, " timing_links: %lu\n", records.timing_links.size()); + fprintf(stderr, " links: %lu\n", records.links.size()); + fprintf(stderr, " lines: %lu\n", records.lines.size()); + fprintf(stderr, " destinations: %lu\n", records.destinations.size()); + fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size()); + fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size()); + fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size()); + fprintf(stderr, " financers: %lu\n", records.financers.size()); + fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size()); + fprintf(stderr, " points: %lu\n", records.points.size()); + fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size()); + fprintf(stderr, " icons: %lu\n", records.icons.size()); + fprintf(stderr, " notices: %lu\n", records.notices.size()); + fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size()); + fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size()); + fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size()); + fprintf(stderr, " period_groups: %lu\n", records.period_groups.size()); + fprintf(stderr, " specific_days: %lu\n", records.specific_days.size()); + fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size()); + fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size()); + fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size()); + fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size()); + fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size()); + fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size()); + fprintf(stderr, " operating_days: %lu\n", records.operating_days.size()); +} + +void printIndexSize(const Kv1Index &index) { + fputs("Index size:\n", stderr); + fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size()); + fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size()); + fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size()); + fprintf(stderr, " timing_links: %lu\n", index.timing_links.size()); + fprintf(stderr, " links: %lu\n", index.links.size()); + fprintf(stderr, " lines: %lu\n", index.lines.size()); + fprintf(stderr, " destinations: %lu\n", index.destinations.size()); + fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size()); + fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size()); + fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size()); + fprintf(stderr, " financers: %lu\n", index.financers.size()); + fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size()); + fprintf(stderr, " points: %lu\n", index.points.size()); + fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size()); + fprintf(stderr, " icons: %lu\n", index.icons.size()); + fprintf(stderr, " notices: %lu\n", index.notices.size()); + fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size()); + fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size()); + fprintf(stderr, " period_groups: %lu\n", index.period_groups.size()); + fprintf(stderr, " specific_days: %lu\n", index.specific_days.size()); + fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size()); + fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size()); + fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size()); + fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size()); + fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size()); + fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size()); + fprintf(stderr, " operating_days: %lu\n", index.operating_days.size()); +} + +struct BasicJourneyKey { + std::string data_owner_code; + std::string line_planning_number; + int journey_number; + + auto operator<=>(const BasicJourneyKey &) const = default; +}; + +size_t hash_value(const BasicJourneyKey &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_number); + + return seed; +} + +using BasicJourneyKeySet = std::unordered_set>; + +arrow::Result basicJourneys(std::shared_ptr table) { + ac::TableSourceNodeOptions table_source_node_options(table); + ac::Declaration table_source("table_source", std::move(table_source_node_options)); + auto aggregate_options = ac::AggregateNodeOptions{ + /* .aggregates = */ {}, + /* .keys = */ { "data_owner_code", "line_planning_number", "journey_number" }, + }; + ac::Declaration aggregate("aggregate", { std::move(table_source) }, std::move(aggregate_options)); + + std::shared_ptr result; + ARROW_ASSIGN_OR_RAISE(result, ac::DeclarationToTable(std::move(aggregate))); + + std::shared_ptr data_owner_codes = result->GetColumnByName("data_owner_code"); + std::shared_ptr line_planning_numbers = result->GetColumnByName("line_planning_number"); + std::shared_ptr journey_numbers = result->GetColumnByName("journey_number"); + + int i_data_owner_codes_chunk = 0; + int i_journey_numbers_chunk = 0; + int i_line_planning_numbers_chunk = 0; + int i_in_data_owner_codes_chunk = 0; + int i_in_journey_numbers_chunk = 0; + int i_in_line_planning_numbers_chunk = 0; + + BasicJourneyKeySet journeys; + + for (int64_t i = 0; i < result->num_rows(); i++) { + auto data_owner_codes_chunk = std::static_pointer_cast(data_owner_codes->chunk(i_data_owner_codes_chunk)); + auto line_planning_numbers_chunk = std::static_pointer_cast(line_planning_numbers->chunk(i_line_planning_numbers_chunk)); + auto journey_numbers_chunk = std::static_pointer_cast(journey_numbers->chunk(i_journey_numbers_chunk)); + + std::string_view data_owner_code = data_owner_codes_chunk->Value(i_in_data_owner_codes_chunk); + std::string_view line_planning_number = line_planning_numbers_chunk->Value(i_in_line_planning_numbers_chunk); + uint32_t journey_number = journey_numbers_chunk->Value(i_in_journey_numbers_chunk); + + journeys.emplace( + std::string(data_owner_code), + std::string(line_planning_number), + journey_number + ); + + i_in_data_owner_codes_chunk++; + i_in_line_planning_numbers_chunk++; + i_in_journey_numbers_chunk++; + if (i_in_data_owner_codes_chunk >= data_owner_codes_chunk->length()) { + i_data_owner_codes_chunk++; + i_in_data_owner_codes_chunk = 0; + } + if (i_in_line_planning_numbers_chunk >= line_planning_numbers_chunk->length()) { + i_line_planning_numbers_chunk++; + i_in_line_planning_numbers_chunk = 0; + } + if (i_in_journey_numbers_chunk >= journey_numbers_chunk->length()) { + i_journey_numbers_chunk++; + i_in_journey_numbers_chunk = 0; + } + } + + return journeys; +} + +struct DistanceKey { + BasicJourneyKey journey; + std::string last_passed_user_stop_code; + + auto operator<=>(const DistanceKey &) const = default; +}; + +size_t hash_value(const DistanceKey &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.journey); + boost::hash_combine(seed, k.last_passed_user_stop_code); + + return seed; +} + +struct DistanceTimingLink { + const Kv1JourneyPatternTimingLink *jopatili; + double distance_since_start_of_journey = 0; // at the start of the link +}; + +using DistanceMap = std::unordered_map>; + +// Returns a map, where +// DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode -> +// Distance of Last User Stop +DistanceMap makeDistanceMap(Kv1Records &records, Kv1Index &index, BasicJourneyKeySet &journeys) { + std::unordered_map< + Kv1JourneyPattern::Key, + std::vector, + boost::hash> jopatili_index; + std::unordered_map< + BasicJourneyKey, + const Kv1PublicJourney *, + boost::hash> journey_index; + for (size_t i = 0; i < records.public_journeys.size(); i++) { + const Kv1PublicJourney *pujo = &records.public_journeys[i]; + + BasicJourneyKey journey_key( + pujo->key.data_owner_code, + pujo->key.line_planning_number, + pujo->key.journey_number); + + if (journeys.contains(journey_key)) { + journey_index[journey_key] = pujo; + + Kv1JourneyPattern::Key jopa_key( + pujo->key.data_owner_code, + pujo->key.line_planning_number, + pujo->journey_pattern_code); + jopatili_index[jopa_key] = {}; + } + } + + for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { + const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; + Kv1JourneyPattern::Key jopa_key( + jopatili->key.data_owner_code, + jopatili->key.line_planning_number, + jopatili->key.journey_pattern_code); + if (jopatili_index.contains(jopa_key)) { + jopatili_index[jopa_key].push_back(DistanceTimingLink(jopatili, 0)); + } + } + + for (auto &[jopa_key, timing_links] : jopatili_index) { + std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) { + return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; + }); + + const std::string transport_type = index.journey_patterns[jopa_key]->p_line->transport_type; + + for (size_t i = 1; i < timing_links.size(); i++) { + DistanceTimingLink *timing_link = &timing_links[i]; + DistanceTimingLink *prev_timing_link = &timing_links[i - 1]; + + const Kv1Link::Key link_key( + prev_timing_link->jopatili->key.data_owner_code, + prev_timing_link->jopatili->user_stop_code_begin, + prev_timing_link->jopatili->user_stop_code_end, + transport_type); + double link_distance = index.links[link_key]->distance; + timing_link->distance_since_start_of_journey = + prev_timing_link->distance_since_start_of_journey + link_distance; + } + } + + // DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode -> + // Distance of Last User Stop + DistanceMap distance_map; + + for (const auto &journey : journeys) { + const Kv1PublicJourney *pujo = journey_index[journey]; + if (pujo == nullptr) { + std::cerr << "Warning: No PUJO found for [" << journey.data_owner_code << "] " + << journey.line_planning_number << "/" << journey.journey_number << std::endl; + continue; + } + Kv1JourneyPattern::Key jopa_key( + pujo->key.data_owner_code, + pujo->key.line_planning_number, + pujo->journey_pattern_code); + for (const auto &timing_link : jopatili_index[jopa_key]) { + DistanceKey key(journey, timing_link.jopatili->user_stop_code_begin); + distance_map[key] = timing_link.distance_since_start_of_journey; + } + } + + return distance_map; +} + +arrow::Result> augment( + std::shared_ptr table, + const DistanceMap &distance_map +) { + for (int i = 0; i < table->num_columns(); i++) { + if (table->column(i)->num_chunks() > 1) { + std::stringstream ss; + ss << "Error: Expected column " << i + << " (" << table->ColumnNames()[i] << ") to have 1 chunk, got " + << table->column(i)->num_chunks(); + return arrow::Status::Invalid(ss.str()); + } + } + + auto data_owner_codes = std::static_pointer_cast(table->GetColumnByName("data_owner_code")->chunk(0)); + auto line_planning_numbers = std::static_pointer_cast(table->GetColumnByName("line_planning_number")->chunk(0)); + auto journey_numbers = std::static_pointer_cast(table->GetColumnByName("journey_number")->chunk(0)); + auto user_stop_codes = std::static_pointer_cast(table->GetColumnByName("user_stop_code")->chunk(0)); + auto distance_since_last_user_stops = std::static_pointer_cast(table->GetColumnByName("distance_since_last_user_stop")->chunk(0)); + auto timestamps = std::static_pointer_cast(table->GetColumnByName("timestamp")->chunk(0)); + + auto timestamps_type = table->schema()->GetFieldByName("timestamp")->type(); + if (timestamps_type->id() != arrow::Type::TIMESTAMP) + return arrow::Status::Invalid("Field 'timestamp' does not have expected type TIMESTAMP"); + if (std::static_pointer_cast(timestamps_type)->unit() != arrow::TimeUnit::MILLI) + return arrow::Status::Invalid("Field 'timestamp' does not have unit MILLI"); + if (!std::static_pointer_cast(timestamps_type)->timezone().empty()) + return arrow::Status::Invalid("Field 'timestamp' should have empty time zone name"); + + std::shared_ptr field_distance_since_start_of_journey = + arrow::field("distance_since_start_of_journey", arrow::uint32()); + std::shared_ptr field_day_of_week = + arrow::field("timestamp_iso_day_of_week", arrow::int64()); + std::shared_ptr field_date = + arrow::field("timestamp_date", arrow::date32()); + std::shared_ptr field_local_time = + arrow::field("timestamp_local_time", arrow::time32(arrow::TimeUnit::SECOND)); + arrow::UInt32Builder distance_since_start_of_journey_builder; + arrow::Int64Builder day_of_week_builder; + arrow::Date32Builder date_builder; + arrow::Time32Builder local_time_builder(arrow::time32(arrow::TimeUnit::SECOND), arrow::default_memory_pool()); + + const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam"); + + for (int64_t i = 0; i < table->num_rows(); i++) { + DistanceKey key( + BasicJourneyKey( + std::string(data_owner_codes->Value(i)), + std::string(line_planning_numbers->Value(i)), + journey_numbers->Value(i)), + std::string(user_stop_codes->Value(i))); + + uint32_t distance_since_last_user_stop = distance_since_last_user_stops->Value(i); + if (distance_map.contains(key)) { + uint32_t total_distance = distance_since_last_user_stop + static_cast(distance_map.at(key)); + ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.Append(total_distance)); + } else { + ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.AppendNull()); + } + + // Welp, this has gotten a bit complicated! + std::chrono::sys_seconds timestamp(std::chrono::floor(std::chrono::milliseconds(timestamps->Value(i)))); + std::chrono::zoned_seconds zoned_timestamp(amsterdam, timestamp); + std::chrono::local_seconds local_timestamp(zoned_timestamp); + std::chrono::local_days local_date = std::chrono::floor(local_timestamp); + std::chrono::year_month_day date(local_date); + std::chrono::weekday day_of_week(local_date); + std::chrono::hh_mm_ss time(local_timestamp - local_date); + std::chrono::sys_days unix_date(date); + + int64_t iso_day_of_week = day_of_week.iso_encoding(); + int32_t unix_days = static_cast(unix_date.time_since_epoch().count()); + int32_t secs_since_midnight = static_cast(std::chrono::seconds(time).count()); + + ARROW_RETURN_NOT_OK(day_of_week_builder.Append(iso_day_of_week)); + ARROW_RETURN_NOT_OK(date_builder.Append(unix_days)); + ARROW_RETURN_NOT_OK(local_time_builder.Append(secs_since_midnight)); + } + + ARROW_ASSIGN_OR_RAISE(auto distance_since_start_of_journey_col_chunk, distance_since_start_of_journey_builder.Finish()); + ARROW_ASSIGN_OR_RAISE(auto day_of_week_col_chunk, day_of_week_builder.Finish()); + ARROW_ASSIGN_OR_RAISE(auto date_col_chunk, date_builder.Finish()); + ARROW_ASSIGN_OR_RAISE(auto local_time_col_chunk, local_time_builder.Finish()); + auto distance_since_start_of_journey_col = + std::make_shared(distance_since_start_of_journey_col_chunk); + auto day_of_week_col = std::make_shared(day_of_week_col_chunk); + auto date_col = std::make_shared(date_col_chunk); + auto local_time_col = std::make_shared(local_time_col_chunk); + + ARROW_ASSIGN_OR_RAISE(table, table->AddColumn( + table->num_columns(), + field_distance_since_start_of_journey, + distance_since_start_of_journey_col)); + ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_day_of_week, day_of_week_col)); + ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_date, date_col)); + ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_local_time, local_time_col)); + + return table; +} + +arrow::Status processTables(Kv1Records &records, Kv1Index &index) { + std::shared_ptr input; + ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open("oeuf-input.parquet")); + + std::unique_ptr arrow_reader; + ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader)); + + std::shared_ptr table; + ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); + + std::cerr << "Input KV6 file has " << table->num_rows() << " rows" << std::endl; + ARROW_ASSIGN_OR_RAISE(BasicJourneyKeySet journeys, basicJourneys(table)); + std::cerr << "Found " << journeys.size() << " distinct journeys" << std::endl; + DistanceMap distance_map = makeDistanceMap(records, index, journeys); + std::cerr << "Distance map has " << distance_map.size() << " keys" << std::endl; + + std::cerr << "Creating augmented table" << std::endl; + ARROW_ASSIGN_OR_RAISE(std::shared_ptr augmented, augment(table, distance_map)); + + std::cerr << "Writing augmented table" << std::endl; + return writeArrowTableAsParquetFile(*augmented, "oeuf-augmented.parquet"); +} + +int main(int argc, char *argv[]) { + Kv1Records records; + if (!parse(records)) { + fputs("Error parsing records, exiting\n", stderr); + return EXIT_FAILURE; + } + printParsedRecords(records); + fputs("Indexing...\n", stderr); + Kv1Index index(&records); + fprintf(stderr, "Indexed %lu records\n", index.size()); + // Only notice assignments are not indexed. If this equality is not valid, + // then this means that we had duplicate keys or that something else went + // wrong. That would really not be great. + assert(index.size() == records.size() - records.notice_assignments.size()); + printIndexSize(index); + fputs("Linking records...\n", stderr); + kv1LinkRecords(index); + fputs("Done linking\n", stderr); + + arrow::Status st = processTables(records, index); + if (!st.ok()) { + std::cerr << "Failed to process tables: " << st << std::endl; + return EXIT_FAILURE; + } +} diff --git a/src/bundleparquet/.envrc b/src/bundleparquet/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/bundleparquet/.envrc @@ -0,0 +1,2 @@ +source_env ../../ +export DEVMODE=1 diff --git a/src/bundleparquet/Makefile b/src/bundleparquet/Makefile new file mode 100644 index 0000000..170304d --- /dev/null +++ b/src/bundleparquet/Makefile @@ -0,0 +1,21 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-larrow -lcurl -lparquet -lprometheus-cpp-push -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now + +bundleparquet: main.cpp spliturl.cpp + $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +.PHONY: clean +clean: + rm bundleparquet diff --git a/src/bundleparquet/main.cpp b/src/bundleparquet/main.cpp new file mode 100644 index 0000000..05fd881 --- /dev/null +++ b/src/bundleparquet/main.cpp @@ -0,0 +1,213 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +#include + +#include "spliturl.hpp" + +static const int MIN_COMBINED_ROWS = 1000000; // one million +static const int MAX_COMBINED_ROWS = 2000000; // two million + +struct FileMetadata { + int64_t min_timestamp = 0; + int64_t max_timestamp = 0; + int64_t rows_written = 0; +}; + +struct File { + FileMetadata metadata; + std::filesystem::path filename; +}; + +FileMetadata readMetadataOf(std::filesystem::path filename) { + std::string meta_filename = std::string(filename) + ".meta.json"; + std::ifstream meta_file = std::ifstream(meta_filename, std::ifstream::in|std::ifstream::binary); + nlohmann::json meta_json; + meta_file >> meta_json; + FileMetadata meta = { + .min_timestamp = meta_json["min_timestamp"], + .max_timestamp = meta_json["max_timestamp"], + .rows_written = meta_json["rows_written"], + }; + return meta; +} + +arrow::Status processFirstTables(std::deque &files, prometheus::Counter &rows_written) { + if (files.size() == 0) { + std::cerr << "Did not find any files" << std::endl; + return arrow::Status::OK(); + } + + int64_t rows = 0; + + std::vector> tables; + std::vector processed; + int64_t min_timestamp = std::numeric_limits::max(); + int64_t max_timestamp = 0; + + bool over_capacity_risk = false; + auto it = files.begin(); + while (it != files.end()) { + const std::filesystem::path &filename = it->filename; + const FileMetadata &metadata = it->metadata; + + std::shared_ptr input; + ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(filename)); + + std::unique_ptr arrow_reader; + ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader)); + + if (metadata.min_timestamp < min_timestamp) + min_timestamp = metadata.min_timestamp; + if (metadata.max_timestamp > max_timestamp) + max_timestamp = metadata.max_timestamp; + + if (rows + metadata.rows_written > MAX_COMBINED_ROWS) { + over_capacity_risk = true; + break; + } + + std::shared_ptr table; + ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); + tables.push_back(table); + processed.push_back(filename); + rows += metadata.rows_written; + it = files.erase(it); + } + + if (rows < MIN_COMBINED_ROWS && !over_capacity_risk) { + std::cerr << "Found files, but not enough to satisfy the minimum amount of rows for the combined file" << std::endl; + std::cerr << "(We have " << rows << "/" << MIN_COMBINED_ROWS << " rows at the moment, so " + << static_cast(rows)/static_cast(MIN_COMBINED_ROWS)*100.f << "%)" << std::endl; + return arrow::Status::OK(); + } else if (rows == 0 && over_capacity_risk) { + const std::filesystem::path &filename = files.front().filename; + std::filesystem::rename(filename, "merged" / filename); + std::filesystem::rename(std::string(filename) + ".meta.json", std::string("merged" / filename) + ".meta.json"); + rows_written.Increment(static_cast(files.front().metadata.rows_written)); + files.pop_front(); + return arrow::Status::OK(); + } + + // Default options specify that the schemas are not unified, which is + // luckliy exactly what we want :) + std::shared_ptr merged_table; + ARROW_ASSIGN_OR_RAISE(merged_table, arrow::ConcatenateTables(tables)); + + auto timestamp = std::chrono::round(std::chrono::system_clock::now()); + std::string filename = std::format("merged/oeuf-{:%FT%T%Ez}.parquet", timestamp); + ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*merged_table, filename)); + + std::cerr << "Wrote merged table to " << filename << std::endl; + + std::ofstream metaf(filename + ".meta.json.part", std::ios::binary); + nlohmann::json meta{ + { "min_timestamp", min_timestamp }, + { "max_timestamp", max_timestamp }, + { "rows_written", rows }, + }; + metaf << meta; + metaf.close(); + std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json"); + + std::cerr << "Wrote merged table metadata" << std::endl; + rows_written.Increment(static_cast(rows)); + + for (const std::filesystem::path &filename : processed) { + std::filesystem::remove(filename); + std::filesystem::remove(std::string(filename) + ".meta.json"); + } + + std::cerr << "Successfully wrote merged table, metadata and deleted old files" << std::endl; + + return arrow::Status::OK(); +} + +arrow::Status processTables(std::deque &files, prometheus::Counter &rows_written) { + while (!files.empty()) + ARROW_RETURN_NOT_OK(processFirstTables(files, rows_written)); + return arrow::Status::OK(); +} + +int main(int argc, char *argv[]) { + std::filesystem::path cwd = std::filesystem::current_path(); + std::filesystem::create_directory(cwd / "merged"); + + const char *prom_push_url = getenv("PROMETHEUS_PUSH_URL"); + if (!prom_push_url || strlen(prom_push_url) == 0) { + std::cerr << "Error: no PROMETHEUS_PUSH_URL set!" << std::endl; + return EXIT_FAILURE; + } + + std::string split_err; + auto split_prom_push_url = splitUrl(prom_push_url, &split_err); + if (!split_prom_push_url) { + std::cerr << "Could not process URL in environment variable PROMETHEUS_PUSH_URL: " + << split_err << std::endl; + return EXIT_FAILURE; + } + std::cout << "Prometheus Push URL: " << split_prom_push_url->schemehost << ":" + << split_prom_push_url->portpath << std::endl; + + prometheus::Gateway gateway{split_prom_push_url->schemehost, + split_prom_push_url->portpath, + "oeuf-archiver"}; + + auto registry = std::make_shared(); + prometheus::Gauge &rows_available = prometheus::BuildGauge() + .Name("archiver_rows_available") + .Help("Number of rows available to the archiver") + .Register(*registry) + .Add({}); + prometheus::Counter &rows_written = prometheus::BuildCounter() + .Name("archiver_rows_written") + .Help("Number of rows written by the archiver") + .Register(*registry) + .Add({}); + gateway.RegisterCollectable(registry); + + std::deque files; + for (auto const &dir_entry : std::filesystem::directory_iterator{cwd}) { + if (!dir_entry.is_regular_file()) continue; + std::filesystem::path filename = dir_entry.path().filename(); + const std::string &filename_str = filename; + if (filename_str.starts_with("oeuf-") && filename_str.ends_with("+00:00.parquet")) { + try { + FileMetadata meta = readMetadataOf(filename); + File file = { .metadata = meta, .filename = filename }; + files.push_back(file); + + rows_available.Increment(static_cast(meta.rows_written)); + } catch (const std::exception &e) { + std::cerr << "Failed to read metadata of file " << filename << ": " << e.what() << std::endl; + return EXIT_FAILURE; + } + } + } + + std::sort(files.begin(), files.end(), + [](const File &f1, const File &f2) { return f1.filename < f2.filename; }); + arrow::Status st = processTables(files, rows_written); + if (!st.ok()) { + std::cerr << "Failed to process tables: " << st << std::endl; + return EXIT_FAILURE; + } + + gateway.Push(); +} diff --git a/src/bundleparquet/spliturl.cpp b/src/bundleparquet/spliturl.cpp new file mode 100644 index 0000000..90fd821 --- /dev/null +++ b/src/bundleparquet/spliturl.cpp @@ -0,0 +1,203 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include + +#include + +#include "spliturl.hpp" + +// splitUrl takes a URL of the shape '[http[s]://]HOST[:PORT][/PATH]', and +// splits it into two URLs: +// - scheme + host -> '[http[s]://]HOST' +// - port + path -> '[PORT][/PATH]' +// In case an IPv6 address is provided, the host must enclosed in square +// brackets. The zone ID may also be indicated. Note that in the resulting +// parts, the colon preceding the port number is omitted. This is on purpose. +std::optional splitUrl(const std::string &url, std::string *error) { + std::stringstream errs; + std::optional result; + char *processed = nullptr; + char *scheme = nullptr; + char *user = nullptr; + char *password = nullptr; + char *zoneid = nullptr; + char *query = nullptr; + char *fragment = nullptr; + CURLU *schemehost = nullptr; + char *schemehost_url = nullptr; + char *portpath_url = nullptr; + + // Parse the URL, allowing the user to omit the scheme. CURL will use 'https' + // by default if no scheme is specified. + + CURLU *parsed = curl_url(); + CURLUcode rc = curl_url_set(parsed, CURLUPART_URL, url.c_str(), CURLU_DEFAULT_SCHEME); + if (rc != CURLUE_OK) { + errs << "Failed to parse URL: " << curl_url_strerror(rc); + goto Exit; + } + + // As we parse the URL with the option CURLU_DEFAULT_SCHEME, the CURL API + // won't require the user to provide the scheme part of the URL. It will + // automatically default the scheme to https. However, we do not usually want + // it to default to HTTPS, but HTTP instead (as the use case, connecting to a + // PushGateway server, usually is served over a private network via HTTP). + // + // This is why we check if the scheme was put there by CURL and otherwise set + // it to HTTP. We also check for any other schemes that the user may have + // provided, and reject anything that is not http/https. + if (!url.starts_with("http://") && !url.starts_with("https://")) { + rc = curl_url_get(parsed, CURLUPART_SCHEME, &scheme, 0); + if (rc != CURLUE_OK) { + errs << "Could not get scheme from parsed URL: " << curl_url_strerror(rc); + goto Exit; + } + if (strcmp(scheme, "https")) { + errs << "Unexpected scheme" << scheme << "in provided URL (expected http or https)"; + goto Exit; + } + rc = curl_url_set(parsed, CURLUPART_SCHEME, "http", 0); + if (rc != CURLUE_OK) { + errs << "Could not set URL scheme to http: " << curl_url_strerror(rc); + goto Exit; + } + } + + // Turn the parsed URL back into a string. + rc = curl_url_get(parsed, CURLUPART_URL, &processed, 0); + if (rc != CURLUE_OK) { + errs << "Failed to output parsed URL: " << curl_url_strerror(rc); + goto Exit; + } + + // This part of the code checks if no prohibited parts are present in the URL + // (basic auth: (user, password), query, fragment). + + rc = curl_url_get(parsed, CURLUPART_USER, &user, 0); + if (rc == CURLUE_OK && strlen(user) != 0) { + errs << "Provided URL should not contain a user part"; + goto Exit; + } else if (rc != CURLUE_NO_USER && rc != CURLUE_OK) { + errs << "Failed to get check user part existence in provided url: " << curl_url_strerror(rc); + goto Exit; + } + + rc = curl_url_get(parsed, CURLUPART_PASSWORD, &password, 0); + if (rc == CURLUE_OK && strlen(password) != 0) { + errs << "Provided URL should not contain a password part"; + goto Exit; + } else if (rc != CURLUE_NO_PASSWORD && rc != CURLUE_OK) { + errs << "Failed to get check password part existence in provided url: " << curl_url_strerror(rc); + goto Exit; + } + + rc = curl_url_get(parsed, CURLUPART_QUERY, &query, 0); + if (rc == CURLUE_OK && strlen(query) != 0) { + errs << "Provided URL should not contain a query part"; + goto Exit; + } else if (rc != CURLUE_NO_QUERY && rc != CURLUE_OK) { + errs << "Failed to get check query part existence in provided url: " << curl_url_strerror(rc); + goto Exit; + } + + rc = curl_url_get(parsed, CURLUPART_FRAGMENT, &fragment, 0); + if (rc == CURLUE_OK && strlen(fragment) != 0) { + errs << "Provided URL should not contain a fragment part"; + goto Exit; + } else if (rc != CURLUE_NO_FRAGMENT && rc != CURLUE_OK) { + errs << "Failed to get check fragment part existence in provided url: " << curl_url_strerror(rc); + goto Exit; + } + + // Now that we know that the provided URL makes sense, we can start doing + // some arts and crafts. We get started by copying the parsed URL into + // schemehost and simply delete all parts which are not scheme + host. + + schemehost = curl_url_dup(parsed); + + // CURL BUG WORKAROUND: CURLUPART_ZONEID is NOT copied by curl_url_dup! + // ^ fixed in CURL 8.3.0 after https://curl.se/mail/lib-2023-07/0047.html + rc = curl_url_get(parsed, CURLUPART_ZONEID, &zoneid, 0); + if (rc == CURLUE_OK) { + rc = curl_url_set(schemehost, CURLUPART_ZONEID, zoneid, 0); + if (rc != CURLUE_OK) { + errs << "Could not copy zone ID to duplicated URL: " << curl_url_strerror(rc); + goto Exit; + } + } + rc = curl_url_set(schemehost, CURLUPART_PORT, nullptr, 0); + if (rc != CURLUE_OK) { + errs << "Could not unset port in duplicated URL: " << curl_url_strerror(rc); + goto Exit; + } + rc = curl_url_set(schemehost, CURLUPART_PATH, nullptr, 0); + if (rc != CURLUE_OK) { + errs << "Could not unset path in duplicated URL: " << curl_url_strerror(rc); + goto Exit; + } + + // Okay, now we have the schemehost CURLU all ready to go. Note that a URL + // only consisting of a scheme and host is considered valid, so CURL will be + // more than happy to actually turn it into a string for us. Which is exactly + // what we do here :) + + rc = curl_url_get(schemehost, CURLUPART_URL, &schemehost_url, 0); + if (rc != CURLUE_OK) { + errs << "Could not get scheme + host URL: " << curl_url_strerror(rc); + goto Exit; + } + + // Remove any trailing slash after the scheme + host URL that CURL might have + // put there -- we still want to get a valid URL if we paste the port + path + // part behind it. + + if (strlen(schemehost_url) > 0) { + if (schemehost_url[strlen(schemehost_url) - 1] != '/') { + errs << "Scheme + host URL does not end with a slash"; + goto Exit; + } + schemehost_url[strlen(schemehost_url) - 1] = '\0'; + } + + // Look, this is really gross. Because the port + path part of the URL is not + // a valid URL itself, but the scheme + host should be a prefix of the full + // URL containing the port + path, we can simply check if it is indeed a + // prefix, and then strip it from the full URL, giving us the port + path + // (after deleting the colon preceding the port). + + if (!std::string_view(processed).starts_with(schemehost_url)) { + errs << "Scheme + host URL is not a prefix of the processed URL"; + goto Exit; + } + + portpath_url = processed + strlen(schemehost_url); + // We should not have the colon before the port, prometheus-cpp inserts it + if (strlen(portpath_url) > 0 && portpath_url[0] == ':') portpath_url++; + // We do not need a trailing slash + if (strlen(portpath_url) > 0 && portpath_url[strlen(portpath_url)-1] == '/') + portpath_url[strlen(portpath_url)-1] = '\0'; + + // It has been done. BLECH + result = std::make_optional(schemehost_url, portpath_url); + +Exit: + curl_free(processed); + curl_free(scheme); + curl_free(user); + curl_free(password); + curl_free(query); + curl_free(fragment); + curl_free(zoneid); + curl_free(schemehost_url); + curl_url_cleanup(schemehost); + curl_url_cleanup(parsed); + + if (!result && error) + *error = errs.str(); + + return result; +} diff --git a/src/bundleparquet/spliturl.hpp b/src/bundleparquet/spliturl.hpp new file mode 100644 index 0000000..d8150e0 --- /dev/null +++ b/src/bundleparquet/spliturl.hpp @@ -0,0 +1,11 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include + +struct SplitUrl { + std::string schemehost; + std::string portpath; +}; + +std::optional splitUrl(const std::string &url, std::string *error = nullptr); diff --git a/src/filterkv6/.envrc b/src/filterkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/filterkv6/.envrc @@ -0,0 +1,2 @@ +source_env ../../ +export DEVMODE=1 diff --git a/src/filterkv6/Makefile b/src/filterkv6/Makefile new file mode 100644 index 0000000..13bb38e --- /dev/null +++ b/src/filterkv6/Makefile @@ -0,0 +1,21 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-larrow -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now + +filterkv6: main.cpp + $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +.PHONY: clean +clean: + rm filterkv6 diff --git a/src/filterkv6/main.cpp b/src/filterkv6/main.cpp new file mode 100644 index 0000000..a32220a --- /dev/null +++ b/src/filterkv6/main.cpp @@ -0,0 +1,106 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace ds = arrow::dataset; +namespace cp = arrow::compute; +using namespace arrow; + +arrow::Status processTables(std::string lineno) { + auto filesystem = std::make_shared(); + + fs::FileSelector selector; + selector.base_dir = std::filesystem::current_path(); + selector.recursive = false; + + auto format = std::static_pointer_cast(std::make_shared()); + + ARROW_ASSIGN_OR_RAISE(auto factory, + ds::FileSystemDatasetFactory::Make(filesystem, selector, format, + ds::FileSystemFactoryOptions())); + + ARROW_ASSIGN_OR_RAISE(auto dataset, factory->Finish()); + + printf("Scanning dataset for line %s...\n", lineno.c_str()); + // Read specified columns with a row filter + ARROW_ASSIGN_OR_RAISE(auto scan_builder, dataset->NewScan()); + ARROW_RETURN_NOT_OK(scan_builder->Filter(cp::and_({ + cp::equal(cp::field_ref("line_planning_number"), cp::literal(lineno)), + cp::is_valid(cp::field_ref("rd_x")), + cp::is_valid(cp::field_ref("rd_y")), + }))); + + ARROW_ASSIGN_OR_RAISE(auto scanner, scan_builder->Finish()); + ARROW_ASSIGN_OR_RAISE(auto table, scanner->ToTable()); + + puts("Finished loading data, computing stable sort indices..."); + + arrow::Datum sort_indices; + cp::SortOptions sort_options; + sort_options.sort_keys = { cp::SortKey("timestamp" /* ascending by default */) }; + ARROW_ASSIGN_OR_RAISE(sort_indices, cp::CallFunction("sort_indices", { table }, &sort_options)); + puts("Finished computing stable sort indices, creating sorted table..."); + + arrow::Datum sorted; + ARROW_ASSIGN_OR_RAISE(sorted, cp::CallFunction("take", { table, sort_indices })); + + puts("Writing sorted table to disk..."); + ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*sorted.table(), "merged/oeuf-merged.parquet")); + puts("Syncing..."); + sync(); + puts("Done. Have a nice day."); + + return arrow::Status::OK(); +} + +#define NOTICE "Notice: This tool will fail if any non-Parquet files in are present in the\n" \ + " current working directory. It does not load files which are present in\n" \ + " any possible subdirectories." + +const char help[] = + "Usage: %s \n" + "\n" + " LINENO The LinePlanningNumber as in the KV1/KV6 data\n\n" + NOTICE "\n"; + +void exitHelp(const char *progname, int code = 1) { + printf(help, progname); + exit(code); +} + +int main(int argc, char *argv[]) { + const char *progname = argv[0]; + if (argc != 2) { + puts("Error: incorrect number of arguments provided\n"); + exitHelp(progname); + } + char *lineno = argv[1]; + puts(NOTICE "\n"); + + std::filesystem::path cwd = std::filesystem::current_path(); + std::filesystem::create_directory(cwd / "merged"); + + puts("Running this program may take a while, especially on big datasets. If you're\n" + "processing the data of a single bus line over the course of multiple months,\n" + "you may see memory usage of up to 10 GiB. Make sure that you have sufficient\n" + "RAM available, to avoid overloading and subsequently freezing your system.\n"); + + arrow::Status st = processTables(std::string(lineno)); + if (!st.ok()) { + std::cerr << "Failed to process tables: " << st << std::endl; + return EXIT_FAILURE; + } +} diff --git a/src/querykv1/.envrc b/src/querykv1/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/querykv1/.envrc @@ -0,0 +1,2 @@ +source_env ../../ +export DEVMODE=1 diff --git a/src/querykv1/.gitignore b/src/querykv1/.gitignore new file mode 100644 index 0000000..5761abc --- /dev/null +++ b/src/querykv1/.gitignore @@ -0,0 +1 @@ +*.o diff --git a/src/querykv1/Makefile b/src/querykv1/Makefile new file mode 100644 index 0000000..a8791f5 --- /dev/null +++ b/src/querykv1/Makefile @@ -0,0 +1,28 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-ltmi8 -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now + +HDRS=cliopts.hpp daterange.hpp joparoute.hpp journeyinfo.hpp journeyroute.hpp journeys.hpp schedule.hpp +SRCS=main.cpp cliopts.cpp daterange.cpp joparoute.cpp journeyinfo.cpp journeyroute.cpp journeys.cpp schedule.cpp +OBJS=$(patsubst %.cpp,%.o,$(SRCS)) + +%.o: %.cpp $(HDRS) + $(CXX) -c -o $@ $< $(CXXFLAGS) + +querykv1: $(OBJS) + $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +.PHONY: clean +clean: + rm querykv1 diff --git a/src/querykv1/cliopts.cpp b/src/querykv1/cliopts.cpp new file mode 100644 index 0000000..bef7a98 --- /dev/null +++ b/src/querykv1/cliopts.cpp @@ -0,0 +1,456 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include + +#include + +#include "cliopts.hpp" + +using namespace std::string_view_literals; + +const char *opt_set = ""; +const char *opt_unset = nullptr; + +const char help[] = R"(Usage: %1$s [OPTIONS] + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help + +Commands: + joparoute Generate CSV for journey pattern route + journeyinfo Print some information on a journey + journeyroute Generate CSV for journey route + journeys List journeys of a specific line going from stop A to B + schedule Generate schedule +)"; + +const char joparoute_help[] = R"(Usage: %1$s joparoute --line --jopa [OPTIONS] + +Options: + --line Line planning number as in schedule + --jopa Journey pattern code as in KV1 data + -o Path of file to write to, '-' for stdout + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help +)"; + +const char journeyroute_help[] = R"(Usage: %1$s journeyroute --line [OPTIONS] + +Options: + --line Line planning number as in KV1 data + --journey Journey number as in KV1 data + -o Path of file to write to, '-' for stdout + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help +)"; + +const char journeys_help[] = R"(Usage: %1$s journeys --line --begin --end [OPTIONS] + +For the --begin and --end arguments, use the following format: + --begin/--end stop: + --begin/--end star: + +Options: + --begin User stop code/area of stop the journey should begin at + --end User stop code/area of stop the journey should end at + --line Line planning number to filter on + -o Path of file to write to, '-' for stdout + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help +)"; + +const char journeyinfo_help[] = R"(Usage: %1$s journeyinfo --line --journey [OPTIONS] + +Options: + --line Line planning number to filter on + --journey Journey number as in schedule + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help +)"; + +const char schedule_help[] = R"(Usage: %1$s schedule --line [OPTIONS] + +Options: + --line Line planning number to generate schedule for + -o Path of file to write to, '-' for stdout + +Global Options: + --kv1 Path to file containing all KV1 data, '-' for stdin + -h, --help Print this help +)"; + +void journeyRouteValidateOptions(const char *progname, Options *options) { +#define X(name, argument, long_, short_) \ + if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ + && #name != "journey_number"sv && #name != "help"sv && #name != "output_file_path"sv) \ + if (options->name) { \ + if (long_) { \ + if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyroute subcommand\n\n", progname, static_cast(long_), short_); \ + else fprintf(stderr, "%s: unexpected flag --%s for journeyroute subcommand\n\n", progname, static_cast(long_)); \ + } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyroute subcommand\n\n", progname, short_); \ + fprintf(stderr, journeyroute_help, progname); \ + exit(1); \ + } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + + if (options->positional.size() > 0) { + fprintf(stderr, "%s: unexpected positional argument(s) for journeyroute subcommand\n\n", progname); + for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); + fprintf(stderr, journeyroute_help, progname); + exit(1); + } + + if (!options->kv1_file_path) + options->kv1_file_path = "-"; + if (!options->output_file_path) + options->output_file_path = "-"; + if (options->kv1_file_path == ""sv) { + fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); + fprintf(stderr, journeyroute_help, progname); + exit(1); + } + if (options->output_file_path == ""sv) { + fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); + fprintf(stderr, journeyroute_help, progname); + exit(1); + } + if (!options->journey_number || options->journey_number == ""sv) { + fprintf(stderr, "%s: journey number must be provided\n\n", progname); + fprintf(stderr, journeyroute_help, progname); + exit(1); + } + if (!options->line_planning_number || options->line_planning_number == ""sv) { + fprintf(stderr, "%s: line planning number must be provided\n\n", progname); + fprintf(stderr, journeyroute_help, progname); + exit(1); + } +} + +void scheduleValidateOptions(const char *progname, Options *options) { +#define X(name, argument, long_, short_) \ + if (#name != "kv1_file_path"sv && #name != "help"sv \ + && #name != "line_planning_number"sv && #name != "output_file_path"sv) \ + if (options->name) { \ + if (long_) { \ + if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for schedule subcommand\n\n", progname, static_cast(long_), short_); \ + else fprintf(stderr, "%s: unexpected flag --%s for schedule subcommand\n\n", progname, static_cast(long_)); \ + } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for schedule subcommand\n\n", progname, short_); \ + fprintf(stderr, schedule_help, progname); \ + exit(1); \ + } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + + if (options->positional.size() > 0) { + fprintf(stderr, "%s: unexpected positional argument(s) for schedule subcommand\n\n", progname); + for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); + fprintf(stderr, schedule_help, progname); + exit(1); + } + + if (!options->kv1_file_path) + options->kv1_file_path = "-"; + if (!options->output_file_path) + options->output_file_path = "-"; + if (options->kv1_file_path == ""sv) { + fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); + fprintf(stderr, schedule_help, progname); + exit(1); + } + if (options->output_file_path == ""sv) { + fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); + fprintf(stderr, schedule_help, progname); + exit(1); + } + if (!options->line_planning_number || options->line_planning_number == ""sv) { + fprintf(stderr, "%s: line planning number must be provided\n\n", progname); + fprintf(stderr, schedule_help, progname); + exit(1); + } +} + +void journeysValidateOptions(const char *progname, Options *options) { +#define X(name, argument, long_, short_) \ + if (#name != "kv1_file_path"sv && #name != "help"sv \ + && #name != "line_planning_number"sv && #name != "output_file_path"sv \ + && #name != "begin_stop_code"sv && #name != "end_stop_code"sv) \ + if (options->name) { \ + if (long_) { \ + if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeys subcommand\n\n", progname, static_cast(long_), short_); \ + else fprintf(stderr, "%s: unexpected flag --%s for journeys subcommand\n\n", progname, static_cast(long_)); \ + } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeys subcommand\n\n", progname, short_); \ + fprintf(stderr, journeys_help, progname); \ + exit(1); \ + } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + + if (options->positional.size() > 0) { + fprintf(stderr, "%s: unexpected positional argument(s) for journeys subcommand\n\n", progname); + for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); + fprintf(stderr, journeys_help, progname); + exit(1); + } + + if (!options->kv1_file_path) + options->kv1_file_path = "-"; + if (!options->output_file_path) + options->output_file_path = "-"; + if (options->kv1_file_path == ""sv) { + fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (options->output_file_path == ""sv) { + fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (!options->line_planning_number || options->line_planning_number == ""sv) { + fprintf(stderr, "%s: line planning number must be provided\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (!options->begin_stop_code || options->begin_stop_code == ""sv) { + fprintf(stderr, "%s: start user stop code must be provided\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (!options->end_stop_code || options->end_stop_code == ""sv) { + fprintf(stderr, "%s: end user stop code must be provided\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (!std::string_view(options->begin_stop_code).starts_with("star:") + && !std::string_view(options->begin_stop_code).starts_with("stop:")) { + fprintf(stderr, "%s: begin user stop code must be prefixed with star:/stop:\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } + if (!std::string_view(options->end_stop_code).starts_with("star:") + && !std::string_view(options->end_stop_code).starts_with("stop:")) { + fprintf(stderr, "%s: end user stop code must be prefixed with star:/stop:\n\n", progname); + fprintf(stderr, journeys_help, progname); + exit(1); + } +} + +void journeyInfoValidateOptions(const char *progname, Options *options) { +#define X(name, argument, long_, short_) \ + if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ + && #name != "journey_number"sv && #name != "help"sv) \ + if (options->name) { \ + if (long_) { \ + if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyinfo subcommand\n\n", progname, static_cast(long_), short_); \ + else fprintf(stderr, "%s: unexpected flag --%s for journeyinfo subcommand\n\n", progname, static_cast(long_)); \ + } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyinfo subcommand\n\n", progname, short_); \ + fprintf(stderr, journeyinfo_help, progname); \ + exit(1); \ + } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + + if (options->positional.size() > 0) { + fprintf(stderr, "%s: unexpected positional argument(s) for journeyinfo subcommand\n\n", progname); + for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); + fprintf(stderr, journeyinfo_help, progname); + exit(1); + } + + if (!options->kv1_file_path) + options->kv1_file_path = "-"; + if (options->kv1_file_path == ""sv) { + fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); + fprintf(stderr, journeyinfo_help, progname); + exit(1); + } + if (!options->journey_number || options->journey_number == ""sv) { + fprintf(stderr, "%s: journey number must be provided\n\n", progname); + fprintf(stderr, journeyinfo_help, progname); + exit(1); + } + if (!options->line_planning_number || options->line_planning_number == ""sv) { + fprintf(stderr, "%s: line planning number must be provided\n\n", progname); + fprintf(stderr, journeyinfo_help, progname); + exit(1); + } +} + +void jopaRouteValidateOptions(const char *progname, Options *options) { +#define X(name, argument, long_, short_) \ + if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ + && #name != "journey_pattern_code"sv && #name != "help"sv && #name != "output_file_path"sv) \ + if (options->name) { \ + if (long_) { \ + if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for joparoute subcommand\n\n", progname, static_cast(long_), short_); \ + else fprintf(stderr, "%s: unexpected flag --%s for joparoute subcommand\n\n", progname, static_cast(long_)); \ + } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for joparoute subcommand\n\n", progname, short_); \ + fprintf(stderr, joparoute_help, progname); \ + exit(1); \ + } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + + if (options->positional.size() > 0) { + fprintf(stderr, "%s: unexpected positional argument(s) for joparoute subcommand\n\n", progname); + for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); + fprintf(stderr, joparoute_help, progname); + exit(1); + } + + if (!options->kv1_file_path) + options->kv1_file_path = "-"; + if (!options->output_file_path) + options->output_file_path = "-"; + if (options->kv1_file_path == ""sv) { + fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); + fprintf(stderr, joparoute_help, progname); + exit(1); + } + if (options->output_file_path == ""sv) { + fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); + fprintf(stderr, joparoute_help, progname); + exit(1); + } + if (!options->journey_pattern_code || options->journey_pattern_code == ""sv) { + fprintf(stderr, "%s: journey pattern code must be provided\n\n", progname); + fprintf(stderr, joparoute_help, progname); + exit(1); + } + if (!options->line_planning_number || options->line_planning_number == ""sv) { + fprintf(stderr, "%s: line planning number must be provided\n\n", progname); + fprintf(stderr, joparoute_help, progname); + exit(1); + } +} + +struct ShortFlag { + int has_arg; + int c; +}; + +template +const std::string mkargarr = + (std::string() + + ... + + (flags.c == 0 + ? "" + : std::string((const char[]){ flags.c, '\0' }) + + (flags.has_arg == required_argument + ? ":" + : flags.has_arg == optional_argument + ? "::" + : ""))); + +#define X(name, has_arg, long_, short_) ShortFlag(has_arg, short_), +const std::string argarr = mkargarr; +#undef X + +Options parseOptions(int argc, char *argv[]) { + const char *progname = argv[0]; + + // Struct with options for augmentkv6. + Options options; + + static option long_options[] = { +#define X(name, argument, long_, short_) { long_, argument, nullptr, short_ }, + LONG_OPTIONS +#undef X + { 0 }, + }; + + int c; + int option_index = 0; + bool error = false; + while ((c = getopt_long(argc, argv, argarr.c_str(), long_options, &option_index)) != -1) { + // If a long option was used, c corresponds with val. We have val = 0 for + // options which have no short alternative, so checking for c = 0 gives us + // whether a long option with no short alternative was used. + // Below, we check for c = 'h', which corresponds with the long option + // '--help', for which val = 'h'. + if (c == 0) { + const char *name = long_options[option_index].name; +#define X(opt_name, opt_has_arg, opt_long, opt_short) \ + if (name == opt_long ## sv) { options.opt_name = optarg; continue; } + LONG_OPTIONS +#undef X + error = true; + } +#define X(opt_name, opt_has_arg, opt_long, opt_short) \ + if (c == opt_short) { options.opt_name = optarg ? optarg : opt_set; continue; } + LONG_OPTIONS + SHORT_OPTIONS +#undef X + error = true; + } + + if (optind < argc) + options.subcommand = argv[optind++]; + while (optind < argc) + options.positional.push_back(argv[optind++]); + + if (options.subcommand + && options.subcommand != "schedule"sv + && options.subcommand != "joparoute"sv + && options.subcommand != "journeyinfo"sv + && options.subcommand != "journeyroute"sv + && options.subcommand != "journeys"sv) { + fprintf(stderr, "%s: unknown subcommand '%s'\n\n", progname, options.subcommand); + fprintf(stderr, help, progname); + exit(1); + } + if (options.subcommand && error) { + fputc('\n', stderr); + if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname); + if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname); + if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname); + if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname); + if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname); + exit(1); + } + if (error || !options.subcommand) { + if (!options.subcommand) fprintf(stderr, "%s: no subcommand provided\n", progname); + fputc('\n', stderr); + fprintf(stderr, help, progname); + exit(1); + } + if (options.help) { + if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname); + if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname); + if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname); + if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname); + if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname); + exit(0); + } + + if (options.subcommand == "joparoute"sv) + jopaRouteValidateOptions(progname, &options); + if (options.subcommand == "journeyinfo"sv) + journeyInfoValidateOptions(progname, &options); + if (options.subcommand == "journeyroute"sv) + journeyRouteValidateOptions(progname, &options); + if (options.subcommand == "journeys"sv) + journeysValidateOptions(progname, &options); + if (options.subcommand == "schedule"sv) + scheduleValidateOptions(progname, &options); + + return options; +} diff --git a/src/querykv1/cliopts.hpp b/src/querykv1/cliopts.hpp new file mode 100644 index 0000000..df8630e --- /dev/null +++ b/src/querykv1/cliopts.hpp @@ -0,0 +1,35 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_CLIOPTS_HPP +#define OEUF_QUERYKV1_CLIOPTS_HPP + +#include + +#define LONG_OPTIONS \ +/* name req/opt/no arg long short */ + X(kv1_file_path, required_argument, "kv1", 0 ) \ + X(line_planning_number, required_argument, "line", 0 ) \ + X(journey_number, required_argument, "journey", 0 ) \ + X(journey_pattern_code, required_argument, "jopa", 0 ) \ + X(begin_stop_code, required_argument, "begin", 0 ) \ + X(end_stop_code, required_argument, "end", 0 ) \ + X(help, no_argument, "help", 'h') + +#define SHORT_OPTIONS \ + X(output_file_path, required_argument, nullptr, 'o') + +struct Options { + const char *subcommand = nullptr; + std::vector positional; +#define X(name, argument, long_, short_) const char *name = nullptr; + LONG_OPTIONS + SHORT_OPTIONS +#undef X +}; + +extern const char *opt_set; +extern const char *opt_unset; + +Options parseOptions(int argc, char *argv[]); + +#endif // OEUF_QUERYKV1_CLIOPTS_HPP diff --git a/src/querykv1/daterange.cpp b/src/querykv1/daterange.cpp new file mode 100644 index 0000000..5ce42bf --- /dev/null +++ b/src/querykv1/daterange.cpp @@ -0,0 +1,91 @@ +// vim:set sw=2 ts=2 sts et: + +#include "daterange.hpp" + +static std::chrono::year_month_day nextDay(std::chrono::year_month_day ymd) { + return std::chrono::sys_days(ymd) + std::chrono::days(1); +} + +// DateRange expresses the date range [from, thru]. +DateRange::Iterator &DateRange::Iterator::operator++() { + ymd_ = nextDay(ymd_); + return *this; +} + +std::chrono::year_month_day DateRange::Iterator::operator*() const { + return ymd_; +} + +std::chrono::year_month_day DateRange::Iterator::ymd() const { + return ymd_; +} + +DateRange::Iterator::Iterator(std::chrono::year_month_day ymd) : ymd_(ymd) {} + +DateRange::DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru) + : from_(from), thru_(thru) +{} + +DateRange::Iterator DateRange::begin() const { + return DateRange::Iterator(from_); +} + +DateRange::Iterator DateRange::end() const { + return DateRange::Iterator(nextDay(thru_)); +} + +bool DateRange::valid() const { + return from_ <= thru_; +} + +std::chrono::year_month_day DateRange::from() const { + return from_; +} + +std::chrono::year_month_day DateRange::thru() const { + return thru_; +} + +bool operator==(const DateRange::Iterator a, const DateRange::Iterator b) { + return *a == *b; +} + +DateRangeSeq::DateRangeSeq(std::initializer_list ranges) + : DateRangeSeq(ranges.begin(), ranges.end()) +{} + +DateRangeSeq DateRangeSeq::clampFrom(std::chrono::year_month_day from) const { + std::vector new_ranges; + new_ranges.reserve(ranges_.size()); + for (const DateRange range : ranges_) { + if (range.from() < from) { + if (range.thru() < from) + continue; + new_ranges.emplace_back(from, range.thru()); + } + new_ranges.push_back(range); + } + return DateRangeSeq(new_ranges.begin(), new_ranges.end()); +} + +DateRangeSeq DateRangeSeq::clampThru(std::chrono::year_month_day thru) const { + std::vector new_ranges; + new_ranges.reserve(ranges_.size()); + for (const DateRange range : ranges_) { + if (range.thru() > thru) { + if (range.from() > thru) + continue; + new_ranges.emplace_back(range.from(), thru); + } + new_ranges.push_back(range); + } + return DateRangeSeq(new_ranges.begin(), new_ranges.end()); +} + +std::vector::const_iterator DateRangeSeq::begin() const { + return ranges_.begin(); +} + +std::vector::const_iterator DateRangeSeq::end() const { + return ranges_.end(); +} diff --git a/src/querykv1/daterange.hpp b/src/querykv1/daterange.hpp new file mode 100644 index 0000000..e34c39c --- /dev/null +++ b/src/querykv1/daterange.hpp @@ -0,0 +1,118 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_DATERANGE_HPP +#define OEUF_QUERYKV1_DATERANGE_HPP + +#include +#include +#include +#include +#include +#include + +// DateRange expresses the date range [from, thru]. +class DateRange { + public: + class Iterator { + friend class DateRange; + + public: + Iterator &operator++(); + + std::chrono::year_month_day operator*() const; + std::chrono::year_month_day ymd() const; + + private: + explicit Iterator(std::chrono::year_month_day ymd); + + std::chrono::year_month_day ymd_; + }; + + explicit DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru); + + Iterator begin() const; + Iterator end() const; + bool valid() const; + std::chrono::year_month_day from() const; + std::chrono::year_month_day thru() const; + + private: + std::chrono::year_month_day from_; + std::chrono::year_month_day thru_; +}; + +bool operator==(const DateRange::Iterator a, const DateRange::Iterator b); + +template +concept DerefsTo = requires(Tp p) { + { *p } -> std::convertible_to; +}; + +class DateRangeSeq { + // The way LE and GE are ordered makes a difference for how the sorting + // (insertion based on lower_bound) works. Do not carelessly reorder this. + enum LeGe { + GE, // >= + LE, // <= + }; + + std::vector ranges_; + + public: + template + requires DerefsTo + explicit DateRangeSeq(InputIt begin, InputIt end) { + // We convert every inclusive date range [x, y] into (x, >=) and (y, <=) + // and put these into a list, using binary search to make sure that these + // stay ordered. We then reduce this list, removing tautological + // predicates, giving us a final list of ranges that do not overlap. + + std::vector> preds; + + size_t n = 0; + for (auto it = begin; it != end; it++) { + auto &range = *it; + if (!range.valid()) continue; + + auto a = std::make_pair(range.from(), GE); + auto b = std::make_pair(range.thru(), LE); + preds.insert(std::lower_bound(preds.begin(), preds.end(), a), a); + preds.insert(std::lower_bound(preds.begin(), preds.end(), b), b); + + n++; + } + + if (preds.empty()) + return; + + assert(preds.size() >= 2); + assert(preds.front().second == GE); + assert(preds.back().second == LE); + + std::chrono::year_month_day begin_ymd = preds[0].first; + for (size_t i = 1; i < preds.size(); i++) { + if (preds[i].second == LE && (i + 1 == preds.size() || preds[i + 1].second == GE)) { + std::chrono::year_month_day end_ymd = preds[i].first; + if (!ranges_.empty() && ranges_.back().thru() == begin_ymd) + ranges_.back() = DateRange(ranges_.back().from(), end_ymd); + else + ranges_.push_back(DateRange(begin_ymd, end_ymd)); + if (i + 1 != preds.size()) { + begin_ymd = preds[i + 1].first; + i++; + } + } + } + } + + explicit DateRangeSeq(std::initializer_list ranges); + + DateRangeSeq clampFrom(std::chrono::year_month_day from) const; + DateRangeSeq clampThru(std::chrono::year_month_day thru) const; + + public: + std::vector::const_iterator begin() const; + std::vector::const_iterator end() const; +}; + +#endif // OEUF_QUERYKV1_DATERANGE_HPP diff --git a/src/querykv1/grammar.abnf b/src/querykv1/grammar.abnf new file mode 100644 index 0000000..1c93760 --- /dev/null +++ b/src/querykv1/grammar.abnf @@ -0,0 +1,44 @@ +; This grammar does *not* allow fields to contain LF, unless the entire content +; of the field is quoted. The file is simply rejected otherwise. +; I took the liberty to take some inspiration from the somewhat similar IETF RFC 4180. + +document = [header NEWLINE] (comment / record / empty-line) *(NEWLINE (comment / record / empty-line)) [NEWLINE] / header + +header = OPENBRACK *NOTCRLF +comment = SEMICOLON *NOTCRLF + +empty-line = *WHITESPACE + +record = field *(PIPE field) +field = *WHITESPACE field-data *WHITESPACE +field-data = escaped / unescaped + +; Unescaped fields are also allowed to contain double quotes, +; they are just not interpreted in any special way. +escaped = DQUOTE *(TEXTDATA / WHITESPACE / NEWLINE / PIPE / 2DQUOTE) DQUOTE +unescaped = [TEXTDATA *(*WHITESPACE (TEXTDATA / DQUOTE))] + +HTAB = %x09 ; +LF = %x0A ; +VTAB = %x0B ; +FF = %x0C ;
+CR = %x0D ; +SPACE = %x20 ; +DQUOTE = %x22 ; " +SEMICOLON = %x3B ; ; +OPENBRACK = %x5B ; [ +PIPE = %x7C ; | + +; All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE +; Semicolon is included, as comments are only defined as 'lines starting with a semicolon'. +; So it should be fine if a semicolon is part of a field, the rest of the line would not +; be interpreted as a comment in that case. +TEXTDATA = %x00-08 / %x0E-1F / %x21 / %x23-5A / %x5C-7B / %x7D-10FFFF + +; Not including LF here even though TMI8/KV1 does not officially consider it +; a newline, as newlines are defined as 'CR optionally followed by LF' +WHITESPACE = SPACE / FF / HTAB / VTAB + +; All codepoints excluding CR and LF +NOTCRLF = %x00-09 / %x0B-0C / %x0E-10FFFF +NEWLINE = CR [LF] diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf new file mode 100644 index 0000000..94f8cde --- /dev/null +++ b/src/querykv1/grammar.ebnf @@ -0,0 +1,47 @@ +/* This grammar does allow fields to contain stray LFs, not after any specific + * CR. I took the liberty to take some inspiration from the somewhat similar + * IETF RFC 4180. + */ +document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header + +header ::= OPENBRACK NOTCR* +comment ::= SEMICOLON NOTCR* + +empty-line ::= WHITESPACE* + +record ::= field (PIPE field)* +field ::= WHITESPACE* field-data WHITESPACE* +field-data ::= DQUOTE escaped DQUOTE | unescaped + +/* Unescaped fields are also allowed to contain double quotes, they are just + * not interpreted in any special way. + */ +escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* +unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? + +HTAB ::= #x09 /* */ +LF ::= #x0A /* */ +VTAB ::= #x0B /* */ +FF ::= #x0C /* */ +CR ::= #x0D /* */ +SPACE ::= #x20 /* */ +DQUOTE ::= #x22 /* " */ +SEMICOLON ::= #x3B /* ; */ +OPENBRACK ::= #x5B /* [ */ +PIPE ::= #x7C /* | */ + +/* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE. + * Semicolon is included, as comments are only defined as 'lines starting with + * a semicolon'. So it should be fine if a semicolon is part of a field, the + * rest of the line would not be interpreted as a comment in that case. + */ +TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] + +/* Including LF here as TMI8/KV1 does not consider it a newline, + * as newlines are defined as 'CR optionally followed by LF' + */ +WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB + +/* All codepoints excluding CR and LF */ +NOTCR ::= [#x00-#x0C#x0E-#x10FFFF] +NEWLINE ::= CR LF? diff --git a/src/querykv1/grammar.ebnf.bak b/src/querykv1/grammar.ebnf.bak new file mode 100644 index 0000000..b5acbf5 --- /dev/null +++ b/src/querykv1/grammar.ebnf.bak @@ -0,0 +1,23 @@ +document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header +header ::= OPENBRACK NOTCRLF* +comment ::= SEMICOLON NOTCRLF* +empty-line ::= WHITESPACE* +record ::= field (PIPE field)* +field ::= WHITESPACE* field-data WHITESPACE* +field-data ::= escaped | unescaped +escaped ::= DQUOTE (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* DQUOTE +unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? +HTAB ::= #x09 +LF ::= #x0A +VTAB ::= #x0B +FF ::= #x0C +CR ::= #x0D +SPACE ::= #x20 +DQUOTE ::= #x22 +SEMICOLON ::= #x3B +OPENBRACK ::= #x5B +PIPE ::= #x7C +WHITESPACE ::= SPACE | FF | HTAB | VTAB +NOTCRLF ::= [#x00-#x09#x0B-#x0C#x0E-#x10FFFF] +TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] +NEWLINE ::= CR LF? diff --git a/src/querykv1/joparoute.cpp b/src/querykv1/joparoute.cpp new file mode 100644 index 0000000..94ed359 --- /dev/null +++ b/src/querykv1/joparoute.cpp @@ -0,0 +1,102 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include + +#include "joparoute.hpp" + +using namespace std::string_view_literals; + +void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index) { + FILE *out = stdout; + if (options.output_file_path != "-"sv) + out = fopen(options.output_file_path, "wb"); + if (!out) { + fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); + exit(EXIT_FAILURE); + } + + const std::string data_owner_code = "CXX"; + Kv1JourneyPattern::Key jopa_key( + // Of course it is bad to hardcode this, but we really have no time to make + // everything nice and dynamic. We're only working with CXX data anyway, + // and provide no support for the 'Schedules and Passing Times' KV1 + // variant. + data_owner_code, + options.line_planning_number, + options.journey_pattern_code); + + const Kv1JourneyPattern *jopa = index.journey_patterns[jopa_key]; + if (!jopa) { + std::cerr << "Journey pattern not found" << std::endl; + return; + } + const Kv1Line *line = jopa->p_line; + + struct Point { + bool is_stop = false; + const Kv1JourneyPatternTimingLink *jopatili = nullptr; + const Kv1Link *link = nullptr; + const Kv1Point *point = nullptr; + double distance_since_start_of_link = 0; + double distance_since_start_of_journey = 0; + }; + std::vector points; + + for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { + const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; + if (jopatili->key.line_planning_number == jopa->key.line_planning_number + && jopatili->key.journey_pattern_code == jopa->key.journey_pattern_code) { + const Kv1Link::Key link_key(data_owner_code, jopatili->user_stop_code_begin, + jopatili->user_stop_code_end, line->transport_type); + const Kv1Link *link = index.links[link_key]; + const Kv1UserStopPoint::Key link_begin_key(data_owner_code, jopatili->user_stop_code_begin); + const Kv1UserStopPoint::Key link_end_key(data_owner_code, jopatili->user_stop_code_end); + const Kv1UserStopPoint *link_begin = index.user_stop_points[link_begin_key]; + const Kv1UserStopPoint *link_end = index.user_stop_points[link_end_key]; + + points.emplace_back(true, jopatili, link, link_begin->p_point, 0); + + for (size_t j = 0; j < records.point_on_links.size(); j++) { + Kv1PointOnLink *pool = &records.point_on_links[j]; + if (pool->key.user_stop_code_begin == jopatili->user_stop_code_begin + && pool->key.user_stop_code_end == jopatili->user_stop_code_end + && pool->key.transport_type == jopatili->p_line->transport_type) { + points.emplace_back(false, jopatili, link, pool->p_point, pool->distance_since_start_of_link); + } + } + + points.emplace_back(true, jopatili, link, link_end->p_point, link->distance); + } + } + + std::sort(points.begin(), points.end(), [](Point &a, Point &b) { + if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order) + return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; + return a.distance_since_start_of_link < b.distance_since_start_of_link; + }); + + double distance_since_start_of_journey = 0; + for (size_t i = 0; i < points.size(); i++) { + Point *p = &points[i]; + if (i > 0) { + Point *prev = &points[i - 1]; + if (p->link != prev->link) { + distance_since_start_of_journey += prev->link->distance; + } + } + p->distance_since_start_of_journey = distance_since_start_of_journey + p->distance_since_start_of_link; + } + + fputs("is_stop,link_usrstop_begin,link_usrstop_end,point_code,rd_x,rd_y,distance_since_start_of_link,distance_since_start_of_journey\n", out); + for (const auto &point : points) { + fprintf(out, "%s,%s,%s,%s,%f,%f,%f,%f\n", + point.is_stop ? "true" : "false", + point.jopatili->user_stop_code_begin.c_str(), point.jopatili->user_stop_code_end.c_str(), + point.point->key.point_code.c_str(), point.point->location_x_ew, point.point->location_y_ns, + point.distance_since_start_of_link, point.distance_since_start_of_journey); + } + + if (options.output_file_path != "-"sv) fclose(out); +} diff --git a/src/querykv1/joparoute.hpp b/src/querykv1/joparoute.hpp new file mode 100644 index 0000000..ade94e8 --- /dev/null +++ b/src/querykv1/joparoute.hpp @@ -0,0 +1,13 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_JOPAROUTE_HPP +#define OEUF_QUERYKV1_JOPAROUTE_HPP + +#include +#include + +#include "cliopts.hpp" + +void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index); + +#endif // OEUF_QUERYKV1_JOPAROUTE_HPP diff --git a/src/querykv1/journeyinfo.cpp b/src/querykv1/journeyinfo.cpp new file mode 100644 index 0000000..bd29490 --- /dev/null +++ b/src/querykv1/journeyinfo.cpp @@ -0,0 +1,64 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +#include "journeyinfo.hpp" + +void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index) { + std::cout << "Info for journey " << options.line_planning_number + << "/" << options.journey_number << std::endl; + + std::unordered_map usrstops; + for (size_t i = 0; i < records.user_stop_points.size(); i++) { + const Kv1UserStopPoint *usrstop = &records.user_stop_points[i]; + usrstops[usrstop->key.user_stop_code] = usrstop; + } + + for (const auto &pujo : records.public_journeys) { + if (pujo.key.line_planning_number != options.line_planning_number + || std::to_string(pujo.key.journey_number) != options.journey_number) + continue; + + std::vector timing_links; + for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { + const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; + if (jopatili->key.line_planning_number != options.line_planning_number + || jopatili->key.journey_pattern_code != pujo.journey_pattern_code) + continue; + timing_links.push_back(jopatili); + } + + std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool { + return a->key.timing_link_order < b->key.timing_link_order; + }); + auto begin_stop = timing_links.front()->user_stop_code_begin; + auto end_stop = timing_links.back()->user_stop_code_end; + + const auto *begin = usrstops[begin_stop]; + const auto *end = usrstops[end_stop]; + + std::cout << " Journey pattern: " << pujo.key.line_planning_number + << "/" << pujo.journey_pattern_code << std::endl + << " Begin stop: " << begin_stop + << "; name: " << std::quoted(begin->name) + << "; town: " << std::quoted(begin->town) << std::endl + << " End stop: " << end_stop + << "; name: " << std::quoted(end->name) + << "; town: " << std::quoted(end->town) << std::endl; + + const auto *begin_star = begin->p_user_stop_area; + const auto *end_star = end->p_user_stop_area; + if (begin_star) + std::cout << " Begin stop area: " << begin_star->key.user_stop_area_code + << "; name: " << std::quoted(begin_star->name) + << ", town: " << std::quoted(begin_star->town) + << std::endl; + if (end_star) + std::cout << " End stop area: " << end_star->key.user_stop_area_code + << "; name: " << std::quoted(end_star->name) + << ", town: " << std::quoted(end_star->town) + << std::endl; + + break; + } +} diff --git a/src/querykv1/journeyinfo.hpp b/src/querykv1/journeyinfo.hpp new file mode 100644 index 0000000..2a2118d --- /dev/null +++ b/src/querykv1/journeyinfo.hpp @@ -0,0 +1,13 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_JOURNEYINFO_HPP +#define OEUF_QUERYKV1_JOURNEYINFO_HPP + +#include +#include + +#include "cliopts.hpp" + +void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index); + +#endif // OEUF_QUERYKV1_JOURNEYINFO_HPP diff --git a/src/querykv1/journeyroute.cpp b/src/querykv1/journeyroute.cpp new file mode 100644 index 0000000..013ea1c --- /dev/null +++ b/src/querykv1/journeyroute.cpp @@ -0,0 +1,96 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include + +#include "journeyroute.hpp" + +using namespace std::string_view_literals; + +void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index) { + FILE *out = stdout; + if (options.output_file_path != "-"sv) + out = fopen(options.output_file_path, "wb"); + if (!out) { + fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); + exit(EXIT_FAILURE); + } + + for (auto &pujo : records.public_journeys) { + if (pujo.key.line_planning_number == options.line_planning_number && std::to_string(pujo.key.journey_number) == options.journey_number) { + fprintf(stderr, "Got PUJO %s/%s:\n", options.line_planning_number, options.journey_number); + fprintf(stderr, " Day type: %s\n", pujo.key.day_type.c_str()); + auto &pegr = *pujo.p_period_group; + fprintf(stderr, " PEGR Code: %s\n", pegr.key.period_group_code.c_str()); + fprintf(stderr, " PEGR Description: %s\n", pegr.description.c_str()); + fprintf(stderr, " SPECDAY Code: %s\n", pujo.key.specific_day_code.c_str()); + auto &timdemgrp = *pujo.p_time_demand_group; + + for (auto &pegrval : records.period_group_validities) { + if (pegrval.key.period_group_code == pegr.key.period_group_code) { + fprintf(stderr, "Got PEGRVAL for PEGR %s\n", pegr.key.period_group_code.c_str()); + std::cerr << " Valid from: " << pegrval.key.valid_from << std::endl; + std::cerr << " Valid thru: " << pegrval.valid_thru << std::endl; + } + } + + struct Point { + Kv1JourneyPatternTimingLink *jopatili = nullptr; + Kv1TimeDemandGroupRunTime *timdemrnt = nullptr; + double distance_since_start_of_link = 0; + double rd_x = 0; + double rd_y = 0; + double total_time_s = 0; + }; + std::vector points; + + for (size_t i = 0; i < records.time_demand_group_run_times.size(); i++) { + Kv1TimeDemandGroupRunTime *timdemrnt = &records.time_demand_group_run_times[i]; + if (timdemrnt->key.line_planning_number == timdemgrp.key.line_planning_number + && timdemrnt->key.journey_pattern_code == timdemgrp.key.journey_pattern_code + && timdemrnt->key.time_demand_group_code == timdemgrp.key.time_demand_group_code) { + Kv1JourneyPatternTimingLink *jopatili = timdemrnt->p_journey_pattern_timing_link; + for (auto &pool : records.point_on_links) { + if (pool.key.user_stop_code_begin == timdemrnt->user_stop_code_begin + && pool.key.user_stop_code_end == timdemrnt->user_stop_code_end + && pool.key.transport_type == jopatili->p_line->transport_type) { + points.emplace_back( + jopatili, + timdemrnt, + pool.distance_since_start_of_link, + pool.p_point->location_x_ew, + pool.p_point->location_y_ns + ); + } + } + } + } + + std::sort(points.begin(), points.end(), [](Point &a, Point &b) { + if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order) + return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; + return a.distance_since_start_of_link < b.distance_since_start_of_link; + }); + + double total_time_s = 0; + for (size_t i = 0; i < points.size(); i++) { + Point *p = &points[i]; + p->total_time_s = total_time_s; + if (i > 0) { + Point *prev = &points[i - 1]; + if (p->timdemrnt != prev->timdemrnt) { + total_time_s += prev->timdemrnt->total_drive_time_s; + prev->total_time_s = total_time_s; + } + } + } + + fputs("rd_x,rd_y,total_time_s,is_timing_stop\n", out); + for (const auto &point : points) { + fprintf(out, "%f,%f,%f,%d\n", point.rd_x, point.rd_y, point.total_time_s, point.jopatili->is_timing_stop); + } + } + } + + if (options.output_file_path != "-"sv) fclose(out); +} diff --git a/src/querykv1/journeyroute.hpp b/src/querykv1/journeyroute.hpp new file mode 100644 index 0000000..ccd996c --- /dev/null +++ b/src/querykv1/journeyroute.hpp @@ -0,0 +1,13 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_JOURNEYROUTE_HPP +#define OEUF_QUERYKV1_JOURNEYROUTE_HPP + +#include +#include + +#include "cliopts.hpp" + +void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index); + +#endif // OEUF_QUERYKV1_JOURNEYROUTE_HPP diff --git a/src/querykv1/journeys.cpp b/src/querykv1/journeys.cpp new file mode 100644 index 0000000..96566b2 --- /dev/null +++ b/src/querykv1/journeys.cpp @@ -0,0 +1,95 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include + +#include "journeys.hpp" + +using namespace std::string_view_literals; + +void journeys(const Options &options, Kv1Records &records, Kv1Index &index) { + const std::string_view want_begin_stop_code(options.begin_stop_code); + const std::string_view want_end_stop_code(options.end_stop_code); + + FILE *out = stdout; + if (options.output_file_path != "-"sv) + out = fopen(options.output_file_path, "wb"); + if (!out) { + fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); + exit(EXIT_FAILURE); + } + + std::cerr << "Generating journeys for " << options.line_planning_number << ", going from stop " + << options.begin_stop_code << " to " << options.end_stop_code << std::endl; + + std::unordered_map usrstops; + for (size_t i = 0; i < records.user_stop_points.size(); i++) { + const Kv1UserStopPoint *usrstop = &records.user_stop_points[i]; + usrstops[usrstop->key.user_stop_code] = usrstop; + } + + std::unordered_set journey_pattern_codes; + for (const auto &jopa : records.journey_patterns) { + if (jopa.key.line_planning_number != options.line_planning_number) + continue; + journey_pattern_codes.insert(jopa.key.journey_pattern_code); + } + + std::unordered_map> jopatilis; + for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { + const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; + if (jopatili->key.line_planning_number != options.line_planning_number + || !journey_pattern_codes.contains(jopatili->key.journey_pattern_code)) + continue; + jopatilis[jopatili->key.journey_pattern_code].push_back(jopatili); + } + + std::unordered_set valid_jopas; + for (auto &[journey_pattern_code, timing_links] : jopatilis) { + std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool { + return a->key.timing_link_order < b->key.timing_link_order; + }); + auto begin_stop = timing_links.front()->user_stop_code_begin; + auto end_stop = timing_links.back()->user_stop_code_end; + + const auto *begin = usrstops[begin_stop]; + const auto *end = usrstops[end_stop]; + + bool begin_stop_ok = false; + if (want_begin_stop_code.starts_with("stop:")) + begin_stop_ok = want_begin_stop_code.substr(5) == begin_stop; + else if (want_begin_stop_code.starts_with("star:")) + begin_stop_ok = want_begin_stop_code.substr(5) == begin->user_stop_area_code; + + bool end_stop_ok = false; + if (want_end_stop_code.starts_with("stop:")) + end_stop_ok = want_end_stop_code.substr(5) == end_stop; + else if (want_end_stop_code.starts_with("star:")) + end_stop_ok = want_end_stop_code.substr(5) == end->user_stop_area_code; + + if (begin_stop_ok && end_stop_ok) { + valid_jopas.insert(journey_pattern_code); + } + } + + std::map> valid_journeys; + for (const auto &pujo : records.public_journeys) { + if (pujo.key.line_planning_number == options.line_planning_number + && valid_jopas.contains(pujo.journey_pattern_code)) { + valid_journeys[pujo.key.journey_number] = { + pujo.time_demand_group_code, + pujo.journey_pattern_code, + }; + } + } + + fputs("journey_number,time_demand_group_code,journey_pattern_code\n", out); + for (const auto &[journey_number, timdemgrp_jopa] : valid_journeys) { + const auto &[time_demand_group_code, journey_pattern_code] = timdemgrp_jopa; + fprintf(out, "%d,%s,%s\n", journey_number, time_demand_group_code.c_str(), journey_pattern_code.c_str()); + } + + if (options.output_file_path != "-"sv) fclose(out); +} diff --git a/src/querykv1/journeys.hpp b/src/querykv1/journeys.hpp new file mode 100644 index 0000000..cf615c7 --- /dev/null +++ b/src/querykv1/journeys.hpp @@ -0,0 +1,13 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_JOURNEYS_HPP +#define OEUF_QUERYKV1_JOURNEYS_HPP + +#include +#include + +#include "cliopts.hpp" + +void journeys(const Options &options, Kv1Records &records, Kv1Index &index); + +#endif // OEUF_QUERYKV1_JOURNEYS_HPP diff --git a/src/querykv1/main.cpp b/src/querykv1/main.cpp new file mode 100644 index 0000000..6c606ba --- /dev/null +++ b/src/querykv1/main.cpp @@ -0,0 +1,198 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "cliopts.hpp" +#include "joparoute.hpp" +#include "journeyinfo.hpp" +#include "journeyroute.hpp" +#include "journeys.hpp" +#include "schedule.hpp" + +using namespace std::string_view_literals; + +using TimingClock = std::conditional_t< + std::chrono::high_resolution_clock::is_steady, + std::chrono::high_resolution_clock, + std::chrono::steady_clock>; + +std::string readKv1(const char *path) { + FILE *in = stdin; + if (path != "-"sv) in = fopen(path, "rb"); + else fputs("Reading KV1 from standard input\n", stderr); + if (!in) { + fprintf(stderr, "Open %s: %s\n", path, strerrordesc_np(errno)); + exit(1); + } + + char buf[4096]; + std::string data; + while (!feof(in) && !ferror(in)) { + size_t read = fread(buf, sizeof(char), 4096, in); + data.append(buf, read); + } + if (ferror(in)) { + if (path == "-"sv) + fputs("Error when reading from stdin\n", stderr); + else + fprintf(stderr, "Error reading from file \"%s\"\n", path); + exit(1); + } + fprintf(stderr, "Read %lu bytes\n", data.size()); + + if (path != "-"sv) + fclose(in); + + return data; +} + +std::vector lex(const char *path) { + std::string data = readKv1(path); + + auto start = TimingClock::now(); + Kv1Lexer lexer(data); + lexer.lex(); + auto end = TimingClock::now(); + + std::chrono::duration elapsed{end - start}; + double bytes = static_cast(data.size()) / 1'000'000; + double speed = bytes / elapsed.count(); + + if (!lexer.errors.empty()) { + fputs("Lexer reported errors:\n", stderr); + for (const auto &error : lexer.errors) + fprintf(stderr, "- %s\n", error.c_str()); + exit(1); + } + + fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size()); + fprintf(stderr, "Duration: %f s\n", elapsed.count()); + fprintf(stderr, "Speed: %f MB/s\n", speed); + + return std::move(lexer.tokens); +} + +bool parse(const char *path, Kv1Records &into) { + std::vector tokens = lex(path); + + Kv1Parser parser(tokens, into); + parser.parse(); + + bool ok = true; + if (!parser.gerrors.empty()) { + ok = false; + fputs("Parser reported errors:\n", stderr); + for (const auto &error : parser.gerrors) + fprintf(stderr, "- %s\n", error.c_str()); + } + if (!parser.warns.empty()) { + fputs("Parser reported warnings:\n", stderr); + for (const auto &warn : parser.warns) + fprintf(stderr, "- %s\n", warn.c_str()); + } + + fprintf(stderr, "Parsed %lu records\n", into.size()); + + return ok; +} + +void printParsedRecords(const Kv1Records &records) { + fputs("Parsed records:\n", stderr); + fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size()); + fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size()); + fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size()); + fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size()); + fprintf(stderr, " timing_links: %lu\n", records.timing_links.size()); + fprintf(stderr, " links: %lu\n", records.links.size()); + fprintf(stderr, " lines: %lu\n", records.lines.size()); + fprintf(stderr, " destinations: %lu\n", records.destinations.size()); + fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size()); + fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size()); + fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size()); + fprintf(stderr, " financers: %lu\n", records.financers.size()); + fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size()); + fprintf(stderr, " points: %lu\n", records.points.size()); + fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size()); + fprintf(stderr, " icons: %lu\n", records.icons.size()); + fprintf(stderr, " notices: %lu\n", records.notices.size()); + fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size()); + fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size()); + fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size()); + fprintf(stderr, " period_groups: %lu\n", records.period_groups.size()); + fprintf(stderr, " specific_days: %lu\n", records.specific_days.size()); + fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size()); + fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size()); + fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size()); + fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size()); + fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size()); + fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size()); + fprintf(stderr, " operating_days: %lu\n", records.operating_days.size()); +} + +void printIndexSize(const Kv1Index &index) { + fputs("Index size:\n", stderr); + fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size()); + fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size()); + fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size()); + fprintf(stderr, " timing_links: %lu\n", index.timing_links.size()); + fprintf(stderr, " links: %lu\n", index.links.size()); + fprintf(stderr, " lines: %lu\n", index.lines.size()); + fprintf(stderr, " destinations: %lu\n", index.destinations.size()); + fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size()); + fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size()); + fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size()); + fprintf(stderr, " financers: %lu\n", index.financers.size()); + fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size()); + fprintf(stderr, " points: %lu\n", index.points.size()); + fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size()); + fprintf(stderr, " icons: %lu\n", index.icons.size()); + fprintf(stderr, " notices: %lu\n", index.notices.size()); + fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size()); + fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size()); + fprintf(stderr, " period_groups: %lu\n", index.period_groups.size()); + fprintf(stderr, " specific_days: %lu\n", index.specific_days.size()); + fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size()); + fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size()); + fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size()); + fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size()); + fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size()); + fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size()); + fprintf(stderr, " operating_days: %lu\n", index.operating_days.size()); +} + +int main(int argc, char *argv[]) { + Options options = parseOptions(argc, argv); + + Kv1Records records; + if (!parse(options.kv1_file_path, records)) { + fputs("Error parsing records, exiting\n", stderr); + return EXIT_FAILURE; + } + printParsedRecords(records); + fputs("Indexing...\n", stderr); + Kv1Index index(&records); + fprintf(stderr, "Indexed %lu records\n", index.size()); + // Only notice assignments are not indexed. If this equality is not valid, + // then this means that we had duplicate keys or that something else went + // wrong. That would really not be great. + assert(index.size() == records.size() - records.notice_assignments.size()); + printIndexSize(index); + fputs("Linking records...\n", stderr); + kv1LinkRecords(index); + fputs("Done linking\n", stderr); + + if (options.subcommand == "joparoute"sv) jopaRoute(options, records, index); + if (options.subcommand == "journeyroute"sv) journeyRoute(options, records, index); + if (options.subcommand == "journeys"sv) journeys(options, records, index); + if (options.subcommand == "journeyinfo"sv) journeyInfo(options, records, index); + if (options.subcommand == "schedule"sv) schedule(options, records, index); +} diff --git a/src/querykv1/schedule.cpp b/src/querykv1/schedule.cpp new file mode 100644 index 0000000..2bcfe0a --- /dev/null +++ b/src/querykv1/schedule.cpp @@ -0,0 +1,63 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include + +#include "daterange.hpp" +#include "schedule.hpp" + +using namespace std::string_view_literals; + +void schedule(const Options &options, Kv1Records &records, Kv1Index &index) { + FILE *out = stdout; + if (options.output_file_path != "-"sv) + out = fopen(options.output_file_path, "wb"); + if (!out) { + fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); + exit(EXIT_FAILURE); + } + + std::cerr << "Generating schedule for " << options.line_planning_number << std::endl; + + std::unordered_multimap period_group_validities; + for (const auto &pegr : records.period_group_validities) + period_group_validities.insert({ pegr.key.period_group_code, pegr }); + std::unordered_multimap public_journeys; + for (const auto &pujo : records.public_journeys) + public_journeys.insert({ pujo.key.timetable_version_code, pujo }); + + std::cout << "line_planning_number,journey_number,date,departure_time" << std::endl; + for (const auto &tive : records.timetable_versions) { + std::vector tive_pegrval_ranges; + + auto pegrval_range = period_group_validities.equal_range(tive.key.period_group_code); + for (auto it = pegrval_range.first; it != pegrval_range.second; it++) { + const auto &[_, pegrval] = *it; + tive_pegrval_ranges.emplace_back(pegrval.key.valid_from, pegrval.valid_thru); + } + + DateRangeSeq seq(tive_pegrval_ranges.begin(), tive_pegrval_ranges.end()); + seq = seq.clampFrom(tive.valid_from); + if (tive.valid_thru) + seq = seq.clampThru(*tive.valid_thru); + + for (const auto &range : seq) for (auto date : range) { + auto weekday = std::chrono::year_month_weekday(std::chrono::sys_days(date)).weekday(); + + auto pujo_range = public_journeys.equal_range(tive.key.timetable_version_code); + for (auto itt = pujo_range.first; itt != pujo_range.second; itt++) { + const auto &[_, pujo] = *itt; + + if (pujo.key.line_planning_number == options.line_planning_number && pujo.key.day_type.size() == 7 + && pujo.key.day_type[weekday.iso_encoding() - 1] == static_cast('0' + weekday.iso_encoding())) { + std::cout << pujo.key.line_planning_number << "," << pujo.key.journey_number << "," + << date << "," << pujo.departure_time << std::endl; + } + } + } + } + + if (options.output_file_path != "-"sv) fclose(out); +} diff --git a/src/querykv1/schedule.hpp b/src/querykv1/schedule.hpp new file mode 100644 index 0000000..100bd4c --- /dev/null +++ b/src/querykv1/schedule.hpp @@ -0,0 +1,13 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_QUERYKV1_SCHEDULE_HPP +#define OEUF_QUERYKV1_SCHEDULE_HPP + +#include +#include + +#include "cliopts.hpp" + +void schedule(const Options &options, Kv1Records &records, Kv1Index &index); + +#endif // OEUF_QUERYKV1_SCHEDULE_HPP diff --git a/src/recvkv6/.envrc b/src/recvkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/recvkv6/.envrc @@ -0,0 +1,2 @@ +source_env ../../ +export DEVMODE=1 diff --git a/src/recvkv6/Makefile b/src/recvkv6/Makefile new file mode 100644 index 0000000..12ff7fb --- /dev/null +++ b/src/recvkv6/Makefile @@ -0,0 +1,21 @@ +# Taken from: +# Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide +# for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, +# 2023. [Online]. Available: +# https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html +CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ + -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ + -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ + -D_GLIBCXX_ASSERTIONS \ + -fstrict-flex-arrays=3 \ + -fstack-clash-protection -fstack-protector-strong +LDFLAGS=-lzmq -larrow -lparquet -lprometheus-cpp-pull -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \ + -Wl,-z,nodlopen -Wl,-z,noexecstack \ + -Wl,-z,relro -Wl,-z,now + +recvkv6: main.cpp + $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS) + +.PHONY: clean +clean: + rm recvkv6 diff --git a/src/recvkv6/main.cpp b/src/recvkv6/main.cpp new file mode 100644 index 0000000..2ac3669 --- /dev/null +++ b/src/recvkv6/main.cpp @@ -0,0 +1,1300 @@ +// vim:set sw=2 ts=2 sts et: + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +#define CHUNK 16384 + +struct RawMessage { + public: + // Takes ownership of envelope and body + RawMessage(zmq_msg_t envelope, zmq_msg_t body) + : envelope(envelope), body(body) + {} + + // Prevent copying + RawMessage(const RawMessage &) = delete; + RawMessage &operator=(RawMessage const &) = delete; + + std::string_view getEnvelope() { + return static_cast(zmq_msg_data(&envelope)); + } + + char *getBody() { + return static_cast(zmq_msg_data(&body)); + } + + size_t getBodySize() { + return zmq_msg_size(&body); + } + + ~RawMessage() { + zmq_msg_close(&envelope); + zmq_msg_close(&body); + } + + private: + zmq_msg_t envelope; + zmq_msg_t body; +}; + +std::optional recvMsg(void *socket) { + while (true) { + zmq_msg_t envelope, body; + int rc = zmq_msg_init(&envelope); + assert(rc == 0); + rc = zmq_msg_init(&body); + assert(rc == 0); + + rc = zmq_msg_recv(&envelope, socket, 0); + if (rc == -1) return std::nullopt; + + int more; + size_t more_size = sizeof(more); + rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size); + if (!more) { + zmq_msg_close(&envelope); + zmq_msg_close(&body); + continue; + } + + rc = zmq_msg_recv(&body, socket, 0); + if (rc == -1) return std::nullopt; + + rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size); + assert(!more); + + return std::make_optional(envelope, body); + } +} + +// Ensures that [output_size] == 0 +char *decompress(char *raw, unsigned int input_size, unsigned int &output_size) { + assert(input_size <= UINT32_MAX); + + z_stream strm; + strm.next_in = reinterpret_cast(raw); + strm.avail_in = input_size; + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + int rc = inflateInit2(&strm, 32); + assert(rc == Z_OK); + + unsigned int buf_cap = CHUNK; + unsigned int buf_len = 0; + char *buf = static_cast(malloc(CHUNK)); + do { + if (buf_len + CHUNK > buf_cap) { + assert(buf_cap <= UINT32_MAX); + buf_cap *= 2; + buf = static_cast(realloc(buf, buf_cap)); + } + strm.avail_out = buf_cap - buf_len; + strm.next_out = reinterpret_cast(buf + buf_len); + + unsigned long old_total = strm.total_out; + rc = inflate(&strm, Z_FINISH); + unsigned progress = static_cast(strm.total_out - old_total); + buf_len += progress; + assert(progress != 0 || rc == Z_STREAM_END); + } while (strm.total_in < input_size); + + if (buf_len == buf_cap) { + buf = static_cast(realloc(buf, buf_len + 1)); + } + buf[buf_len] = 0; + output_size = buf_len; + + rc = inflateEnd(&strm); + assert(rc == Z_OK); + + return buf; +} + +struct Date { + int16_t year = 0; + uint8_t month = 0; + uint8_t day = 0; + + static bool parse(Date &dest, std::string_view src) { + dest.year = 0, dest.month = 0, dest.day = 0; + + int16_t y_mul_fac = 1; + bool extended = false; + + size_t plus = src.find('+'); + if (plus != std::string_view::npos) { + extended = true; + src = src.substr(1); // remove plus sign from the start + } + if (!extended) { + size_t min_or_dash = src.find('-'); + if (min_or_dash == std::string_view::npos) return false; + if (min_or_dash == 0) { + y_mul_fac = -1; // it's a minus sign + src = src.substr(1); // remove minus sign at the start + } + } + + int y_chars = 0; + while (src.size() > 0 && src[0] >= '0' && src[0] <= '9') { + dest.year = static_cast(dest.year * 10 + src[0] - '0'); + src = src.substr(1); + y_chars++; + } + if (src.size() == 0) { dest.year = 0; return false; } + if (src[0] != '-') { dest.year = 0; return false; } + src = src.substr(1); // remove dash + if (y_chars < 4 || (y_chars > 4 && !extended)) { dest.year = 0; return false; } + dest.year *= y_mul_fac; + + bool rest_correct = src.size() == 5 + && src[0] >= '0' && src[0] <= '9' + && src[1] >= '0' && src[1] <= '9' + && src[3] >= '0' && src[3] <= '9' + && src[4] >= '0' && src[4] <= '9'; + if (!rest_correct) { dest.year = 0; return false; } + dest.month = static_cast((src[0] - '0') * 10 + src[1] - '0'); + dest.day = static_cast((src[3] - '0') * 10 + src[4] - '0'); + if (dest.month > 12 || dest.day > 31) { + dest.year = 0, dest.month = 0, dest.day = 0; + return false; + } + return true; + } + + std::string toString() const { + if (year < 0 || year > 9999 || month < 0 || month > 12 || day < 0 || day > 31) + throw std::invalid_argument("one or more date components (year, month, day) out of range"); + char data[11] = "XXXX-XX-XX"; + sprintf(data, "%04u-%02u-%02u", year, month, day); + return data; + } + + std::chrono::days toUnixDays() const { + std::chrono::year_month_day ymd{std::chrono::year(year), std::chrono::month(month), std::chrono::day(day)}; + // This is valid since C++20: as of C++20, the system clock is defined to measure the + // Unix Time, the amount of seconds since Thursday 1 January 1970, without leap seconds. + std::chrono::days since_epoch = std::chrono::sys_days(ymd).time_since_epoch(); + return since_epoch; + } +}; + +struct Time { + uint8_t hour = 0; + uint8_t minute = 0; + uint8_t second = 0; + + static bool parse(Time &dest, std::string_view src) { + bool okay = src.size() == 8 + && src[0] >= '0' && src[0] <= '9' + && src[1] >= '0' && src[1] <= '9' + && src[2] == ':' + && src[3] >= '0' && src[3] <= '9' + && src[4] >= '0' && src[4] <= '9' + && src[5] == ':' + && src[6] >= '0' && src[6] <= '9' + && src[7] >= '0' && src[7] <= '9'; + if (!okay) return false; + dest.hour = static_cast((src[0] - '0') * 10 + src[1] - '0'); + dest.minute = static_cast((src[3] - '0') * 10 + src[4] - '0'); + dest.second = static_cast((src[6] - '0') * 10 + src[7] - '0'); + if (dest.hour > 23 || dest.minute > 59 || dest.second > 59) { + dest.hour = 0, dest.minute = 0, dest.second = 0; + return false; + } + return true; + } + + std::string toString() const { + if (hour < 0 || hour > 23 || minute < 0 || minute > 59 || second < 0 || second > 59) + throw std::invalid_argument("one or more time components (hour, minute, second) out of range"); + char data[9] = "XX:XX:XX"; + sprintf(data, "%02u:%02u:%02u", hour, minute, second); + return data; + } +}; + +// Time zone designator +struct Tzd { + int16_t minutes = 0; + + static bool parse(Tzd &dest, std::string_view src) { + dest.minutes = 0; + + if (src.size() == 0) return false; + if (src == "Z") return true; + + int16_t multiplier = 1; + if (src[0] == '-') multiplier = -1; + else if (src[0] != '+') return false; + src = src.substr(1); + + bool okay = src.size() == 5 + && src[0] >= '0' && src[0] <= '9' + && src[1] >= '0' && src[1] <= '9' + && src[2] == ':' + && src[3] >= '0' && src[3] <= '9' + && src[4] >= '0' && src[4] <= '9'; + if (!okay) return false; + int16_t hours = static_cast((src[0] - '0') * 10 + src[1] - '0'); + int16_t minutes = static_cast((src[3] - '0') * 10 + src[4] - '0'); + if (hours > 23 || minutes > 59) return false; + dest.minutes = static_cast(multiplier * (60 * hours + minutes)); + return true; + } + + std::string toString() const { + if (minutes == 0) + return "Z"; + + bool negative = minutes < 0; + int hours_off = abs(minutes / 60); + int mins_off = abs(minutes) - hours_off*60; + if (hours_off > 23 || mins_off > 59) + throw std::invalid_argument("offset out of range"); + char data[7] = "+XX:XX"; + sprintf(data, "%c%02u:%02u", negative ? '-' : '+', hours_off, mins_off); + return data; + } +}; + +struct Timestamp { + Date date; + Tzd off; + Time time; + + static bool parse(Timestamp &dest, std::string_view src) { + size_t t = src.find('T'); + if (t == std::string_view::npos || t + 1 >= src.size()) return false; + + std::string_view date = src.substr(0, t); + std::string_view time_and_tzd = src.substr(t + 1); + if (time_and_tzd.size() < 9) return false; + if (!Date::parse(dest.date, date)) return false; + + std::string_view time = time_and_tzd.substr(0, 8); + std::string_view tzd = time_and_tzd.substr(8); + if (!Time::parse(dest.time, time)) return false; + return Tzd::parse(dest.off, tzd); + } + + std::string toString() const { + return date.toString() + "T" + time.toString() + off.toString(); + } + + std::chrono::seconds toUnixSeconds() const { + std::chrono::year_month_day ymd(std::chrono::year(date.year), + std::chrono::month(date.month), + std::chrono::day(date.day)); + std::chrono::sys_days sys_days(ymd); + std::chrono::time_point utc_days(sys_days.time_since_epoch()); + std::chrono::utc_seconds utc_seconds = std::chrono::time_point_cast(utc_days); + utc_seconds += std::chrono::hours(time.hour) + std::chrono::minutes(time.minute) + + std::chrono::seconds(time.second) - std::chrono::minutes(off.minutes); + std::chrono::sys_seconds sys_seconds = std::chrono::utc_clock::to_sys(utc_seconds); + std::chrono::seconds unix = sys_seconds.time_since_epoch(); + return unix; + } +}; + +static const std::string_view TMI8_XML_NS = "http://bison.connekt.nl/tmi8/kv6/msg"; + +enum Kv6RecordType { + KV6T_UNKNOWN = 0, + KV6T_DELAY = 1, + KV6T_INIT = 2, + KV6T_ARRIVAL = 3, + KV6T_ON_STOP = 4, + KV6T_DEPARTURE = 5, + KV6T_ON_ROUTE = 6, + KV6T_ON_PATH = 7, + KV6T_OFF_ROUTE = 8, + KV6T_END = 9, + // Always keep this updated to correspond to the + // first and last elements of the enumeration! + _KV6T_FIRST_TYPE = KV6T_UNKNOWN, + _KV6T_LAST_TYPE = KV6T_END, +}; + +enum Kv6Field { + KV6F_NONE = 0, + KV6F_DATA_OWNER_CODE = 1, + KV6F_LINE_PLANNING_NUMBER = 2, + KV6F_OPERATING_DAY = 4, + KV6F_JOURNEY_NUMBER = 8, + KV6F_REINFORCEMENT_NUMBER = 16, + KV6F_TIMESTAMP = 32, + KV6F_SOURCE = 64, + KV6F_PUNCTUALITY = 128, + KV6F_USER_STOP_CODE = 256, + KV6F_PASSAGE_SEQUENCE_NUMBER = 512, + KV6F_VEHICLE_NUMBER = 1024, + KV6F_BLOCK_CODE = 2048, + KV6F_WHEELCHAIR_ACCESSIBLE = 4096, + KV6F_NUMBER_OF_COACHES = 8192, + KV6F_RD_Y = 16384, + KV6F_RD_X = 32768, + KV6F_DISTANCE_SINCE_LAST_USER_STOP = 65536, +}; + +static constexpr Kv6Field KV6T_REQUIRED_FIELDS[_KV6T_LAST_TYPE + 1] = { + // KV6T_UNKNOWN + KV6F_NONE, + // KV6T_DELAY + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_PUNCTUALITY), + // KV6T_INIT + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_VEHICLE_NUMBER + | KV6F_BLOCK_CODE + | KV6F_WHEELCHAIR_ACCESSIBLE + | KV6F_NUMBER_OF_COACHES), + // KV6T_ARRIVAL + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_VEHICLE_NUMBER + | KV6F_PUNCTUALITY), + // KV6T_ON_STOP + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_VEHICLE_NUMBER + | KV6F_PUNCTUALITY), + // KV6T_DEPARTURE + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_VEHICLE_NUMBER + | KV6F_PUNCTUALITY), + // KV6T_ON_ROUTE + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_VEHICLE_NUMBER + | KV6F_PUNCTUALITY + | KV6F_RD_X + | KV6F_RD_Y), + // KV6T_ON_PATH + KV6F_NONE, + // KV6T_OFF_ROUTE + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_VEHICLE_NUMBER + | KV6F_RD_X + | KV6F_RD_Y), + // KV6T_END + static_cast( + KV6F_DATA_OWNER_CODE + | KV6F_LINE_PLANNING_NUMBER + | KV6F_OPERATING_DAY + | KV6F_JOURNEY_NUMBER + | KV6F_REINFORCEMENT_NUMBER + | KV6F_TIMESTAMP + | KV6F_SOURCE + | KV6F_USER_STOP_CODE + | KV6F_PASSAGE_SEQUENCE_NUMBER + | KV6F_VEHICLE_NUMBER), +}; + +static constexpr Kv6Field KV6T_OPTIONAL_FIELDS[_KV6T_LAST_TYPE + 1] = { + // KV6T_UNKNOWN + KV6F_NONE, + // KV6T_DELAY + KV6F_NONE, + // KV6T_INIT + KV6F_NONE, + // KV6T_ARRIVAL + static_cast(KV6F_RD_X | KV6F_RD_Y), + // KV6T_ON_STOP + static_cast(KV6F_RD_X | KV6F_RD_Y), + // KV6T_DEPARTURE + static_cast(KV6F_RD_X | KV6F_RD_Y), + // KV6T_ON_ROUTE + KV6F_DISTANCE_SINCE_LAST_USER_STOP, + // KV6T_ON_PATH + KV6F_NONE, + // KV6T_OFF_ROUTE + KV6F_NONE, + // KV6T_END + KV6F_NONE, +}; + +struct Kv6Record { + Kv6RecordType type = KV6T_UNKNOWN; + Kv6Field presence = KV6F_NONE; + Kv6Field next = KV6F_NONE; + std::string data_owner_code; + std::string line_planning_number; + std::string source; + std::string user_stop_code; + std::string wheelchair_accessible; + Date operating_day; + Timestamp timestamp; + uint32_t block_code = 0; + uint32_t journey_number = 0; + uint32_t vehicle_number = 0; + int32_t rd_x = 0; + int32_t rd_y = 0; + // The TMI8 specification is unclear: this field + // might actually be called distancesincelaststop + uint32_t distance_since_last_user_stop = 0; + uint16_t passage_sequence_number = 0; + int16_t punctuality = 0; + uint8_t number_of_coaches = 0; + uint8_t reinforcement_number = 0; + + void markPresent(Kv6Field field) { + presence = static_cast(presence | field); + } + + void removeUnsupportedFields() { + Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type]; + Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type]; + Kv6Field supported_fields = static_cast(required_fields | optional_fields); + presence = static_cast(presence & supported_fields); + } + + bool valid() { + Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type]; + Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type]; + Kv6Field supported_fields = static_cast(required_fields | optional_fields); + + Kv6Field required_field_presence = static_cast(presence & required_fields); + Kv6Field unsupported_field_presence = static_cast(presence & ~supported_fields); + + return required_field_presence == required_fields && !unsupported_field_presence; + } +}; + +enum Tmi8VvTmPushInfoField { + TMI8F_NONE = 0, + TMI8F_SUBSCRIBER_ID = 1, + TMI8F_VERSION = 2, + TMI8F_DOSSIER_NAME = 4, + TMI8F_TIMESTAMP = 8, +}; + +struct Tmi8VvTmPushInfo { + Tmi8VvTmPushInfoField next = TMI8F_NONE; + Tmi8VvTmPushInfoField presence = TMI8F_NONE; + std::string subscriber_id; + std::string version; + std::string dossier_name; + Timestamp timestamp; + std::vector messages; + + void markPresent(Tmi8VvTmPushInfoField field) { + presence = static_cast(presence | field); + } + + bool valid() { + const Tmi8VvTmPushInfoField REQUIRED_FIELDS = + static_cast( + TMI8F_SUBSCRIBER_ID + | TMI8F_VERSION + | TMI8F_DOSSIER_NAME + | TMI8F_TIMESTAMP); + return (presence & REQUIRED_FIELDS) == REQUIRED_FIELDS; + } +}; + +static const std::array KV6_POS_INFO_RECORD_TYPES = { + "UNKNOWN", "DELAY", "INIT", "ARRIVAL", "ONSTOP", "DEPARTURE", "ONROUTE", "ONPATH", "OFFROUTE", "END", +}; + +std::optional findKv6PosInfoRecordTypeName(Kv6RecordType type) { + if (type > _KV6T_LAST_TYPE) + return std::nullopt; + return KV6_POS_INFO_RECORD_TYPES[type]; +} + +const std::array, 17> KV6_POS_INFO_RECORD_FIELDS = {{ + { "dataownercode", KV6F_DATA_OWNER_CODE }, + { "lineplanningnumber", KV6F_LINE_PLANNING_NUMBER }, + { "operatingday", KV6F_OPERATING_DAY }, + { "journeynumber", KV6F_JOURNEY_NUMBER }, + { "reinforcementnumber", KV6F_REINFORCEMENT_NUMBER }, + { "timestamp", KV6F_TIMESTAMP }, + { "source", KV6F_SOURCE }, + { "punctuality", KV6F_PUNCTUALITY }, + { "userstopcode", KV6F_USER_STOP_CODE }, + { "passagesequencenumber", KV6F_PASSAGE_SEQUENCE_NUMBER }, + { "vehiclenumber", KV6F_VEHICLE_NUMBER }, + { "blockcode", KV6F_BLOCK_CODE }, + { "wheelchairaccessible", KV6F_WHEELCHAIR_ACCESSIBLE }, + { "numberofcoaches", KV6F_NUMBER_OF_COACHES }, + { "rd-y", KV6F_RD_Y }, + { "rd-x", KV6F_RD_X }, + { "distancesincelastuserstop", KV6F_DISTANCE_SINCE_LAST_USER_STOP }, +}}; + +// Returns the maximum amount of digits such that it is guaranteed that +// a corresponding amount of repeated 9's can be represented by the type. +template +constexpr size_t maxDigits() { + size_t digits = 0; + for (T x = std::numeric_limits::max(); x != 0; x /= 10) digits++; + return digits - 1; +} + +template +constexpr bool parseUnsigned(T &out, std::string_view src) { + static_assert(MaxDigits <= maxDigits()); + if (src.size() > MaxDigits) return false; + T res = 0; + while (src.size() > 0) { + if (src[0] < '0' || src[0] > '9') return false; + res = static_cast(res * 10 + src[0] - '0'); + src = src.substr(1); + } + out = res; + return true; +} + +template +constexpr bool parseSigned(T &out, std::string_view src) { + static_assert(MaxDigits <= maxDigits()); + if (src.size() == 0) return false; + bool negative = src[0] == '-'; + if (negative) src = src.substr(1); + if (src.size() > MaxDigits) return false; + T res = 0; + while (src.size() > 0) { + if (src[0] < '0' || src[0] > '9') return false; + res = static_cast(res * 10 + src[0] - '0'); + src = src.substr(1); + } + out = negative ? -res : res; + return true; +} + +struct Xmlns { + const Xmlns *next; + std::string_view prefix; + std::string_view url; +}; + +std::optional resolve(std::string_view prefix, const Xmlns *nss) { + while (nss) + if (nss->prefix == prefix) + return nss->url; + else + nss = nss->next; + return std::nullopt; +} + +template +void withXmlnss(const rapidxml::xml_attribute<> *attr, const Xmlns *nss, const T &fn) { + while (attr) { + std::string_view name(attr->name(), attr->name_size()); + if (name.starts_with("xmlns")) { + if (name.size() == 5) { // just xmlns + Xmlns ns0 = { + .next = nss, + .url = std::string_view(attr->value(), attr->value_size()), + }; + withXmlnss(attr->next_attribute(), &ns0, fn); + return; + } else if (name.size() > 6 && name[5] == ':') { // xmlns: + Xmlns ns0 = { + .next = nss, + .prefix = name.substr(6), + .url = std::string_view(attr->value(), attr->value_size()), + }; + withXmlnss(attr->next_attribute(), &ns0, fn); + return; + } + } + attr = attr->next_attribute(); + } + fn(nss); +} + +template +void ifResolvable(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) { + std::string_view name(node.name(), node.name_size()); + std::string_view ns; + size_t colon = name.find(':'); + + if (colon != std::string_view::npos) { + if (colon >= name.size() - 1) // last character + return; + ns = name.substr(0, colon); + name = name.substr(colon + 1); + } + + withXmlnss(node.first_attribute(), nss, [&](const Xmlns *nss) { + std::optional ns_url = resolve(ns, nss); + if (!ns_url && !ns.empty()) return; + if (!ns_url) fn(std::string_view(), name, nss); + else fn(*ns_url, name, nss); + }); +} + +template +void ifTmi8Element(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) { + ifResolvable(node, nss, [&](std::string_view ns_url, std::string_view name, const Xmlns *nss) { + if (node.type() == rapidxml::node_element && (ns_url.empty() || ns_url == TMI8_XML_NS)) fn(name, nss); + }); +} + +bool onlyTextElement(const rapidxml::xml_node<> &node) { + return node.type() == rapidxml::node_element + && node.first_node() + && node.first_node() == node.last_node() + && node.first_node()->type() == rapidxml::node_data; +} + +std::string_view getValue(const rapidxml::xml_node<> &node) { + return std::string_view(node.value(), node.value_size()); +} + +bool parseStringValue(std::string &into, size_t max_len, std::string_view val) { + if (val.size() > max_len) + return false; + into = val; + return true; +} + +struct Kv6Parser { + std::stringstream &errs; + std::stringstream &warns; + + void error(std::string_view msg) { + errs << msg << '\n'; + } + + void warn(std::string_view msg) { + warns << msg << '\n'; + } + +#define PERRASSERT(msg, ...) do { if (!(__VA_ARGS__)) { error(msg); return; } } while (false) +#define PWARNASSERT(msg, ...) do { if (!(__VA_ARGS__)) { warn(msg); return; } } while (false) + + std::optional parseKv6PosInfoRecord(Kv6RecordType type, const rapidxml::xml_node<> &node, const Xmlns *nss) { + Kv6Record fields = { .type = type }; + for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { + ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { + for (const auto &[fname, field] : KV6_POS_INFO_RECORD_FIELDS) { + if (field == KV6F_NONE) + continue; + if (fname == name) { + PWARNASSERT("Expected KV6 record field element to only contain data", + onlyTextElement(*child)); + std::string_view childval = getValue(*child); + switch (field) { + case KV6F_DATA_OWNER_CODE: + PWARNASSERT("Invalid value for dataownercode", + parseStringValue(fields.data_owner_code, 10, childval)); + break; + case KV6F_LINE_PLANNING_NUMBER: + PWARNASSERT("Invalid value for lineplanningnumber", + parseStringValue(fields.line_planning_number, 10, childval)); + break; + case KV6F_OPERATING_DAY: + PWARNASSERT("Invalid value for operatatingday: not a valid date", + Date::parse(fields.operating_day, childval)); + break; + case KV6F_JOURNEY_NUMBER: + PWARNASSERT("Invalid value for journeynumber:" + " not a valid unsigned number with at most six digits", + parseUnsigned<6>(fields.journey_number, childval)); + break; + case KV6F_REINFORCEMENT_NUMBER: + PWARNASSERT("Invalid value for reinforcementnumber:" + " not a valid unsigned number with at most two digits", + parseUnsigned<2>(fields.reinforcement_number, childval)); + break; + case KV6F_TIMESTAMP: + PWARNASSERT("Invalid value for timestamp: not a valid timestamp", + Timestamp::parse(fields.timestamp, childval)); + break; + case KV6F_SOURCE: + PWARNASSERT("Invalid value for source:" + " not a valid string of at most 10 bytes", + parseStringValue(fields.source, 10, childval)); + break; + case KV6F_PUNCTUALITY: + PWARNASSERT("Invalid value for punctuality:" + " not a valid signed number with at most four digits", + parseSigned<4>(fields.punctuality, childval)); + break; + case KV6F_USER_STOP_CODE: + PWARNASSERT("Invalid value for userstopcode:" + " not a valid string of at most 10 bytes", + parseStringValue(fields.user_stop_code, 10, childval)); + break; + case KV6F_PASSAGE_SEQUENCE_NUMBER: + PWARNASSERT("Invalid value for passagesequencenumber:" + " not a valid unsigned number with at most four digits", + parseUnsigned<4>(fields.passage_sequence_number, childval)); + break; + case KV6F_VEHICLE_NUMBER: + PWARNASSERT("Invalid value for vehiclenumber:" + " not a valid unsigned number with at most six digits", + parseUnsigned<6>(fields.vehicle_number, childval)); + break; + case KV6F_BLOCK_CODE: + PWARNASSERT("Invalid value for blockcode:" + " not a valid unsigned number with at most eight digits", + parseUnsigned<8>(fields.block_code, childval)); + break; + case KV6F_WHEELCHAIR_ACCESSIBLE: + PWARNASSERT("Invalid value for wheelchairaccessible:" + " not a valid value for wheelchair accessibility", + childval == "ACCESSIBLE" + || childval == "NOTACCESSIBLE" + || childval == "UNKNOWN"); + fields.wheelchair_accessible = childval; + break; + case KV6F_NUMBER_OF_COACHES: + PWARNASSERT("Invalid for numberofcoaches:" + " not a valid unsigned number with at most two digits", + parseUnsigned<2>(fields.number_of_coaches, childval)); + break; + case KV6F_RD_X: + PWARNASSERT("Invalid value for rd-x:" + " not a valid signed number with at most six digits", + parseSigned<6>(fields.rd_x, childval)); + break; + case KV6F_RD_Y: + PWARNASSERT("Invalid value for rd-y:" + " not a valid signed number with at most six digits", + parseSigned<6>(fields.rd_y, childval)); + break; + case KV6F_DISTANCE_SINCE_LAST_USER_STOP: + PWARNASSERT("Invalid value for distancesincelastuserstop:" + " not a valid unsigned number with at most five digits", + parseUnsigned<5>(fields.distance_since_last_user_stop, childval)); + break; + case KV6F_NONE: + error("NONE field type case should be unreachable in parseKv6PosInfoRecord"); + return; + } + fields.markPresent(field); + break; + } + } + }); + } + + fields.removeUnsupportedFields(); + + if (!fields.valid()) + return std::nullopt; + return fields; + } + + std::vector parseKv6PosInfo(const rapidxml::xml_node<> &node, const Xmlns *nss) { + std::vector records; + for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { + ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { + for (auto type = _KV6T_FIRST_TYPE; + type != _KV6T_LAST_TYPE; + type = static_cast(type + 1)) { + if (type == KV6T_UNKNOWN) + continue; + if (KV6_POS_INFO_RECORD_TYPES[type] == name) { + auto record = parseKv6PosInfoRecord(type, *child, nss); + if (record) { + records.push_back(*record); + } + } + } + }); + } + return records; + } + + std::optional parseVvTmPush(const rapidxml::xml_node<> &node, const Xmlns *nss) { + Tmi8VvTmPushInfo info; + for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { + ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { + if (name == "Timestamp") { + PERRASSERT("Invalid value for Timestamp: Bad format", onlyTextElement(*child)); + PERRASSERT("Invalid value for Timestamp: Invalid timestamp", Timestamp::parse(info.timestamp, getValue(*child))); + info.markPresent(TMI8F_TIMESTAMP); + } else if (name == "SubscriberID") { + PERRASSERT("Invalid value for SubscriberID: Bad format", onlyTextElement(*child)); + info.subscriber_id = getValue(*child); + info.markPresent(TMI8F_SUBSCRIBER_ID); + } else if (name == "Version") { + PERRASSERT("Invalid value for Version: Bad format", onlyTextElement(*child)); + info.version = getValue(*child); + info.markPresent(TMI8F_VERSION); + } else if (name == "DossierName") { + PERRASSERT("Invalid value for DossierName: Bad format", onlyTextElement(*child)); + info.dossier_name = getValue(*child); + info.markPresent(TMI8F_DOSSIER_NAME); + } else if (name == "KV6posinfo") { + info.messages = parseKv6PosInfo(*child, nss); + } + }); + } + + if (!info.valid()) + return std::nullopt; + return info; + } + + std::optional parse(const rapidxml::xml_document<> &doc) { + std::optional msg; + for (const rapidxml::xml_node<> *node = doc.first_node(); node; node = node->next_sibling()) { + ifTmi8Element(*node, nullptr /* nss */, [&](std::string_view name, const Xmlns *nss) { + if (name == "VV_TM_PUSH") { + if (msg) { + error("Duplicated VV_TM_PUSH"); + return; + } + msg = parseVvTmPush(*node, nss); + if (!msg) { + error("Invalid VV_TM_PUSH"); + } + } + }); + } + if (!msg) + error("Expected to find VV_TM_PUSH"); + return msg; + } +}; + +std::optional parseXml(const rapidxml::xml_document<> &doc, std::stringstream &errs, std::stringstream &warns) { + Kv6Parser parser = { errs, warns }; + return parser.parse(doc); +} + +struct Metrics { + prometheus::Counter &messages_counter_ok; + prometheus::Counter &messages_counter_error; + prometheus::Counter &messages_counter_warning; + prometheus::Counter &rows_written_counter; + prometheus::Histogram &records_hist; + prometheus::Histogram &message_parse_hist; + prometheus::Histogram &payload_size_hist; + + using BucketBoundaries = prometheus::Histogram::BucketBoundaries; + + enum class ParseStatus { + OK, + WARNING, + ERROR, + }; + + Metrics(std::shared_ptr registry) : + Metrics(registry, prometheus::BuildCounter() + .Name("kv6_vv_tm_push_messages_total") + .Help("Number of KV6 VV_TM_PUSH messages received") + .Register(*registry)) + {} + + void addMeasurement(std::chrono::duration took_secs, size_t payload_size, size_t records, ParseStatus parsed) { + double millis = took_secs.count() * 1000.0; + + if (parsed == ParseStatus::OK) messages_counter_ok.Increment(); + else if (parsed == ParseStatus::WARNING) messages_counter_warning.Increment(); + else if (parsed == ParseStatus::ERROR) messages_counter_error.Increment(); + records_hist.Observe(static_cast(records)); + message_parse_hist.Observe(millis); + payload_size_hist.Observe(static_cast(payload_size)); + } + + void rowsWritten(int64_t rows) { + rows_written_counter.Increment(static_cast(rows)); + } + + private: + Metrics(std::shared_ptr registry, + prometheus::Family &messages_counter) : + messages_counter_ok(messages_counter + .Add({{ "status", "ok" }})), + messages_counter_error(messages_counter + .Add({{ "status", "error" }})), + messages_counter_warning(messages_counter + .Add({{ "status", "warning" }})), + rows_written_counter(prometheus::BuildCounter() + .Name("kv6_vv_tm_push_records_written") + .Help("Numer of VV_TM_PUSH records written to disk") + .Register(*registry) + .Add({})), + records_hist(prometheus::BuildHistogram() + .Name("kv6_vv_tm_push_records_amount") + .Help("Number of KV6 VV_TM_PUSH records") + .Register(*registry) + .Add({}, BucketBoundaries{ 5.0, 10.0, 20.0, 50.0, 100.0, 250.0, 500.0 })), + message_parse_hist(prometheus::BuildHistogram() + .Name("kv6_vv_tm_push_message_parse_millis") + .Help("Milliseconds taken to parse KV6 VV_TM_PUSH messages") + .Register(*registry) + .Add({}, BucketBoundaries{ 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 100.0, 1000.0, 2000.0 })), + payload_size_hist(prometheus::BuildHistogram() + .Name("kv6_payload_size") + .Help("Sizes of KV6 ZeroMQ message payloads") + .Register(*registry) + .Add({}, BucketBoundaries{ 500.0, 1000.0, 2500.0, 5000.0, 10000.0, 25000.0, 50000.0 })) + {} +}; + +// Note: it *must* hold that decompressed[size] == 0 +std::optional parseMsg(char *decompressed, size_t size, Metrics &metrics, std::stringstream &errs, std::stringstream &warns) { + auto start = std::chrono::steady_clock::now(); + + std::optional info; + + if (decompressed[size] != 0) { + errs << "Not parsing: missing null terminator" << '\n'; + } else { + rapidxml::xml_document<> doc; + constexpr int PARSE_FLAGS = rapidxml::parse_trim_whitespace + | rapidxml::parse_no_string_terminators + | rapidxml::parse_validate_closing_tags; + + try { + doc.parse(decompressed); + info = parseXml(doc, errs, warns); + } catch (const rapidxml::parse_error &err) { + errs << "XML parsing failed" << '\n'; + } + } + + auto end = std::chrono::steady_clock::now(); + std::chrono::duration took = end - start; + + if (info) + if (warns.view().empty()) + metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::OK); + else + metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::WARNING); + else + metrics.addMeasurement(took, size, 0, Metrics::ParseStatus::ERROR); + + return info; +} + +bool terminate = false; + +void onSigIntOrTerm(int /* signum */) { + terminate = true; +} + +arrow::Result> getTable(const std::vector &messages, size_t &rows_written) { + ParquetBuilder builder; + + for (const auto &msg : messages) { + Kv6Field present = msg.presence; + Kv6Field required = KV6T_REQUIRED_FIELDS[msg.type]; + Kv6Field optional = KV6T_OPTIONAL_FIELDS[msg.type]; + if ((~msg.presence & required) != 0) { + std::cout << "Invalid message: not all required fields present; skipping" << std::endl; + continue; + } + Kv6Field used = static_cast(present & (required | optional)); + rows_written++; + + // RD-X and RD-Y fix: some datatypes have these fields marked as required, but still give option + // of not providing these fields by setting them to -1. We want this normalized, where these + // fields are instead simply marked as not present. + if ((used & KV6F_RD_X) && msg.rd_x == -1) + used = static_cast(used & ~KV6F_RD_X); + if ((used & KV6F_RD_Y) && msg.rd_y == -1) + used = static_cast(used & ~KV6F_RD_Y); + + ARROW_RETURN_NOT_OK(builder.types.Append(*findKv6PosInfoRecordTypeName(msg.type))); + ARROW_RETURN_NOT_OK(used & KV6F_DATA_OWNER_CODE + ? builder.data_owner_codes.Append(msg.data_owner_code) + : builder.data_owner_codes.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_LINE_PLANNING_NUMBER + ? builder.line_planning_numbers.Append(msg.line_planning_number) + : builder.line_planning_numbers.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_OPERATING_DAY + ? builder.operating_days.Append(static_cast(msg.operating_day.toUnixDays().count())) + : builder.operating_days.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_JOURNEY_NUMBER + ? builder.journey_numbers.Append(msg.journey_number) + : builder.journey_numbers.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_REINFORCEMENT_NUMBER + ? builder.reinforcement_numbers.Append(msg.reinforcement_number) + : builder.reinforcement_numbers.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_TIMESTAMP + ? builder.timestamps.Append(msg.timestamp.toUnixSeconds().count()) + : builder.timestamps.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_SOURCE + ? builder.sources.Append(msg.source) + : builder.sources.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_PUNCTUALITY + ? builder.punctualities.Append(msg.punctuality) + : builder.punctualities.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_USER_STOP_CODE + ? builder.user_stop_codes.Append(msg.user_stop_code) + : builder.user_stop_codes.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_PASSAGE_SEQUENCE_NUMBER + ? builder.passage_sequence_numbers.Append(msg.passage_sequence_number) + : builder.passage_sequence_numbers.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_VEHICLE_NUMBER + ? builder.vehicle_numbers.Append(msg.vehicle_number) + : builder.vehicle_numbers.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_BLOCK_CODE + ? builder.block_codes.Append(msg.block_code) + : builder.block_codes.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_WHEELCHAIR_ACCESSIBLE + ? builder.wheelchair_accessibles.Append(msg.wheelchair_accessible) + : builder.wheelchair_accessibles.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_NUMBER_OF_COACHES + ? builder.number_of_coaches.Append(msg.number_of_coaches) + : builder.number_of_coaches.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_RD_Y + ? builder.rd_ys.Append(msg.rd_y) + : builder.rd_ys.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_RD_X + ? builder.rd_xs.Append(msg.rd_x) + : builder.rd_xs.AppendNull()); + ARROW_RETURN_NOT_OK(used & KV6F_DISTANCE_SINCE_LAST_USER_STOP + ? builder.distance_since_last_user_stops.Append(msg.distance_since_last_user_stop) + : builder.distance_since_last_user_stops.AppendNull()); + } + + return builder.getTable(); +} + +std::tuple getMinMaxTimestamp(const std::vector &messages) { + if (messages.size() == 0) + return { 0, 0 }; + int64_t min = std::numeric_limits::max(); + int64_t max = 0; + for (const auto &message : messages) { + if (~message.presence & KV6F_TIMESTAMP) + continue; + int64_t seconds = message.timestamp.toUnixSeconds().count(); + if (seconds < min) + min = seconds; + if (seconds > max) + max = seconds; + } + if (min == std::numeric_limits::max()) + return { 0, 0 }; // this is stupid + return { min, max }; +} + +arrow::Status writeParquet(const std::vector &messages, Metrics &metrics) { + size_t rows_written = 0; + ARROW_ASSIGN_OR_RAISE(std::shared_ptr table, getTable(messages, rows_written)); + + auto timestamp = std::chrono::round(std::chrono::utc_clock::now()); + std::string filename = std::format("oeuf-{:%FT%T%Ez}.parquet", timestamp); + ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*table, filename)); + std::cout << "Wrote Parquet file " << filename << std::endl; + + auto [min_timestamp, max_timestamp] = getMinMaxTimestamp(messages); + std::ofstream metaf(filename + ".meta.json.part", std::ios::binary); + nlohmann::json meta{ + { "min_timestamp", min_timestamp }, + { "max_timestamp", max_timestamp }, + { "rows_written", rows_written }, + }; + metaf << meta; + metaf.close(); + std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json"); + + metrics.rowsWritten(rows_written); + + return arrow::Status::OK(); +} + +using SteadyTime = std::chrono::steady_clock::time_point; + +std::string dumpFailedMsg(std::string_view txt, std::string_view errs, std::string_view warns) { + auto timestamp = std::chrono::round(std::chrono::utc_clock::now()); + std::string filename = std::format("oeuf-error-{:%FT%T%Ez}.txt", timestamp); + std::ofstream dumpf(filename, std::ios::binary); + dumpf << "======= ERROR MESSAGES ========" << std::endl; + dumpf << errs; + dumpf << "======= WARNING MESSAGES ======" << std::endl; + dumpf << warns; + dumpf << "======= RECEIVED MESSAGE ======" << std::endl; + dumpf << txt << std::endl; + dumpf.close(); + return filename; +} + +void handleMsg(RawMessage &msg, Metrics &metrics, SteadyTime &last_output, std::vector &msg_buf) { + unsigned int decompressed_size = 0; + if (msg.getBodySize() > std::numeric_limits::max()) + std::cout << "parseMsg failed due to too large message" << std::endl; + char *decompressed = decompress(msg.getBody(), static_cast(msg.getBodySize()), decompressed_size); + + std::stringstream errs; + std::stringstream warns; + // We know that decompressed[decompressed_size] == 0 because decompress() ensures this. + auto parsed_msg = parseMsg(decompressed, decompressed_size, metrics, errs, warns); + if (parsed_msg) { + const Tmi8VvTmPushInfo &info = *parsed_msg; + auto new_msgs_it = info.messages.begin(); + while (new_msgs_it != info.messages.end()) { + size_t remaining_space = MAX_PARQUET_CHUNK - msg_buf.size(); + size_t new_msgs_left = info.messages.end() - new_msgs_it; + auto new_msgs_start = new_msgs_it; + auto new_msgs_end = new_msgs_start + std::min(remaining_space, new_msgs_left); + new_msgs_it = new_msgs_end; + msg_buf.insert(msg_buf.end(), new_msgs_start, new_msgs_end); + + bool time_expired = std::chrono::steady_clock::now() - last_output > std::chrono::minutes(5); + if (msg_buf.size() >= MAX_PARQUET_CHUNK || (new_msgs_it == info.messages.end() && time_expired)) { + arrow::Status status = writeParquet(msg_buf, metrics); + if (!status.ok()) + std::cout << "Writing Parquet file failed: " << status << std::endl; + msg_buf.clear(); + last_output = std::chrono::steady_clock::now(); + } + } + if (!errs.view().empty() || !warns.view().empty()) { + std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str()); + std::cout << "parseMsg finished with warnings: details dumped to " << dump_file << std::endl; + } + } else { + std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str()); + std::cout << "parseMsg failed: error details dumped to " << dump_file << std::endl; + } + free(decompressed); +} + +int main(int argc, char *argv[]) { + std::cout << "Working directory: " << std::filesystem::current_path() << std::endl; + + const char *metrics_addr = getenv("METRICS_ADDR"); + if (!metrics_addr || strlen(metrics_addr) == 0) { + std::cout << "Error: no METRICS_ADDR set!" << std::endl; + exit(EXIT_FAILURE); + } + prometheus::Exposer exposer{metrics_addr}; + + bool prod = false; + const char *prod_env = getenv("NDOV_PRODUCTION"); + if (prod_env && strcmp(prod_env, "true") == 0) prod = true; + + void *zmq_context = zmq_ctx_new(); + void *zmq_subscriber = zmq_socket(zmq_context, ZMQ_SUB); + int rc = zmq_connect(zmq_subscriber, prod ? "tcp://pubsub.ndovloket.nl:7658" : "tcp://pubsub.besteffort.ndovloket.nl:7658"); + assert(rc == 0); + + const char *topic = "/CXX/KV6posinfo"; + rc = zmq_setsockopt(zmq_subscriber, ZMQ_SUBSCRIBE, topic, strlen(topic)); + assert(rc == 0); + + signal(SIGINT, onSigIntOrTerm); + signal(SIGTERM, onSigIntOrTerm); + + SteadyTime last_output = std::chrono::steady_clock::now(); + + auto registry = std::make_shared(); + Metrics metrics(registry); + exposer.RegisterCollectable(registry); + + std::vector msg_buf; + while (!terminate) { + std::optional msg = recvMsg(zmq_subscriber); + if (!msg) { + if (!terminate) + perror("recvMsg"); + continue; + } + handleMsg(*msg, metrics, last_output, msg_buf); + } + + std::cout << "Terminating" << std::endl; + if (msg_buf.size() > 0) { + arrow::Status status = writeParquet(msg_buf, metrics); + if (!status.ok()) std::cout << "Writing final Parquet file failed: " << status << std::endl; + else std::cout << "Final data written" << std::endl; + msg_buf.clear(); + } + + if (zmq_close(zmq_subscriber)) + perror("zmq_close"); + if (zmq_ctx_destroy(zmq_context)) + perror("zmq_ctx_destroy"); + + std::cout << "Bye" << std::endl; + + return 0; +} -- cgit v1.2.3