diff options
| author | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
|---|---|---|
| committer | Rutger Broekhoff | 2024-05-02 20:27:40 +0200 |
| commit | 17a3ea880402338420699e03bcb24181e4ff3924 (patch) | |
| tree | da666ef91e0b60d20aa0b01529644c136fd1f4ab | |
| download | oeuf-17a3ea880402338420699e03bcb24181e4ff3924.tar.gz oeuf-17a3ea880402338420699e03bcb24181e4ff3924.zip | |
Initial commit
Based on dc4ba6a
64 files changed, 9451 insertions, 0 deletions
| @@ -0,0 +1 @@ | |||
| use flake | |||
diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d92417a --- /dev/null +++ b/.gitattributes | |||
| @@ -0,0 +1 @@ | |||
| *.tif filter=lfs diff=lfs merge=lfs -text | |||
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9d7718b --- /dev/null +++ b/.gitignore | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | .direnv/ | ||
| 2 | src/augmentkv6/augmentkv6 | ||
| 3 | src/bundleparquet/bundleparquet | ||
| 4 | src/filterkv6/filterkv6 | ||
| 5 | src/querykv1/querykv1 | ||
| 6 | src/recvkv6/recvkv6 | ||
| 7 | result* | ||
| @@ -0,0 +1,287 @@ | |||
| 1 | EUROPEAN UNION PUBLIC LICENCE v. 1.2 | ||
| 2 | EUPL © the European Union 2007, 2016 | ||
| 3 | |||
| 4 | This European Union Public Licence (the ‘EUPL’) applies to the Work (as defined | ||
| 5 | below) which is provided under the terms of this Licence. Any use of the Work, | ||
| 6 | other than as authorised under this Licence is prohibited (to the extent such | ||
| 7 | use is covered by a right of the copyright holder of the Work). | ||
| 8 | |||
| 9 | The Work is provided under the terms of this Licence when the Licensor (as | ||
| 10 | defined below) has placed the following notice immediately following the | ||
| 11 | copyright notice for the Work: | ||
| 12 | |||
| 13 | Licensed under the EUPL | ||
| 14 | |||
| 15 | or has expressed by any other means his willingness to license under the EUPL. | ||
| 16 | |||
| 17 | 1. Definitions | ||
| 18 | |||
| 19 | In this Licence, the following terms have the following meaning: | ||
| 20 | |||
| 21 | - ‘The Licence’: this Licence. | ||
| 22 | |||
| 23 | - ‘The Original Work’: the work or software distributed or communicated by the | ||
| 24 | Licensor under this Licence, available as Source Code and also as Executable | ||
| 25 | Code as the case may be. | ||
| 26 | |||
| 27 | - ‘Derivative Works’: the works or software that could be created by the | ||
| 28 | Licensee, based upon the Original Work or modifications thereof. This Licence | ||
| 29 | does not define the extent of modification or dependence on the Original Work | ||
| 30 | required in order to classify a work as a Derivative Work; this extent is | ||
| 31 | determined by copyright law applicable in the country mentioned in Article 15. | ||
| 32 | |||
| 33 | - ‘The Work’: the Original Work or its Derivative Works. | ||
| 34 | |||
| 35 | - ‘The Source Code’: the human-readable form of the Work which is the most | ||
| 36 | convenient for people to study and modify. | ||
| 37 | |||
| 38 | - ‘The Executable Code’: any code which has generally been compiled and which is | ||
| 39 | meant to be interpreted by a computer as a program. | ||
| 40 | |||
| 41 | - ‘The Licensor’: the natural or legal person that distributes or communicates | ||
| 42 | the Work under the Licence. | ||
| 43 | |||
| 44 | - ‘Contributor(s)’: any natural or legal person who modifies the Work under the | ||
| 45 | Licence, or otherwise contributes to the creation of a Derivative Work. | ||
| 46 | |||
| 47 | - ‘The Licensee’ or ‘You’: any natural or legal person who makes any usage of | ||
| 48 | the Work under the terms of the Licence. | ||
| 49 | |||
| 50 | - ‘Distribution’ or ‘Communication’: any act of selling, giving, lending, | ||
| 51 | renting, distributing, communicating, transmitting, or otherwise making | ||
| 52 | available, online or offline, copies of the Work or providing access to its | ||
| 53 | essential functionalities at the disposal of any other natural or legal | ||
| 54 | person. | ||
| 55 | |||
| 56 | 2. Scope of the rights granted by the Licence | ||
| 57 | |||
| 58 | The Licensor hereby grants You a worldwide, royalty-free, non-exclusive, | ||
| 59 | sublicensable licence to do the following, for the duration of copyright vested | ||
| 60 | in the Original Work: | ||
| 61 | |||
| 62 | - use the Work in any circumstance and for all usage, | ||
| 63 | - reproduce the Work, | ||
| 64 | - modify the Work, and make Derivative Works based upon the Work, | ||
| 65 | - communicate to the public, including the right to make available or display | ||
| 66 | the Work or copies thereof to the public and perform publicly, as the case may | ||
| 67 | be, the Work, | ||
| 68 | - distribute the Work or copies thereof, | ||
| 69 | - lend and rent the Work or copies thereof, | ||
| 70 | - sublicense rights in the Work or copies thereof. | ||
| 71 | |||
| 72 | Those rights can be exercised on any media, supports and formats, whether now | ||
| 73 | known or later invented, as far as the applicable law permits so. | ||
| 74 | |||
| 75 | In the countries where moral rights apply, the Licensor waives his right to | ||
| 76 | exercise his moral right to the extent allowed by law in order to make effective | ||
| 77 | the licence of the economic rights here above listed. | ||
| 78 | |||
| 79 | The Licensor grants to the Licensee royalty-free, non-exclusive usage rights to | ||
| 80 | any patents held by the Licensor, to the extent necessary to make use of the | ||
| 81 | rights granted on the Work under this Licence. | ||
| 82 | |||
| 83 | 3. Communication of the Source Code | ||
| 84 | |||
| 85 | The Licensor may provide the Work either in its Source Code form, or as | ||
| 86 | Executable Code. If the Work is provided as Executable Code, the Licensor | ||
| 87 | provides in addition a machine-readable copy of the Source Code of the Work | ||
| 88 | along with each copy of the Work that the Licensor distributes or indicates, in | ||
| 89 | a notice following the copyright notice attached to the Work, a repository where | ||
| 90 | the Source Code is easily and freely accessible for as long as the Licensor | ||
| 91 | continues to distribute or communicate the Work. | ||
| 92 | |||
| 93 | 4. Limitations on copyright | ||
| 94 | |||
| 95 | Nothing in this Licence is intended to deprive the Licensee of the benefits from | ||
| 96 | any exception or limitation to the exclusive rights of the rights owners in the | ||
| 97 | Work, of the exhaustion of those rights or of other applicable limitations | ||
| 98 | thereto. | ||
| 99 | |||
| 100 | 5. Obligations of the Licensee | ||
| 101 | |||
| 102 | The grant of the rights mentioned above is subject to some restrictions and | ||
| 103 | obligations imposed on the Licensee. Those obligations are the following: | ||
| 104 | |||
| 105 | Attribution right: The Licensee shall keep intact all copyright, patent or | ||
| 106 | trademarks notices and all notices that refer to the Licence and to the | ||
| 107 | disclaimer of warranties. The Licensee must include a copy of such notices and a | ||
| 108 | copy of the Licence with every copy of the Work he/she distributes or | ||
| 109 | communicates. The Licensee must cause any Derivative Work to carry prominent | ||
| 110 | notices stating that the Work has been modified and the date of modification. | ||
| 111 | |||
| 112 | Copyleft clause: If the Licensee distributes or communicates copies of the | ||
| 113 | Original Works or Derivative Works, this Distribution or Communication will be | ||
| 114 | done under the terms of this Licence or of a later version of this Licence | ||
| 115 | unless the Original Work is expressly distributed only under this version of the | ||
| 116 | Licence — for example by communicating ‘EUPL v. 1.2 only’. The Licensee | ||
| 117 | (becoming Licensor) cannot offer or impose any additional terms or conditions on | ||
| 118 | the Work or Derivative Work that alter or restrict the terms of the Licence. | ||
| 119 | |||
| 120 | Compatibility clause: If the Licensee Distributes or Communicates Derivative | ||
| 121 | Works or copies thereof based upon both the Work and another work licensed under | ||
| 122 | a Compatible Licence, this Distribution or Communication can be done under the | ||
| 123 | terms of this Compatible Licence. For the sake of this clause, ‘Compatible | ||
| 124 | Licence’ refers to the licences listed in the appendix attached to this Licence. | ||
| 125 | Should the Licensee's obligations under the Compatible Licence conflict with | ||
| 126 | his/her obligations under this Licence, the obligations of the Compatible | ||
| 127 | Licence shall prevail. | ||
| 128 | |||
| 129 | Provision of Source Code: When distributing or communicating copies of the Work, | ||
| 130 | the Licensee will provide a machine-readable copy of the Source Code or indicate | ||
| 131 | a repository where this Source will be easily and freely available for as long | ||
| 132 | as the Licensee continues to distribute or communicate the Work. | ||
| 133 | |||
| 134 | Legal Protection: This Licence does not grant permission to use the trade names, | ||
| 135 | trademarks, service marks, or names of the Licensor, except as required for | ||
| 136 | reasonable and customary use in describing the origin of the Work and | ||
| 137 | reproducing the content of the copyright notice. | ||
| 138 | |||
| 139 | 6. Chain of Authorship | ||
| 140 | |||
| 141 | The original Licensor warrants that the copyright in the Original Work granted | ||
| 142 | hereunder is owned by him/her or licensed to him/her and that he/she has the | ||
| 143 | power and authority to grant the Licence. | ||
| 144 | |||
| 145 | Each Contributor warrants that the copyright in the modifications he/she brings | ||
| 146 | to the Work are owned by him/her or licensed to him/her and that he/she has the | ||
| 147 | power and authority to grant the Licence. | ||
| 148 | |||
| 149 | Each time You accept the Licence, the original Licensor and subsequent | ||
| 150 | Contributors grant You a licence to their contributions to the Work, under the | ||
| 151 | terms of this Licence. | ||
| 152 | |||
| 153 | 7. Disclaimer of Warranty | ||
| 154 | |||
| 155 | The Work is a work in progress, which is continuously improved by numerous | ||
| 156 | Contributors. It is not a finished work and may therefore contain defects or | ||
| 157 | ‘bugs’ inherent to this type of development. | ||
| 158 | |||
| 159 | For the above reason, the Work is provided under the Licence on an ‘as is’ basis | ||
| 160 | and without warranties of any kind concerning the Work, including without | ||
| 161 | limitation merchantability, fitness for a particular purpose, absence of defects | ||
| 162 | or errors, accuracy, non-infringement of intellectual property rights other than | ||
| 163 | copyright as stated in Article 6 of this Licence. | ||
| 164 | |||
| 165 | This disclaimer of warranty is an essential part of the Licence and a condition | ||
| 166 | for the grant of any rights to the Work. | ||
| 167 | |||
| 168 | 8. Disclaimer of Liability | ||
| 169 | |||
| 170 | Except in the cases of wilful misconduct or damages directly caused to natural | ||
| 171 | persons, the Licensor will in no event be liable for any direct or indirect, | ||
| 172 | material or moral, damages of any kind, arising out of the Licence or of the use | ||
| 173 | of the Work, including without limitation, damages for loss of goodwill, work | ||
| 174 | stoppage, computer failure or malfunction, loss of data or any commercial | ||
| 175 | damage, even if the Licensor has been advised of the possibility of such damage. | ||
| 176 | However, the Licensor will be liable under statutory product liability laws as | ||
| 177 | far such laws apply to the Work. | ||
| 178 | |||
| 179 | 9. Additional agreements | ||
| 180 | |||
| 181 | While distributing the Work, You may choose to conclude an additional agreement, | ||
| 182 | defining obligations or services consistent with this Licence. However, if | ||
| 183 | accepting obligations, You may act only on your own behalf and on your sole | ||
| 184 | responsibility, not on behalf of the original Licensor or any other Contributor, | ||
| 185 | and only if You agree to indemnify, defend, and hold each Contributor harmless | ||
| 186 | for any liability incurred by, or claims asserted against such Contributor by | ||
| 187 | the fact You have accepted any warranty or additional liability. | ||
| 188 | |||
| 189 | 10. Acceptance of the Licence | ||
| 190 | |||
| 191 | The provisions of this Licence can be accepted by clicking on an icon ‘I agree’ | ||
| 192 | placed under the bottom of a window displaying the text of this Licence or by | ||
| 193 | affirming consent in any other similar way, in accordance with the rules of | ||
| 194 | applicable law. Clicking on that icon indicates your clear and irrevocable | ||
| 195 | acceptance of this Licence and all of its terms and conditions. | ||
| 196 | |||
| 197 | Similarly, you irrevocably accept this Licence and all of its terms and | ||
| 198 | conditions by exercising any rights granted to You by Article 2 of this Licence, | ||
| 199 | such as the use of the Work, the creation by You of a Derivative Work or the | ||
| 200 | Distribution or Communication by You of the Work or copies thereof. | ||
| 201 | |||
| 202 | 11. Information to the public | ||
| 203 | |||
| 204 | In case of any Distribution or Communication of the Work by means of electronic | ||
| 205 | communication by You (for example, by offering to download the Work from a | ||
| 206 | remote location) the distribution channel or media (for example, a website) must | ||
| 207 | at least provide to the public the information requested by the applicable law | ||
| 208 | regarding the Licensor, the Licence and the way it may be accessible, concluded, | ||
| 209 | stored and reproduced by the Licensee. | ||
| 210 | |||
| 211 | 12. Termination of the Licence | ||
| 212 | |||
| 213 | The Licence and the rights granted hereunder will terminate automatically upon | ||
| 214 | any breach by the Licensee of the terms of the Licence. | ||
| 215 | |||
| 216 | Such a termination will not terminate the licences of any person who has | ||
| 217 | received the Work from the Licensee under the Licence, provided such persons | ||
| 218 | remain in full compliance with the Licence. | ||
| 219 | |||
| 220 | 13. Miscellaneous | ||
| 221 | |||
| 222 | Without prejudice of Article 9 above, the Licence represents the complete | ||
| 223 | agreement between the Parties as to the Work. | ||
| 224 | |||
| 225 | If any provision of the Licence is invalid or unenforceable under applicable | ||
| 226 | law, this will not affect the validity or enforceability of the Licence as a | ||
| 227 | whole. Such provision will be construed or reformed so as necessary to make it | ||
| 228 | valid and enforceable. | ||
| 229 | |||
| 230 | The European Commission may publish other linguistic versions or new versions of | ||
| 231 | this Licence or updated versions of the Appendix, so far this is required and | ||
| 232 | reasonable, without reducing the scope of the rights granted by the Licence. New | ||
| 233 | versions of the Licence will be published with a unique version number. | ||
| 234 | |||
| 235 | All linguistic versions of this Licence, approved by the European Commission, | ||
| 236 | have identical value. Parties can take advantage of the linguistic version of | ||
| 237 | their choice. | ||
| 238 | |||
| 239 | 14. Jurisdiction | ||
| 240 | |||
| 241 | Without prejudice to specific agreement between parties, | ||
| 242 | |||
| 243 | - any litigation resulting from the interpretation of this License, arising | ||
| 244 | between the European Union institutions, bodies, offices or agencies, as a | ||
| 245 | Licensor, and any Licensee, will be subject to the jurisdiction of the Court | ||
| 246 | of Justice of the European Union, as laid down in article 272 of the Treaty on | ||
| 247 | the Functioning of the European Union, | ||
| 248 | |||
| 249 | - any litigation arising between other parties and resulting from the | ||
| 250 | interpretation of this License, will be subject to the exclusive jurisdiction | ||
| 251 | of the competent court where the Licensor resides or conducts its primary | ||
| 252 | business. | ||
| 253 | |||
| 254 | 15. Applicable Law | ||
| 255 | |||
| 256 | Without prejudice to specific agreement between parties, | ||
| 257 | |||
| 258 | - this Licence shall be governed by the law of the European Union Member State | ||
| 259 | where the Licensor has his seat, resides or has his registered office, | ||
| 260 | |||
| 261 | - this licence shall be governed by Belgian law if the Licensor has no seat, | ||
| 262 | residence or registered office inside a European Union Member State. | ||
| 263 | |||
| 264 | Appendix | ||
| 265 | |||
| 266 | ‘Compatible Licences’ according to Article 5 EUPL are: | ||
| 267 | |||
| 268 | - GNU General Public License (GPL) v. 2, v. 3 | ||
| 269 | - GNU Affero General Public License (AGPL) v. 3 | ||
| 270 | - Open Software License (OSL) v. 2.1, v. 3.0 | ||
| 271 | - Eclipse Public License (EPL) v. 1.0 | ||
| 272 | - CeCILL v. 2.0, v. 2.1 | ||
| 273 | - Mozilla Public Licence (MPL) v. 2 | ||
| 274 | - GNU Lesser General Public Licence (LGPL) v. 2.1, v. 3 | ||
| 275 | - Creative Commons Attribution-ShareAlike v. 3.0 Unported (CC BY-SA 3.0) for | ||
| 276 | works other than software | ||
| 277 | - European Union Public Licence (EUPL) v. 1.1, v. 1.2 | ||
| 278 | - Québec Free and Open-Source Licence — Reciprocity (LiLiQ-R) or Strong | ||
| 279 | Reciprocity (LiLiQ-R+). | ||
| 280 | |||
| 281 | The European Commission may update this Appendix to later versions of the above | ||
| 282 | licences without producing a new version of the EUPL, as long as they provide | ||
| 283 | the rights granted in Article 2 of this Licence and protect the covered Source | ||
| 284 | Code from exclusive appropriation. | ||
| 285 | |||
| 286 | All other changes or additions to this Appendix require the production of a new | ||
| 287 | EUPL version. | ||
diff --git a/README.txt b/README.txt new file mode 100644 index 0000000..469f5b4 --- /dev/null +++ b/README.txt | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | ## Copyright Notice and License | ||
| 2 | |||
| 3 | Copyright 2024 Rutger Broekhoff. | ||
| 4 | Licensed under the EUPL. | ||
| 5 | |||
| 6 | An English copy of version 1.2 of the EUPL license can be found in the LICENSE | ||
| 7 | file. If you wish to read the license in another one of the (currently) 23 | ||
| 8 | official languages of the European union, you can! You may find your version at | ||
| 9 | https://joinup.ec.europa.eu/collection/eupl/eupl-text-eupl-12 | ||
| 10 | |||
| 11 | For now, the code is licensed under the EUPL-1.2 license. If the time comes | ||
| 12 | that I seriously need to think about what license to use, this may change. | ||
| 13 | Nevertheless, I expect that this project will retain a copyleft license. \ No newline at end of file | ||
diff --git a/all-deterministic.sh b/all-deterministic.sh new file mode 100755 index 0000000..5a857ef --- /dev/null +++ b/all-deterministic.sh | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | |||
| 3 | nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build --rebuild | ||
diff --git a/build-all.sh b/build-all.sh new file mode 100755 index 0000000..ea45126 --- /dev/null +++ b/build-all.sh | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | |||
| 3 | nix flake show --json | jq -r '.packages.[].[].name | values | ".#\(.)"' | xargs nix build | ||
diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..3b8b6fe --- /dev/null +++ b/flake.lock | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | { | ||
| 2 | "nodes": { | ||
| 3 | "flake-utils": { | ||
| 4 | "inputs": { | ||
| 5 | "systems": "systems" | ||
| 6 | }, | ||
| 7 | "locked": { | ||
| 8 | "lastModified": 1701680307, | ||
| 9 | "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", | ||
| 10 | "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", | ||
| 11 | "revCount": 88, | ||
| 12 | "type": "tarball", | ||
| 13 | "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88%2Brev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz" | ||
| 14 | }, | ||
| 15 | "original": { | ||
| 16 | "type": "tarball", | ||
| 17 | "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz" | ||
| 18 | } | ||
| 19 | }, | ||
| 20 | "libtmi8": { | ||
| 21 | "inputs": { | ||
| 22 | "flake-utils": [ | ||
| 23 | "flake-utils" | ||
| 24 | ], | ||
| 25 | "nixpkgs": [ | ||
| 26 | "nixpkgs" | ||
| 27 | ] | ||
| 28 | }, | ||
| 29 | "locked": { | ||
| 30 | "lastModified": 1, | ||
| 31 | "narHash": "sha256-ST9E5LANnA7IV5cY0fbk+MSukaOczxnkXV1/IH7ps4U=", | ||
| 32 | "path": "./lib/libtmi8", | ||
| 33 | "type": "path" | ||
| 34 | }, | ||
| 35 | "original": { | ||
| 36 | "path": "./lib/libtmi8", | ||
| 37 | "type": "path" | ||
| 38 | } | ||
| 39 | }, | ||
| 40 | "nixpkgs": { | ||
| 41 | "locked": { | ||
| 42 | "lastModified": 1702346276, | ||
| 43 | "narHash": "sha256-eAQgwIWApFQ40ipeOjVSoK4TEHVd6nbSd9fApiHIw5A=", | ||
| 44 | "rev": "cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7", | ||
| 45 | "revCount": 553141, | ||
| 46 | "type": "tarball", | ||
| 47 | "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.553141%2Brev-cf28ee258fd5f9a52de6b9865cdb93a1f96d09b7/018c652c-2ff2-777b-bade-dae9c2abe1e1/source.tar.gz" | ||
| 48 | }, | ||
| 49 | "original": { | ||
| 50 | "type": "tarball", | ||
| 51 | "url": "https://flakehub.com/f/NixOs/nixpkgs/%2A.tar.gz" | ||
| 52 | } | ||
| 53 | }, | ||
| 54 | "root": { | ||
| 55 | "inputs": { | ||
| 56 | "flake-utils": "flake-utils", | ||
| 57 | "libtmi8": "libtmi8", | ||
| 58 | "nixpkgs": "nixpkgs" | ||
| 59 | } | ||
| 60 | }, | ||
| 61 | "systems": { | ||
| 62 | "locked": { | ||
| 63 | "lastModified": 1681028828, | ||
| 64 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", | ||
| 65 | "owner": "nix-systems", | ||
| 66 | "repo": "default", | ||
| 67 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", | ||
| 68 | "type": "github" | ||
| 69 | }, | ||
| 70 | "original": { | ||
| 71 | "owner": "nix-systems", | ||
| 72 | "repo": "default", | ||
| 73 | "type": "github" | ||
| 74 | } | ||
| 75 | } | ||
| 76 | }, | ||
| 77 | "root": "root", | ||
| 78 | "version": 7 | ||
| 79 | } | ||
diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..df5fffb --- /dev/null +++ b/flake.nix | |||
| @@ -0,0 +1,137 @@ | |||
| 1 | { | ||
| 2 | inputs = { | ||
| 3 | nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz"; | ||
| 4 | flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"; | ||
| 5 | libtmi8 = { | ||
| 6 | url = "path:./lib/libtmi8"; | ||
| 7 | inputs.nixpkgs.follows = "nixpkgs"; | ||
| 8 | inputs.flake-utils.follows = "flake-utils"; | ||
| 9 | }; | ||
| 10 | }; | ||
| 11 | |||
| 12 | outputs = { self, nixpkgs, flake-utils, libtmi8, ... }@inputs: | ||
| 13 | { | ||
| 14 | nixosModules = rec { | ||
| 15 | oeuf = import ./module self; | ||
| 16 | default = oeuf; | ||
| 17 | }; | ||
| 18 | } // flake-utils.lib.eachDefaultSystem | ||
| 19 | (system: | ||
| 20 | let | ||
| 21 | libtmi8Overlay = final: prev: { oeuf-libtmi8 = libtmi8.packages.${system}.oeuf-libtmi8; }; | ||
| 22 | |||
| 23 | pkgs = import nixpkgs { | ||
| 24 | inherit system; | ||
| 25 | overlays = [ libtmi8Overlay ]; | ||
| 26 | }; | ||
| 27 | boostPkg = pkgs.boost182; | ||
| 28 | |||
| 29 | inherit (pkgs.gcc13) stdenv; | ||
| 30 | |||
| 31 | oeuf-augmentkv6 = stdenv.mkDerivation { | ||
| 32 | name = "oeuf-augmentkv6"; | ||
| 33 | src = ./.; | ||
| 34 | |||
| 35 | nativeBuildInputs = with pkgs; [ gcc13 boostPkg ]; | ||
| 36 | buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ]; | ||
| 37 | buildPhase = '' | ||
| 38 | cd src/augmentkv6 | ||
| 39 | make augmentkv6 | ||
| 40 | ''; | ||
| 41 | |||
| 42 | installPhase = '' | ||
| 43 | mkdir -p $out/bin | ||
| 44 | cp augmentkv6 $out/bin/oeuf-augmentkv6 | ||
| 45 | ''; | ||
| 46 | }; | ||
| 47 | |||
| 48 | oeuf-filterkv6 = stdenv.mkDerivation { | ||
| 49 | name = "oeuf-filterkv6"; | ||
| 50 | src = ./.; | ||
| 51 | |||
| 52 | nativeBuildInputs = with pkgs; [ gcc13 ]; | ||
| 53 | buildInputs = with pkgs; [ arrow-cpp oeuf-libtmi8 ]; | ||
| 54 | buildPhase = '' | ||
| 55 | cd src/filterkv6 | ||
| 56 | make filterkv6 | ||
| 57 | ''; | ||
| 58 | |||
| 59 | installPhase = '' | ||
| 60 | mkdir -p $out/bin | ||
| 61 | cp filterkv6 $out/bin/oeuf-filterkv6 | ||
| 62 | ''; | ||
| 63 | }; | ||
| 64 | |||
| 65 | oeuf-bundleparquet = stdenv.mkDerivation { | ||
| 66 | name = "oeuf-bundleparquet"; | ||
| 67 | src = ./.; | ||
| 68 | |||
| 69 | nativeBuildInputs = with pkgs; [ gcc13 ]; | ||
| 70 | buildInputs = with pkgs; [ arrow-cpp curl nlohmann_json prometheus-cpp zlib oeuf-libtmi8 ]; | ||
| 71 | buildPhase = '' | ||
| 72 | cd src/bundleparquet | ||
| 73 | make bundleparquet | ||
| 74 | ''; | ||
| 75 | |||
| 76 | installPhase = '' | ||
| 77 | mkdir -p $out/bin | ||
| 78 | cp bundleparquet $out/bin/oeuf-bundleparquet | ||
| 79 | ''; | ||
| 80 | }; | ||
| 81 | |||
| 82 | oeuf-querykv1 = stdenv.mkDerivation { | ||
| 83 | name = "oeuf-querykv1"; | ||
| 84 | src = ./.; | ||
| 85 | |||
| 86 | nativeBuildInputs = with pkgs; [ gcc13 ]; | ||
| 87 | buildInputs = with pkgs; [ oeuf-libtmi8 boostPkg ]; | ||
| 88 | buildPhase = '' | ||
| 89 | cd src/querykv1 | ||
| 90 | make querykv1 | ||
| 91 | ''; | ||
| 92 | |||
| 93 | installPhase = '' | ||
| 94 | mkdir -p $out/bin | ||
| 95 | cp querykv1 $out/bin/oeuf-querykv1 | ||
| 96 | ''; | ||
| 97 | }; | ||
| 98 | |||
| 99 | oeuf-recvkv6 = stdenv.mkDerivation { | ||
| 100 | name = "oeuf-recvkv6"; | ||
| 101 | src = ./.; | ||
| 102 | |||
| 103 | nativeBuildInputs = with pkgs; [ gcc13 ]; | ||
| 104 | buildInputs = with pkgs; [ zeromq zlib arrow-cpp nlohmann_json prometheus-cpp rapidxml oeuf-libtmi8 ]; | ||
| 105 | buildPhase = '' | ||
| 106 | cd src/recvkv6 | ||
| 107 | make recvkv6 | ||
| 108 | ''; | ||
| 109 | |||
| 110 | installPhase = '' | ||
| 111 | mkdir -p $out/bin | ||
| 112 | cp recvkv6 $out/bin/oeuf-recvkv6 | ||
| 113 | ''; | ||
| 114 | }; | ||
| 115 | |||
| 116 | oeuf-archiver = import ./script/archiver { | ||
| 117 | pkgs = pkgs // { inherit oeuf-bundleparquet; }; | ||
| 118 | }; | ||
| 119 | |||
| 120 | oeuf-synckv6 = import ./script/synckv6 { inherit pkgs; }; | ||
| 121 | in | ||
| 122 | { | ||
| 123 | packages.oeuf-archiver = oeuf-archiver; | ||
| 124 | packages.oeuf-augmentkv6 = oeuf-augmentkv6; | ||
| 125 | packages.oeuf-synckv6 = oeuf-synckv6; | ||
| 126 | packages.oeuf-filterkv6 = oeuf-filterkv6; | ||
| 127 | packages.oeuf-bundleparquet = oeuf-bundleparquet; | ||
| 128 | packages.oeuf-querykv1 = oeuf-querykv1; | ||
| 129 | packages.oeuf-recvkv6 = oeuf-recvkv6; | ||
| 130 | |||
| 131 | devShells.default = pkgs.mkShell { | ||
| 132 | inputsFrom = [ oeuf-bundleparquet oeuf-querykv1 oeuf-recvkv6 ]; | ||
| 133 | }; | ||
| 134 | |||
| 135 | formatter = pkgs.nixpkgs-fmt; | ||
| 136 | }); | ||
| 137 | } | ||
diff --git a/lib/libtmi8/.envrc b/lib/libtmi8/.envrc new file mode 100644 index 0000000..4e0d702 --- /dev/null +++ b/lib/libtmi8/.envrc | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | use flake | ||
| 2 | |||
| 3 | export DEVMODE=1 | ||
diff --git a/lib/libtmi8/.gitignore b/lib/libtmi8/.gitignore new file mode 100644 index 0000000..f6b8cf6 --- /dev/null +++ b/lib/libtmi8/.gitignore | |||
| @@ -0,0 +1,3 @@ | |||
| 1 | src/*.o | ||
| 2 | libtmi8.a | ||
| 3 | libtmi8.so | ||
diff --git a/lib/libtmi8/Makefile b/lib/libtmi8/Makefile new file mode 100644 index 0000000..52a9807 --- /dev/null +++ b/lib/libtmi8/Makefile | |||
| @@ -0,0 +1,41 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer -Iinclude $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-larrow -lparquet -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | DESTDIR=/usr/local | ||
| 16 | |||
| 17 | LIBHDRS=include/tmi8/kv1_lexer.hpp include/tmi8/kv1_parser.hpp include/tmi8/kv1_types.hpp include/tmi8/kv6_parquet.hpp | ||
| 18 | LIBSRCS=src/kv1_index.cpp src/kv1_lexer.cpp src/kv1_parser.cpp src/kv1_types.cpp src/kv6_parquet.cpp | ||
| 19 | LIBOBJS=$(patsubst %.cpp,%.o,$(LIBSRCS)) | ||
| 20 | |||
| 21 | .PHONY: all install libtmi8 clean | ||
| 22 | all: libtmi8 | ||
| 23 | |||
| 24 | libtmi8: libtmi8.a libtmi8.so | ||
| 25 | |||
| 26 | clean: | ||
| 27 | rm libtmi8.a libtmi8.so $(LIBOBJS) | ||
| 28 | |||
| 29 | install: libtmi8.a $(LIBHDRS) | ||
| 30 | install -D -m644 include/tmi8/* -t $(DESTDIR)/include/tmi8 | ||
| 31 | install -D -m644 libtmi8.a -t $(DESTDIR)/lib | ||
| 32 | install -D -m644 libtmi8.so -t $(DESTDIR)/lib | ||
| 33 | |||
| 34 | src/%.o: src/%.cpp $(LIBHDRS) | ||
| 35 | $(CXX) -c -o $@ $< $(CXXFLAGS) | ||
| 36 | |||
| 37 | libtmi8.a: $(LIBOBJS) | ||
| 38 | $(AR) rcs $@ $^ | ||
| 39 | |||
| 40 | libtmi8.so: $(LIBOBJS) | ||
| 41 | $(CXX) -shared -fPIC -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
diff --git a/lib/libtmi8/flake.lock b/lib/libtmi8/flake.lock new file mode 100644 index 0000000..5ff7d5d --- /dev/null +++ b/lib/libtmi8/flake.lock | |||
| @@ -0,0 +1,58 @@ | |||
| 1 | { | ||
| 2 | "nodes": { | ||
| 3 | "flake-utils": { | ||
| 4 | "inputs": { | ||
| 5 | "systems": "systems" | ||
| 6 | }, | ||
| 7 | "locked": { | ||
| 8 | "lastModified": 1701680307, | ||
| 9 | "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", | ||
| 10 | "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", | ||
| 11 | "revCount": 88, | ||
| 12 | "type": "tarball", | ||
| 13 | "url": "https://api.flakehub.com/f/pinned/numtide/flake-utils/0.1.88+rev-4022d587cbbfd70fe950c1e2083a02621806a725/018c340d-3287-7c66-818b-f2f646a808e3/source.tar.gz" | ||
| 14 | }, | ||
| 15 | "original": { | ||
| 16 | "type": "tarball", | ||
| 17 | "url": "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz" | ||
| 18 | } | ||
| 19 | }, | ||
| 20 | "nixpkgs": { | ||
| 21 | "locked": { | ||
| 22 | "lastModified": 1701539137, | ||
| 23 | "narHash": "sha256-nVO/5QYpf1GwjvtpXhyxx5M3U/WN0MwBro4Lsk+9mL0=", | ||
| 24 | "rev": "933d7dc155096e7575d207be6fb7792bc9f34f6d", | ||
| 25 | "revCount": 552571, | ||
| 26 | "type": "tarball", | ||
| 27 | "url": "https://api.flakehub.com/f/pinned/NixOs/nixpkgs/0.2311.552571+rev-933d7dc155096e7575d207be6fb7792bc9f34f6d/018c3242-a93c-7779-8d13-ddba0a38d24a/source.tar.gz" | ||
| 28 | }, | ||
| 29 | "original": { | ||
| 30 | "type": "tarball", | ||
| 31 | "url": "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz" | ||
| 32 | } | ||
| 33 | }, | ||
| 34 | "root": { | ||
| 35 | "inputs": { | ||
| 36 | "flake-utils": "flake-utils", | ||
| 37 | "nixpkgs": "nixpkgs" | ||
| 38 | } | ||
| 39 | }, | ||
| 40 | "systems": { | ||
| 41 | "locked": { | ||
| 42 | "lastModified": 1681028828, | ||
| 43 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", | ||
| 44 | "owner": "nix-systems", | ||
| 45 | "repo": "default", | ||
| 46 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", | ||
| 47 | "type": "github" | ||
| 48 | }, | ||
| 49 | "original": { | ||
| 50 | "owner": "nix-systems", | ||
| 51 | "repo": "default", | ||
| 52 | "type": "github" | ||
| 53 | } | ||
| 54 | } | ||
| 55 | }, | ||
| 56 | "root": "root", | ||
| 57 | "version": 7 | ||
| 58 | } | ||
diff --git a/lib/libtmi8/flake.nix b/lib/libtmi8/flake.nix new file mode 100644 index 0000000..2ae7fc9 --- /dev/null +++ b/lib/libtmi8/flake.nix | |||
| @@ -0,0 +1,42 @@ | |||
| 1 | { | ||
| 2 | inputs = { | ||
| 3 | nixpkgs.url = "https://flakehub.com/f/NixOs/nixpkgs/*.tar.gz"; | ||
| 4 | flake-utils.url = "https://flakehub.com/f/numtide/flake-utils/0.1.88.tar.gz"; | ||
| 5 | }; | ||
| 6 | |||
| 7 | outputs = { self, nixpkgs, flake-utils, ... }@inputs: | ||
| 8 | flake-utils.lib.eachDefaultSystem | ||
| 9 | (system: | ||
| 10 | let | ||
| 11 | pkgs = import nixpkgs { | ||
| 12 | inherit system; | ||
| 13 | overlays = [ ]; | ||
| 14 | }; | ||
| 15 | |||
| 16 | inherit (pkgs.gcc13) stdenv; | ||
| 17 | |||
| 18 | oeuf-libtmi8 = stdenv.mkDerivation { | ||
| 19 | name = "oeuf-libtmi8"; | ||
| 20 | src = pkgs.lib.cleanSource ./.; | ||
| 21 | |||
| 22 | nativeBuildInputs = with pkgs; [ gcc13 ]; | ||
| 23 | buildInputs = with pkgs; [ arrow-cpp boost182 ]; | ||
| 24 | buildPhase = '' | ||
| 25 | make libtmi8 | ||
| 26 | ''; | ||
| 27 | |||
| 28 | installPhase = '' | ||
| 29 | make install DESTDIR="$out" | ||
| 30 | ''; | ||
| 31 | }; | ||
| 32 | in | ||
| 33 | { | ||
| 34 | packages.oeuf-libtmi8 = oeuf-libtmi8; | ||
| 35 | |||
| 36 | devShells.default = pkgs.mkShell { | ||
| 37 | inputsFrom = [ oeuf-libtmi8 ]; | ||
| 38 | }; | ||
| 39 | |||
| 40 | formatter = pkgs.nixpkgs-fmt; | ||
| 41 | }); | ||
| 42 | } | ||
diff --git a/lib/libtmi8/include/tmi8/kv1_index.hpp b/lib/libtmi8/include/tmi8/kv1_index.hpp new file mode 100644 index 0000000..621acf6 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_index.hpp | |||
| @@ -0,0 +1,135 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_LIBTMI8_KV1_INDEX_HPP | ||
| 4 | #define OEUF_LIBTMI8_KV1_INDEX_HPP | ||
| 5 | |||
| 6 | #include <unordered_map> | ||
| 7 | |||
| 8 | #include <boost/container_hash/hash.hpp> | ||
| 9 | |||
| 10 | #include <tmi8/kv1_types.hpp> | ||
| 11 | |||
| 12 | struct Kv1Index { | ||
| 13 | Kv1Records *records; | ||
| 14 | |||
| 15 | explicit Kv1Index(Kv1Records *records); | ||
| 16 | |||
| 17 | std::unordered_map< | ||
| 18 | Kv1OrganizationalUnit::Key, | ||
| 19 | Kv1OrganizationalUnit *, | ||
| 20 | boost::hash<Kv1OrganizationalUnit::Key>> organizational_units; | ||
| 21 | std::unordered_map< | ||
| 22 | Kv1HigherOrganizationalUnit::Key, | ||
| 23 | Kv1HigherOrganizationalUnit *, | ||
| 24 | boost::hash<Kv1HigherOrganizationalUnit::Key>> higher_organizational_units; | ||
| 25 | std::unordered_map< | ||
| 26 | Kv1UserStopPoint::Key, | ||
| 27 | Kv1UserStopPoint *, | ||
| 28 | boost::hash<Kv1UserStopPoint::Key>> user_stop_points; | ||
| 29 | std::unordered_map< | ||
| 30 | Kv1UserStopArea::Key, | ||
| 31 | Kv1UserStopArea *, | ||
| 32 | boost::hash<Kv1UserStopArea::Key>> user_stop_areas; | ||
| 33 | std::unordered_map< | ||
| 34 | Kv1TimingLink::Key, | ||
| 35 | Kv1TimingLink *, | ||
| 36 | boost::hash<Kv1TimingLink::Key>> timing_links; | ||
| 37 | std::unordered_map< | ||
| 38 | Kv1Link::Key, | ||
| 39 | Kv1Link *, | ||
| 40 | boost::hash<Kv1Link::Key>> links; | ||
| 41 | std::unordered_map< | ||
| 42 | Kv1Line::Key, | ||
| 43 | Kv1Line *, | ||
| 44 | boost::hash<Kv1Line::Key>> lines; | ||
| 45 | std::unordered_map< | ||
| 46 | Kv1Destination::Key, | ||
| 47 | Kv1Destination *, | ||
| 48 | boost::hash<Kv1Destination::Key>> destinations; | ||
| 49 | std::unordered_map< | ||
| 50 | Kv1JourneyPattern::Key, | ||
| 51 | Kv1JourneyPattern *, | ||
| 52 | boost::hash<Kv1JourneyPattern::Key>> journey_patterns; | ||
| 53 | std::unordered_map< | ||
| 54 | Kv1ConcessionFinancerRelation::Key, | ||
| 55 | Kv1ConcessionFinancerRelation *, | ||
| 56 | boost::hash<Kv1ConcessionFinancerRelation::Key>> concession_financer_relations; | ||
| 57 | std::unordered_map< | ||
| 58 | Kv1ConcessionArea::Key, | ||
| 59 | Kv1ConcessionArea *, | ||
| 60 | boost::hash<Kv1ConcessionArea::Key>> concession_areas; | ||
| 61 | std::unordered_map< | ||
| 62 | Kv1Financer::Key, | ||
| 63 | Kv1Financer *, | ||
| 64 | boost::hash<Kv1Financer::Key>> financers; | ||
| 65 | std::unordered_map< | ||
| 66 | Kv1JourneyPatternTimingLink::Key, | ||
| 67 | Kv1JourneyPatternTimingLink *, | ||
| 68 | boost::hash<Kv1JourneyPatternTimingLink::Key>> journey_pattern_timing_links; | ||
| 69 | std::unordered_map< | ||
| 70 | Kv1Point::Key, | ||
| 71 | Kv1Point *, | ||
| 72 | boost::hash<Kv1Point::Key>> points; | ||
| 73 | std::unordered_map< | ||
| 74 | Kv1PointOnLink::Key, | ||
| 75 | Kv1PointOnLink *, | ||
| 76 | boost::hash<Kv1PointOnLink::Key>> point_on_links; | ||
| 77 | std::unordered_map< | ||
| 78 | Kv1Icon::Key, | ||
| 79 | Kv1Icon *, | ||
| 80 | boost::hash<Kv1Icon::Key>> icons; | ||
| 81 | std::unordered_map< | ||
| 82 | Kv1Notice::Key, | ||
| 83 | Kv1Notice *, | ||
| 84 | boost::hash<Kv1Notice::Key>> notices; | ||
| 85 | std::unordered_map< | ||
| 86 | Kv1TimeDemandGroup::Key, | ||
| 87 | Kv1TimeDemandGroup *, | ||
| 88 | boost::hash<Kv1TimeDemandGroup::Key>> time_demand_groups; | ||
| 89 | std::unordered_map< | ||
| 90 | Kv1TimeDemandGroupRunTime::Key, | ||
| 91 | Kv1TimeDemandGroupRunTime *, | ||
| 92 | boost::hash<Kv1TimeDemandGroupRunTime::Key>> time_demand_group_run_times; | ||
| 93 | std::unordered_map< | ||
| 94 | Kv1PeriodGroup::Key, | ||
| 95 | Kv1PeriodGroup *, | ||
| 96 | boost::hash<Kv1PeriodGroup::Key>> period_groups; | ||
| 97 | std::unordered_map< | ||
| 98 | Kv1SpecificDay::Key, | ||
| 99 | Kv1SpecificDay *, | ||
| 100 | boost::hash<Kv1SpecificDay::Key>> specific_days; | ||
| 101 | std::unordered_map< | ||
| 102 | Kv1TimetableVersion::Key, | ||
| 103 | Kv1TimetableVersion *, | ||
| 104 | boost::hash<Kv1TimetableVersion::Key>> timetable_versions; | ||
| 105 | std::unordered_map< | ||
| 106 | Kv1PublicJourney::Key, | ||
| 107 | Kv1PublicJourney *, | ||
| 108 | boost::hash<Kv1PublicJourney::Key>> public_journeys; | ||
| 109 | std::unordered_map< | ||
| 110 | Kv1PeriodGroupValidity::Key, | ||
| 111 | Kv1PeriodGroupValidity *, | ||
| 112 | boost::hash<Kv1PeriodGroupValidity::Key>> period_group_validities; | ||
| 113 | std::unordered_map< | ||
| 114 | Kv1ExceptionalOperatingDay::Key, | ||
| 115 | Kv1ExceptionalOperatingDay *, | ||
| 116 | boost::hash<Kv1ExceptionalOperatingDay::Key>> exceptional_operating_days; | ||
| 117 | std::unordered_map< | ||
| 118 | Kv1ScheduleVersion::Key, | ||
| 119 | Kv1ScheduleVersion *, | ||
| 120 | boost::hash<Kv1ScheduleVersion::Key>> schedule_versions; | ||
| 121 | std::unordered_map< | ||
| 122 | Kv1PublicJourneyPassingTimes::Key, | ||
| 123 | Kv1PublicJourneyPassingTimes *, | ||
| 124 | boost::hash<Kv1PublicJourneyPassingTimes::Key>> public_journey_passing_times; | ||
| 125 | std::unordered_map< | ||
| 126 | Kv1OperatingDay::Key, | ||
| 127 | Kv1OperatingDay *, | ||
| 128 | boost::hash<Kv1OperatingDay::Key>> operating_days; | ||
| 129 | |||
| 130 | size_t size() const; | ||
| 131 | }; | ||
| 132 | |||
| 133 | void kv1LinkRecords(Kv1Index &index); | ||
| 134 | |||
| 135 | #endif // OEUF_LIBTMI8_KV1_INDEX_HPP | ||
diff --git a/lib/libtmi8/include/tmi8/kv1_lexer.hpp b/lib/libtmi8/include/tmi8/kv1_lexer.hpp new file mode 100644 index 0000000..df6a57c --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_lexer.hpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_LIBTMI8_KV1_LEXER_HPP | ||
| 4 | #define OEUF_LIBTMI8_KV1_LEXER_HPP | ||
| 5 | |||
| 6 | #include <cstdint> | ||
| 7 | #include <cstring> | ||
| 8 | #include <iostream> | ||
| 9 | #include <string> | ||
| 10 | #include <vector> | ||
| 11 | #include <variant> | ||
| 12 | |||
| 13 | enum Kv1TokenType { | ||
| 14 | KV1_TOKEN_CELL, | ||
| 15 | KV1_TOKEN_ROW_END, | ||
| 16 | }; | ||
| 17 | struct Kv1Token { Kv1TokenType type; std::string data; }; | ||
| 18 | |||
| 19 | struct Kv1Lexer { | ||
| 20 | std::vector<std::string> errors; | ||
| 21 | std::vector<Kv1Token> tokens; | ||
| 22 | |||
| 23 | explicit Kv1Lexer(std::string_view input); | ||
| 24 | |||
| 25 | void lex(); | ||
| 26 | |||
| 27 | private: | ||
| 28 | // Does not eat newline character. | ||
| 29 | void eatRestOfLine(); | ||
| 30 | void lexOptionalHeader(); | ||
| 31 | void lexOptionalComment(); | ||
| 32 | |||
| 33 | static bool isWhitespace(int c); | ||
| 34 | |||
| 35 | void readQuotedColumn(); | ||
| 36 | void readUnquotedColumn(); | ||
| 37 | void lexRow(); | ||
| 38 | // Returns true when a line ending was consumed. | ||
| 39 | bool eatWhitespace(); | ||
| 40 | |||
| 41 | std::string_view input; | ||
| 42 | std::string_view slice; | ||
| 43 | std::string colbuf; | ||
| 44 | }; | ||
| 45 | |||
| 46 | #endif // OEUF_LIBTMI8_KV1_LEXER_HPP | ||
diff --git a/lib/libtmi8/include/tmi8/kv1_parser.hpp b/lib/libtmi8/include/tmi8/kv1_parser.hpp new file mode 100644 index 0000000..ccd8ec6 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_parser.hpp | |||
| @@ -0,0 +1,87 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_LIBTMI8_KV1_PARSER_HPP | ||
| 4 | #define OEUF_LIBTMI8_KV1_PARSER_HPP | ||
| 5 | |||
| 6 | #include <optional> | ||
| 7 | #include <string> | ||
| 8 | #include <string_view> | ||
| 9 | #include <unordered_map> | ||
| 10 | #include <vector> | ||
| 11 | |||
| 12 | #include <tmi8/kv1_lexer.hpp> | ||
| 13 | #include <tmi8/kv1_types.hpp> | ||
| 14 | |||
| 15 | struct Kv1Parser { | ||
| 16 | explicit Kv1Parser(std::vector<Kv1Token> tokens, Kv1Records &parse_into); | ||
| 17 | |||
| 18 | void parse(); | ||
| 19 | |||
| 20 | private: | ||
| 21 | // Method pointer to a method of Kv1Parser (i.e. a function that takes | ||
| 22 | // 'this'; is not static) that takes no arguments and also does not return | ||
| 23 | // anything. | ||
| 24 | using ParseFunc = void (Kv1Parser::*)(); | ||
| 25 | static const std::unordered_map<std::string_view, ParseFunc> type_parsers; | ||
| 26 | |||
| 27 | bool atEnd() const; | ||
| 28 | void eatRowEnds(); | ||
| 29 | const Kv1Token *cur() const; | ||
| 30 | const std::string *eatCell(std::string_view parsing_what); | ||
| 31 | std::string parseHeader(); | ||
| 32 | void eatRestOfRow(); | ||
| 33 | |||
| 34 | void requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value); | ||
| 35 | std::optional<bool> requireBoolean(std::string_view field, bool mandatory, std::string_view value); | ||
| 36 | std::optional<double> requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value); | ||
| 37 | std::optional<RgbColor> requireRgbColor(std::string_view field, bool mandatory, std::string_view value); | ||
| 38 | std::optional<double> requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value); | ||
| 39 | |||
| 40 | std::string eatString(std::string_view field, bool mandatory, size_t max_length); | ||
| 41 | std::optional<bool> eatBoolean(std::string_view field, bool mandatory); | ||
| 42 | std::optional<double> eatNumber(std::string_view field, bool mandatory, size_t max_digits); | ||
| 43 | std::optional<RgbColor> eatRgbColor(std::string_view field, bool mandatory); | ||
| 44 | std::optional<double> eatRdCoord(std::string_view field, bool mandatory, size_t min_digits); | ||
| 45 | |||
| 46 | void parseOrganizationalUnit(); | ||
| 47 | void parseHigherOrganizationalUnit(); | ||
| 48 | void parseUserStopPoint(); | ||
| 49 | void parseUserStopArea(); | ||
| 50 | void parseTimingLink(); | ||
| 51 | void parseLink(); | ||
| 52 | void parseLine(); | ||
| 53 | void parseDestination(); | ||
| 54 | void parseJourneyPattern(); | ||
| 55 | void parseConcessionFinancerRelation(); | ||
| 56 | void parseConcessionArea(); | ||
| 57 | void parseFinancer(); | ||
| 58 | void parseJourneyPatternTimingLink(); | ||
| 59 | void parsePoint(); | ||
| 60 | void parsePointOnLink(); | ||
| 61 | void parseIcon(); | ||
| 62 | void parseNotice(); | ||
| 63 | void parseNoticeAssignment(); | ||
| 64 | void parseTimeDemandGroup(); | ||
| 65 | void parseTimeDemandGroupRunTime(); | ||
| 66 | void parsePeriodGroup(); | ||
| 67 | void parseSpecificDay(); | ||
| 68 | void parseTimetableVersion(); | ||
| 69 | void parsePublicJourney(); | ||
| 70 | void parsePeriodGroupValidity(); | ||
| 71 | void parseExceptionalOperatingDay(); | ||
| 72 | void parseScheduleVersion(); | ||
| 73 | void parsePublicJourneyPassingTimes(); | ||
| 74 | void parseOperatingDay(); | ||
| 75 | |||
| 76 | size_t pos = 0; | ||
| 77 | std::vector<Kv1Token> tokens; | ||
| 78 | const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam"); | ||
| 79 | |||
| 80 | public: | ||
| 81 | std::vector<std::string> warns; | ||
| 82 | std::vector<std::string> global_errors; | ||
| 83 | std::vector<std::string> record_errors; | ||
| 84 | Kv1Records &records; | ||
| 85 | }; | ||
| 86 | |||
| 87 | #endif // OEUF_LIBTMI8_KV1_PARSER_HPP | ||
diff --git a/lib/libtmi8/include/tmi8/kv1_types.hpp b/lib/libtmi8/include/tmi8/kv1_types.hpp new file mode 100644 index 0000000..d4a0760 --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv1_types.hpp | |||
| @@ -0,0 +1,1528 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_LIBTMI8_KV1_TYPES_HPP | ||
| 4 | #define OEUF_LIBTMI8_KV1_TYPES_HPP | ||
| 5 | |||
| 6 | #include <chrono> | ||
| 7 | #include <cstdint> | ||
| 8 | #include <optional> | ||
| 9 | #include <string> | ||
| 10 | #include <variant> | ||
| 11 | |||
| 12 | struct Kv1OrganizationalUnit; | ||
| 13 | struct Kv1HigherOrganizationalUnit; | ||
| 14 | struct Kv1UserStopPoint; | ||
| 15 | struct Kv1UserStopArea; | ||
| 16 | struct Kv1TimingLink; | ||
| 17 | struct Kv1Link; | ||
| 18 | struct Kv1Line; | ||
| 19 | struct Kv1Destination; | ||
| 20 | struct Kv1JourneyPattern; | ||
| 21 | struct Kv1ConcessionFinancerRelation; | ||
| 22 | struct Kv1ConcessionArea; | ||
| 23 | struct Kv1Financer; | ||
| 24 | struct Kv1JourneyPatternTimingLink; | ||
| 25 | struct Kv1Point; | ||
| 26 | struct Kv1PointOnLink; | ||
| 27 | struct Kv1Icon; | ||
| 28 | struct Kv1Notice; | ||
| 29 | struct Kv1NoticeAssignment; | ||
| 30 | struct Kv1TimeDemandGroup; | ||
| 31 | struct Kv1TimeDemandGroupRunTime; | ||
| 32 | struct Kv1PeriodGroup; | ||
| 33 | struct Kv1SpecificDay; | ||
| 34 | struct Kv1TimetableVersion; | ||
| 35 | struct Kv1PublicJourney; | ||
| 36 | struct Kv1PeriodGroupValidity; | ||
| 37 | struct Kv1ExceptionalOperatingDay; | ||
| 38 | struct Kv1ScheduleVersion; | ||
| 39 | struct Kv1PublicJourneyPassingTimes; | ||
| 40 | struct Kv1OperatingDay; | ||
| 41 | |||
| 42 | struct Kv1Records { | ||
| 43 | std::vector<Kv1OrganizationalUnit> organizational_units; | ||
| 44 | std::vector<Kv1HigherOrganizationalUnit> higher_organizational_units; | ||
| 45 | std::vector<Kv1UserStopPoint> user_stop_points; | ||
| 46 | std::vector<Kv1UserStopArea> user_stop_areas; | ||
| 47 | std::vector<Kv1TimingLink> timing_links; | ||
| 48 | std::vector<Kv1Link> links; | ||
| 49 | std::vector<Kv1Line> lines; | ||
| 50 | std::vector<Kv1Destination> destinations; | ||
| 51 | std::vector<Kv1JourneyPattern> journey_patterns; | ||
| 52 | std::vector<Kv1ConcessionFinancerRelation> concession_financer_relations; | ||
| 53 | std::vector<Kv1ConcessionArea> concession_areas; | ||
| 54 | std::vector<Kv1Financer> financers; | ||
| 55 | std::vector<Kv1JourneyPatternTimingLink> journey_pattern_timing_links; | ||
| 56 | std::vector<Kv1Point> points; | ||
| 57 | std::vector<Kv1PointOnLink> point_on_links; | ||
| 58 | std::vector<Kv1Icon> icons; | ||
| 59 | std::vector<Kv1Notice> notices; | ||
| 60 | std::vector<Kv1NoticeAssignment> notice_assignments; | ||
| 61 | std::vector<Kv1TimeDemandGroup> time_demand_groups; | ||
| 62 | std::vector<Kv1TimeDemandGroupRunTime> time_demand_group_run_times; | ||
| 63 | std::vector<Kv1PeriodGroup> period_groups; | ||
| 64 | std::vector<Kv1SpecificDay> specific_days; | ||
| 65 | std::vector<Kv1TimetableVersion> timetable_versions; | ||
| 66 | std::vector<Kv1PublicJourney> public_journeys; | ||
| 67 | std::vector<Kv1PeriodGroupValidity> period_group_validities; | ||
| 68 | std::vector<Kv1ExceptionalOperatingDay> exceptional_operating_days; | ||
| 69 | std::vector<Kv1ScheduleVersion> schedule_versions; | ||
| 70 | std::vector<Kv1PublicJourneyPassingTimes> public_journey_passing_times; | ||
| 71 | std::vector<Kv1OperatingDay> operating_days; | ||
| 72 | |||
| 73 | size_t size() const; | ||
| 74 | }; | ||
| 75 | |||
| 76 | // These definitions implement TMI8, KV1 Dienstregeling (Timetable) version | ||
| 77 | // 8.3.0.2 (release), published by BISON on January 8, 2020. | ||
| 78 | // (Filename: tmi8 dienstregeling (kv 1) v8.3.0.2, release.docx) | ||
| 79 | // | ||
| 80 | // This specification and other BISON specifications, as well as other | ||
| 81 | // supplementary information, can be found on BISON's website: | ||
| 82 | // https://bison.dova.nu/ | ||
| 83 | // | ||
| 84 | // The specification that was used to create these definitions was downloaded | ||
| 85 | // from the following address: | ||
| 86 | // https://bison.dova.nu/sites/default/files/bestanden/tmi8_dienstregeling_kv_1_v8.3.0.2_release.pdf | ||
| 87 | // | ||
| 88 | // The KV1 table structure and the corresponding documentation describing the | ||
| 89 | // relevant tables and fields, as presented here, is derived from the original | ||
| 90 | // specification. Most documentation is a manually translated version of the | ||
| 91 | // documentation as present in the specification. The specification is licensed | ||
| 92 | // under CC BY-ND 3.0. The exact text of this license can be found on | ||
| 93 | // https://creativecommons.org/licenses/by-nd/3.0/nl/. | ||
| 94 | |||
| 95 | // KV1 Table 1: Organizational Unit [ORUN] (MANDATORY) | ||
| 96 | // | ||
| 97 | // A collection of trips with the same validity features. An organizational | ||
| 98 | // unit can be part of a 'higher' unit. | ||
| 99 | // | ||
| 100 | // An organizational unit is defined as a unity vor which the planning of trips | ||
| 101 | // is compiled. When defining the organizational units, it is important that | ||
| 102 | // all trips within the package have a homogeneous validity (school holidays, | ||
| 103 | // shopping Sundays, foreign bank holidays). | ||
| 104 | // | ||
| 105 | // This table is part of the core data tables, which are common for all KV1 | ||
| 106 | // variants. | ||
| 107 | struct Kv1OrganizationalUnit { | ||
| 108 | struct Key { | ||
| 109 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 110 | // defined in BISON enumeration E1). | ||
| 111 | std::string data_owner_code; | ||
| 112 | // Mandatory (key), at most 10 characters. | ||
| 113 | std::string organizational_unit_code; | ||
| 114 | |||
| 115 | explicit Key(std::string data_owner_code, | ||
| 116 | std::string organizational_unit_code); | ||
| 117 | }; | ||
| 118 | |||
| 119 | Key key; | ||
| 120 | // Mandatory, at most 50 characters. | ||
| 121 | std::string name; | ||
| 122 | // Mandatory, at most 10 characters. | ||
| 123 | std::string organizational_unit_type; | ||
| 124 | // Optional, at most 255 characters. | ||
| 125 | std::string description; | ||
| 126 | }; | ||
| 127 | |||
| 128 | // KV1 Table 2: Higher Organizational Unit [ORUNORUN] (OPTIONAL) | ||
| 129 | // | ||
| 130 | // An in the hierarchy higher-ordered organizational unit for the purpose of | ||
| 131 | // (among others) recording of (deviating) validities on the high level. | ||
| 132 | // | ||
| 133 | // This table is part of the core data tables, which are common for all KV1 | ||
| 134 | // variants. | ||
| 135 | struct Kv1HigherOrganizationalUnit { | ||
| 136 | struct Key { | ||
| 137 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 138 | // defined in BISON enumeration E1). | ||
| 139 | std::string data_owner_code; | ||
| 140 | // Mandatory (key), at most 10 characters. Parent, higher organizational unit | ||
| 141 | // that is referred to. | ||
| 142 | std::string organizational_unit_code_parent; | ||
| 143 | // Mandatory (key), at most 10 characters. Child, lower organizational unit. | ||
| 144 | std::string organizational_unit_code_child; | ||
| 145 | // Mandatory (key), at most 10 characters. [YYYY-MM-DD] Starting date of the | ||
| 146 | // hierarchical relation (can be a fixed value, e.g. 2006-12-31). | ||
| 147 | std::chrono::year_month_day valid_from; | ||
| 148 | |||
| 149 | explicit Key(std::string data_owner_code, | ||
| 150 | std::string organizational_unit_code_parent, | ||
| 151 | std::string organizational_unit_code_child, | ||
| 152 | std::chrono::year_month_day valid_from); | ||
| 153 | }; | ||
| 154 | |||
| 155 | Key key; | ||
| 156 | |||
| 157 | Kv1OrganizationalUnit *p_organizational_unit_parent = nullptr; | ||
| 158 | Kv1OrganizationalUnit *p_organizational_unit_child = nullptr; | ||
| 159 | }; | ||
| 160 | |||
| 161 | // KV1 Table 3: User Stop Point [USRSTOP] | ||
| 162 | // | ||
| 163 | // Stop or other point (e.g. Bridge, functioning as info for the bridge keeper) | ||
| 164 | // for which times are recorded in the planning system of the transit operator. | ||
| 165 | // | ||
| 166 | // Coordinates of a UserStopPoint are recorded as Point. When defining | ||
| 167 | // UserStopPoints, it is important that the coordinates can be unambiguously | ||
| 168 | // and verifiably recorded. For a stop, the coordinates of the stop sign are | ||
| 169 | // recorded. If there is no stop sign, the end of the bus stop (where the bus | ||
| 170 | // normally halts) is recorded as the coordinate of the stop. | ||
| 171 | // | ||
| 172 | // This table is part of the core data tables, which are common for all KV1 | ||
| 173 | // variants. | ||
| 174 | struct Kv1UserStopPoint { | ||
| 175 | struct Key { | ||
| 176 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 177 | // defined in BISON enumeration E1). | ||
| 178 | std::string data_owner_code; | ||
| 179 | // Mandatory (key), at most 10 characters. Stop number in domain of operator. | ||
| 180 | std::string user_stop_code; | ||
| 181 | |||
| 182 | explicit Key(std::string data_owner_code, | ||
| 183 | std::string user_stop_code); | ||
| 184 | }; | ||
| 185 | |||
| 186 | Key key; | ||
| 187 | // Optional, at most 10 characters. Stop number in domain of integrator, | ||
| 188 | // (initially) equal to UserStopCode. | ||
| 189 | std::string timing_point_code; | ||
| 190 | // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used | ||
| 191 | // as boarding stop, true by default. False for e.g. dummy stop for bridge | ||
| 192 | // keeper. | ||
| 193 | bool get_in = true; | ||
| 194 | // Mandatory, at most 5 characters. Boolean indicator whether USRSTOP is used | ||
| 195 | // as alighting stop. | ||
| 196 | bool get_out = false; | ||
| 197 | // Mandatory, at most 50 characters. Stop name. | ||
| 198 | std::string name; | ||
| 199 | // Mandatory, at most 50 characters. Town name. | ||
| 200 | std::string town; | ||
| 201 | // Optional, at most 10 characters. Reference to StopArea of which the | ||
| 202 | // UserStop is part. | ||
| 203 | std::string user_stop_area_code; | ||
| 204 | // Mandatory, at most 10 characters. Platform indication/letter. The '-' | ||
| 205 | // value is used to indication that this is not applicable. | ||
| 206 | std::string stop_side_code; | ||
| 207 | // Mandatory, at most 5 digits. Minimal stop duration for boarding and | ||
| 208 | // alighting, zero by default. In seconds. | ||
| 209 | double minimal_stop_time_s = 0; | ||
| 210 | // Optional, at most 3 digits. Length of stop platform. | ||
| 211 | std::optional<double> stop_side_length; | ||
| 212 | // Optional, at most 255 characters. | ||
| 213 | std::string description; | ||
| 214 | // Mandatory, at most 10 characters. USRSTOPTYPE. Indicates the stop kind. | ||
| 215 | std::string user_stop_type; | ||
| 216 | // Optional, at most 30 characters. Nationally unique stop number. | ||
| 217 | std::string quay_code; | ||
| 218 | |||
| 219 | Kv1UserStopArea *p_user_stop_area = nullptr; | ||
| 220 | Kv1Point *p_point = nullptr; | ||
| 221 | }; | ||
| 222 | |||
| 223 | // KV1 Table 4: User Stop Area [USRSTAR] | ||
| 224 | // | ||
| 225 | // A StopArea is a collection of stops, which have the same name for passengers | ||
| 226 | // and logically belong together. (E.g. a bus station of transfer point.) Stops | ||
| 227 | // lying opposite each other can also form a StopArea. | ||
| 228 | // | ||
| 229 | // Used for display of all stops in a stop area on an overview display and for | ||
| 230 | // announcement of stop names (stops on both sides of the street share the same | ||
| 231 | // name). | ||
| 232 | // | ||
| 233 | // This table is part of the core data tables, which are common for all KV1 | ||
| 234 | // variants. | ||
| 235 | struct Kv1UserStopArea { | ||
| 236 | struct Key { | ||
| 237 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 238 | // defined in BISON enumeration E1). | ||
| 239 | std::string data_owner_code; | ||
| 240 | // Mandatory (key), at most 10 characters. Code of StopArea following coding | ||
| 241 | // of operator, e.g. PlaceCode. | ||
| 242 | std::string user_stop_area_code; | ||
| 243 | |||
| 244 | explicit Key(std::string data_owner_code, | ||
| 245 | std::string user_stop_area_code); | ||
| 246 | }; | ||
| 247 | |||
| 248 | Key key; | ||
| 249 | // Mandatory, at most 50 characters. | ||
| 250 | std::string name; | ||
| 251 | // Mandatory, at most 50 characters. | ||
| 252 | std::string town; | ||
| 253 | // Mandatory, at most 255 characters. | ||
| 254 | std::string description; | ||
| 255 | }; | ||
| 256 | |||
| 257 | // KV1 Table 5: Timing Link [TILI] | ||
| 258 | // | ||
| 259 | // Link between two points which have the feature 'stop' or 'timing point'. A | ||
| 260 | // Timing Link is set between all stops and other timing points (e.g. for the | ||
| 261 | // bridge) which make part of a journey pattern. | ||
| 262 | // | ||
| 263 | // This table is part of the core data tables, which are common for all KV1 | ||
| 264 | // variants. | ||
| 265 | struct Kv1TimingLink { | ||
| 266 | struct Key { | ||
| 267 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 268 | // defined in BISON enumeration E1). | ||
| 269 | std::string data_owner_code; | ||
| 270 | // Mandatory (key), at most 10 characters. Stop number in the domain of | ||
| 271 | // DataOwner (here: the operator). | ||
| 272 | std::string user_stop_code_begin; | ||
| 273 | // Mandatory (key), at most 10 characters. Stop number in the domain of | ||
| 274 | // DataOwner (here: the operator). | ||
| 275 | std::string user_stop_code_end; | ||
| 276 | |||
| 277 | explicit Key(std::string data_owner_code, | ||
| 278 | std::string user_stop_code_begin, | ||
| 279 | std::string user_stop_code_end); | ||
| 280 | }; | ||
| 281 | |||
| 282 | Key key; | ||
| 283 | // Optional, at most 5 digits. Minimal trip time (in seconds). | ||
| 284 | std::optional<double> minimal_drive_time_s; | ||
| 285 | // Optional, at most 255 characters. | ||
| 286 | std::string description; | ||
| 287 | |||
| 288 | Kv1UserStopPoint *p_user_stop_begin = nullptr; | ||
| 289 | Kv1UserStopPoint *p_user_stop_end = nullptr; | ||
| 290 | }; | ||
| 291 | |||
| 292 | // KV1 Table 6: Link [LINK] | ||
| 293 | // | ||
| 294 | // A route link describes the connection between to points on the physical path | ||
| 295 | // of a route. | ||
| 296 | // | ||
| 297 | // This table is part of the core data tables, which are common for all KV1 | ||
| 298 | // variants. | ||
| 299 | struct Kv1Link { | ||
| 300 | struct Key { | ||
| 301 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 302 | // defined in BISON enumeration E1). | ||
| 303 | std::string data_owner_code; | ||
| 304 | // Mandatory (key), at most 10 characters. Stop code in the domain of | ||
| 305 | // DataOwner (here: the operator). | ||
| 306 | std::string user_stop_code_begin; | ||
| 307 | // Mandatory (key), at most 10 characters. Stop code in the domain of | ||
| 308 | // DataOwner (here: the operator). | ||
| 309 | std::string user_stop_code_end; | ||
| 310 | // Mandatory (key), at most 5 characters. Modality for which the distance | ||
| 311 | // applies, see BISON enumeration E9. | ||
| 312 | // TODO: Check if BISON enumeration E9 can be put into an enum. | ||
| 313 | std::string transport_type; | ||
| 314 | |||
| 315 | explicit Key(std::string data_owner_code, | ||
| 316 | std::string user_stop_code_begin, | ||
| 317 | std::string user_stop_code_end, | ||
| 318 | std::string transport_type); | ||
| 319 | }; | ||
| 320 | |||
| 321 | Key key; | ||
| 322 | // Mandatory, at most 6 digits. Length of the link (in meters). | ||
| 323 | double distance = 0; | ||
| 324 | // Optional, at most 255 characters. | ||
| 325 | std::string description; | ||
| 326 | |||
| 327 | Kv1UserStopPoint *p_user_stop_begin = nullptr; | ||
| 328 | Kv1UserStopPoint *p_user_stop_end = nullptr; | ||
| 329 | }; | ||
| 330 | |||
| 331 | struct RgbColor { | ||
| 332 | uint8_t r, g, b = 0; | ||
| 333 | }; | ||
| 334 | |||
| 335 | // KV1 Table 7: Line [LINE] | ||
| 336 | // | ||
| 337 | // A line is a collection of routes/journey patterns which is publically known | ||
| 338 | // under a shared number. | ||
| 339 | // | ||
| 340 | // This table is part of the core data tables, which are common for all KV1 | ||
| 341 | // variants. | ||
| 342 | struct Kv1Line { | ||
| 343 | struct Key { | ||
| 344 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 345 | // defined in BISON enumeration E1). | ||
| 346 | std::string data_owner_code; | ||
| 347 | // Mandatory (key), at most 10 characters. Unique system line number in the | ||
| 348 | // domain of DataOwner. | ||
| 349 | std::string line_planning_number; | ||
| 350 | |||
| 351 | explicit Key(std::string data_owner_code, | ||
| 352 | std::string line_planning_number); | ||
| 353 | }; | ||
| 354 | |||
| 355 | Key key; | ||
| 356 | // Mandatory, at most 4 characters. Line number for the public, incl. S/N | ||
| 357 | // indications. | ||
| 358 | std::string line_public_number; | ||
| 359 | // Mandatory, at most 50 characters. | ||
| 360 | std::string line_name; | ||
| 361 | // Mandatory, at most three digits. Should be in the range [0, 400). | ||
| 362 | // Only processing Connexxion's KV1 export, however, shows us that this range | ||
| 363 | // constrained is not honored in practice. That is why we also don't care. | ||
| 364 | short line_ve_tag_number = 0; | ||
| 365 | // Optional, at most 255 characters. | ||
| 366 | std::string description; | ||
| 367 | // Mandatory, at most 5 characters. Modality, see BISON enumeration E9. | ||
| 368 | // TODO: Check if BISON enumeration E9 can be put into an enum. | ||
| 369 | std::string transport_type; | ||
| 370 | // Optional, at most 4 digits. Symbol / image for the line. Reference to ICON | ||
| 371 | // table. | ||
| 372 | std::optional<short> line_icon; | ||
| 373 | // Optional, at most four characters. Background color for the line. | ||
| 374 | // Hexadecimal representation following RGB coding. Always six characters | ||
| 375 | // (RRGGBB), only numbers and/or capital letters. | ||
| 376 | std::optional<RgbColor> line_color; | ||
| 377 | // Optional, at most four characters. Foreground color for the line. | ||
| 378 | // Hexadecimal representation following RGB coding. Always six characters | ||
| 379 | // (RRGGBB), only numbers and/or capital letters. | ||
| 380 | std::optional<RgbColor> line_text_color; | ||
| 381 | |||
| 382 | Kv1Icon *p_line_icon = nullptr; | ||
| 383 | }; | ||
| 384 | |||
| 385 | // KV1 Table 8: Destination [DEST] | ||
| 386 | // | ||
| 387 | // A destination shows the place/district/description of the route for the | ||
| 388 | // passenger. Intermediate and detail destinations of a journey pattern are | ||
| 389 | // shown under a single desination code, together with the primary destination. | ||
| 390 | // | ||
| 391 | // This table is part of the core data tables, which are common for all KV1 | ||
| 392 | // variants. | ||
| 393 | struct Kv1Destination { | ||
| 394 | struct Key { | ||
| 395 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 396 | // defined in BISON enumeration E1). | ||
| 397 | std::string data_owner_code; | ||
| 398 | // Mandatory (key), at most 10 characters. | ||
| 399 | std::string dest_code; | ||
| 400 | |||
| 401 | explicit Key(std::string data_owner_code, | ||
| 402 | std::string dest_code); | ||
| 403 | }; | ||
| 404 | |||
| 405 | Key key; | ||
| 406 | // Mandatory, at most 50 characters. Full destination (e.g. compiled from | ||
| 407 | // primary, detail or intermediate destination). | ||
| 408 | std::string dest_name_full; | ||
| 409 | // Mandatory, at most 24 characters. Primary / intermediate destination in | ||
| 410 | // enumeration / final destination if 1 line is used. | ||
| 411 | std::string dest_name_main; | ||
| 412 | // Optional, at most 24 characters. Detail/secondary or intermediate | ||
| 413 | // destination for primary desination, final destination (for intermediate | ||
| 414 | // destination on line 1). | ||
| 415 | std::string dest_name_detail; | ||
| 416 | // Mandatory, at most 5 characters. Boolean which indcates whether | ||
| 417 | // DestNameDetail must always be shown (e.g. because this contains an | ||
| 418 | // important intermediate destination.) | ||
| 419 | bool relevant_dest_name_detail = false; | ||
| 420 | // Mandatory, at most 21 characters. Primary destination in 21 characters. | ||
| 421 | std::string dest_name_main_21; | ||
| 422 | // Optional, at most 21 characters. Detail/secondary/intermediate destination | ||
| 423 | // in 21 characters. | ||
| 424 | std::string dest_name_detail_21; | ||
| 425 | // Mandatory, at most 19 characters. Primary destination in 19 characters. | ||
| 426 | std::string dest_name_main_19; | ||
| 427 | // Optional, at most 19 characters. Detail/secondary/intermediate destination | ||
| 428 | // in 19 characters. | ||
| 429 | std::string dest_name_detail_19; | ||
| 430 | // Mandatory, at most 16 characters. Primary destination in 16 characters. | ||
| 431 | std::string dest_name_main_16; | ||
| 432 | // Optional, at most 16 characters. Detail/secondary/intermediate destination | ||
| 433 | // in 16 characters. | ||
| 434 | std::string dest_name_detail_16; | ||
| 435 | // Optional, at most 4 digits. Symbol/image for the destination. Reference to | ||
| 436 | // the ICON table. | ||
| 437 | std::optional<short> dest_icon; | ||
| 438 | // Optional, at most 6 characters. Background color for the destination. | ||
| 439 | // Hexadecimal representation following RGB coding. Always six characters | ||
| 440 | // (RRGGBB), only six digits and/or capital letters. | ||
| 441 | std::optional<RgbColor> dest_color; | ||
| 442 | // Optional, at most 30 characters (WTF?). Foreground color for the | ||
| 443 | // destination. Hexadecimal representation following RGB coding. Always six | ||
| 444 | // characters (RRGGBB), only six digits and/or capital letters. | ||
| 445 | std::optional<RgbColor> dest_text_color; | ||
| 446 | }; | ||
| 447 | |||
| 448 | // KV1 Table 9: Journey Pattern [JOPA] | ||
| 449 | // | ||
| 450 | // The journey pattern describes the route from start to end point as a ordered | ||
| 451 | // list of stops and links between stops/timing points. | ||
| 452 | // | ||
| 453 | // This table is part of the core data tables, which are common for all KV1 | ||
| 454 | // variants. | ||
| 455 | struct Kv1JourneyPattern { | ||
| 456 | struct Key { | ||
| 457 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 458 | // defined in BISON enumeration E1). | ||
| 459 | std::string data_owner_code; | ||
| 460 | // Mandatory (key), at most 10 characters. | ||
| 461 | std::string line_planning_number; | ||
| 462 | // Mandatory (key), at most 10 characters. | ||
| 463 | std::string journey_pattern_code; | ||
| 464 | |||
| 465 | explicit Key(std::string data_owner_code, | ||
| 466 | std::string line_planning_number, | ||
| 467 | std::string journey_pattern_code); | ||
| 468 | }; | ||
| 469 | |||
| 470 | Key key; | ||
| 471 | // Mandatory, at most 10 characters. Refers to a journey pattern type | ||
| 472 | // (JOPATYPE). | ||
| 473 | std::string journey_pattern_type; | ||
| 474 | // Mandatory, at most 1 character. One of [1, 2, A, B]. | ||
| 475 | char direction = 0; | ||
| 476 | // Optional, at most 255 characters. | ||
| 477 | std::string description; | ||
| 478 | |||
| 479 | Kv1Line *p_line = nullptr; | ||
| 480 | }; | ||
| 481 | |||
| 482 | // KV1 Table 10: Concession Financer Relation [CONFINREL] | ||
| 483 | // | ||
| 484 | // Concession financer relation (mainly parcel). Smallest unit for which data | ||
| 485 | // about a concession can be captured in relation to a financer and/or | ||
| 486 | // concession. | ||
| 487 | // | ||
| 488 | // This table is part of the core data tables, which are common for all KV1 | ||
| 489 | // variants. | ||
| 490 | struct Kv1ConcessionFinancerRelation { | ||
| 491 | struct Key { | ||
| 492 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 493 | // defined in BISON enumeration E1). | ||
| 494 | std::string data_owner_code; | ||
| 495 | // Mandatory (key), at most 10 characters. Parcel code. | ||
| 496 | std::string con_fin_rel_code; | ||
| 497 | |||
| 498 | explicit Key(std::string data_owner_code, | ||
| 499 | std::string con_fin_rel_code); | ||
| 500 | }; | ||
| 501 | |||
| 502 | Key key; | ||
| 503 | // Mandatory, at most 10 characters. Concession code. | ||
| 504 | std::string concession_area_code; | ||
| 505 | // Optional, at most 10 characters. Code of financer/client of the parcel. | ||
| 506 | std::string financer_code; | ||
| 507 | |||
| 508 | Kv1ConcessionArea *p_concession_area = nullptr; | ||
| 509 | Kv1Financer *p_financer = nullptr; | ||
| 510 | }; | ||
| 511 | |||
| 512 | // KV1 Table 11: Concession Area [CONAREA] | ||
| 513 | // | ||
| 514 | // Concession (area). | ||
| 515 | // | ||
| 516 | // This table is part of the core data tables, which are common for all KV1 | ||
| 517 | // variants. | ||
| 518 | struct Kv1ConcessionArea { | ||
| 519 | struct Key { | ||
| 520 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 521 | // defined in BISON enumeration E1). | ||
| 522 | std::string data_owner_code; | ||
| 523 | // Mandatory (key), at most 10 characters. Code of the concession. | ||
| 524 | std::string concession_area_code; | ||
| 525 | |||
| 526 | explicit Key(std::string data_owner_code, | ||
| 527 | std::string concession_area_code); | ||
| 528 | }; | ||
| 529 | |||
| 530 | Key key; | ||
| 531 | // Mandatory, at most 255 characters. | ||
| 532 | std::string description; | ||
| 533 | }; | ||
| 534 | |||
| 535 | // KV1 Table 12: Financer [FINANCER] (OPTIONAL) | ||
| 536 | // | ||
| 537 | // Financer of a parcel. | ||
| 538 | // | ||
| 539 | // This table is part of the core data tables, which are common for all KV1 | ||
| 540 | // variants. | ||
| 541 | struct Kv1Financer { | ||
| 542 | struct Key { | ||
| 543 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 544 | // defined in BISON enumeration E1). | ||
| 545 | std::string data_owner_code; | ||
| 546 | // Mandatory (key), at most 10 characters. | ||
| 547 | std::string financer_code; | ||
| 548 | |||
| 549 | explicit Key(std::string data_owner_code, | ||
| 550 | std::string financer_code); | ||
| 551 | }; | ||
| 552 | |||
| 553 | Key key; | ||
| 554 | // Mandatory, at most 255 characters. | ||
| 555 | std::string description; | ||
| 556 | }; | ||
| 557 | |||
| 558 | // KV1 Table 13: Journey Pattern Timing Link [JOPATILI] | ||
| 559 | // | ||
| 560 | // Compilation of journey pattern from logical links (between pairs of | ||
| 561 | // stops/timing points). Features such as the destination code, the public line | ||
| 562 | // number, the concession financer relation (parcel) and product formula are | ||
| 563 | // set per connection. Moreover, a color and/or image linked to the line | ||
| 564 | // destination and the use of the (first) stop as boarding/alighting stop can | ||
| 565 | // be set per link. | ||
| 566 | // | ||
| 567 | // Timing Link: A timing link is a stop, set by the transit operator, where a | ||
| 568 | // bus / public transit vehicle may never depart earlier than set in the | ||
| 569 | // timetable. | ||
| 570 | // | ||
| 571 | // A logical link may never occur more than once in a journey pattern. | ||
| 572 | // Therefore, the combination of LinePlanningNumber, JourneyPatternCode, | ||
| 573 | // UserStopCodeBegin and UserStopCodeEnd must be unique in JOPATILI. | ||
| 574 | // | ||
| 575 | // The value of GetIn and GetOut are normally copied from the corresponding | ||
| 576 | // stop in the USRSTOP table, but can be overruled per journey pattern if so | ||
| 577 | // desired. | ||
| 578 | // | ||
| 579 | // A Icon or (Text)Color set here overrules the general value of the | ||
| 580 | // corresponding line (Line) or destination (Destination). | ||
| 581 | // | ||
| 582 | // A value of ShowFlexibleTrip or ProductFormulaType in PUJO or PUJOPASS | ||
| 583 | // overrules the value in JOPATILI. | ||
| 584 | // | ||
| 585 | // This table is part of the core data tables, which are common for all KV1 | ||
| 586 | // variants. | ||
| 587 | struct Kv1JourneyPatternTimingLink { | ||
| 588 | struct Key { | ||
| 589 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 590 | // defined in BISON enumeration E1). | ||
| 591 | std::string data_owner_code; | ||
| 592 | // Mandatory (key), at most 10 characters. | ||
| 593 | std::string line_planning_number; | ||
| 594 | // Mandatory (key), at most 10 characters. | ||
| 595 | std::string journey_pattern_code; | ||
| 596 | // Mandatory (key), at most 3 digits. | ||
| 597 | short timing_link_order = 0; | ||
| 598 | |||
| 599 | explicit Key(std::string data_owner_code, | ||
| 600 | std::string line_planning_number, | ||
| 601 | std::string journey_pattern_code, | ||
| 602 | short timing_link_order); | ||
| 603 | }; | ||
| 604 | |||
| 605 | Key key; | ||
| 606 | // Mandatory, at most 10 characters. Stop number in the domain of the | ||
| 607 | // DataOwner (here: the transit operator). | ||
| 608 | std::string user_stop_code_begin; | ||
| 609 | // Mandatory, at most 10 characters. Stop number in the domain of the | ||
| 610 | // DataOwner (here: the transit operator). | ||
| 611 | std::string user_stop_code_end; | ||
| 612 | // Mandatory, at most 10 characters. Concession financer relation / parcel | ||
| 613 | // (smallest unit). | ||
| 614 | std::string con_fin_rel_code; | ||
| 615 | // Mandatory, at most 10 characters. The destination (incl. intermediat | ||
| 616 | // destinations) as these are shown at the first stop of the journey pattern | ||
| 617 | // link. | ||
| 618 | std::string dest_code; | ||
| 619 | // Mandatory, at most 5 characters. Boolean which indicates whether the first | ||
| 620 | // stop of the connection is a timing stop. Indicator is at least "true" at | ||
| 621 | // first stop of a line and at waiting stops. | ||
| 622 | bool is_timing_stop = false; | ||
| 623 | // Optional, at most 4 characters. Public line number which must be shown on | ||
| 624 | // displays from the first stop of the journey pattern link (e.g. Line number | ||
| 625 | // + S). This is important when a deviating public line number applies from a | ||
| 626 | // certain point on forward. Normally, the public line number of the | ||
| 627 | // corresponding line is shown. | ||
| 628 | std::string display_public_line; | ||
| 629 | // Optional, at most 4 digits. Enumeration E10 (see section 2.5). A public | ||
| 630 | // transit service which distinguishes itself by a set of unique features, | ||
| 631 | // that is offered to the passenger as distinct (a marketing aspect). | ||
| 632 | // TODO: Check if we can turn BISON enumeration E10 into an enum | ||
| 633 | std::optional<short> product_formula_type; | ||
| 634 | // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin | ||
| 635 | // is used as a boarding stop in this journey pattern. Usually equal to the | ||
| 636 | // value of the corresponding USRSTOP. | ||
| 637 | bool get_in = false; | ||
| 638 | // Mandatory, at most 5 characters. Boolean indicator whether UserStopBegin | ||
| 639 | // is used as an alighting stop in this journey pattern. Usually equal to the | ||
| 640 | // value of the corresponding USRSTOP. | ||
| 641 | bool get_out = false; | ||
| 642 | // Optional, at most 8 characters. Indicates whether the transit operator | ||
| 643 | // wants a not explicitly planned trip (i.e. a trip that only operates after | ||
| 644 | // reservation such as a 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to | ||
| 645 | // be shown on displays. Values according enumeration E21: TRUE (always), | ||
| 646 | // FALSE (never), REALTIME (only when tracking trip). | ||
| 647 | // TODO: Check if we can turn BISON enumeration E21 into an enum | ||
| 648 | std::string show_flexible_trip; | ||
| 649 | // Optional, at most 4 digits. Symbol / image for display of the line | ||
| 650 | // destination at the journey stop passing. Reference to the ICON table. | ||
| 651 | std::optional<short> line_dest_icon; | ||
| 652 | // Optional, at most 6 characters. Background color for display of the line | ||
| 653 | // destination at a journey stop passing. Hexadecimal representation | ||
| 654 | // following RGB coding. Always six characters (RRGGBB), only numbers and/or | ||
| 655 | // capital letters. | ||
| 656 | std::optional<RgbColor> line_dest_color; | ||
| 657 | // Optional, at most 6 characters. Foreground color for display of the line | ||
| 658 | // destination at a journey stop passing. Hexadecimal representation | ||
| 659 | // following RGB coding. Always six characters (RRGGBB), only numbers and/or | ||
| 660 | // capital letters. | ||
| 661 | std::optional<RgbColor> line_dest_text_color; | ||
| 662 | |||
| 663 | Kv1Line *p_line = nullptr; | ||
| 664 | Kv1JourneyPattern *p_journey_pattern = nullptr; | ||
| 665 | Kv1UserStopPoint *p_user_stop_begin = nullptr; | ||
| 666 | Kv1UserStopPoint *p_user_stop_end = nullptr; | ||
| 667 | Kv1ConcessionFinancerRelation *p_con_fin_rel = nullptr; | ||
| 668 | Kv1Destination *p_dest = nullptr; | ||
| 669 | Kv1Icon *p_line_dest_icon = nullptr; | ||
| 670 | }; | ||
| 671 | |||
| 672 | // KV1 Table 14: Point [POINT] | ||
| 673 | // | ||
| 674 | // A point is the smallest location which can be reffered to within the public | ||
| 675 | // transit network. Every stop (USRSTOP) is a point. | ||
| 676 | // | ||
| 677 | // This table is part of the core data tables, which are common for all KV1 | ||
| 678 | // variants. | ||
| 679 | struct Kv1Point { | ||
| 680 | struct Key { | ||
| 681 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 682 | // defined in BISON enumeration E1). | ||
| 683 | std::string data_owner_code; | ||
| 684 | // Mandatory (key), at most 10 characters. | ||
| 685 | std::string point_code; | ||
| 686 | |||
| 687 | explicit Key(std::string data_owner_code, | ||
| 688 | std::string point_code); | ||
| 689 | }; | ||
| 690 | |||
| 691 | Key key; | ||
| 692 | // Mandatory, at most 10 characters. Refers to the POINTTYPE table. | ||
| 693 | std::string point_type; | ||
| 694 | // Mandatory, at most 10 characters. Refers to the GEOSYSTYPE table. Only | ||
| 695 | // allowed to have the value "RD" (rijkdsdriehoekstelsel; the national Dutch | ||
| 696 | // coordinate system). | ||
| 697 | std::string coordinate_system_type; | ||
| 698 | // Mandatory, at most 15 characters. X position in the RD coordinate system, | ||
| 699 | // in meters (at least 6 digits). | ||
| 700 | double location_x_ew = 0; | ||
| 701 | // Mandatory, at most 15 characters. Y position in the RD coordinate system, | ||
| 702 | // in meters (at least 6 digits). | ||
| 703 | double location_y_ns = 0; | ||
| 704 | // Optional, at most 15 characters. | ||
| 705 | // NOTE: the standart (presumeably wrongly) indicates this field as having | ||
| 706 | // alphanumeric contents. | ||
| 707 | std::optional<double> location_z; | ||
| 708 | // Optional, at most 255 characters. | ||
| 709 | std::string description; | ||
| 710 | }; | ||
| 711 | |||
| 712 | // KV1 Table 15: Point on Link [POOL] | ||
| 713 | // | ||
| 714 | // A point that is used to geographically describe the trajectory between two | ||
| 715 | // stops. | ||
| 716 | // | ||
| 717 | // This table is part of the core data tables, which are common for all KV1 | ||
| 718 | // variants. | ||
| 719 | struct Kv1PointOnLink { | ||
| 720 | struct Key { | ||
| 721 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 722 | // defined in BISON enumeration E1). | ||
| 723 | std::string data_owner_code; | ||
| 724 | // Mandatory (key), at most 10 characters. Stop number in the domain of the | ||
| 725 | // DataOwner (here: transit operator). | ||
| 726 | std::string user_stop_code_begin; | ||
| 727 | // Mandatory (key), at most 10 characters. Stop number in the domain of the | ||
| 728 | // DataOwner (here: transit operator). | ||
| 729 | std::string user_stop_code_end; | ||
| 730 | // Mandatory (key), at most 10 characters. Code from the road manager for KAR | ||
| 731 | // points. For curve points of the DataOwner (often the transit operator). | ||
| 732 | std::string point_data_owner_code; | ||
| 733 | // Mandatory (key), at most 10 charcters. | ||
| 734 | std::string point_code; | ||
| 735 | // Mandatory (key), at most 5 characters. Modality for which the distance | ||
| 736 | // applies, see BISON enumeration E9. | ||
| 737 | std::string transport_type; | ||
| 738 | |||
| 739 | explicit Key(std::string data_owner_code, | ||
| 740 | std::string user_stop_code_begin, | ||
| 741 | std::string user_stop_code_end, | ||
| 742 | std::string point_data_owner_code, | ||
| 743 | std::string point_code, | ||
| 744 | std::string transport_type); | ||
| 745 | }; | ||
| 746 | |||
| 747 | Key key; | ||
| 748 | // Mandatory, at most 5 digits. Distance in meters relative to the start of | ||
| 749 | // the link. | ||
| 750 | double distance_since_start_of_link = 0; | ||
| 751 | // Optional, at most 4 digits. Crossing speed for a public transit vehicle | ||
| 752 | // from the previous point (on a link) in m/s. | ||
| 753 | std::optional<double> segment_speed_mps = 0; | ||
| 754 | // Optional, at most 4 digits. Comfort speed for a public transit vehicle on | ||
| 755 | // the curve point. | ||
| 756 | std::optional<double> local_point_speed_mps = 0; | ||
| 757 | // Optional, at most 255 characters. | ||
| 758 | std::string description; | ||
| 759 | |||
| 760 | Kv1UserStopPoint *p_user_stop_begin = nullptr; | ||
| 761 | Kv1UserStopPoint *p_user_stop_end = nullptr; | ||
| 762 | Kv1Point *p_point = nullptr; | ||
| 763 | }; | ||
| 764 | |||
| 765 | // KV1 Table 16: Icon [ICON] | ||
| 766 | // | ||
| 767 | // Table with images which can be referred to from DEST.DestIcon, LINE.LineIcon | ||
| 768 | // and JOPATILI.LineDestIcon to load the correct image. | ||
| 769 | // | ||
| 770 | // This table is part of the core data tables, which are common for all KV1 | ||
| 771 | // variants. | ||
| 772 | struct Kv1Icon { | ||
| 773 | struct Key { | ||
| 774 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 775 | // defined in BISON enumeration E1). | ||
| 776 | std::string data_owner_code; | ||
| 777 | // Mandatory (key), at most 4 digits. Reference from other tables for the | ||
| 778 | // requested image. | ||
| 779 | short icon_number = 0; | ||
| 780 | |||
| 781 | explicit Key(std::string data_owner_code, | ||
| 782 | short icon_number); | ||
| 783 | }; | ||
| 784 | |||
| 785 | Key key; | ||
| 786 | // Mandatory, at most 1024 characters. Absolute URI to a publically available | ||
| 787 | // location from which the image can be loaded. The extension of the file | ||
| 788 | // indicates the image type. | ||
| 789 | // Supported file types are: GIF (.gif), JPEG (.jpg, .jpeg), | ||
| 790 | // PNG (.png), SVG (.svg) | ||
| 791 | // Supported protocols are: HTTP, HTTPS, FTP | ||
| 792 | // Prefer to not use any capital letters. Examples: | ||
| 793 | // - http://bison.dova.nu/images/logo.png | ||
| 794 | // - https://bison.dova.nu/images/logo.png | ||
| 795 | // - ftp://ftp.dova.nu/images/logo.png | ||
| 796 | std::string icon_uri; | ||
| 797 | }; | ||
| 798 | |||
| 799 | // KV1 Table 17: Notice [NOTICE] (OPTIONAL) | ||
| 800 | // | ||
| 801 | // A (reusable) text with supplementary information about exceptions / | ||
| 802 | // clarifications for a line, journey pattern etc. | ||
| 803 | // | ||
| 804 | // Usage is optional; when there are no clarifying texts, the NOTICE table does | ||
| 805 | // not need to be provided in a KV1 set. | ||
| 806 | // | ||
| 807 | // This table is part of the core data tables, which are common for all KV1 | ||
| 808 | // variants. | ||
| 809 | struct Kv1Notice { | ||
| 810 | struct Key { | ||
| 811 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 812 | // defined in BISON enumeration E1). | ||
| 813 | std::string data_owner_code; | ||
| 814 | // Mandatory (key), at most 20 characters. Identification of Notice (remark, | ||
| 815 | // clarifying text). | ||
| 816 | std::string notice_code; | ||
| 817 | |||
| 818 | explicit Key(std::string data_owner_code, | ||
| 819 | std::string notice_code); | ||
| 820 | }; | ||
| 821 | |||
| 822 | Key key; | ||
| 823 | // Mandatory, at most 1024 characters. Content, text. Contains contact | ||
| 824 | // information such as telephone number, web address and reservation time for | ||
| 825 | // 'call buses' (belbussen) and other demand-based transit. | ||
| 826 | std::string notice_content; | ||
| 827 | }; | ||
| 828 | |||
| 829 | // KV1 Table 18: Notice Assignment [NTCASSGNM] (OPTIONAL) | ||
| 830 | // | ||
| 831 | // Linking table in which Notice (remark, clarfiying text) is assigned to a | ||
| 832 | // line, journey pattern, stops within a journey pattern, journey etc. Notice | ||
| 833 | // Assignment contains all logical key elements of the corresponding objects to | ||
| 834 | // which a Notice can be assigned. | ||
| 835 | // | ||
| 836 | // Different attributes are required for the Notice Assignment, depending on | ||
| 837 | // the type object to which the Notice is assigned. In the following table | ||
| 838 | // structure, this is indicated as 'Only relevant for ...'. This means that | ||
| 839 | // fields for other object types in the Notice Assignment can be ignored. | ||
| 840 | // | ||
| 841 | // Moreover, it can also occur that not all key fields of the linked table are | ||
| 842 | // of interest (content-wise) for recording the Notice. | ||
| 843 | // | ||
| 844 | // Both matters are summarised in this overview: | ||
| 845 | // | ||
| 846 | // -------------------------------------------------------- | ||
| 847 | // AssignedObject PUJO PUJOPASS LINE JOPATILI | ||
| 848 | // -------------------------------------------------------- | ||
| 849 | // DataOwnerCode........... x ...... x ...... x ..... x ... | ||
| 850 | // TimetableVersionCode ... o ............................. | ||
| 851 | // OrganizationalUnitCode . o ...... o .................... | ||
| 852 | // ScheduleCode .................... o .................... | ||
| 853 | // ScheduleTypeCode ................ o .................... | ||
| 854 | // PeriodGroupCode ........ o ............................. | ||
| 855 | // SpecificDayCode ........ o ............................. | ||
| 856 | // DayType ................ o ............................. | ||
| 857 | // LinePlanningNumber ..... x ...... x ...... x ..... x ... | ||
| 858 | // JourneyNumber .......... x ...... x .................... | ||
| 859 | // StopOrder ....................... o .............. o ... | ||
| 860 | // JourneyPatternCode ............................... x ... | ||
| 861 | // TimingLinkOrder .................................. o ... | ||
| 862 | // UserStopCode .................... o .............. o ... | ||
| 863 | // -------------------------------------------------------- | ||
| 864 | // | ||
| 865 | // Legend: | ||
| 866 | // x - Mandatory. The Notice for this object type is always depndent on the | ||
| 867 | // value of the attribute. | ||
| 868 | // o - Optional. The Notice can be independent of the value of this | ||
| 869 | // attribute for this object type. | ||
| 870 | // <empty> - Attribute is no key field for this object type and can be | ||
| 871 | // ignored when processed. | ||
| 872 | // | ||
| 873 | // Usage of Notice Assignment is optional in KV1. If there are no clarifying | ||
| 874 | // texts, then the Notice Assignment table is not required to be present in the | ||
| 875 | // provided KV1 set. | ||
| 876 | // | ||
| 877 | // This table is part of the core data tables, which are common for all KV1 | ||
| 878 | // variants. | ||
| 879 | struct Kv1NoticeAssignment { | ||
| 880 | // Mandatory, at most 10 characters. Transport operator (from list as | ||
| 881 | // defined in BISON enumeration E1). | ||
| 882 | std::string data_owner_code; | ||
| 883 | // Mandatory, at most 20 characters. Notice that is assigned. | ||
| 884 | std::string notice_code; | ||
| 885 | // Mandatory, at most 8 characters. Object type to which Notice is assigned. | ||
| 886 | std::string assigned_object; | ||
| 887 | // Optional, at most 10 characters. Only relevant for PUJO. | ||
| 888 | std::string timetable_version_code; | ||
| 889 | // Optional, at most 10 characters. Only relevant for PUJO and PUJOPASS. | ||
| 890 | std::string organizational_unit_code; | ||
| 891 | // Optional, at most 10 characters. Only relevant for PUJOPASS. | ||
| 892 | std::string schedule_code; | ||
| 893 | // Optional, at most 10 characters. Only relevant for PUJOPASS. | ||
| 894 | std::string schedule_type_code; | ||
| 895 | // Optional, at most 10 characters. Only relevant for PUJO. | ||
| 896 | std::string period_group_code; | ||
| 897 | // Optional, at most 10 characters. Only relevant for PUJO. | ||
| 898 | std::string specific_day_code; | ||
| 899 | // Optional, at most 10 characters. Only relevant for PUJO. | ||
| 900 | // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun. | ||
| 901 | // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. | ||
| 902 | std::string day_type; | ||
| 903 | // Mandatory, at most 10 characters. Mandatory for all object types. | ||
| 904 | std::string line_planning_number; | ||
| 905 | // Optional (for all object types except PUJO and PUJOPASS), at most 6 | ||
| 906 | // digits. Only relevant for PUJO and PUJOPASS. Must be in the range | ||
| 907 | // [0-1000000). | ||
| 908 | std::optional<int> journey_number; | ||
| 909 | // Optional, at most 4 digits. Only relevant for PUJOPASS and JOPATILI. | ||
| 910 | std::optional<int> stop_order; | ||
| 911 | // Optional (for all object types except JOPATILI), at most 4 digits. Only | ||
| 912 | // relevant for JOPATILI. | ||
| 913 | std::string journey_pattern_code; | ||
| 914 | // Optional (at most 3 digits). Only relevant for JOPATILI. | ||
| 915 | std::optional<short> timing_link_order; | ||
| 916 | // Optional (at most 10 characters). Only relevant for PUJOPASS and JOPATILI. | ||
| 917 | // For JOPATILI, this correspond to the first stop of the link. | ||
| 918 | std::string user_stop_code; | ||
| 919 | |||
| 920 | Kv1Notice *p_notice = nullptr; | ||
| 921 | }; | ||
| 922 | |||
| 923 | // KV1 Table 19: Time Demand Group [TIMDEMGRP] | ||
| 924 | // | ||
| 925 | // A time demand group is a grouping of the run time distribution from stop to | ||
| 926 | // stop, for a journey pattern (from start to end point). | ||
| 927 | // | ||
| 928 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 929 | struct Kv1TimeDemandGroup { | ||
| 930 | struct Key { | ||
| 931 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 932 | // defined in BISON enumeration E1). | ||
| 933 | std::string data_owner_code; | ||
| 934 | // Mandatory (key), at most 10 characters. | ||
| 935 | std::string line_planning_number; | ||
| 936 | // Mandatory (key), at most 10 characters. Refers to the JOPATILI table. | ||
| 937 | std::string journey_pattern_code; | ||
| 938 | // Mandatory (key), at most 10 characters. Defines the code for the time | ||
| 939 | // demand group. (NOTE: this is not entirely made clear by the specification. | ||
| 940 | // This claim must be verified.) | ||
| 941 | std::string time_demand_group_code; | ||
| 942 | |||
| 943 | explicit Key(std::string data_owner_code, | ||
| 944 | std::string line_planning_number, | ||
| 945 | std::string journey_pattern_code, | ||
| 946 | std::string time_demand_group_code); | ||
| 947 | }; | ||
| 948 | |||
| 949 | Key key; | ||
| 950 | |||
| 951 | Kv1Line *p_line = nullptr; | ||
| 952 | Kv1JourneyPattern *p_journey_pattern = nullptr; | ||
| 953 | }; | ||
| 954 | |||
| 955 | // KV1 Table 20: Time Demand Group Run Time [TIMDEMRNT] | ||
| 956 | // | ||
| 957 | // The run time structure/distribution for all timing links of a journey | ||
| 958 | // pattern or a time demand group. | ||
| 959 | // | ||
| 960 | // Optional run time elements are, when these are present, used to more | ||
| 961 | // accurately calculate expected departure times based on punctuality | ||
| 962 | // deviations. | ||
| 963 | // | ||
| 964 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 965 | struct Kv1TimeDemandGroupRunTime { | ||
| 966 | struct Key { | ||
| 967 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 968 | // defined in BISON enumeration E1). | ||
| 969 | std::string data_owner_code; | ||
| 970 | // Mandatory (key), at most 10 characters. | ||
| 971 | std::string line_planning_number; | ||
| 972 | // Mandatory (key), at most 10 characters. Refers to the JOPATILI table. | ||
| 973 | std::string journey_pattern_code; | ||
| 974 | // Mandatory (key), at most 10 characters. Refers to the TIMDEMGRP table. | ||
| 975 | std::string time_demand_group_code; | ||
| 976 | // Mandatory (key), at most 3 digits. Reference number of a link within the | ||
| 977 | // journey pattern (a link can occur more than once within a journey | ||
| 978 | // pattern). | ||
| 979 | short timing_link_order = 0; | ||
| 980 | |||
| 981 | explicit Key(std::string data_owner_code, | ||
| 982 | std::string line_planning_number, | ||
| 983 | std::string journey_pattern_code, | ||
| 984 | std::string time_demand_group_code, | ||
| 985 | short timing_link_order); | ||
| 986 | }; | ||
| 987 | |||
| 988 | Key key; | ||
| 989 | // Mandatory, at most 10 characters. Refers to the first stop of the link. | ||
| 990 | std::string user_stop_code_begin; | ||
| 991 | // Mandatory, at most 10 characters. Refers to the last stop of the link. | ||
| 992 | std::string user_stop_code_end; | ||
| 993 | // Mandatory, at most 5 digits. Planned total run time on link for time | ||
| 994 | // demand group: (Departure time end stop - departure time begin stop) | ||
| 995 | // corresponding to the time demand group. In seconds. | ||
| 996 | double total_drive_time_s = 0; | ||
| 997 | // Mandatory, at most 5 digits. Planned minimal run time on link for time | ||
| 998 | // demand group. Often calculated as: (Arrival time end stop - arrival time | ||
| 999 | // begin stop) corresponding to the time demand group. In seconds. | ||
| 1000 | double drive_time_s = 0; | ||
| 1001 | // Optional, at most 5 digits. Expected/planned delay/congestion on link for | ||
| 1002 | // time demand group. In seconds. | ||
| 1003 | std::optional<double> expected_delay_s; | ||
| 1004 | // Optional, at most 5 digits. Layover/catch-up time. Gives play in the | ||
| 1005 | // timetable. In seconds. | ||
| 1006 | // LayOverTime = TotDriveTime - DriveTime + ExpectedDelay - StopWaitTime. | ||
| 1007 | std::optional<double> layover_time; | ||
| 1008 | // Mandatory, at most 5 digits. Planned stop waiting time at the final stop | ||
| 1009 | // of the link for the time demand group. Determined based on the difference | ||
| 1010 | // between the departure time and arrival time at this stop. Is zero when no | ||
| 1011 | // waiting time is planned for this stop. In seconds. | ||
| 1012 | double stop_wait_time = 0; | ||
| 1013 | // Optional, at most 5 digits. Planned minimal stop time for | ||
| 1014 | // boarding/alighting of passengers at the final stop of the link for the | ||
| 1015 | // time demand group. Application: at hub stops with a planned waiting time, | ||
| 1016 | // the difference between the planned waiting time and the minimum stop time | ||
| 1017 | // is the layover/catch-up time. In seconds. | ||
| 1018 | std::optional<double> minimum_stop_time; | ||
| 1019 | |||
| 1020 | Kv1Line *p_line = nullptr; | ||
| 1021 | Kv1UserStopPoint *p_user_stop_begin = nullptr; | ||
| 1022 | Kv1UserStopPoint *p_user_stop_end = nullptr; | ||
| 1023 | Kv1JourneyPattern *p_journey_pattern = nullptr; | ||
| 1024 | Kv1TimeDemandGroup *p_time_demand_group = nullptr; | ||
| 1025 | Kv1JourneyPatternTimingLink *p_journey_pattern_timing_link = nullptr; | ||
| 1026 | }; | ||
| 1027 | |||
| 1028 | // KV1 Table 21: Period Group [PEGR] | ||
| 1029 | // | ||
| 1030 | // Period group is an indication of a 'homogeneous period' during the year, | ||
| 1031 | // i.e. a period in which the schedule has the same composition w.r.t. | ||
| 1032 | // frequencies and run times. | ||
| 1033 | // | ||
| 1034 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1035 | struct Kv1PeriodGroup { | ||
| 1036 | struct Key { | ||
| 1037 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1038 | // defined in BISON enumeration E1). | ||
| 1039 | std::string data_owner_code; | ||
| 1040 | // Mandatory (key), at most 10 characters. | ||
| 1041 | std::string period_group_code; | ||
| 1042 | |||
| 1043 | explicit Key(std::string data_owner_code, | ||
| 1044 | std::string period_group_code); | ||
| 1045 | }; | ||
| 1046 | |||
| 1047 | Key key; | ||
| 1048 | // Optional, at most 255 characters. | ||
| 1049 | std::string description; | ||
| 1050 | }; | ||
| 1051 | |||
| 1052 | // KV1 Table 22: Specific Day [SPECDAY] | ||
| 1053 | // | ||
| 1054 | // A specific day is a feature of a day for which a deviating service level is | ||
| 1055 | // provided, respective to a normal day of the week. | ||
| 1056 | // | ||
| 1057 | // E.g. shopping Sundays (koopzondagen, if not every Sunday), New Year's Eve | ||
| 1058 | // (oudejaarsdag), foreign bank holidays (as applicable). | ||
| 1059 | // | ||
| 1060 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1061 | struct Kv1SpecificDay { | ||
| 1062 | struct Key { | ||
| 1063 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1064 | // defined in BISON enumeration E1). | ||
| 1065 | std::string data_owner_code; | ||
| 1066 | // Mandatory (key), at most 10 characters. Default: "NORMAL". | ||
| 1067 | std::string specific_day_code; | ||
| 1068 | |||
| 1069 | explicit Key(std::string data_owner_code, | ||
| 1070 | std::string specific_day_code); | ||
| 1071 | }; | ||
| 1072 | |||
| 1073 | Key key; | ||
| 1074 | // Mandatory, at most 50 characters. | ||
| 1075 | std::string name; | ||
| 1076 | // Optional, at most 255 characters. | ||
| 1077 | std::string description; | ||
| 1078 | }; | ||
| 1079 | |||
| 1080 | // KV1 Table 23: Timetable Version [TIVE] | ||
| 1081 | // | ||
| 1082 | // A timetable version budles all planned activities for an organizational | ||
| 1083 | // unit. For the public schedule, these are trips, routes, run times etc. | ||
| 1084 | // | ||
| 1085 | // When processing a new Timetable Version, it is checked if another TIVE with | ||
| 1086 | // the same key has already been processed. If this is the case, ValidFrom must | ||
| 1087 | // be equal to the starting date of the previously provided set. The new set | ||
| 1088 | // replaces the older one. A package with a new starting date is only processed | ||
| 1089 | // if another TimetableVersionCode is used. | ||
| 1090 | // | ||
| 1091 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1092 | struct Kv1TimetableVersion { | ||
| 1093 | struct Key { | ||
| 1094 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1095 | // defined in BISON enumeration E1). | ||
| 1096 | std::string data_owner_code; | ||
| 1097 | // Mandatory (key), at most 10 characters. | ||
| 1098 | std::string organizational_unit_code; | ||
| 1099 | // Mandatory (key), at most 10 characters. | ||
| 1100 | std::string timetable_version_code; | ||
| 1101 | // Mandatory (key), at most 10 charactes. | ||
| 1102 | std::string period_group_code; | ||
| 1103 | // Mandatory (key), at most 10 characters. Default: "NORMAL". | ||
| 1104 | std::string specific_day_code; | ||
| 1105 | |||
| 1106 | explicit Key(std::string data_owner_code, | ||
| 1107 | std::string organizational_unit_code, | ||
| 1108 | std::string timetable_version_code, | ||
| 1109 | std::string period_group_code, | ||
| 1110 | std::string specific_day_code); | ||
| 1111 | }; | ||
| 1112 | |||
| 1113 | Key key; | ||
| 1114 | // Mandatory, at most 10 characters. Datum on which the timetable goes into | ||
| 1115 | // effect, following the YYYY-MM-DD format. | ||
| 1116 | std::chrono::year_month_day valid_from; | ||
| 1117 | // Mandatory, at most 10 characters. Value: "PUBT". | ||
| 1118 | std::string timetable_version_type; | ||
| 1119 | // Optional, at most 10 characters. Datum on which the timetable goes out of | ||
| 1120 | // effect, following the YYYY-MM-DD format. | ||
| 1121 | std::optional<std::chrono::year_month_day> valid_thru; | ||
| 1122 | // Optional, at most 255 characters. Should be null/empty. | ||
| 1123 | std::string description; | ||
| 1124 | |||
| 1125 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1126 | Kv1PeriodGroup *p_period_group = nullptr; | ||
| 1127 | Kv1SpecificDay *p_specific_day = nullptr; | ||
| 1128 | }; | ||
| 1129 | |||
| 1130 | // KV1 Table 24: Public Journey [PUJO] | ||
| 1131 | // | ||
| 1132 | // Public journeys are journeys that are operated by a public transit | ||
| 1133 | // organization and are accessible to the passenger. | ||
| 1134 | // | ||
| 1135 | // Business rules: | ||
| 1136 | // - If ShowFlexibleTrip or ProductFormulaType is set in a record of this | ||
| 1137 | // table, this takes precedence over the value as in the corresponding | ||
| 1138 | // JOPATILI entry. | ||
| 1139 | // | ||
| 1140 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1141 | struct Kv1PublicJourney { | ||
| 1142 | struct Key { | ||
| 1143 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1144 | // defined in BISON enumeration E1). | ||
| 1145 | std::string data_owner_code; | ||
| 1146 | // Mandatory (key), at most 10 characters. | ||
| 1147 | std::string timetable_version_code; | ||
| 1148 | // Mandatory (key), at most 10 characters. | ||
| 1149 | std::string organizational_unit_code; | ||
| 1150 | // Mandatory (key), at most 10 characters. | ||
| 1151 | std::string period_group_code; | ||
| 1152 | // Mandatory (key), at most 10 characters. | ||
| 1153 | std::string specific_day_code; | ||
| 1154 | // Mandatory (key), at most 7 characters. | ||
| 1155 | // [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, Thu, Fri, Sat, Sun. | ||
| 1156 | // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. | ||
| 1157 | // TODO: See if we can make this into a more concrete type | ||
| 1158 | std::string day_type; | ||
| 1159 | // Mandatory (key), at most 10 characters. | ||
| 1160 | std::string line_planning_number; | ||
| 1161 | // Mandatory (key), at most 6 digits. Must be in the range [0-1000000). | ||
| 1162 | int journey_number = 0; | ||
| 1163 | |||
| 1164 | explicit Key(std::string data_owner_code, | ||
| 1165 | std::string timetable_version_code, | ||
| 1166 | std::string organizational_unit_code, | ||
| 1167 | std::string period_group_code, | ||
| 1168 | std::string specific_day_code, | ||
| 1169 | std::string day_type, | ||
| 1170 | std::string line_planning_number, | ||
| 1171 | int journey_number); | ||
| 1172 | }; | ||
| 1173 | |||
| 1174 | Key key; | ||
| 1175 | // Mandatory, at most 10 characters. | ||
| 1176 | std::string time_demand_group_code; | ||
| 1177 | // Mandatory, at most 10 characters. | ||
| 1178 | std::string journey_pattern_code; | ||
| 1179 | // Mandatory, at most 8 characters. Format: "HH:MM:SS". | ||
| 1180 | std::chrono::hh_mm_ss<std::chrono::seconds> departure_time; | ||
| 1181 | // Mandatory, at most 13 characters. Values as in BISON enumeration E3. | ||
| 1182 | // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN". | ||
| 1183 | // TODO: See if we can fit BISON enumeration E3 into an enum | ||
| 1184 | std::string wheelchair_accessible; | ||
| 1185 | // Mandatory, at most 5 characters. Boolean. Value "true": journey is | ||
| 1186 | // operator by DataOwner. Value "false": journey is operator by a different | ||
| 1187 | // DataOwner. Indicator is meant for a line that is operated jointly by | ||
| 1188 | // multiple transit operators. The indicator is used to be able to match the | ||
| 1189 | // journey operation (KV6, KV19 etc.); only journeys for which the indicator | ||
| 1190 | // is "true" can be expected to have corresponding current/real-time | ||
| 1191 | // information, although "true" doesn't necessarily mean that this | ||
| 1192 | // current/real-time information will (always) become available. | ||
| 1193 | bool data_owner_is_operator = false; | ||
| 1194 | // Mandatory, at most 5 characters. Boolean. Indicates whether | ||
| 1195 | // current/real-time journey information may be expected for the | ||
| 1196 | // corresponding journey ("true" or "false"). | ||
| 1197 | bool planned_monitored = false; | ||
| 1198 | // Optional, at most 4 digits. BISON enumeration E10. Intended to allow | ||
| 1199 | // capturing transit mode features at the journey level. | ||
| 1200 | // TODO: See if we can make BISON enumeration E10 into an enum | ||
| 1201 | std::optional<short> product_formula_type; | ||
| 1202 | // Optional, at most 8 characters. Indicates whether the transit operator | ||
| 1203 | // wants that a not-explicitly planned trip (i.e. a journey that only runs on | ||
| 1204 | // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be | ||
| 1205 | // shown on displays. Values following BISON enumeration E21: TRUE (always), | ||
| 1206 | // FALSE (never), REALTIME (only when journey is tracked). | ||
| 1207 | // TODO: See if we can make BISON enumeration E21 into an enum | ||
| 1208 | std::string show_flexible_trip; | ||
| 1209 | |||
| 1210 | Kv1TimetableVersion *p_timetable_version = nullptr; | ||
| 1211 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1212 | Kv1PeriodGroup *p_period_group = nullptr; | ||
| 1213 | Kv1SpecificDay *p_specific_day = nullptr; | ||
| 1214 | Kv1Line *p_line = nullptr; | ||
| 1215 | Kv1TimeDemandGroup *p_time_demand_group = nullptr; | ||
| 1216 | Kv1JourneyPattern *p_journey_pattern = nullptr; | ||
| 1217 | }; | ||
| 1218 | |||
| 1219 | // KV1 Table 25: Period Group Validity [PEGRVAL] | ||
| 1220 | // | ||
| 1221 | // Validities (multiple from-thru data) of a period group. | ||
| 1222 | // | ||
| 1223 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1224 | struct Kv1PeriodGroupValidity { | ||
| 1225 | struct Key { | ||
| 1226 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1227 | // defined in BISON enumeration E1). | ||
| 1228 | std::string data_owner_code; | ||
| 1229 | // Mandatory (key), at most 10 characters. | ||
| 1230 | std::string organizational_unit_code; | ||
| 1231 | // Mandatory (key), at most 10 characters. | ||
| 1232 | std::string period_group_code; | ||
| 1233 | // Mandatory (key), at most 10 characters. Date of the start of the validity | ||
| 1234 | // period. Format: "YYYY-MM-DD". | ||
| 1235 | std::chrono::year_month_day valid_from; | ||
| 1236 | |||
| 1237 | explicit Key(std::string data_owner_code, | ||
| 1238 | std::string organizational_unit_code, | ||
| 1239 | std::string period_group_code, | ||
| 1240 | std::chrono::year_month_day valid_from); | ||
| 1241 | }; | ||
| 1242 | |||
| 1243 | Key key; | ||
| 1244 | // Mandatory, at most 10 characters. Date of the end of the validity period. | ||
| 1245 | // Format: "YYYY-MM-DD". | ||
| 1246 | std::chrono::year_month_day valid_thru; | ||
| 1247 | |||
| 1248 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1249 | Kv1PeriodGroup *p_period_group = nullptr; | ||
| 1250 | }; | ||
| 1251 | |||
| 1252 | // KV1 Table 26: Exceptional Operating Day [EXCOPDAY] | ||
| 1253 | // | ||
| 1254 | // Contains exceptional validity dates, for which the service runs following a | ||
| 1255 | // different day type (such as another day of the week or a different period). | ||
| 1256 | // | ||
| 1257 | // This table is part of the KV1 variant "validities and time demand groups". | ||
| 1258 | struct Kv1ExceptionalOperatingDay { | ||
| 1259 | struct Key { | ||
| 1260 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1261 | // defined in BISON enumeration E1). | ||
| 1262 | std::string data_owner_code; | ||
| 1263 | // Mandatory (key), at most 10 characters. Organization unit for which an | ||
| 1264 | // exceptional day validity applies. | ||
| 1265 | std::string organizational_unit_code; | ||
| 1266 | // Mandatory (key), at most 23 characters. Date (+ time) for which the | ||
| 1267 | // exceptional validity applies. Format: "YYYYMMDDThh:mm:ssTZD". | ||
| 1268 | std::chrono::sys_seconds valid_date; | ||
| 1269 | |||
| 1270 | explicit Key(std::string data_owner_code, | ||
| 1271 | std::string organizational_unit_code, | ||
| 1272 | std::chrono::sys_seconds valid_date); | ||
| 1273 | }; | ||
| 1274 | |||
| 1275 | Key key; | ||
| 1276 | // Mandatory, at most 7 characters. The exceptional day type that applies on | ||
| 1277 | // a calendar day: [0|1][0|2][0|3][0|4][0|5][0|6][0|7] for Mon, Tue, Wed, | ||
| 1278 | // Thu, Fri, Sat. | ||
| 1279 | // E.g. 1234500 means Mon, Tue, Wed, Thu, Fri but not Sat, Sun. | ||
| 1280 | // TODO: See if we can make this into a more concrete type | ||
| 1281 | std::string day_type_as_on; | ||
| 1282 | // Mandatory, at most 10 characters. Specific day service level to which the | ||
| 1283 | // exceptional day validity refers. | ||
| 1284 | std::string specific_day_code; | ||
| 1285 | // Optional, at most 10 characters. An exceptional day validity can be | ||
| 1286 | // related to the service level of another period (e.g. the school holiday | ||
| 1287 | // schedule). This exceptional period reference is set here. | ||
| 1288 | // | ||
| 1289 | // E.g. on Good Friday or the day after Ascension day, transit runs according | ||
| 1290 | // to the holiday season schedule, while transit runs following the winter | ||
| 1291 | // package in the surrounding days. | ||
| 1292 | std::string period_group_code; | ||
| 1293 | // Optional, at most 255 characters. | ||
| 1294 | std::string description; | ||
| 1295 | |||
| 1296 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1297 | Kv1SpecificDay *p_specific_day = nullptr; | ||
| 1298 | Kv1PeriodGroup *p_period_group = nullptr; | ||
| 1299 | }; | ||
| 1300 | |||
| 1301 | // KV1 Table 27: Schedule Version [SCHEDVERS] | ||
| 1302 | // | ||
| 1303 | // A schedule version bundles the planned activities for an organisation unit | ||
| 1304 | // per day type. The journeys with passing times and corresponding routes are | ||
| 1305 | // for the public timetable. | ||
| 1306 | // | ||
| 1307 | // When processing a new Schedule Version, it is checked if another SCHEDVERS | ||
| 1308 | // with the same key has already been processed. If this is the case, ValidFrom | ||
| 1309 | // must be equal to the starting date of the previously provided set. The new | ||
| 1310 | // set replaces the older one. A package with a new starting date is only | ||
| 1311 | // processed if another Schedule Code is used. | ||
| 1312 | // | ||
| 1313 | // This table is part of the KV1 variant "schedules and passing times". | ||
| 1314 | struct Kv1ScheduleVersion { | ||
| 1315 | struct Key { | ||
| 1316 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1317 | // defined in BISON enumeration E1). | ||
| 1318 | std::string data_owner_code; | ||
| 1319 | // Mandatory (key), at most 10 characters. | ||
| 1320 | std::string organizational_unit_code; | ||
| 1321 | // Mandatory (key), at most 10 characters. A unique code in combination with | ||
| 1322 | // the ScheduleTypeCode of the package within the ORUN. | ||
| 1323 | std::string schedule_code; | ||
| 1324 | // Mandatory (key), at most 10 characters. Code for the Schedule Type (Day Type). | ||
| 1325 | std::string schedule_type_code; | ||
| 1326 | |||
| 1327 | explicit Key(std::string data_owner_code, | ||
| 1328 | std::string organizational_unit_code, | ||
| 1329 | std::string schedule_code, | ||
| 1330 | std::string schedule_type_code); | ||
| 1331 | }; | ||
| 1332 | |||
| 1333 | Key key; | ||
| 1334 | // Mandatory, at most 10 characters. Date on which the schedule goes into | ||
| 1335 | // effect. Format: "YYYY-MM-DD". | ||
| 1336 | std::chrono::year_month_day valid_from; | ||
| 1337 | // Optional, at most 10 characters. Date on which the schedule goes out of | ||
| 1338 | // effect. Format: "YYYY-MM-DD". | ||
| 1339 | std::optional<std::chrono::year_month_day> valid_thru; | ||
| 1340 | // Optional, at most 255 characters. Should be empty/null. | ||
| 1341 | std::string description; | ||
| 1342 | |||
| 1343 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1344 | }; | ||
| 1345 | |||
| 1346 | // KV1 Table 28: Public Journey Passing Times [PUJOPASS] | ||
| 1347 | // | ||
| 1348 | // Public journey with arrival and departure times at all stops (and other | ||
| 1349 | // timing points). | ||
| 1350 | // | ||
| 1351 | // Business rules: | ||
| 1352 | // - If ShowFlexibleTrip or ProductFormulaType is set here, then this takes | ||
| 1353 | // precedence over the value in the corresponding JOPATILI record. | ||
| 1354 | // - All stop passings of a public journey refer to the same journey pattern | ||
| 1355 | // (JOPA)! | ||
| 1356 | // | ||
| 1357 | // This table is part of the KV1 variant "schedules and passing times". | ||
| 1358 | struct Kv1PublicJourneyPassingTimes { | ||
| 1359 | struct Key { | ||
| 1360 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1361 | // defined in BISON enumeration E1). | ||
| 1362 | std::string data_owner_code; | ||
| 1363 | // Mandatory (key), at most 10 characters. | ||
| 1364 | std::string organizational_unit_code; | ||
| 1365 | // Mandatory (key), at most 10 characters. A unique code in combination with | ||
| 1366 | // the ScheduleTypeCode of the package within the ORUN. | ||
| 1367 | std::string schedule_code; | ||
| 1368 | // Mandatory (key), at most 10 characters. Code for the Schedule Type (e.g. | ||
| 1369 | // Day Type). | ||
| 1370 | std::string schedule_type_code; | ||
| 1371 | // Mandatory (key), at most 10 characters. | ||
| 1372 | std::string line_planning_number; | ||
| 1373 | // Mandatory (key), at most 6 digits. Must be in the range [0-1000000). | ||
| 1374 | int journey_number = 0; | ||
| 1375 | // Mandatory (key), at most 4 digits. | ||
| 1376 | short stop_order = 0; | ||
| 1377 | |||
| 1378 | explicit Key(std::string data_owner_code, | ||
| 1379 | std::string organizational_unit_code, | ||
| 1380 | std::string schedule_code, | ||
| 1381 | std::string schedule_type_code, | ||
| 1382 | std::string line_planning_number, | ||
| 1383 | int journey_number, | ||
| 1384 | short stop_order); | ||
| 1385 | }; | ||
| 1386 | |||
| 1387 | Key key; | ||
| 1388 | // Mandatory, at most 10 characters. | ||
| 1389 | std::string journey_pattern_code; | ||
| 1390 | // Mandatory, at most 10 characters. | ||
| 1391 | std::string user_stop_code; | ||
| 1392 | // Mandatory (except for the first stop of a journey), at most 8 digits. Not | ||
| 1393 | // compulsory for the first stop of a journey. Format: "HH:MM:SS". | ||
| 1394 | std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_arrival_time; | ||
| 1395 | // Mandatory (expect for the last stop of a journey), at most 8 digits. Not | ||
| 1396 | // compulsory for the last stop of a journey. Format: "HH:MM:SS". | ||
| 1397 | std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_departure_time; | ||
| 1398 | // Mandatory, at most 13 characters. Values as in BISON enumeration E3. | ||
| 1399 | // Allowed are: "ACCESSIBLE", "NOTACCESSIBLE" and "UNKNOWN". | ||
| 1400 | // TODO: See if we can fit BISON enumeration E3 into an enum | ||
| 1401 | std::string wheelchair_accessible; | ||
| 1402 | // Mandatory, at most 5 characters. Boolean. Value "true": journey is | ||
| 1403 | // operator by DataOwner. Value "false": journey is operator by a different | ||
| 1404 | // DataOwner. Indicator is meant for a line that is operated jointly by | ||
| 1405 | // multiple transit operators. The indicator is used to be able to match the | ||
| 1406 | // journey operation (KV6, KV19 etc.); only journeys for which the indicator | ||
| 1407 | // is "true" can be expected to have corresponding current/real-time | ||
| 1408 | // information, although "true" doesn't necessarily mean that this | ||
| 1409 | // current/real-time information will (always) become available. | ||
| 1410 | bool data_owner_is_operator = false; | ||
| 1411 | // Mandatory, at most 5 characters. Boolean. Indicates whether | ||
| 1412 | // current/real-time journey information may be expected for the | ||
| 1413 | // corresponding journey ("true" or "false"). | ||
| 1414 | bool planned_monitored = false; | ||
| 1415 | // Optional, at most 4 digits. BISON enumeration E10. Intended to allow | ||
| 1416 | // capturing transit mode features at the journey level. | ||
| 1417 | // TODO: See if we can make BISON enumeration E10 into an enum | ||
| 1418 | std::optional<short> product_formula_type; | ||
| 1419 | // Optional, at most 8 characters. Indicates whether the transit operator | ||
| 1420 | // wants that a not-explicitly planned trip (i.e. a journey that only runs on | ||
| 1421 | // reservation, e.g. 'call bus' (belbus), 'line taxi' (lijntaxi) etc.) to be | ||
| 1422 | // shown on displays. Values following BISON enumeration E21: TRUE (always), | ||
| 1423 | // FALSE (never), REALTIME (only when journey is tracked). | ||
| 1424 | // TODO: See if we can make BISON enumeration E21 into an enum | ||
| 1425 | std::string show_flexible_trip; | ||
| 1426 | |||
| 1427 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1428 | Kv1ScheduleVersion *p_schedule_version = nullptr; | ||
| 1429 | Kv1Line *p_line = nullptr; | ||
| 1430 | Kv1JourneyPattern *p_journey_pattern = nullptr; | ||
| 1431 | Kv1UserStopPoint *p_user_stop = nullptr; | ||
| 1432 | }; | ||
| 1433 | |||
| 1434 | // KV1 Table 29: Operating Day [OPERDAY] | ||
| 1435 | // | ||
| 1436 | // Contains the operational calendar. Which package (schedule version) applies | ||
| 1437 | // is specified per day, per organisation unit. | ||
| 1438 | // | ||
| 1439 | // This table is part of the KV1 variant "schedules and passing times". | ||
| 1440 | struct Kv1OperatingDay { | ||
| 1441 | struct Key { | ||
| 1442 | // Mandatory (key), at most 10 characters. Transport operator (from list as | ||
| 1443 | // defined in BISON enumeration E1). | ||
| 1444 | std::string data_owner_code; | ||
| 1445 | // Mandatory (key), at most 10 characters. | ||
| 1446 | std::string organizational_unit_code; | ||
| 1447 | // Mandatory (key), at most 10 characters. | ||
| 1448 | std::string schedule_code; | ||
| 1449 | // Mandatory (key), at most 10 characters. | ||
| 1450 | std::string schedule_type_code; | ||
| 1451 | // Mandatory (key), at most 10 characters. Date on which the package | ||
| 1452 | // (schedule version) applies. Format: "YYYY-MM-DD". | ||
| 1453 | std::chrono::year_month_day valid_date; | ||
| 1454 | |||
| 1455 | explicit Key(std::string data_owner_code, | ||
| 1456 | std::string organizational_unit_code, | ||
| 1457 | std::string schedule_code, | ||
| 1458 | std::string schedule_type_code, | ||
| 1459 | std::chrono::year_month_day valid_date); | ||
| 1460 | }; | ||
| 1461 | |||
| 1462 | Key key; | ||
| 1463 | // Optional, at most 255 characters. | ||
| 1464 | std::string description; | ||
| 1465 | |||
| 1466 | Kv1OrganizationalUnit *p_organizational_unit = nullptr; | ||
| 1467 | Kv1ScheduleVersion *p_schedule_version = nullptr; | ||
| 1468 | }; | ||
| 1469 | |||
| 1470 | bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b); | ||
| 1471 | bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b); | ||
| 1472 | bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b); | ||
| 1473 | bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b); | ||
| 1474 | bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b); | ||
| 1475 | bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b); | ||
| 1476 | bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b); | ||
| 1477 | bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b); | ||
| 1478 | bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b); | ||
| 1479 | bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b); | ||
| 1480 | bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b); | ||
| 1481 | bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b); | ||
| 1482 | bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b); | ||
| 1483 | bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b); | ||
| 1484 | bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b); | ||
| 1485 | bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b); | ||
| 1486 | bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b); | ||
| 1487 | bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b); | ||
| 1488 | bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b); | ||
| 1489 | bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b); | ||
| 1490 | bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b); | ||
| 1491 | bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b); | ||
| 1492 | bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b); | ||
| 1493 | bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b); | ||
| 1494 | bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b); | ||
| 1495 | bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b); | ||
| 1496 | bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b); | ||
| 1497 | bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b); | ||
| 1498 | |||
| 1499 | size_t hash_value(const Kv1OrganizationalUnit::Key &k); | ||
| 1500 | size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k); | ||
| 1501 | size_t hash_value(const Kv1UserStopPoint::Key &k); | ||
| 1502 | size_t hash_value(const Kv1UserStopArea::Key &k); | ||
| 1503 | size_t hash_value(const Kv1TimingLink::Key &k); | ||
| 1504 | size_t hash_value(const Kv1Link::Key &k); | ||
| 1505 | size_t hash_value(const Kv1Line::Key &k); | ||
| 1506 | size_t hash_value(const Kv1Destination::Key &k); | ||
| 1507 | size_t hash_value(const Kv1JourneyPattern::Key &k); | ||
| 1508 | size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k); | ||
| 1509 | size_t hash_value(const Kv1ConcessionArea::Key &k); | ||
| 1510 | size_t hash_value(const Kv1Financer::Key &k); | ||
| 1511 | size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k); | ||
| 1512 | size_t hash_value(const Kv1Point::Key &k); | ||
| 1513 | size_t hash_value(const Kv1PointOnLink::Key &k); | ||
| 1514 | size_t hash_value(const Kv1Icon::Key &k); | ||
| 1515 | size_t hash_value(const Kv1Notice::Key &k); | ||
| 1516 | size_t hash_value(const Kv1TimeDemandGroup::Key &k); | ||
| 1517 | size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k); | ||
| 1518 | size_t hash_value(const Kv1PeriodGroup::Key &k); | ||
| 1519 | size_t hash_value(const Kv1SpecificDay::Key &k); | ||
| 1520 | size_t hash_value(const Kv1TimetableVersion::Key &k); | ||
| 1521 | size_t hash_value(const Kv1PublicJourney::Key &k); | ||
| 1522 | size_t hash_value(const Kv1PeriodGroupValidity::Key &k); | ||
| 1523 | size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k); | ||
| 1524 | size_t hash_value(const Kv1ScheduleVersion::Key &k); | ||
| 1525 | size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k); | ||
| 1526 | size_t hash_value(const Kv1OperatingDay::Key &k); | ||
| 1527 | |||
| 1528 | #endif // OEUF_LIBTMI8_KV1_TYPES_HPP | ||
diff --git a/lib/libtmi8/include/tmi8/kv6_parquet.hpp b/lib/libtmi8/include/tmi8/kv6_parquet.hpp new file mode 100644 index 0000000..33b57ca --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv6_parquet.hpp | |||
| @@ -0,0 +1,46 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_LIBTMI8_KV6_PARQUET_HPP | ||
| 4 | #define OEUF_LIBTMI8_KV6_PARQUET_HPP | ||
| 5 | |||
| 6 | #include <filesystem> | ||
| 7 | |||
| 8 | #include <arrow/api.h> | ||
| 9 | #include <arrow/io/api.h> | ||
| 10 | #include <parquet/arrow/writer.h> | ||
| 11 | |||
| 12 | static const size_t MAX_PARQUET_CHUNK = 10000; | ||
| 13 | |||
| 14 | struct ParquetBuilder { | ||
| 15 | ParquetBuilder(); | ||
| 16 | arrow::Result<std::shared_ptr<arrow::Table>> getTable(); | ||
| 17 | |||
| 18 | std::shared_ptr<arrow::Schema> schema; | ||
| 19 | |||
| 20 | arrow::StringBuilder types; | ||
| 21 | arrow::StringBuilder data_owner_codes; | ||
| 22 | arrow::StringBuilder line_planning_numbers; | ||
| 23 | arrow::Date32Builder operating_days; | ||
| 24 | arrow::UInt32Builder journey_numbers; | ||
| 25 | arrow::UInt8Builder reinforcement_numbers; | ||
| 26 | arrow::TimestampBuilder timestamps{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::default_memory_pool()}; | ||
| 27 | arrow::StringBuilder sources; | ||
| 28 | arrow::Int16Builder punctualities; | ||
| 29 | arrow::StringBuilder user_stop_codes; | ||
| 30 | arrow::UInt16Builder passage_sequence_numbers; | ||
| 31 | arrow::UInt32Builder vehicle_numbers; | ||
| 32 | arrow::UInt32Builder block_codes; | ||
| 33 | arrow::StringBuilder wheelchair_accessibles; | ||
| 34 | arrow::UInt8Builder number_of_coaches; | ||
| 35 | arrow::Int32Builder rd_ys; | ||
| 36 | arrow::Int32Builder rd_xs; | ||
| 37 | arrow::UInt32Builder distance_since_last_user_stops; | ||
| 38 | }; | ||
| 39 | |||
| 40 | [[nodiscard]] | ||
| 41 | arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename); | ||
| 42 | |||
| 43 | [[nodiscard]] | ||
| 44 | arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename); | ||
| 45 | |||
| 46 | #endif // OEUF_LIBTMI8_KV6_PARQUET_HPP | ||
diff --git a/lib/libtmi8/src/kv1_index.cpp b/lib/libtmi8/src/kv1_index.cpp new file mode 100644 index 0000000..23e9596 --- /dev/null +++ b/lib/libtmi8/src/kv1_index.cpp | |||
| @@ -0,0 +1,461 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <tmi8/kv1_index.hpp> | ||
| 4 | |||
| 5 | Kv1Index::Kv1Index(Kv1Records *records) : records(records) { | ||
| 6 | organizational_units.reserve(records->organizational_units.size()); | ||
| 7 | for (size_t i = 0; i < records->organizational_units.size(); i++) { | ||
| 8 | auto *it = &records->organizational_units[i]; | ||
| 9 | organizational_units[it->key] = it; | ||
| 10 | } | ||
| 11 | higher_organizational_units.reserve(records->higher_organizational_units.size()); | ||
| 12 | for (size_t i = 0; i < records->higher_organizational_units.size(); i++) { | ||
| 13 | auto *it = &records->higher_organizational_units[i]; | ||
| 14 | higher_organizational_units[it->key] = it; | ||
| 15 | } | ||
| 16 | user_stop_points.reserve(records->user_stop_points.size()); | ||
| 17 | for (size_t i = 0; i < records->user_stop_points.size(); i++) { | ||
| 18 | auto *it = &records->user_stop_points[i]; | ||
| 19 | user_stop_points[it->key] = it; | ||
| 20 | } | ||
| 21 | user_stop_areas.reserve(records->user_stop_areas.size()); | ||
| 22 | for (size_t i = 0; i < records->user_stop_areas.size(); i++) { | ||
| 23 | auto *it = &records->user_stop_areas[i]; | ||
| 24 | user_stop_areas[it->key] = it; | ||
| 25 | } | ||
| 26 | timing_links.reserve(records->timing_links.size()); | ||
| 27 | for (size_t i = 0; i < records->timing_links.size(); i++) { | ||
| 28 | auto *it = &records->timing_links[i]; | ||
| 29 | timing_links[it->key] = it; | ||
| 30 | } | ||
| 31 | links.reserve(records->links.size()); | ||
| 32 | for (size_t i = 0; i < records->links.size(); i++) { | ||
| 33 | auto *it = &records->links[i]; | ||
| 34 | links[it->key] = it; | ||
| 35 | } | ||
| 36 | lines.reserve(records->lines.size()); | ||
| 37 | for (size_t i = 0; i < records->lines.size(); i++) { | ||
| 38 | auto *it = &records->lines[i]; | ||
| 39 | lines[it->key] = it; | ||
| 40 | } | ||
| 41 | destinations.reserve(records->destinations.size()); | ||
| 42 | for (size_t i = 0; i < records->destinations.size(); i++) { | ||
| 43 | auto *it = &records->destinations[i]; | ||
| 44 | destinations[it->key] = it; | ||
| 45 | } | ||
| 46 | journey_patterns.reserve(records->journey_patterns.size()); | ||
| 47 | for (size_t i = 0; i < records->journey_patterns.size(); i++) { | ||
| 48 | auto *it = &records->journey_patterns[i]; | ||
| 49 | journey_patterns[it->key] = it; | ||
| 50 | } | ||
| 51 | concession_financer_relations.reserve(records->concession_financer_relations.size()); | ||
| 52 | for (size_t i = 0; i < records->concession_financer_relations.size(); i++) { | ||
| 53 | auto *it = &records->concession_financer_relations[i]; | ||
| 54 | concession_financer_relations[it->key] = it; | ||
| 55 | } | ||
| 56 | concession_areas.reserve(records->concession_areas.size()); | ||
| 57 | for (size_t i = 0; i < records->concession_areas.size(); i++) { | ||
| 58 | auto *it = &records->concession_areas[i]; | ||
| 59 | concession_areas[it->key] = it; | ||
| 60 | } | ||
| 61 | financers.reserve(records->financers.size()); | ||
| 62 | for (size_t i = 0; i < records->financers.size(); i++) { | ||
| 63 | auto *it = &records->financers[i]; | ||
| 64 | financers[it->key] = it; | ||
| 65 | } | ||
| 66 | journey_pattern_timing_links.reserve(records->journey_pattern_timing_links.size()); | ||
| 67 | for (size_t i = 0; i < records->journey_pattern_timing_links.size(); i++) { | ||
| 68 | auto *it = &records->journey_pattern_timing_links[i]; | ||
| 69 | journey_pattern_timing_links[it->key] = it; | ||
| 70 | } | ||
| 71 | points.reserve(records->points.size()); | ||
| 72 | for (size_t i = 0; i < records->points.size(); i++) { | ||
| 73 | auto *it = &records->points[i]; | ||
| 74 | points[it->key] = it; | ||
| 75 | } | ||
| 76 | point_on_links.reserve(records->point_on_links.size()); | ||
| 77 | for (size_t i = 0; i < records->point_on_links.size(); i++) { | ||
| 78 | auto *it = &records->point_on_links[i]; | ||
| 79 | point_on_links[it->key] = it; | ||
| 80 | } | ||
| 81 | icons.reserve(records->icons.size()); | ||
| 82 | for (size_t i = 0; i < records->icons.size(); i++) { | ||
| 83 | auto *it = &records->icons[i]; | ||
| 84 | icons[it->key] = it; | ||
| 85 | } | ||
| 86 | notices.reserve(records->notices.size()); | ||
| 87 | for (size_t i = 0; i < records->notices.size(); i++) { | ||
| 88 | auto *it = &records->notices[i]; | ||
| 89 | notices[it->key] = it; | ||
| 90 | } | ||
| 91 | time_demand_groups.reserve(records->time_demand_groups.size()); | ||
| 92 | for (size_t i = 0; i < records->time_demand_groups.size(); i++) { | ||
| 93 | auto *it = &records->time_demand_groups[i]; | ||
| 94 | time_demand_groups[it->key] = it; | ||
| 95 | } | ||
| 96 | time_demand_group_run_times.reserve(records->time_demand_group_run_times.size()); | ||
| 97 | for (size_t i = 0; i < records->time_demand_group_run_times.size(); i++) { | ||
| 98 | auto *it = &records->time_demand_group_run_times[i]; | ||
| 99 | time_demand_group_run_times[it->key] = it; | ||
| 100 | } | ||
| 101 | period_groups.reserve(records->period_groups.size()); | ||
| 102 | for (size_t i = 0; i < records->period_groups.size(); i++) { | ||
| 103 | auto *it = &records->period_groups[i]; | ||
| 104 | period_groups[it->key] = it; | ||
| 105 | } | ||
| 106 | specific_days.reserve(records->specific_days.size()); | ||
| 107 | for (size_t i = 0; i < records->specific_days.size(); i++) { | ||
| 108 | auto *it = &records->specific_days[i]; | ||
| 109 | specific_days[it->key] = it; | ||
| 110 | } | ||
| 111 | timetable_versions.reserve(records->timetable_versions.size()); | ||
| 112 | for (size_t i = 0; i < records->timetable_versions.size(); i++) { | ||
| 113 | auto *it = &records->timetable_versions[i]; | ||
| 114 | timetable_versions[it->key] = it; | ||
| 115 | } | ||
| 116 | public_journeys.reserve(records->public_journeys.size()); | ||
| 117 | for (size_t i = 0; i < records->public_journeys.size(); i++) { | ||
| 118 | auto *it = &records->public_journeys[i]; | ||
| 119 | public_journeys[it->key] = it; | ||
| 120 | } | ||
| 121 | period_group_validities.reserve(records->period_group_validities.size()); | ||
| 122 | for (size_t i = 0; i < records->period_group_validities.size(); i++) { | ||
| 123 | auto *it = &records->period_group_validities[i]; | ||
| 124 | period_group_validities[it->key] = it; | ||
| 125 | } | ||
| 126 | exceptional_operating_days.reserve(records->exceptional_operating_days.size()); | ||
| 127 | for (size_t i = 0; i < records->exceptional_operating_days.size(); i++) { | ||
| 128 | auto *it = &records->exceptional_operating_days[i]; | ||
| 129 | exceptional_operating_days[it->key] = it; | ||
| 130 | } | ||
| 131 | schedule_versions.reserve(records->schedule_versions.size()); | ||
| 132 | for (size_t i = 0; i < records->schedule_versions.size(); i++) { | ||
| 133 | auto *it = &records->schedule_versions[i]; | ||
| 134 | schedule_versions[it->key] = it; | ||
| 135 | } | ||
| 136 | public_journey_passing_times.reserve(records->public_journey_passing_times.size()); | ||
| 137 | for (size_t i = 0; i < records->public_journey_passing_times.size(); i++) { | ||
| 138 | auto *it = &records->public_journey_passing_times[i]; | ||
| 139 | public_journey_passing_times[it->key] = it; | ||
| 140 | } | ||
| 141 | operating_days.reserve(records->operating_days.size()); | ||
| 142 | for (size_t i = 0; i < records->operating_days.size(); i++) { | ||
| 143 | auto *it = &records->operating_days[i]; | ||
| 144 | operating_days[it->key] = it; | ||
| 145 | } | ||
| 146 | } | ||
| 147 | |||
| 148 | size_t Kv1Index::size() const { | ||
| 149 | return organizational_units.size() | ||
| 150 | + higher_organizational_units.size() | ||
| 151 | + user_stop_points.size() | ||
| 152 | + user_stop_areas.size() | ||
| 153 | + timing_links.size() | ||
| 154 | + links.size() | ||
| 155 | + lines.size() | ||
| 156 | + destinations.size() | ||
| 157 | + journey_patterns.size() | ||
| 158 | + concession_financer_relations.size() | ||
| 159 | + concession_areas.size() | ||
| 160 | + financers.size() | ||
| 161 | + journey_pattern_timing_links.size() | ||
| 162 | + points.size() | ||
| 163 | + point_on_links.size() | ||
| 164 | + icons.size() | ||
| 165 | + notices.size() | ||
| 166 | + time_demand_groups.size() | ||
| 167 | + time_demand_group_run_times.size() | ||
| 168 | + period_groups.size() | ||
| 169 | + specific_days.size() | ||
| 170 | + timetable_versions.size() | ||
| 171 | + public_journeys.size() | ||
| 172 | + period_group_validities.size() | ||
| 173 | + exceptional_operating_days.size() | ||
| 174 | + schedule_versions.size() | ||
| 175 | + public_journey_passing_times.size() | ||
| 176 | + operating_days.size(); | ||
| 177 | } | ||
| 178 | |||
| 179 | void kv1LinkRecords(Kv1Index &index) { | ||
| 180 | for (auto &orunorun : index.records->higher_organizational_units) { | ||
| 181 | Kv1OrganizationalUnit::Key orun_parent_key( | ||
| 182 | orunorun.key.data_owner_code, | ||
| 183 | orunorun.key.organizational_unit_code_parent); | ||
| 184 | Kv1OrganizationalUnit::Key orun_child_key( | ||
| 185 | orunorun.key.data_owner_code, | ||
| 186 | orunorun.key.organizational_unit_code_child); | ||
| 187 | orunorun.p_organizational_unit_parent = index.organizational_units[orun_parent_key]; | ||
| 188 | orunorun.p_organizational_unit_child = index.organizational_units[orun_child_key]; | ||
| 189 | } | ||
| 190 | for (auto &usrstop : index.records->user_stop_points) { | ||
| 191 | Kv1Point::Key point_key( | ||
| 192 | usrstop.key.data_owner_code, | ||
| 193 | usrstop.key.user_stop_code); | ||
| 194 | usrstop.p_point = index.points[point_key]; | ||
| 195 | if (!usrstop.user_stop_area_code.empty()) { | ||
| 196 | Kv1UserStopArea::Key usrstar_key( | ||
| 197 | usrstop.key.data_owner_code, | ||
| 198 | usrstop.user_stop_area_code); | ||
| 199 | usrstop.p_user_stop_area = index.user_stop_areas[usrstar_key]; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | for (auto &tili : index.records->timing_links) { | ||
| 203 | Kv1UserStopPoint::Key usrstop_begin_key( | ||
| 204 | tili.key.data_owner_code, | ||
| 205 | tili.key.user_stop_code_begin); | ||
| 206 | Kv1UserStopPoint::Key usrstop_end_key( | ||
| 207 | tili.key.data_owner_code, | ||
| 208 | tili.key.user_stop_code_end); | ||
| 209 | tili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; | ||
| 210 | tili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; | ||
| 211 | } | ||
| 212 | for (auto &link : index.records->links) { | ||
| 213 | Kv1UserStopPoint::Key usrstop_begin_key( | ||
| 214 | link.key.data_owner_code, | ||
| 215 | link.key.user_stop_code_begin); | ||
| 216 | Kv1UserStopPoint::Key usrstop_end_key( | ||
| 217 | link.key.data_owner_code, | ||
| 218 | link.key.user_stop_code_end); | ||
| 219 | link.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; | ||
| 220 | link.p_user_stop_end = index.user_stop_points[usrstop_end_key]; | ||
| 221 | } | ||
| 222 | for (auto &line : index.records->lines) { | ||
| 223 | if (!line.line_icon) | ||
| 224 | continue; | ||
| 225 | Kv1Icon::Key icon_key( | ||
| 226 | line.key.data_owner_code, | ||
| 227 | *line.line_icon); | ||
| 228 | line.p_line_icon = index.icons[icon_key]; | ||
| 229 | } | ||
| 230 | for (auto &jopa : index.records->journey_patterns) { | ||
| 231 | Kv1Line::Key line_key( | ||
| 232 | jopa.key.data_owner_code, | ||
| 233 | jopa.key.line_planning_number); | ||
| 234 | jopa.p_line = index.lines[line_key]; | ||
| 235 | } | ||
| 236 | for (auto &confinrel : index.records->concession_financer_relations) { | ||
| 237 | Kv1ConcessionArea::Key conarea_key( | ||
| 238 | confinrel.key.data_owner_code, | ||
| 239 | confinrel.concession_area_code); | ||
| 240 | confinrel.p_concession_area = index.concession_areas[conarea_key]; | ||
| 241 | if (!confinrel.financer_code.empty()) { | ||
| 242 | Kv1Financer::Key financer_key( | ||
| 243 | confinrel.key.data_owner_code, | ||
| 244 | confinrel.financer_code); | ||
| 245 | confinrel.p_financer = index.financers[financer_key]; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | for (auto &jopatili : index.records->journey_pattern_timing_links) { | ||
| 249 | Kv1Line::Key line_key( | ||
| 250 | jopatili.key.data_owner_code, | ||
| 251 | jopatili.key.line_planning_number); | ||
| 252 | Kv1JourneyPattern::Key jopa_key( | ||
| 253 | jopatili.key.data_owner_code, | ||
| 254 | jopatili.key.line_planning_number, | ||
| 255 | jopatili.key.journey_pattern_code); | ||
| 256 | Kv1UserStopPoint::Key usrstop_begin_key( | ||
| 257 | jopatili.key.data_owner_code, | ||
| 258 | jopatili.user_stop_code_begin); | ||
| 259 | Kv1UserStopPoint::Key usrstop_end_key( | ||
| 260 | jopatili.key.data_owner_code, | ||
| 261 | jopatili.user_stop_code_end); | ||
| 262 | Kv1ConcessionFinancerRelation::Key confinrel_key( | ||
| 263 | jopatili.key.data_owner_code, | ||
| 264 | jopatili.con_fin_rel_code); | ||
| 265 | Kv1Destination::Key dest_key( | ||
| 266 | jopatili.key.data_owner_code, | ||
| 267 | jopatili.dest_code); | ||
| 268 | jopatili.p_line = index.lines[line_key]; | ||
| 269 | jopatili.p_journey_pattern = index.journey_patterns[jopa_key]; | ||
| 270 | jopatili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; | ||
| 271 | jopatili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; | ||
| 272 | jopatili.p_con_fin_rel = index.concession_financer_relations[confinrel_key]; | ||
| 273 | jopatili.p_dest = index.destinations[dest_key]; | ||
| 274 | if (jopatili.line_dest_icon) { | ||
| 275 | Kv1Icon::Key icon_key{ | ||
| 276 | jopatili.key.data_owner_code, | ||
| 277 | *jopatili.line_dest_icon, | ||
| 278 | }; | ||
| 279 | jopatili.p_line_dest_icon = index.icons[icon_key]; | ||
| 280 | } | ||
| 281 | } | ||
| 282 | for (auto &pool : index.records->point_on_links) { | ||
| 283 | Kv1UserStopPoint::Key usrstop_begin_key( | ||
| 284 | pool.key.data_owner_code, | ||
| 285 | pool.key.user_stop_code_begin); | ||
| 286 | Kv1UserStopPoint::Key usrstop_end_key( | ||
| 287 | pool.key.data_owner_code, | ||
| 288 | pool.key.user_stop_code_end); | ||
| 289 | Kv1Point::Key point_key( | ||
| 290 | pool.key.point_data_owner_code, | ||
| 291 | pool.key.point_code); | ||
| 292 | pool.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; | ||
| 293 | pool.p_user_stop_end = index.user_stop_points[usrstop_end_key]; | ||
| 294 | pool.p_point = index.points[point_key]; | ||
| 295 | } | ||
| 296 | for (auto &ntcassgnm : index.records->notice_assignments) { | ||
| 297 | Kv1Notice::Key notice_key( | ||
| 298 | ntcassgnm.data_owner_code, | ||
| 299 | ntcassgnm.notice_code); | ||
| 300 | ntcassgnm.p_notice = index.notices[notice_key]; | ||
| 301 | } | ||
| 302 | for (auto &timdemgrp : index.records->time_demand_groups) { | ||
| 303 | Kv1Line::Key line_key( | ||
| 304 | timdemgrp.key.data_owner_code, | ||
| 305 | timdemgrp.key.line_planning_number); | ||
| 306 | Kv1JourneyPattern::Key jopa_key( | ||
| 307 | timdemgrp.key.data_owner_code, | ||
| 308 | timdemgrp.key.line_planning_number, | ||
| 309 | timdemgrp.key.journey_pattern_code); | ||
| 310 | timdemgrp.p_line = index.lines[line_key]; | ||
| 311 | timdemgrp.p_journey_pattern = index.journey_patterns[jopa_key]; | ||
| 312 | } | ||
| 313 | for (auto &timdemrnt : index.records->time_demand_group_run_times) { | ||
| 314 | Kv1Line::Key line_key( | ||
| 315 | timdemrnt.key.data_owner_code, | ||
| 316 | timdemrnt.key.line_planning_number); | ||
| 317 | Kv1JourneyPattern::Key jopa_key( | ||
| 318 | timdemrnt.key.data_owner_code, | ||
| 319 | timdemrnt.key.line_planning_number, | ||
| 320 | timdemrnt.key.journey_pattern_code); | ||
| 321 | Kv1TimeDemandGroup::Key timdemgrp_key( | ||
| 322 | timdemrnt.key.data_owner_code, | ||
| 323 | timdemrnt.key.line_planning_number, | ||
| 324 | timdemrnt.key.journey_pattern_code, | ||
| 325 | timdemrnt.key.time_demand_group_code); | ||
| 326 | Kv1UserStopPoint::Key usrstop_begin_key( | ||
| 327 | timdemrnt.key.data_owner_code, | ||
| 328 | timdemrnt.user_stop_code_begin); | ||
| 329 | Kv1UserStopPoint::Key usrstop_end_key( | ||
| 330 | timdemrnt.key.data_owner_code, | ||
| 331 | timdemrnt.user_stop_code_end); | ||
| 332 | Kv1JourneyPatternTimingLink::Key jopatili_key( | ||
| 333 | timdemrnt.key.data_owner_code, | ||
| 334 | timdemrnt.key.line_planning_number, | ||
| 335 | timdemrnt.key.journey_pattern_code, | ||
| 336 | timdemrnt.key.timing_link_order); | ||
| 337 | timdemrnt.p_line = index.lines[line_key]; | ||
| 338 | timdemrnt.p_user_stop_end = index.user_stop_points[usrstop_end_key]; | ||
| 339 | timdemrnt.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; | ||
| 340 | timdemrnt.p_journey_pattern = index.journey_patterns[jopa_key]; | ||
| 341 | timdemrnt.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; | ||
| 342 | timdemrnt.p_journey_pattern_timing_link = index.journey_pattern_timing_links[jopatili_key]; | ||
| 343 | } | ||
| 344 | for (auto &tive : index.records->timetable_versions) { | ||
| 345 | Kv1OrganizationalUnit::Key orun_key( | ||
| 346 | tive.key.data_owner_code, | ||
| 347 | tive.key.organizational_unit_code); | ||
| 348 | Kv1PeriodGroup::Key pegr_key( | ||
| 349 | tive.key.data_owner_code, | ||
| 350 | tive.key.period_group_code); | ||
| 351 | Kv1SpecificDay::Key specday_key( | ||
| 352 | tive.key.data_owner_code, | ||
| 353 | tive.key.specific_day_code); | ||
| 354 | tive.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 355 | tive.p_period_group = index.period_groups[pegr_key]; | ||
| 356 | tive.p_specific_day = index.specific_days[specday_key]; | ||
| 357 | } | ||
| 358 | for (auto &pujo : index.records->public_journeys) { | ||
| 359 | Kv1TimetableVersion::Key tive_key( | ||
| 360 | pujo.key.data_owner_code, | ||
| 361 | pujo.key.organizational_unit_code, | ||
| 362 | pujo.key.timetable_version_code, | ||
| 363 | pujo.key.period_group_code, | ||
| 364 | pujo.key.specific_day_code); | ||
| 365 | Kv1OrganizationalUnit::Key orun_key( | ||
| 366 | pujo.key.data_owner_code, | ||
| 367 | pujo.key.organizational_unit_code); | ||
| 368 | Kv1PeriodGroup::Key pegr_key( | ||
| 369 | pujo.key.data_owner_code, | ||
| 370 | pujo.key.period_group_code); | ||
| 371 | Kv1SpecificDay::Key specday_key( | ||
| 372 | pujo.key.data_owner_code, | ||
| 373 | pujo.key.specific_day_code); | ||
| 374 | Kv1Line::Key line_key( | ||
| 375 | pujo.key.data_owner_code, | ||
| 376 | pujo.key.line_planning_number); | ||
| 377 | Kv1TimeDemandGroup::Key timdemgrp_key( | ||
| 378 | pujo.key.data_owner_code, | ||
| 379 | pujo.key.line_planning_number, | ||
| 380 | pujo.journey_pattern_code, | ||
| 381 | pujo.time_demand_group_code); | ||
| 382 | Kv1JourneyPattern::Key jopa_key( | ||
| 383 | pujo.key.data_owner_code, | ||
| 384 | pujo.key.line_planning_number, | ||
| 385 | pujo.journey_pattern_code); | ||
| 386 | pujo.p_timetable_version = index.timetable_versions[tive_key]; | ||
| 387 | pujo.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 388 | pujo.p_period_group = index.period_groups[pegr_key]; | ||
| 389 | pujo.p_specific_day = index.specific_days[specday_key]; | ||
| 390 | pujo.p_line = index.lines[line_key]; | ||
| 391 | pujo.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; | ||
| 392 | pujo.p_journey_pattern = index.journey_patterns[jopa_key]; | ||
| 393 | } | ||
| 394 | for (auto &pegrval : index.records->period_group_validities) { | ||
| 395 | Kv1OrganizationalUnit::Key orun_key( | ||
| 396 | pegrval.key.data_owner_code, | ||
| 397 | pegrval.key.organizational_unit_code); | ||
| 398 | Kv1PeriodGroup::Key pegr_key( | ||
| 399 | pegrval.key.data_owner_code, | ||
| 400 | pegrval.key.period_group_code); | ||
| 401 | pegrval.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 402 | pegrval.p_period_group = index.period_groups[pegr_key]; | ||
| 403 | } | ||
| 404 | for (auto &excopday : index.records->exceptional_operating_days) { | ||
| 405 | Kv1OrganizationalUnit::Key orun_key( | ||
| 406 | excopday.key.data_owner_code, | ||
| 407 | excopday.key.organizational_unit_code); | ||
| 408 | Kv1SpecificDay::Key specday_key( | ||
| 409 | excopday.key.data_owner_code, | ||
| 410 | excopday.specific_day_code); | ||
| 411 | Kv1PeriodGroup::Key pegr_key( | ||
| 412 | excopday.key.data_owner_code, | ||
| 413 | excopday.period_group_code); | ||
| 414 | excopday.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 415 | excopday.p_specific_day = index.specific_days[specday_key]; | ||
| 416 | excopday.p_period_group = index.period_groups[pegr_key]; | ||
| 417 | } | ||
| 418 | for (auto &schedvers : index.records->schedule_versions) { | ||
| 419 | Kv1OrganizationalUnit::Key orun_key( | ||
| 420 | schedvers.key.data_owner_code, | ||
| 421 | schedvers.key.organizational_unit_code); | ||
| 422 | schedvers.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 423 | } | ||
| 424 | for (auto &pujopass : index.records->public_journey_passing_times) { | ||
| 425 | Kv1OrganizationalUnit::Key orun_key( | ||
| 426 | pujopass.key.data_owner_code, | ||
| 427 | pujopass.key.organizational_unit_code); | ||
| 428 | Kv1ScheduleVersion::Key schedvers_key( | ||
| 429 | pujopass.key.data_owner_code, | ||
| 430 | pujopass.key.organizational_unit_code, | ||
| 431 | pujopass.key.schedule_code, | ||
| 432 | pujopass.key.schedule_type_code); | ||
| 433 | Kv1Line::Key line_key( | ||
| 434 | pujopass.key.data_owner_code, | ||
| 435 | pujopass.key.line_planning_number); | ||
| 436 | Kv1JourneyPattern::Key jopa_key( | ||
| 437 | pujopass.key.data_owner_code, | ||
| 438 | pujopass.key.line_planning_number, | ||
| 439 | pujopass.journey_pattern_code); | ||
| 440 | Kv1UserStopPoint::Key usrstop_key( | ||
| 441 | pujopass.key.data_owner_code, | ||
| 442 | pujopass.user_stop_code); | ||
| 443 | pujopass.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 444 | pujopass.p_schedule_version = index.schedule_versions[schedvers_key]; | ||
| 445 | pujopass.p_line = index.lines[line_key]; | ||
| 446 | pujopass.p_journey_pattern = index.journey_patterns[jopa_key]; | ||
| 447 | pujopass.p_user_stop = index.user_stop_points[usrstop_key]; | ||
| 448 | } | ||
| 449 | for (auto &operday : index.records->operating_days) { | ||
| 450 | Kv1OrganizationalUnit::Key orun_key( | ||
| 451 | operday.key.data_owner_code, | ||
| 452 | operday.key.organizational_unit_code); | ||
| 453 | Kv1ScheduleVersion::Key schedvers_key( | ||
| 454 | operday.key.data_owner_code, | ||
| 455 | operday.key.organizational_unit_code, | ||
| 456 | operday.key.schedule_code, | ||
| 457 | operday.key.schedule_type_code); | ||
| 458 | operday.p_organizational_unit = index.organizational_units[orun_key]; | ||
| 459 | operday.p_schedule_version = index.schedule_versions[schedvers_key]; | ||
| 460 | } | ||
| 461 | } | ||
diff --git a/lib/libtmi8/src/kv1_lexer.cpp b/lib/libtmi8/src/kv1_lexer.cpp new file mode 100644 index 0000000..028127b --- /dev/null +++ b/lib/libtmi8/src/kv1_lexer.cpp | |||
| @@ -0,0 +1,152 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <tmi8/kv1_lexer.hpp> | ||
| 4 | |||
| 5 | Kv1Lexer::Kv1Lexer(std::string_view input) | ||
| 6 | : input(input), slice(input) | ||
| 7 | {} | ||
| 8 | |||
| 9 | // Does not eat newline character. | ||
| 10 | void Kv1Lexer::eatRestOfLine() { | ||
| 11 | size_t end = slice.size(); | ||
| 12 | for (size_t i = 0; i < slice.size(); i++) { | ||
| 13 | if (slice[i] == '\r' || slice[i] == '\n') { | ||
| 14 | end = i; | ||
| 15 | break; | ||
| 16 | } | ||
| 17 | } | ||
| 18 | slice = slice.substr(end); | ||
| 19 | } | ||
| 20 | |||
| 21 | void Kv1Lexer::lexOptionalHeader() { | ||
| 22 | if (slice.starts_with('[')) eatRestOfLine(); | ||
| 23 | } | ||
| 24 | |||
| 25 | void Kv1Lexer::lexOptionalComment() { | ||
| 26 | if (slice.starts_with(';')) eatRestOfLine(); | ||
| 27 | } | ||
| 28 | |||
| 29 | inline bool Kv1Lexer::isWhitespace(int c) { | ||
| 30 | return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; | ||
| 31 | } | ||
| 32 | |||
| 33 | void Kv1Lexer::readQuotedColumn() { | ||
| 34 | Kv1Token token{ .type = KV1_TOKEN_CELL }; | ||
| 35 | |||
| 36 | if (slice.size() == 0 || slice[0] != '"') { | ||
| 37 | errors.push_back("(internal error) readQuotedColumn: slice[0] != '\"'"); | ||
| 38 | return; | ||
| 39 | } | ||
| 40 | slice = slice.substr(1); | ||
| 41 | while (true) { | ||
| 42 | size_t quote = slice.find('"'); | ||
| 43 | if (quote == std::string_view::npos) { | ||
| 44 | errors.push_back("readQuotedColumn: no matching closing quote found"); | ||
| 45 | return; | ||
| 46 | } | ||
| 47 | if (quote+1 == slice.size() || slice[quote + 1] != '"') { | ||
| 48 | token.data.append(slice.substr(0, quote)); | ||
| 49 | break; | ||
| 50 | } | ||
| 51 | token.data.append(slice.substr(0, quote + 1)); | ||
| 52 | slice = slice.substr(quote + 2); | ||
| 53 | } | ||
| 54 | |||
| 55 | size_t end = slice.size(); | ||
| 56 | for (size_t i = 0; i < slice.size(); i++) { | ||
| 57 | if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { | ||
| 58 | end = i; | ||
| 59 | break; | ||
| 60 | } | ||
| 61 | if (!isWhitespace(slice[i])) { | ||
| 62 | errors.push_back("readQuotedColumn: encountered non-whitespace character after closing quote"); | ||
| 63 | return; | ||
| 64 | } | ||
| 65 | } | ||
| 66 | if (end != std::string_view::npos) slice = slice.substr(end); | ||
| 67 | else slice = slice.substr(slice.size()); | ||
| 68 | |||
| 69 | tokens.push_back(std::move(token)); | ||
| 70 | } | ||
| 71 | |||
| 72 | void Kv1Lexer::readUnquotedColumn() { | ||
| 73 | size_t end = slice.size(); | ||
| 74 | size_t content_end = 0; | ||
| 75 | for (size_t i = 0; i < slice.size(); i++) { | ||
| 76 | if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { | ||
| 77 | end = i; | ||
| 78 | break; | ||
| 79 | } else if (!isWhitespace(slice[i])) { | ||
| 80 | content_end = i + 1; | ||
| 81 | } | ||
| 82 | } | ||
| 83 | tokens.emplace_back(KV1_TOKEN_CELL, std::string(slice.substr(0, content_end))); | ||
| 84 | if (end != std::string_view::npos) slice = slice.substr(end); | ||
| 85 | else slice = slice.substr(slice.size()); | ||
| 86 | } | ||
| 87 | |||
| 88 | void Kv1Lexer::lexRow() { | ||
| 89 | size_t cols = 0; | ||
| 90 | while (slice.size() > 0 && slice[0] != '\r' && slice[0] != '\n') { | ||
| 91 | if (slice[0] == '"') readQuotedColumn(); | ||
| 92 | else readUnquotedColumn(); | ||
| 93 | if (!errors.empty()) return; | ||
| 94 | cols++; | ||
| 95 | if (slice.size() != 0) { | ||
| 96 | if (slice[0] == '|') { | ||
| 97 | slice = slice.substr(1); | ||
| 98 | // A newline/eof right after pipe? That means an empty field at the end | ||
| 99 | // of the record, we also want to emit that as a token. | ||
| 100 | if (slice.size() == 0 || slice[0] == '\r' || slice[0] == '\n') { | ||
| 101 | tokens.push_back({ .type = KV1_TOKEN_CELL }); | ||
| 102 | } | ||
| 103 | } else if (slice[0] == '\r') { | ||
| 104 | if (slice.size() > 1 && slice[1] == '\n') slice = slice.substr(2); | ||
| 105 | else slice = slice.substr(1); | ||
| 106 | break; | ||
| 107 | } else if (slice[0] == '\n') { | ||
| 108 | slice = slice.substr(1); | ||
| 109 | break; | ||
| 110 | } else { | ||
| 111 | errors.push_back("lexRow: expected CR, LF or |"); | ||
| 112 | return; | ||
| 113 | } | ||
| 114 | } | ||
| 115 | } | ||
| 116 | tokens.push_back({ .type = KV1_TOKEN_ROW_END }); | ||
| 117 | } | ||
| 118 | |||
| 119 | // Returns true when a line ending was consumed. | ||
| 120 | bool Kv1Lexer::eatWhitespace() { | ||
| 121 | for (size_t i = 0; i < slice.size(); i++) { | ||
| 122 | if (slice[i] == '\r') { | ||
| 123 | slice = slice.substr(i + 1); | ||
| 124 | if (slice.size() > 1 && slice[i + 1] == '\n') | ||
| 125 | slice = slice.substr(i + 2); | ||
| 126 | return true; | ||
| 127 | } | ||
| 128 | if (slice[i] == '\n') { | ||
| 129 | slice = slice.substr(i + 1); | ||
| 130 | return true; | ||
| 131 | } | ||
| 132 | |||
| 133 | if (slice[i] != ' ' && slice[i] != '\f' && slice[i] != '\t' && slice[i] != '\v') { | ||
| 134 | slice = slice.substr(i); | ||
| 135 | return false; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | return false; | ||
| 139 | } | ||
| 140 | |||
| 141 | void Kv1Lexer::lex() { | ||
| 142 | lexOptionalHeader(); | ||
| 143 | eatWhitespace(); | ||
| 144 | |||
| 145 | while (errors.empty() && !slice.empty()) { | ||
| 146 | lexOptionalComment(); | ||
| 147 | bool newline = eatWhitespace(); | ||
| 148 | if (newline) continue; | ||
| 149 | // We are now either (1) at the end of the file or (2) at the start of some column data | ||
| 150 | if (errors.empty()) lexRow(); | ||
| 151 | } | ||
| 152 | } | ||
diff --git a/lib/libtmi8/src/kv1_parser.cpp b/lib/libtmi8/src/kv1_parser.cpp new file mode 100644 index 0000000..ac0c6bf --- /dev/null +++ b/lib/libtmi8/src/kv1_parser.cpp | |||
| @@ -0,0 +1,1258 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <tmi8/kv1_parser.hpp> | ||
| 4 | |||
| 5 | using rune = uint32_t; | ||
| 6 | |||
| 7 | static size_t decodeUtf8Cp(std::string_view s, rune *dest = nullptr) { | ||
| 8 | rune res = 0xFFFD; | ||
| 9 | size_t length = 1; | ||
| 10 | |||
| 11 | if (s.size() == 0) | ||
| 12 | return 0; | ||
| 13 | const uint8_t *b = reinterpret_cast<const uint8_t *>(s.data()); | ||
| 14 | if (!(b[0] & 0x80)) | ||
| 15 | res = static_cast<rune>(b[0]); | ||
| 16 | else if ((b[0] & 0xE0) == 0xC0) { | ||
| 17 | length = 2; | ||
| 18 | if (s.size() >= 2 && (b[1] & 0xC0) == 0x80) { | ||
| 19 | res = static_cast<rune>(b[0] & ~0xC0) << 6; | ||
| 20 | res |= static_cast<rune>(b[1] & ~0x80); | ||
| 21 | } | ||
| 22 | } else if ((b[0] & 0xF0) == 0xE0) { | ||
| 23 | length = 3; | ||
| 24 | if (s.size() >= 3 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80) { | ||
| 25 | res = static_cast<rune>(b[0] & ~0xE0) << 12; | ||
| 26 | res |= static_cast<rune>(b[1] & ~0x80) << 6; | ||
| 27 | res |= static_cast<rune>(b[2] & ~0x80); | ||
| 28 | } | ||
| 29 | } else if (b[0] == 0xF0) { | ||
| 30 | length = 4; | ||
| 31 | if (s.size() >= 4 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80) { | ||
| 32 | res = static_cast<rune>(b[0] & ~0xF0) << 18; | ||
| 33 | res |= static_cast<rune>(b[1] & ~0x80) << 12; | ||
| 34 | res |= static_cast<rune>(b[2] & ~0x80) << 6; | ||
| 35 | res |= static_cast<rune>(b[3] & ~0x80); | ||
| 36 | } | ||
| 37 | } | ||
| 38 | |||
| 39 | if (dest) | ||
| 40 | *dest = res; | ||
| 41 | return length; | ||
| 42 | } | ||
| 43 | |||
| 44 | // Counts the number of codepoints in a valid UTF-8 string. Returns SIZE_MAX if | ||
| 45 | // the string contains invalid UTF-8 codepoints. | ||
| 46 | static size_t stringViewLengthUtf8(std::string_view sv) { | ||
| 47 | size_t codepoints = 0; | ||
| 48 | while (sv.size() > 0) { | ||
| 49 | size_t codepoint_size = decodeUtf8Cp(sv); | ||
| 50 | if (codepoint_size == 0) return SIZE_MAX; | ||
| 51 | codepoints++; | ||
| 52 | sv = sv.substr(codepoint_size); | ||
| 53 | } | ||
| 54 | return codepoints; | ||
| 55 | } | ||
| 56 | |||
| 57 | Kv1Parser::Kv1Parser(std::vector<Kv1Token> tokens, Kv1Records &parse_into) | ||
| 58 | : tokens(std::move(tokens)), | ||
| 59 | records(parse_into) | ||
| 60 | {} | ||
| 61 | |||
| 62 | bool Kv1Parser::atEnd() const { | ||
| 63 | return pos >= tokens.size(); | ||
| 64 | } | ||
| 65 | |||
| 66 | void Kv1Parser::eatRowEnds() { | ||
| 67 | while (!atEnd() && tokens[pos].type == KV1_TOKEN_ROW_END) pos++; | ||
| 68 | } | ||
| 69 | |||
| 70 | const Kv1Token *Kv1Parser::cur() const { | ||
| 71 | if (atEnd()) return nullptr; | ||
| 72 | return &tokens[pos]; | ||
| 73 | } | ||
| 74 | |||
| 75 | const std::string *Kv1Parser::eatCell(std::string_view parsing_what) { | ||
| 76 | const Kv1Token *tok = cur(); | ||
| 77 | if (!tok) { | ||
| 78 | record_errors.push_back(std::format("Expected cell but got end of file when parsing {}", parsing_what)); | ||
| 79 | return nullptr; | ||
| 80 | } | ||
| 81 | if (tok->type == KV1_TOKEN_ROW_END) { | ||
| 82 | record_errors.push_back(std::format("Expected cell but got end of row when parsing {}", parsing_what)); | ||
| 83 | return nullptr; | ||
| 84 | } | ||
| 85 | pos++; | ||
| 86 | return &tok->data; | ||
| 87 | } | ||
| 88 | |||
| 89 | void Kv1Parser::requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value) { | ||
| 90 | if (value.empty() && mandatory) { | ||
| 91 | record_errors.push_back(std::format("{} has length zero but is required", field)); | ||
| 92 | return; | ||
| 93 | } | ||
| 94 | size_t codepoints = stringViewLengthUtf8(value); | ||
| 95 | if (codepoints == SIZE_MAX) { | ||
| 96 | global_errors.push_back(std::format("{} contains invalid UTF-8 code points", field)); | ||
| 97 | return; | ||
| 98 | } | ||
| 99 | if (codepoints > max_length) { | ||
| 100 | record_errors.push_back(std::format("{} has length ({}) that is greater than maximum length ({})", | ||
| 101 | field, value.size(), max_length)); | ||
| 102 | } | ||
| 103 | } | ||
| 104 | |||
| 105 | static inline std::optional<bool> parseBoolean(std::string_view src) { | ||
| 106 | if (src == "1") return true; | ||
| 107 | if (src == "0") return false; | ||
| 108 | if (src == "true") return true; | ||
| 109 | if (src == "false") return false; | ||
| 110 | return std::nullopt; | ||
| 111 | } | ||
| 112 | |||
| 113 | std::optional<bool> Kv1Parser::requireBoolean(std::string_view field, bool mandatory, std::string_view value) { | ||
| 114 | if (value.empty()) { | ||
| 115 | if (mandatory) | ||
| 116 | record_errors.push_back(std::format("{} is required, but has no value", field)); | ||
| 117 | return std::nullopt; | ||
| 118 | } | ||
| 119 | auto parsed = parseBoolean(value); | ||
| 120 | if (!parsed.has_value()) | ||
| 121 | record_errors.push_back(std::format("{} should have value \"1\", \"0\", \"true\" or \"false\"", field)); | ||
| 122 | return parsed; | ||
| 123 | } | ||
| 124 | |||
| 125 | static inline size_t countDigits(long x) { | ||
| 126 | size_t digits = 0; | ||
| 127 | while (x != 0) { digits++; x /= 10; } | ||
| 128 | return digits; | ||
| 129 | } | ||
| 130 | |||
| 131 | std::optional<double> Kv1Parser::requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value) { | ||
| 132 | if (value.empty()) { | ||
| 133 | if (mandatory) | ||
| 134 | record_errors.push_back(std::format("{} has no value but is required", field)); | ||
| 135 | return std::nullopt; | ||
| 136 | } | ||
| 137 | |||
| 138 | double parsed; | ||
| 139 | auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); | ||
| 140 | if (ec != std::errc()) { | ||
| 141 | record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); | ||
| 142 | return std::nullopt; | ||
| 143 | } | ||
| 144 | if (ptr != value.data() + value.size()) { | ||
| 145 | record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); | ||
| 146 | return std::nullopt; | ||
| 147 | } | ||
| 148 | |||
| 149 | size_t digits = countDigits(static_cast<long>(parsed)); | ||
| 150 | if (digits > max_digits) { | ||
| 151 | record_errors.push_back(std::format("{} contains more digits (in the integral part) ({}) than allowed ({})", | ||
| 152 | field, digits, max_digits)); | ||
| 153 | return std::nullopt; | ||
| 154 | } | ||
| 155 | |||
| 156 | return parsed; | ||
| 157 | } | ||
| 158 | |||
| 159 | static inline bool isHexDigit(char c) { | ||
| 160 | return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'); | ||
| 161 | } | ||
| 162 | |||
| 163 | static inline uint8_t fromHex(char c) { | ||
| 164 | if (c >= '0' && c <= '9') return static_cast<uint8_t>(c - '0'); | ||
| 165 | else if (c >= 'A' && c <= 'F') return static_cast<uint8_t>(c - 'A' + 10); | ||
| 166 | return 0; | ||
| 167 | } | ||
| 168 | |||
| 169 | static std::optional<RgbColor> parseRgbColor(std::string_view src) { | ||
| 170 | bool valid = src.size() == 6 | ||
| 171 | && isHexDigit(src[0]) && isHexDigit(src[1]) | ||
| 172 | && isHexDigit(src[2]) && isHexDigit(src[3]) | ||
| 173 | && isHexDigit(src[4]) && isHexDigit(src[5]); | ||
| 174 | if (!valid) return std::nullopt; | ||
| 175 | uint8_t r = static_cast<uint8_t>(fromHex(src[0]) << 4) + fromHex(src[1]); | ||
| 176 | uint8_t g = static_cast<uint8_t>(fromHex(src[2]) << 4) + fromHex(src[3]); | ||
| 177 | uint8_t b = static_cast<uint8_t>(fromHex(src[4]) << 4) + fromHex(src[5]); | ||
| 178 | return RgbColor{ r, g, b }; | ||
| 179 | } | ||
| 180 | |||
| 181 | std::optional<RgbColor> Kv1Parser::requireRgbColor(std::string_view field, bool mandatory, std::string_view value) { | ||
| 182 | if (value.empty()) { | ||
| 183 | if (mandatory) | ||
| 184 | record_errors.push_back(std::format("{} is required, but has no value", field)); | ||
| 185 | return std::nullopt; | ||
| 186 | } | ||
| 187 | auto parsed = parseRgbColor(value); | ||
| 188 | if (!parsed.has_value()) | ||
| 189 | record_errors.push_back(std::format("{} should be an RGB color, i.e. a sequence of six hexadecimally represented nibbles", field)); | ||
| 190 | return parsed; | ||
| 191 | } | ||
| 192 | |||
| 193 | std::optional<double> Kv1Parser::requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value) { | ||
| 194 | if (value.empty()) { | ||
| 195 | if (mandatory) | ||
| 196 | record_errors.push_back(std::format("{} is required, but has no value", field)); | ||
| 197 | return std::nullopt; | ||
| 198 | } | ||
| 199 | if (value.size() > 15) { | ||
| 200 | record_errors.push_back(std::format("{} may not have more than 15 characters", field)); | ||
| 201 | return std::nullopt; | ||
| 202 | } | ||
| 203 | |||
| 204 | double parsed; | ||
| 205 | auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); | ||
| 206 | if (ec != std::errc()) { | ||
| 207 | record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); | ||
| 208 | return std::nullopt; | ||
| 209 | } | ||
| 210 | if (ptr != value.data() + value.size()) { | ||
| 211 | record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); | ||
| 212 | return std::nullopt; | ||
| 213 | } | ||
| 214 | |||
| 215 | size_t digits = countDigits(static_cast<long>(parsed)); | ||
| 216 | if (digits < min_digits) { | ||
| 217 | record_errors.push_back(std::format("{} contains less digits (in the integral part) ({}) than required ({}) [value: {}]", | ||
| 218 | field, digits, min_digits, value)); | ||
| 219 | return std::nullopt; | ||
| 220 | } | ||
| 221 | |||
| 222 | return parsed; | ||
| 223 | } | ||
| 224 | |||
| 225 | std::string Kv1Parser::eatString(std::string_view field, bool mandatory, size_t max_length) { | ||
| 226 | auto value = eatCell(field); | ||
| 227 | if (!record_errors.empty()) return {}; | ||
| 228 | requireString(field, mandatory, max_length, *value); | ||
| 229 | return std::move(*value); | ||
| 230 | } | ||
| 231 | |||
| 232 | std::optional<bool> Kv1Parser::eatBoolean(std::string_view field, bool mandatory) { | ||
| 233 | auto value = eatCell(field); | ||
| 234 | if (!record_errors.empty()) return {}; | ||
| 235 | return requireBoolean(field, mandatory, *value); | ||
| 236 | } | ||
| 237 | |||
| 238 | std::optional<double> Kv1Parser::eatNumber(std::string_view field, bool mandatory, size_t max_digits) { | ||
| 239 | auto value = eatCell(field); | ||
| 240 | if (!record_errors.empty()) return {}; | ||
| 241 | return requireNumber(field, mandatory, max_digits, *value); | ||
| 242 | } | ||
| 243 | |||
| 244 | std::optional<RgbColor> Kv1Parser::eatRgbColor(std::string_view field, bool mandatory) { | ||
| 245 | auto value = eatCell(field); | ||
| 246 | if (!record_errors.empty()) return {}; | ||
| 247 | return requireRgbColor(field, mandatory, *value); | ||
| 248 | } | ||
| 249 | |||
| 250 | std::optional<double> Kv1Parser::eatRdCoord(std::string_view field, bool mandatory, size_t min_digits) { | ||
| 251 | auto value = eatCell(field); | ||
| 252 | if (!record_errors.empty()) return {}; | ||
| 253 | return requireRdCoord(field, mandatory, min_digits, *value); | ||
| 254 | } | ||
| 255 | |||
| 256 | std::string Kv1Parser::parseHeader() { | ||
| 257 | auto record_type = eatString("<header>.Recordtype", true, 10); | ||
| 258 | auto version_number = eatString("<header>.VersionNumber", true, 2); | ||
| 259 | auto implicit_explicit = eatString("<header>.Implicit/Explicit", true, 1); | ||
| 260 | if (!record_errors.empty()) return {}; | ||
| 261 | |||
| 262 | if (version_number != "1") { | ||
| 263 | record_errors.push_back("<header>.VersionNumber should be 1"); | ||
| 264 | return ""; | ||
| 265 | } | ||
| 266 | if (implicit_explicit != "I") { | ||
| 267 | record_errors.push_back("<header>.Implicit/Explicit should be 'I'"); | ||
| 268 | return ""; | ||
| 269 | } | ||
| 270 | |||
| 271 | return record_type; | ||
| 272 | } | ||
| 273 | |||
| 274 | void Kv1Parser::eatRestOfRow() { | ||
| 275 | while (!atEnd() && cur()->type != KV1_TOKEN_ROW_END) pos++; | ||
| 276 | } | ||
| 277 | |||
| 278 | void Kv1Parser::parse() { | ||
| 279 | while (!atEnd()) { | ||
| 280 | eatRowEnds(); | ||
| 281 | if (atEnd()) return; | ||
| 282 | |||
| 283 | std::string record_type = parseHeader(); | ||
| 284 | if (!record_errors.empty()) break; | ||
| 285 | if (!type_parsers.contains(record_type)) { | ||
| 286 | warns.push_back(std::format("Recordtype ({}) is bad or names a record type that this program cannot process", | ||
| 287 | record_type)); | ||
| 288 | eatRestOfRow(); | ||
| 289 | continue; | ||
| 290 | } | ||
| 291 | |||
| 292 | ParseFunc parseType = Kv1Parser::type_parsers.at(record_type); | ||
| 293 | (this->*parseType)(); | ||
| 294 | if (cur() && cur()->type != KV1_TOKEN_ROW_END) { | ||
| 295 | record_errors.push_back(std::format("Parser function for Recordtype ({}) did not eat all record fields", | ||
| 296 | record_type)); | ||
| 297 | eatRestOfRow(); | ||
| 298 | } | ||
| 299 | if (!record_errors.empty()) { | ||
| 300 | global_errors.insert(global_errors.end(), record_errors.begin(), record_errors.end()); | ||
| 301 | record_errors.clear(); | ||
| 302 | } | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | void Kv1Parser::parseOrganizationalUnit() { | ||
| 307 | auto data_owner_code = eatString("ORUN.DataOwnerCode", true, 10); | ||
| 308 | auto organizational_unit_code = eatString("ORUN.OrganizationalUnitCode", true, 10); | ||
| 309 | auto name = eatString("ORUN.Name", true, 50); | ||
| 310 | auto organizational_unit_type = eatString("ORUN.OrganizationalUnitType", true, 10); | ||
| 311 | auto description = eatString("ORUN.Description", false, 255); | ||
| 312 | if (!record_errors.empty()) return; | ||
| 313 | |||
| 314 | records.organizational_units.emplace_back( | ||
| 315 | Kv1OrganizationalUnit::Key( | ||
| 316 | data_owner_code, | ||
| 317 | organizational_unit_code), | ||
| 318 | name, | ||
| 319 | organizational_unit_type, | ||
| 320 | description); | ||
| 321 | } | ||
| 322 | |||
| 323 | static inline bool isDigit(char c) { | ||
| 324 | return c >= '0' && c <= '9'; | ||
| 325 | } | ||
| 326 | |||
| 327 | // Parse a string of the format YYYY-MM-DD. | ||
| 328 | static std::optional<std::chrono::year_month_day> parseYyyymmdd(std::string_view src) { | ||
| 329 | bool valid = src.size() == 10 | ||
| 330 | && isDigit(src[0]) && isDigit(src[1]) | ||
| 331 | && isDigit(src[2]) && isDigit(src[3]) && src[4] == '-' | ||
| 332 | && isDigit(src[5]) && isDigit(src[6]) && src[7] == '-' | ||
| 333 | && isDigit(src[8]) && isDigit(src[9]); | ||
| 334 | if (!valid) return std::nullopt; | ||
| 335 | int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; | ||
| 336 | int month = (src[5] - '0') * 10 + src[6] - '0'; | ||
| 337 | int day = (src[8] - '0') * 10 + src[9] - '0'; | ||
| 338 | return std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); | ||
| 339 | } | ||
| 340 | |||
| 341 | // Parse a string of the format HH:MM:SS. | ||
| 342 | static std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> parseHhmmss(std::string_view src) { | ||
| 343 | bool valid = src.size() == 8 | ||
| 344 | && isDigit(src[0]) && isDigit(src[1]) && src[2] == ':' | ||
| 345 | && isDigit(src[3]) && isDigit(src[4]) && src[5] == ':' | ||
| 346 | && isDigit(src[6]) && isDigit(src[7]); | ||
| 347 | if (!valid) return std::nullopt; | ||
| 348 | int hh = (src[0] - '0') * 10 + src[1] - '0'; | ||
| 349 | int mm = (src[3] - '0') * 10 + src[4] - '0'; | ||
| 350 | int ss = (src[6] - '0') * 10 + src[7] - '0'; | ||
| 351 | // The check for the hour not being greater than 32 comes from the fact the | ||
| 352 | // specification explicitly allows hours greater than 23, noting that the | ||
| 353 | // period 24:00-32:00 is equivalent to 00:00-08:00 in the next day, for | ||
| 354 | // exploitation of two days. | ||
| 355 | if (hh > 32 || mm > 59 || ss > 59) return std::nullopt; | ||
| 356 | return std::chrono::hh_mm_ss(std::chrono::hours(hh) + std::chrono::minutes(mm) + std::chrono::seconds(ss)); | ||
| 357 | } | ||
| 358 | |||
| 359 | static std::optional<std::chrono::sys_seconds> parseDateTime(std::string_view src, const std::chrono::time_zone *amsterdam, std::string_view *error = nullptr) { | ||
| 360 | #define ERROR(err) do { if (error) *error = err; return std::nullopt; } while (0) | ||
| 361 | if (src.size() > 23) ERROR("timestamp string is too big"); | ||
| 362 | if (src.size() < 17) ERROR("timestamp string is too small"); | ||
| 363 | |||
| 364 | bool valid_year = isDigit(src[0]) && isDigit(src[1]) && isDigit(src[2]) && isDigit(src[3]); | ||
| 365 | if (!valid_year) ERROR("year has bad format"); | ||
| 366 | |||
| 367 | size_t month_off = src[4] == '-' ? 5 : 4; | ||
| 368 | size_t day_off = src[month_off + 2] == '-' ? month_off + 3 : month_off + 2; | ||
| 369 | size_t time_off = day_off + 2; | ||
| 370 | if (src[time_off] != 'T' && src[time_off] != ' ') | ||
| 371 | ERROR("missing date/time separator"); | ||
| 372 | size_t tzd_off = time_off + 9; | ||
| 373 | // For clarity, TZD stands for Time Zone Designator. It often takes the form | ||
| 374 | // of Z (Zulu, UTC+00:00) or as an offset from UTC in hours and minutes, | ||
| 375 | // formatted as +|-HH:MM (e.g. +01:00, -12:00). | ||
| 376 | |||
| 377 | if (time_off + 8 >= src.size()) ERROR("bad format, not enough space for hh:mm:ss"); | ||
| 378 | |||
| 379 | int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; | ||
| 380 | int month = (src[month_off] - '0') * 10 + src[month_off + 1] - '0'; | ||
| 381 | int day = (src[day_off] - '0') * 10 + src[day_off + 1] - '0'; | ||
| 382 | int hour = (src[time_off + 1] - '0') * 10 + src[time_off + 2] - '0'; | ||
| 383 | int minute = (src[time_off + 4] - '0') * 10 + src[time_off + 5] - '0'; | ||
| 384 | int second = (src[time_off + 7] - '0') * 10 + src[time_off + 8] - '0'; | ||
| 385 | |||
| 386 | auto date = std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); | ||
| 387 | auto time = std::chrono::hours(hour) + std::chrono::minutes(minute) + std::chrono::seconds(second); | ||
| 388 | |||
| 389 | std::chrono::sys_seconds unix_start_of_day; | ||
| 390 | if (tzd_off < src.size()) { | ||
| 391 | unix_start_of_day = std::chrono::sys_days(date); | ||
| 392 | } else { | ||
| 393 | auto local_days = std::chrono::local_days(date); | ||
| 394 | std::chrono::zoned_seconds zoned_start_of_day = std::chrono::zoned_time(amsterdam, local_days); | ||
| 395 | unix_start_of_day = std::chrono::sys_seconds(zoned_start_of_day); | ||
| 396 | } | ||
| 397 | |||
| 398 | std::chrono::minutes offset(0); | ||
| 399 | if (tzd_off + 1 == src.size() && src[tzd_off] != 'Z') { | ||
| 400 | ERROR("bad TZD (missing Zulu indicator)"); | ||
| 401 | } else if (tzd_off + 6 == src.size()) { | ||
| 402 | bool valid_tzd = (src[tzd_off] == '+' || src[tzd_off] == '-') | ||
| 403 | && isDigit(src[tzd_off + 1]) && isDigit(src[tzd_off + 2]) && src[tzd_off + 3] == ':' | ||
| 404 | && isDigit(src[tzd_off + 4]) && isDigit(src[tzd_off + 5]); | ||
| 405 | if (!valid_tzd) ERROR("bad offset TZD format (expected +|-hh:mm)"); | ||
| 406 | int sign = src[tzd_off] == '-' ? -1 : 1; | ||
| 407 | int tzd_hh = (src[tzd_off + 1] - '0') * 10 + src[tzd_off + 2] - '0'; | ||
| 408 | int tzd_mm = (src[tzd_off + 3] - '0') * 10 + src[tzd_off + 4] - '0'; | ||
| 409 | offset = sign * std::chrono::minutes(tzd_hh * 60 + tzd_mm); | ||
| 410 | } else if (tzd_off < src.size()) { | ||
| 411 | // There is a TZD but we literally have no clue how to parse it :/ | ||
| 412 | ERROR("cannot parse TZD of unexpected length"); | ||
| 413 | } | ||
| 414 | |||
| 415 | return unix_start_of_day + time - offset; | ||
| 416 | #undef ERROR | ||
| 417 | } | ||
| 418 | |||
| 419 | void Kv1Parser::parseHigherOrganizationalUnit() { | ||
| 420 | auto data_owner_code = eatString("ORUNORUN.DataOwnerCode", true, 10); | ||
| 421 | auto organizational_unit_code_parent = eatString("ORUNORUN.OrganizationalUnitCodeParent", true, 10); | ||
| 422 | auto organizational_unit_code_child = eatString("ORUNORUN.OrganizationalUnitCodeChild", true, 10); | ||
| 423 | auto valid_from_raw = eatString("ORUNORUN.ValidFrom", true, 10); | ||
| 424 | if (!record_errors.empty()) return; | ||
| 425 | |||
| 426 | auto valid_from = parseYyyymmdd(valid_from_raw); | ||
| 427 | if (!valid_from) { | ||
| 428 | record_errors.push_back("ORUNORUN.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 429 | return; | ||
| 430 | } | ||
| 431 | |||
| 432 | records.higher_organizational_units.emplace_back( | ||
| 433 | Kv1HigherOrganizationalUnit::Key( | ||
| 434 | data_owner_code, | ||
| 435 | organizational_unit_code_parent, | ||
| 436 | organizational_unit_code_child, | ||
| 437 | *valid_from)); | ||
| 438 | } | ||
| 439 | |||
| 440 | void Kv1Parser::parseUserStopPoint() { | ||
| 441 | auto data_owner_code = eatString ("USRSTOP.DataOwnerCode", true, 10); | ||
| 442 | auto user_stop_code = eatString ("USRSTOP.UserStopCode", true, 10); | ||
| 443 | auto timing_point_code = eatString ("USRSTOP.TimingPointCode", false, 10); | ||
| 444 | auto get_in = eatBoolean("USRSTOP.GetIn", true ); | ||
| 445 | auto get_out = eatBoolean("USRSTOP.GetOut", true ); | ||
| 446 | eatCell ("USRSTOP.<deprecated field #1>" ); | ||
| 447 | auto name = eatString ("USRSTOP.Name", true, 50); | ||
| 448 | auto town = eatString ("USRSTOP.Town", true, 50); | ||
| 449 | auto user_stop_area_code = eatString ("USRSTOP.UserStopAreaCode", false, 10); | ||
| 450 | auto stop_side_code = eatString ("USRSTOP.StopSideCode", true, 10); | ||
| 451 | eatCell ("USRSTOP.<deprecated field #2>" ); | ||
| 452 | eatCell ("USRSTOP.<deprecated field #3>" ); | ||
| 453 | auto minimal_stop_time = eatNumber ("USRSTOP.MinimalStopTime", true, 5); | ||
| 454 | auto stop_side_length = eatNumber ("USRSTOP.StopSideLength", false, 3); | ||
| 455 | auto description = eatString ("USRSTOP.Description", false, 255); | ||
| 456 | auto user_stop_type = eatString ("USRSTOP.UserStopType", true, 10); | ||
| 457 | auto quay_code = eatString ("USRSTOP.QuayCode", false, 30); | ||
| 458 | if (!record_errors.empty()) return; | ||
| 459 | |||
| 460 | records.user_stop_points.emplace_back( | ||
| 461 | Kv1UserStopPoint::Key( | ||
| 462 | data_owner_code, | ||
| 463 | user_stop_code), | ||
| 464 | timing_point_code, | ||
| 465 | *get_in, | ||
| 466 | *get_out, | ||
| 467 | name, | ||
| 468 | town, | ||
| 469 | user_stop_area_code, | ||
| 470 | stop_side_code, | ||
| 471 | *minimal_stop_time, | ||
| 472 | stop_side_length, | ||
| 473 | description, | ||
| 474 | user_stop_type, | ||
| 475 | quay_code); | ||
| 476 | } | ||
| 477 | |||
| 478 | void Kv1Parser::parseUserStopArea() { | ||
| 479 | auto data_owner_code = eatString("USRSTAR.DataOwnerCode", true, 10); | ||
| 480 | auto user_stop_area_code = eatString("USRSTAR.UserStopAreaCode", true, 10); | ||
| 481 | auto name = eatString("USRSTAR.Name", true, 50); | ||
| 482 | auto town = eatString("USRSTAR.Town", true, 50); | ||
| 483 | eatCell ("USRSTAR.<deprecated field #1>" ); | ||
| 484 | eatCell ("USRSTAR.<deprecated field #2>" ); | ||
| 485 | auto description = eatString("USRSTAR.Description", false, 255); | ||
| 486 | if (!record_errors.empty()) return; | ||
| 487 | |||
| 488 | records.user_stop_areas.emplace_back( | ||
| 489 | Kv1UserStopArea::Key( | ||
| 490 | data_owner_code, | ||
| 491 | user_stop_area_code), | ||
| 492 | name, | ||
| 493 | town, | ||
| 494 | description); | ||
| 495 | } | ||
| 496 | |||
| 497 | void Kv1Parser::parseTimingLink() { | ||
| 498 | auto data_owner_code = eatString("TILI.DataOwnerCode", true, 10); | ||
| 499 | auto user_stop_code_begin = eatString("TILI.UserStopCodeBegin", true, 10); | ||
| 500 | auto user_stop_code_end = eatString("TILI.UserStopCodeEnd", true, 10); | ||
| 501 | auto minimal_drive_time = eatNumber("TILI.MinimalDriveTime", false, 5); | ||
| 502 | auto description = eatString("TILI.Description", false, 255); | ||
| 503 | if (!record_errors.empty()) return; | ||
| 504 | |||
| 505 | records.timing_links.emplace_back( | ||
| 506 | Kv1TimingLink::Key( | ||
| 507 | data_owner_code, | ||
| 508 | user_stop_code_begin, | ||
| 509 | user_stop_code_end), | ||
| 510 | minimal_drive_time, | ||
| 511 | description); | ||
| 512 | } | ||
| 513 | |||
| 514 | void Kv1Parser::parseLink() { | ||
| 515 | auto data_owner_code = eatString("LINK.DataOwnerCode", true, 10); | ||
| 516 | auto user_stop_code_begin = eatString("LINK.UserStopCodeBegin", true, 10); | ||
| 517 | auto user_stop_code_end = eatString("LINK.UserStopCodeEnd", true, 10); | ||
| 518 | eatCell("LINK.<deprecated field #1>" ); | ||
| 519 | auto distance = eatNumber("LINK.Distance", true, 6); | ||
| 520 | auto description = eatString("LINK.Description", false, 255); | ||
| 521 | auto transport_type = eatString("LINK.TransportType", true, 5); | ||
| 522 | if (!record_errors.empty()) return; | ||
| 523 | |||
| 524 | records.links.emplace_back( | ||
| 525 | Kv1Link::Key( | ||
| 526 | data_owner_code, | ||
| 527 | user_stop_code_begin, | ||
| 528 | user_stop_code_end, | ||
| 529 | transport_type), | ||
| 530 | *distance, | ||
| 531 | description); | ||
| 532 | } | ||
| 533 | |||
| 534 | void Kv1Parser::parseLine() { | ||
| 535 | auto data_owner_code = eatString ("LINE.DataOwnerCode", true, 10); | ||
| 536 | auto line_planning_number = eatString ("LINE.LinePlanningNumber", true, 10); | ||
| 537 | auto line_public_number = eatString ("LINE.LinePublicNumber", true, 4); | ||
| 538 | auto line_name = eatString ("LINE.LineName", true, 50); | ||
| 539 | auto line_ve_tag_number = eatNumber ("LINE.LineVeTagNumber", true, 3); | ||
| 540 | auto description = eatString ("LINE.Description", false, 255); | ||
| 541 | auto transport_type = eatString ("LINE.TransportType", true, 5); | ||
| 542 | auto line_icon = eatNumber ("LINE.LineIcon", false, 4); | ||
| 543 | auto line_color = eatRgbColor("LINE.LineColor", false ); | ||
| 544 | auto line_text_color = eatRgbColor("LINE.LineTextColor", false ); | ||
| 545 | if (!record_errors.empty()) return; | ||
| 546 | |||
| 547 | // NOTE: This check, although it should be performed to comply with the | ||
| 548 | // specification, is not actually honored by transit operators (such as | ||
| 549 | // Connexxion) :/ That's enough reason to keep it disabled here for now. | ||
| 550 | // if (*line_ve_tag_number < 0 || *line_ve_tag_number > 399) { | ||
| 551 | // record_errors.push_back(std::format("LINE.LineVeTagNumber is out of range [0-399] with value {}", *line_ve_tag_number)); | ||
| 552 | // return; | ||
| 553 | // } | ||
| 554 | if (*line_ve_tag_number != static_cast<short>(*line_ve_tag_number)) | ||
| 555 | record_errors.push_back("LINE.LineVeTagNumber should be an integer"); | ||
| 556 | if (line_icon && *line_icon != static_cast<short>(*line_icon)) | ||
| 557 | record_errors.push_back("LINE.LineIcon should be an integer"); | ||
| 558 | if (!record_errors.empty()) return; | ||
| 559 | |||
| 560 | records.lines.emplace_back( | ||
| 561 | Kv1Line::Key( | ||
| 562 | data_owner_code, | ||
| 563 | line_planning_number), | ||
| 564 | line_public_number, | ||
| 565 | line_name, | ||
| 566 | static_cast<short>(*line_ve_tag_number), | ||
| 567 | description, | ||
| 568 | transport_type, | ||
| 569 | static_cast<std::optional<short>>(line_icon), | ||
| 570 | line_color, | ||
| 571 | line_text_color); | ||
| 572 | } | ||
| 573 | |||
| 574 | void Kv1Parser::parseDestination() { | ||
| 575 | auto data_owner_code = eatString ("DEST.DataOwnerCode", true, 10); | ||
| 576 | auto dest_code = eatString ("DEST.DestCode", true, 10); | ||
| 577 | auto dest_name_full = eatString ("DEST.DestNameFull", true, 50); | ||
| 578 | auto dest_name_main = eatString ("DEST.DestNameMain", true, 24); | ||
| 579 | auto dest_name_detail = eatString ("DEST.DestNameDetail", false, 24); | ||
| 580 | auto relevant_dest_name_detail = eatBoolean ("DEST.RelevantDestNameDetail", true ); | ||
| 581 | auto dest_name_main_21 = eatString ("DEST.DestNameMain21", true, 21); | ||
| 582 | auto dest_name_detail_21 = eatString ("DEST.DestNameDetail21", false, 21); | ||
| 583 | auto dest_name_main_19 = eatString ("DEST.DestNameMain19", true, 19); | ||
| 584 | auto dest_name_detail_19 = eatString ("DEST.DestNameDetail19", false, 19); | ||
| 585 | auto dest_name_main_16 = eatString ("DEST.DestNameMain16", true, 16); | ||
| 586 | auto dest_name_detail_16 = eatString ("DEST.DestNameDetail16", false, 16); | ||
| 587 | auto dest_icon = eatNumber ("DEST.DestIcon", false, 4); | ||
| 588 | auto dest_color = eatRgbColor("DEST.DestColor", false ); | ||
| 589 | // NOTE: Deviating from the offical KV1 specification here. It specifies that | ||
| 590 | // the maximum length for this field should be 30, but then proceeds to | ||
| 591 | // specify that it should contain a RGB value comprising of three | ||
| 592 | // hexadecimally encoded octets, i.e. six characters. We assume that the | ||
| 593 | // latter is correct and the intended interpretation. | ||
| 594 | auto dest_text_color = eatRgbColor("DEST.DestTextColor", false ); | ||
| 595 | if (!record_errors.empty()) return; | ||
| 596 | |||
| 597 | if (dest_icon && *dest_icon != static_cast<short>(*dest_icon)) { | ||
| 598 | record_errors.push_back("DEST.DestIcon should be an integer"); | ||
| 599 | return; | ||
| 600 | } | ||
| 601 | |||
| 602 | records.destinations.emplace_back( | ||
| 603 | Kv1Destination::Key( | ||
| 604 | data_owner_code, | ||
| 605 | dest_code), | ||
| 606 | dest_name_full, | ||
| 607 | dest_name_main, | ||
| 608 | dest_name_detail, | ||
| 609 | *relevant_dest_name_detail, | ||
| 610 | dest_name_main_21, | ||
| 611 | dest_name_detail_21, | ||
| 612 | dest_name_main_19, | ||
| 613 | dest_name_detail_19, | ||
| 614 | dest_name_main_16, | ||
| 615 | dest_name_detail_16, | ||
| 616 | dest_icon, | ||
| 617 | dest_color, | ||
| 618 | dest_text_color); | ||
| 619 | } | ||
| 620 | |||
| 621 | void Kv1Parser::parseJourneyPattern() { | ||
| 622 | auto data_owner_code = eatString("JOPA.DataOwnerCode", true, 10); | ||
| 623 | auto line_planning_number = eatString("JOPA.LinePlanningNumber", true, 10); | ||
| 624 | auto journey_pattern_code = eatString("JOPA.JourneyPatternCode", true, 10); | ||
| 625 | auto journey_pattern_type = eatString("JOPA.JourneyPatternType", true, 10); | ||
| 626 | auto direction = eatString("JOPA.Direction", true, 1); | ||
| 627 | auto description = eatString("JOPA.Description", false, 255); | ||
| 628 | if (!record_errors.empty()) return; | ||
| 629 | |||
| 630 | if (direction != "1" && direction != "2" && direction != "A" && direction != "B") { | ||
| 631 | record_errors.push_back("JOPA.Direction should be in [1, 2, A, B]"); | ||
| 632 | return; | ||
| 633 | } | ||
| 634 | |||
| 635 | records.journey_patterns.emplace_back( | ||
| 636 | Kv1JourneyPattern::Key( | ||
| 637 | data_owner_code, | ||
| 638 | line_planning_number, | ||
| 639 | journey_pattern_code), | ||
| 640 | journey_pattern_type, | ||
| 641 | direction[0], | ||
| 642 | description); | ||
| 643 | } | ||
| 644 | |||
| 645 | void Kv1Parser::parseConcessionFinancerRelation() { | ||
| 646 | auto data_owner_code = eatString("CONFINREL.DataOwnerCode", true, 10); | ||
| 647 | auto con_fin_rel_code = eatString("CONFINREL.ConFinRelCode", true, 10); | ||
| 648 | auto concession_area_code = eatString("CONFINREL.ConcessionAreaCode", true, 10); | ||
| 649 | auto financer_code = eatString("CONFINREL.FinancerCode", false, 10); | ||
| 650 | if (!record_errors.empty()) return; | ||
| 651 | |||
| 652 | records.concession_financer_relations.emplace_back( | ||
| 653 | Kv1ConcessionFinancerRelation::Key( | ||
| 654 | data_owner_code, | ||
| 655 | con_fin_rel_code), | ||
| 656 | concession_area_code, | ||
| 657 | financer_code); | ||
| 658 | } | ||
| 659 | |||
| 660 | void Kv1Parser::parseConcessionArea() { | ||
| 661 | auto data_owner_code = eatString("CONAREA.DataOwnerCode", true, 10); | ||
| 662 | auto concession_area_code = eatString("CONAREA.ConcessionAreaCode", true, 10); | ||
| 663 | auto description = eatString("CONAREA.Description", true, 255); | ||
| 664 | if (!record_errors.empty()) return; | ||
| 665 | |||
| 666 | records.concession_areas.emplace_back( | ||
| 667 | Kv1ConcessionArea::Key( | ||
| 668 | data_owner_code, | ||
| 669 | concession_area_code), | ||
| 670 | description); | ||
| 671 | } | ||
| 672 | |||
| 673 | void Kv1Parser::parseFinancer() { | ||
| 674 | auto data_owner_code = eatString("FINANCER.DataOwnerCode", true, 10); | ||
| 675 | auto financer_code = eatString("FINANCER.FinancerCode", true, 10); | ||
| 676 | auto description = eatString("FINANCER.Description", true, 255); | ||
| 677 | if (!record_errors.empty()) return; | ||
| 678 | |||
| 679 | records.financers.emplace_back( | ||
| 680 | Kv1Financer::Key( | ||
| 681 | data_owner_code, | ||
| 682 | financer_code), | ||
| 683 | description); | ||
| 684 | } | ||
| 685 | |||
| 686 | void Kv1Parser::parseJourneyPatternTimingLink() { | ||
| 687 | auto data_owner_code = eatString ("JOPATILI.DataOwnerCode", true, 10); | ||
| 688 | auto line_planning_number = eatString ("JOPATILI.LinePlanningNumber", true, 10); | ||
| 689 | auto journey_pattern_code = eatString ("JOPATILI.JourneyPatternCode", true, 10); | ||
| 690 | auto timing_link_order = eatNumber ("JOPATILI.TimingLinkOrder", true, 3); | ||
| 691 | auto user_stop_code_begin = eatString ("JOPATILI.UserStopCodeBegin", true, 10); | ||
| 692 | auto user_stop_code_end = eatString ("JOPATILI.UserStopCodeEnd", true, 10); | ||
| 693 | auto con_fin_rel_code = eatString ("JOPATILI.ConFinRelCode", true, 10); | ||
| 694 | auto dest_code = eatString ("JOPATILI.DestCode", true, 10); | ||
| 695 | eatCell ("JOPATILI.<deprecated field #1>" ); | ||
| 696 | auto is_timing_stop = eatBoolean ("JOPATILI.IsTimingStop", true ); | ||
| 697 | auto display_public_line = eatString ("JOPATILI.DisplayPublicLine", false, 4); | ||
| 698 | auto product_formula_type = eatNumber ("JOPATILI.ProductFormulaType", false, 4); | ||
| 699 | auto get_in = eatBoolean ("JOPATILI.GetIn", true ); | ||
| 700 | auto get_out = eatBoolean ("JOPATILI.GetOut", true ); | ||
| 701 | auto show_flexible_trip = eatString ("JOPATILI.ShowFlexibleTrip", false, 8); | ||
| 702 | auto line_dest_icon = eatNumber ("JOPATILI.LineDestIcon", false, 4); | ||
| 703 | auto line_dest_color = eatRgbColor("JOPATILI.LineDestColor", false ); | ||
| 704 | auto line_dest_text_color = eatRgbColor("JOPATILI.LineDestTextColor", false ); | ||
| 705 | if (!record_errors.empty()) return; | ||
| 706 | |||
| 707 | if (line_dest_icon && *line_dest_icon != static_cast<short>(*line_dest_icon)) | ||
| 708 | record_errors.push_back("JOPATILI.LineDestIcon should be an integer"); | ||
| 709 | if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && | ||
| 710 | show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") | ||
| 711 | record_errors.push_back("JOPATILI.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); | ||
| 712 | if (!record_errors.empty()) return; | ||
| 713 | |||
| 714 | records.journey_pattern_timing_links.emplace_back( | ||
| 715 | Kv1JourneyPatternTimingLink::Key( | ||
| 716 | data_owner_code, | ||
| 717 | line_planning_number, | ||
| 718 | journey_pattern_code, | ||
| 719 | static_cast<short>(*timing_link_order)), | ||
| 720 | user_stop_code_begin, | ||
| 721 | user_stop_code_end, | ||
| 722 | con_fin_rel_code, | ||
| 723 | dest_code, | ||
| 724 | *is_timing_stop, | ||
| 725 | display_public_line, | ||
| 726 | product_formula_type, | ||
| 727 | *get_in, | ||
| 728 | *get_out, | ||
| 729 | show_flexible_trip, | ||
| 730 | line_dest_icon, | ||
| 731 | line_dest_color, | ||
| 732 | line_dest_text_color); | ||
| 733 | } | ||
| 734 | |||
| 735 | void Kv1Parser::parsePoint() { | ||
| 736 | auto data_owner_code = eatString("POINT.DataOwnerCode", true, 10); | ||
| 737 | auto point_code = eatString("POINT.PointCode", true, 10); | ||
| 738 | eatCell ("POINT.<deprecated field #1>" ); | ||
| 739 | auto point_type = eatString("POINT.PointType", true, 10); | ||
| 740 | auto coordinate_system_type = eatString("POINT.CoordinateSystemType", true, 10); | ||
| 741 | // NOTE: We deviate from the specification here once again. The specification | ||
| 742 | // notes that LocationX_EW should contain 'at least 6 positions'. Assuming | ||
| 743 | // that this is referring to the amount of digits, we have to lower this to | ||
| 744 | // 4. Otherwise, some positions in the Netherlands and Belgium are | ||
| 745 | // unrepresentable. | ||
| 746 | auto location_x_ew = eatRdCoord("POINT.LocationX_EW", true, 4); | ||
| 747 | auto location_y_ew = eatRdCoord("POINT.LocationX_EW", true, 6); | ||
| 748 | auto location_z = eatRdCoord("POINT.LocationZ", false, 0); | ||
| 749 | auto description = eatString ("POINT.Description", false, 255); | ||
| 750 | if (!record_errors.empty()) return; | ||
| 751 | |||
| 752 | records.points.emplace_back( | ||
| 753 | Kv1Point::Key( | ||
| 754 | std::move(data_owner_code), | ||
| 755 | std::move(point_code)), | ||
| 756 | std::move(point_type), | ||
| 757 | std::move(coordinate_system_type), | ||
| 758 | *location_x_ew, | ||
| 759 | *location_y_ew, | ||
| 760 | location_z, | ||
| 761 | std::move(description)); | ||
| 762 | } | ||
| 763 | |||
| 764 | void Kv1Parser::parsePointOnLink() { | ||
| 765 | auto data_owner_code = eatString("POOL.DataOwnerCode", true, 10); | ||
| 766 | auto user_stop_code_begin = eatString("POOL.UserStopCodeBegin", true, 10); | ||
| 767 | auto user_stop_code_end = eatString("POOL.UserStopCodeEnd", true, 10); | ||
| 768 | eatCell ("POOL.<deprecated field #1>" ); | ||
| 769 | auto point_data_owner_code = eatString("POOL.PointDataOwnerCode", true, 10); | ||
| 770 | auto point_code = eatString("POOL.PointCode", true, 10); | ||
| 771 | auto distance_since_start_of_link = eatNumber("POOL.DistanceSinceStartOfLink", true, 5); | ||
| 772 | auto segment_speed = eatNumber("POOL.SegmentSpeed", false, 4); | ||
| 773 | auto local_point_speed = eatNumber("POOL.LocalPointSpeed", false, 4); | ||
| 774 | auto description = eatString("POOL.Description", false, 255); | ||
| 775 | auto transport_type = eatString("POOL.TransportType", true, 5); | ||
| 776 | if (!record_errors.empty()) return; | ||
| 777 | |||
| 778 | records.point_on_links.emplace_back( | ||
| 779 | Kv1PointOnLink::Key( | ||
| 780 | data_owner_code, | ||
| 781 | user_stop_code_begin, | ||
| 782 | user_stop_code_end, | ||
| 783 | point_data_owner_code, | ||
| 784 | point_code, | ||
| 785 | transport_type), | ||
| 786 | *distance_since_start_of_link, | ||
| 787 | segment_speed, | ||
| 788 | local_point_speed, | ||
| 789 | std::move(description)); | ||
| 790 | } | ||
| 791 | |||
| 792 | void Kv1Parser::parseIcon() { | ||
| 793 | auto data_owner_code = eatString("ICON.DataOwnerCode", true, 10); | ||
| 794 | auto icon_number = eatNumber("ICON.IconNumber", true, 4); | ||
| 795 | auto icon_uri = eatString("ICON.IconURI", true, 1024); | ||
| 796 | if (!record_errors.empty()) return; | ||
| 797 | |||
| 798 | if (*icon_number != static_cast<short>(*icon_number)) { | ||
| 799 | record_errors.push_back("ICON.IconNumber should be an integer"); | ||
| 800 | return; | ||
| 801 | } | ||
| 802 | |||
| 803 | records.icons.emplace_back( | ||
| 804 | Kv1Icon::Key( | ||
| 805 | data_owner_code, | ||
| 806 | static_cast<short>(*icon_number)), | ||
| 807 | icon_uri); | ||
| 808 | } | ||
| 809 | |||
| 810 | void Kv1Parser::parseNotice() { | ||
| 811 | auto data_owner_code = eatString("NOTICE.DataOwnerCode", true, 10); | ||
| 812 | auto notice_code = eatString("NOTICE.NoticeCode", true, 20); | ||
| 813 | auto notice_content = eatString("NOTICE.NoticeContent", true, 1024); | ||
| 814 | if (!record_errors.empty()) return; | ||
| 815 | |||
| 816 | records.notices.emplace_back( | ||
| 817 | Kv1Notice::Key( | ||
| 818 | data_owner_code, | ||
| 819 | notice_code), | ||
| 820 | notice_content); | ||
| 821 | } | ||
| 822 | |||
| 823 | void Kv1Parser::parseNoticeAssignment() { | ||
| 824 | auto data_owner_code = eatString("NTCASSGNM.DataOwnerCode", true, 10); | ||
| 825 | auto notice_code = eatString("NTCASSGNM.NoticeCode", true, 20); | ||
| 826 | auto assigned_object = eatString("NTCASSGNM.AssignedObject", true, 8); | ||
| 827 | auto timetable_version_code = eatString("NTCASSGNM.TimetableVersionCode", false, 10); | ||
| 828 | auto organizational_unit_code = eatString("NTCASSGNM.OrganizationalUnitCode", false, 10); | ||
| 829 | auto schedule_code = eatString("NTCASSGNM.ScheduleCode", false, 10); | ||
| 830 | auto schedule_type_code = eatString("NTCASSGNM.ScheduleTypeCode", false, 10); | ||
| 831 | auto period_group_code = eatString("NTCASSGNM.PeriodGroupCode", false, 10); | ||
| 832 | auto specific_day_code = eatString("NTCASSGNM.SpecificDayCode", false, 10); | ||
| 833 | auto day_type = eatString("NTCASSGNM.DayType", false, 7); | ||
| 834 | auto line_planning_number = eatString("NTCASSGNM.LinePlanningNumber", true, 10); | ||
| 835 | auto journey_number = eatNumber("NTCASSGNM.JourneyNumber", false, 6); | ||
| 836 | auto stop_order = eatNumber("NTCASSGNM.StopOrder", false, 4); | ||
| 837 | auto journey_pattern_code = eatString("NTCASSGNM.JourneyPatternCode", false, 10); | ||
| 838 | auto timing_link_order = eatNumber("NTCASSGNM.TimingLinkOrder", false, 3); | ||
| 839 | auto user_stop_code = eatString("NTCASSGNM.UserStopCode", false, 10); | ||
| 840 | if (!record_errors.empty()) return; | ||
| 841 | |||
| 842 | if (journey_number && *journey_number != static_cast<short>(*journey_number)) | ||
| 843 | record_errors.push_back("NTCASSGNM.JourneyNumber should be an integer"); | ||
| 844 | if (journey_number && (*journey_number < 0 || *journey_number > 999'999)) | ||
| 845 | record_errors.push_back("NTCASSGNM.JourneyNumber should be within the range [0-999999]"); | ||
| 846 | if (stop_order && *stop_order != static_cast<short>(*stop_order)) | ||
| 847 | record_errors.push_back("NTCASSGNM.StopOrder should be an integer"); | ||
| 848 | if (!journey_number && (assigned_object == "PUJO" || assigned_object == "PUJOPASS")) | ||
| 849 | record_errors.push_back("NTCASSGNM.JourneyNumber is required for AssignedObject PUJO/PUJOPASS"); | ||
| 850 | if (journey_pattern_code.empty() && assigned_object == "JOPATILI") | ||
| 851 | record_errors.push_back("NTCASSGNM.JourneyPatternCode is required for AssignedObject JOPATILI"); | ||
| 852 | if (!record_errors.empty()) return; | ||
| 853 | |||
| 854 | records.notice_assignments.emplace_back( | ||
| 855 | data_owner_code, | ||
| 856 | notice_code, | ||
| 857 | assigned_object, | ||
| 858 | timetable_version_code, | ||
| 859 | organizational_unit_code, | ||
| 860 | schedule_code, | ||
| 861 | schedule_type_code, | ||
| 862 | period_group_code, | ||
| 863 | specific_day_code, | ||
| 864 | day_type, | ||
| 865 | line_planning_number, | ||
| 866 | static_cast<std::optional<int>>(journey_number), | ||
| 867 | static_cast<std::optional<short>>(stop_order), | ||
| 868 | journey_pattern_code, | ||
| 869 | timing_link_order, | ||
| 870 | user_stop_code); | ||
| 871 | } | ||
| 872 | |||
| 873 | void Kv1Parser::parseTimeDemandGroup() { | ||
| 874 | auto data_owner_code = eatString("TIMDEMGRP.DataOwnerCode", true, 10); | ||
| 875 | auto line_planning_number = eatString("TIMDEMGRP.LinePlanningNumber", true, 10); | ||
| 876 | auto journey_pattern_code = eatString("TIMDEMGRP.JourneyPatternCode", true, 10); | ||
| 877 | auto time_demand_group_code = eatString("TIMDEMGRP.TimeDemandGroupCode", true, 10); | ||
| 878 | if (!record_errors.empty()) return; | ||
| 879 | |||
| 880 | records.time_demand_groups.emplace_back( | ||
| 881 | Kv1TimeDemandGroup::Key( | ||
| 882 | data_owner_code, | ||
| 883 | line_planning_number, | ||
| 884 | journey_pattern_code, | ||
| 885 | time_demand_group_code)); | ||
| 886 | } | ||
| 887 | |||
| 888 | void Kv1Parser::parseTimeDemandGroupRunTime() { | ||
| 889 | auto data_owner_code = eatString("TIMDEMRNT.DataOwnerCode", true, 10); | ||
| 890 | auto line_planning_number = eatString("TIMDEMRNT.LinePlanningNumber", true, 10); | ||
| 891 | auto journey_pattern_code = eatString("TIMDEMRNT.JourneyPatternCode", true, 10); | ||
| 892 | auto time_demand_group_code = eatString("TIMDEMRNT.TimeDemandGroupCode", true, 10); | ||
| 893 | auto timing_link_order = eatNumber("TIMDEMRNT.TimingLinkOrder", true, 3); | ||
| 894 | auto user_stop_code_begin = eatString("TIMDEMRNT.UserStopCodeBegin", true, 10); | ||
| 895 | auto user_stop_code_end = eatString("TIMDEMRNT.UserStopCodeEnd", true, 10); | ||
| 896 | auto total_drive_time = eatNumber("TIMDEMRNT.TotalDriveTime", true, 5); | ||
| 897 | auto drive_time = eatNumber("TIMDEMRNT.DriveTime", true, 5); | ||
| 898 | auto expected_delay = eatNumber("TIMDEMRNT.ExpectedDelay", false, 5); | ||
| 899 | auto layover_time = eatNumber("TIMDEMRNT.LayOverTime", false, 5); | ||
| 900 | auto stop_wait_time = eatNumber("TIMDEMRNT.StopWaitTime", true, 5); | ||
| 901 | auto minimum_stop_time = eatNumber("TIMDEMRNT.MinimumStopTime", false, 5); | ||
| 902 | if (!record_errors.empty()) return; | ||
| 903 | |||
| 904 | if (timing_link_order && *timing_link_order != static_cast<short>(*timing_link_order)) { | ||
| 905 | record_errors.push_back("TIMDEMRNT.TimingLinkOrder should be an integer"); | ||
| 906 | return; | ||
| 907 | } | ||
| 908 | |||
| 909 | records.time_demand_group_run_times.emplace_back( | ||
| 910 | Kv1TimeDemandGroupRunTime::Key( | ||
| 911 | data_owner_code, | ||
| 912 | line_planning_number, | ||
| 913 | journey_pattern_code, | ||
| 914 | time_demand_group_code, | ||
| 915 | static_cast<short>(*timing_link_order)), | ||
| 916 | user_stop_code_begin, | ||
| 917 | user_stop_code_end, | ||
| 918 | *total_drive_time, | ||
| 919 | *drive_time, | ||
| 920 | expected_delay, | ||
| 921 | layover_time, | ||
| 922 | *stop_wait_time, | ||
| 923 | minimum_stop_time); | ||
| 924 | } | ||
| 925 | |||
| 926 | void Kv1Parser::parsePeriodGroup() { | ||
| 927 | auto data_owner_code = eatString("PEGR.DataOwnerCode", true, 10); | ||
| 928 | auto period_group_code = eatString("PEGR.PeriodGroupCode", true, 10); | ||
| 929 | auto description = eatString("PEGR.Description", false, 255); | ||
| 930 | if (!record_errors.empty()) return; | ||
| 931 | |||
| 932 | records.period_groups.emplace_back( | ||
| 933 | Kv1PeriodGroup::Key( | ||
| 934 | data_owner_code, | ||
| 935 | period_group_code), | ||
| 936 | description); | ||
| 937 | } | ||
| 938 | |||
| 939 | void Kv1Parser::parseSpecificDay() { | ||
| 940 | auto data_owner_code = eatString("SPECDAY.DataOwnerCode", true, 10); | ||
| 941 | auto specific_day_code = eatString("SPECDAY.SpecificDayCode", true, 10); | ||
| 942 | auto name = eatString("SPECDAY.Name", true, 50); | ||
| 943 | auto description = eatString("SPECDAY.Description", false, 255); | ||
| 944 | if (!record_errors.empty()) return; | ||
| 945 | |||
| 946 | records.specific_days.emplace_back( | ||
| 947 | Kv1SpecificDay::Key( | ||
| 948 | data_owner_code, | ||
| 949 | specific_day_code), | ||
| 950 | name, | ||
| 951 | description); | ||
| 952 | } | ||
| 953 | |||
| 954 | void Kv1Parser::parseTimetableVersion() { | ||
| 955 | auto data_owner_code = eatString("TIVE.DataOwnerCode", true, 10); | ||
| 956 | auto organizational_unit_code = eatString("TIVE.OrganizationalUnitCode", true, 10); | ||
| 957 | auto timetable_version_code = eatString("TIVE.TimetableVersionCode", true, 10); | ||
| 958 | auto period_group_code = eatString("TIVE.PeriodGroupCode", true, 10); | ||
| 959 | auto specific_day_code = eatString("TIVE.SpecificDayCode", true, 10); | ||
| 960 | auto valid_from_raw = eatString("TIVE.ValidFrom", true, 10); | ||
| 961 | auto timetable_version_type = eatString("TIVE.TimetableVersionType", true, 10); | ||
| 962 | auto valid_thru_raw = eatString("TIVE.ValidThru", false, 10); | ||
| 963 | auto description = eatString("TIVE.Description", false, 255); | ||
| 964 | if (!record_errors.empty()) return; | ||
| 965 | |||
| 966 | auto valid_from = parseYyyymmdd(valid_from_raw); | ||
| 967 | if (!valid_from) | ||
| 968 | record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 969 | std::optional<std::chrono::year_month_day> valid_thru; | ||
| 970 | if (!valid_thru_raw.empty()) { | ||
| 971 | valid_thru = parseYyyymmdd(valid_thru_raw); | ||
| 972 | if (!valid_thru) { | ||
| 973 | record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 974 | } | ||
| 975 | } | ||
| 976 | if (!description.empty()) | ||
| 977 | record_errors.push_back("TIVE.Description should be empty"); | ||
| 978 | if (!record_errors.empty()) return; | ||
| 979 | |||
| 980 | records.timetable_versions.emplace_back( | ||
| 981 | Kv1TimetableVersion::Key( | ||
| 982 | data_owner_code, | ||
| 983 | organizational_unit_code, | ||
| 984 | timetable_version_code, | ||
| 985 | period_group_code, | ||
| 986 | specific_day_code), | ||
| 987 | *valid_from, | ||
| 988 | timetable_version_type, | ||
| 989 | valid_thru, | ||
| 990 | description); | ||
| 991 | } | ||
| 992 | |||
| 993 | void Kv1Parser::parsePublicJourney() { | ||
| 994 | auto data_owner_code = eatString ("PUJO.DataOwnerCode", true, 10); | ||
| 995 | auto timetable_version_code = eatString ("PUJO.TimetableVersionCode", true, 10); | ||
| 996 | auto organizational_unit_code = eatString ("PUJO.OrganizationalUnitCode", true, 10); | ||
| 997 | auto period_group_code = eatString ("PUJO.PeriodGroupCode", true, 10); | ||
| 998 | auto specific_day_code = eatString ("PUJO.SpecificDayCode", true, 10); | ||
| 999 | auto day_type = eatString ("PUJO.DayType", true, 7); | ||
| 1000 | auto line_planning_number = eatString ("PUJO.LinePlanningNumber", true, 10); | ||
| 1001 | auto journey_number = eatNumber ("PUJO.JourneyNumber", true, 6); | ||
| 1002 | auto time_demand_group_code = eatString ("PUJO.TimeDemandGroupCode", true, 10); | ||
| 1003 | auto journey_pattern_code = eatString ("PUJO.JourneyPatternCode", true, 10); | ||
| 1004 | auto departure_time_raw = eatString ("PUJO.DepartureTime", true, 8); | ||
| 1005 | auto wheelchair_accessible = eatString ("PUJO.WheelChairAccessible", true, 13); | ||
| 1006 | auto data_owner_is_operator = eatBoolean("PUJO.DataOwnerIsOperator", true ); | ||
| 1007 | auto planned_monitored = eatBoolean("PUJO.PlannedMonitored", true ); | ||
| 1008 | auto product_formula_type = eatNumber ("PUJO.ProductFormulaType", false, 4); | ||
| 1009 | auto show_flexible_trip = eatString ("PUJO.ShowFlexibleTrip", false, 8); | ||
| 1010 | if (!record_errors.empty()) return; | ||
| 1011 | |||
| 1012 | auto departure_time = parseHhmmss(departure_time_raw); | ||
| 1013 | if (!departure_time) | ||
| 1014 | record_errors.push_back("PUJO.DepartureTime has a bad format"); | ||
| 1015 | if (*journey_number < 0 || *journey_number > 999'999) | ||
| 1016 | record_errors.push_back("PUJO.JourneyNumber should be within the range [0-999999]"); | ||
| 1017 | if (*journey_number != static_cast<int>(*journey_number)) | ||
| 1018 | record_errors.push_back("PUJO.JourneyNumber should be an integer"); | ||
| 1019 | if (product_formula_type && *product_formula_type != static_cast<short>(*product_formula_type)) | ||
| 1020 | record_errors.push_back("PUJO.ProductFormulaType should be an integer"); | ||
| 1021 | if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") | ||
| 1022 | record_errors.push_back("PUJO.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); | ||
| 1023 | if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && | ||
| 1024 | show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") | ||
| 1025 | record_errors.push_back("PUJO.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); | ||
| 1026 | if (!record_errors.empty()) return; | ||
| 1027 | |||
| 1028 | records.public_journeys.emplace_back( | ||
| 1029 | Kv1PublicJourney::Key( | ||
| 1030 | data_owner_code, | ||
| 1031 | timetable_version_code, | ||
| 1032 | organizational_unit_code, | ||
| 1033 | period_group_code, | ||
| 1034 | specific_day_code, | ||
| 1035 | day_type, | ||
| 1036 | line_planning_number, | ||
| 1037 | static_cast<int>(*journey_number)), | ||
| 1038 | time_demand_group_code, | ||
| 1039 | journey_pattern_code, | ||
| 1040 | *departure_time, | ||
| 1041 | wheelchair_accessible, | ||
| 1042 | *data_owner_is_operator, | ||
| 1043 | *planned_monitored, | ||
| 1044 | product_formula_type, | ||
| 1045 | show_flexible_trip); | ||
| 1046 | } | ||
| 1047 | |||
| 1048 | void Kv1Parser::parsePeriodGroupValidity() { | ||
| 1049 | auto data_owner_code = eatString("PEGRVAL.DataOwnerCode", true, 10); | ||
| 1050 | auto organizational_unit_code = eatString("PEGRVAL.OrganizationalUnitCode", true, 10); | ||
| 1051 | auto period_group_code = eatString("PEGRVAL.PeriodGroupCode", true, 10); | ||
| 1052 | auto valid_from_raw = eatString("PEGRVAL.ValidFrom", true, 10); | ||
| 1053 | auto valid_thru_raw = eatString("PEGRVAL.ValidThru", true, 10); | ||
| 1054 | if (!record_errors.empty()) return; | ||
| 1055 | |||
| 1056 | auto valid_from = parseYyyymmdd(valid_from_raw); | ||
| 1057 | auto valid_thru = parseYyyymmdd(valid_thru_raw); | ||
| 1058 | if (!valid_from) | ||
| 1059 | record_errors.push_back("PEGRVAL.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 1060 | if (!valid_thru) | ||
| 1061 | record_errors.push_back("PEGRVAL.ValidThru has invalid format, should be YYYY-MM-DD"); | ||
| 1062 | if (!record_errors.empty()) return; | ||
| 1063 | |||
| 1064 | records.period_group_validities.emplace_back( | ||
| 1065 | Kv1PeriodGroupValidity::Key( | ||
| 1066 | data_owner_code, | ||
| 1067 | organizational_unit_code, | ||
| 1068 | period_group_code, | ||
| 1069 | *valid_from), | ||
| 1070 | *valid_thru); | ||
| 1071 | } | ||
| 1072 | |||
| 1073 | void Kv1Parser::parseExceptionalOperatingDay() { | ||
| 1074 | auto data_owner_code = eatString("EXCOPDAY.DataOwnerCode", true, 10); | ||
| 1075 | auto organizational_unit_code = eatString("EXCOPDAY.OrganizationalUnitCode", true, 10); | ||
| 1076 | auto valid_date_raw = eatString("EXCOPDAY.ValidDate", true, 23); | ||
| 1077 | auto day_type_as_on = eatString("EXCOPDAY.DayTypeAsOn", true, 7); | ||
| 1078 | auto specific_day_code = eatString("EXCOPDAY.SpecificDayCode", true, 10); | ||
| 1079 | auto period_group_code = eatString("EXCOPDAY.PeriodGroupCode", false, 10); | ||
| 1080 | auto description = eatString("EXCOPDAY.Description", false, 255); | ||
| 1081 | if (!record_errors.empty()) return; | ||
| 1082 | |||
| 1083 | std::string_view error; | ||
| 1084 | auto valid_date = parseDateTime(valid_date_raw, amsterdam, &error); | ||
| 1085 | if (!valid_date) { | ||
| 1086 | record_errors.push_back(std::format("EXCOPDAY.ValidDate has an bad format (value: {}): {}", valid_date_raw, error)); | ||
| 1087 | return; | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | records.exceptional_operating_days.emplace_back( | ||
| 1091 | Kv1ExceptionalOperatingDay::Key( | ||
| 1092 | data_owner_code, | ||
| 1093 | organizational_unit_code, | ||
| 1094 | *valid_date), | ||
| 1095 | day_type_as_on, | ||
| 1096 | specific_day_code, | ||
| 1097 | period_group_code, | ||
| 1098 | description); | ||
| 1099 | } | ||
| 1100 | |||
| 1101 | void Kv1Parser::parseScheduleVersion() { | ||
| 1102 | auto data_owner_code = eatString("SCHEDVERS.DataOwnerCode", true, 10); | ||
| 1103 | auto organizational_unit_code = eatString("SCHEDVERS.OrganizationalUnitCode", true, 10); | ||
| 1104 | auto schedule_code = eatString("SCHEDVERS.ScheduleCode", true, 10); | ||
| 1105 | auto schedule_type_code = eatString("SCHEDVERS.ScheduleTypeCode", true, 10); | ||
| 1106 | auto valid_from_raw = eatString("SCHEDVERS.ValidFrom", true, 10); | ||
| 1107 | auto valid_thru_raw = eatString("SCHEDVERS.ValidThru", false, 10); | ||
| 1108 | auto description = eatString("SCHEDVERS.Description", false, 255); | ||
| 1109 | if (!record_errors.empty()) return; | ||
| 1110 | |||
| 1111 | auto valid_from = parseYyyymmdd(valid_from_raw); | ||
| 1112 | if (!valid_from) | ||
| 1113 | record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 1114 | std::optional<std::chrono::year_month_day> valid_thru; | ||
| 1115 | if (!valid_thru_raw.empty()) { | ||
| 1116 | valid_thru = parseYyyymmdd(valid_thru_raw); | ||
| 1117 | if (!valid_thru) { | ||
| 1118 | record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); | ||
| 1119 | } | ||
| 1120 | } | ||
| 1121 | if (!description.empty()) | ||
| 1122 | record_errors.push_back("SCHEDVERS.Description should be empty"); | ||
| 1123 | if (!record_errors.empty()) return; | ||
| 1124 | |||
| 1125 | records.schedule_versions.emplace_back( | ||
| 1126 | Kv1ScheduleVersion::Key( | ||
| 1127 | data_owner_code, | ||
| 1128 | organizational_unit_code, | ||
| 1129 | schedule_code, | ||
| 1130 | schedule_type_code), | ||
| 1131 | *valid_from, | ||
| 1132 | valid_thru, | ||
| 1133 | description); | ||
| 1134 | } | ||
| 1135 | |||
| 1136 | void Kv1Parser::parsePublicJourneyPassingTimes() { | ||
| 1137 | auto data_owner_code = eatString ("PUJOPASS.DataOwnerCode", true, 10); | ||
| 1138 | auto organizational_unit_code = eatString ("PUJOPASS.OrganizationalUnitCode", true, 10); | ||
| 1139 | auto schedule_code = eatString ("PUJOPASS.ScheduleCode", true, 10); | ||
| 1140 | auto schedule_type_code = eatString ("PUJOPASS.ScheduleTypeCode", true, 10); | ||
| 1141 | auto line_planning_number = eatString ("PUJOPASS.LinePlanningNumber", true, 10); | ||
| 1142 | auto journey_number = eatNumber ("PUJOPASS.JourneyNumber", true, 6); | ||
| 1143 | auto stop_order = eatNumber ("PUJOPASS.StopOrder", true, 4); | ||
| 1144 | auto journey_pattern_code = eatString ("PUJOPASS.JourneyPatternCode", true, 10); | ||
| 1145 | auto user_stop_code = eatString ("PUJOPASS.UserStopCode", true, 10); | ||
| 1146 | auto target_arrival_time_raw = eatString ("PUJOPASS.TargetArrivalTime", false, 8); | ||
| 1147 | auto target_departure_time_raw = eatString ("PUJOPASS.TargetDepartureTime", false, 8); | ||
| 1148 | auto wheelchair_accessible = eatString ("PUJOPASS.WheelChairAccessible", true, 13); | ||
| 1149 | auto data_owner_is_operator = eatBoolean("PUJOPASS.DataOwnerIsOperator", true ); | ||
| 1150 | auto planned_monitored = eatBoolean("PUJOPASS.PlannedMonitored", true ); | ||
| 1151 | auto product_formula_type = eatNumber ("PUJOPASS.ProductFormulaType", false, 4); | ||
| 1152 | auto show_flexible_trip = eatString ("PUJOPASS.ShowFlexibleTrip", false, 8); | ||
| 1153 | if (!record_errors.empty()) return; | ||
| 1154 | |||
| 1155 | if (*journey_number < 0 || *journey_number > 999'999) | ||
| 1156 | record_errors.push_back("PUJOPASS.JourneyNumber should be within the range [0-999999]"); | ||
| 1157 | if (*journey_number != static_cast<int>(*journey_number)) | ||
| 1158 | record_errors.push_back("PUJOPASS.JourneyNumber should be an integer"); | ||
| 1159 | if (*stop_order != static_cast<short>(*stop_order)) | ||
| 1160 | record_errors.push_back("PUJOPASS.StopOrder should be an integer"); | ||
| 1161 | if (product_formula_type && *product_formula_type != static_cast<short>(*product_formula_type)) | ||
| 1162 | record_errors.push_back("PUJOPASS.ProductFormulaType should be an integer"); | ||
| 1163 | if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") | ||
| 1164 | record_errors.push_back("PUJOPASS.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); | ||
| 1165 | if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && | ||
| 1166 | show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") | ||
| 1167 | record_errors.push_back("PUJOPASS.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); | ||
| 1168 | std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_arrival_time; | ||
| 1169 | if (!target_arrival_time_raw.empty()) { | ||
| 1170 | target_arrival_time = parseHhmmss(target_arrival_time_raw); | ||
| 1171 | if (!target_arrival_time) { | ||
| 1172 | record_errors.push_back("PUJOPASS.TargetArrivalTime has invalid format, should be HH:MM:SS"); | ||
| 1173 | } | ||
| 1174 | } | ||
| 1175 | std::optional<std::chrono::hh_mm_ss<std::chrono::seconds>> target_departure_time; | ||
| 1176 | if (!target_departure_time_raw.empty()) { | ||
| 1177 | target_departure_time = parseHhmmss(target_departure_time_raw); | ||
| 1178 | if (!target_departure_time) { | ||
| 1179 | record_errors.push_back("PUJOPASS.TargetDepartureTime has invalid format, should be HH:MM:SS"); | ||
| 1180 | } | ||
| 1181 | } | ||
| 1182 | if (!record_errors.empty()) return; | ||
| 1183 | |||
| 1184 | records.public_journey_passing_times.emplace_back( | ||
| 1185 | Kv1PublicJourneyPassingTimes::Key( | ||
| 1186 | data_owner_code, | ||
| 1187 | organizational_unit_code, | ||
| 1188 | schedule_code, | ||
| 1189 | schedule_type_code, | ||
| 1190 | line_planning_number, | ||
| 1191 | static_cast<int>(*journey_number), | ||
| 1192 | static_cast<short>(*stop_order)), | ||
| 1193 | journey_pattern_code, | ||
| 1194 | user_stop_code, | ||
| 1195 | target_arrival_time, | ||
| 1196 | target_departure_time, | ||
| 1197 | wheelchair_accessible, | ||
| 1198 | *data_owner_is_operator, | ||
| 1199 | *planned_monitored, | ||
| 1200 | product_formula_type, | ||
| 1201 | show_flexible_trip); | ||
| 1202 | } | ||
| 1203 | |||
| 1204 | void Kv1Parser::parseOperatingDay() { | ||
| 1205 | auto data_owner_code = eatString("OPERDAY.DataOwnerCode", true, 10); | ||
| 1206 | auto organizational_unit_code = eatString("OPERDAY.OrganizationalUnitCode", true, 10); | ||
| 1207 | auto schedule_code = eatString("OPERDAY.ScheduleCode", true, 10); | ||
| 1208 | auto schedule_type_code = eatString("OPERDAY.ScheduleTypeCode", true, 10); | ||
| 1209 | auto valid_date_raw = eatString("OPERDAY.ValidDate", true, 10); | ||
| 1210 | auto description = eatString("OPERDAY.Description", false, 255); | ||
| 1211 | if (!record_errors.empty()) return; | ||
| 1212 | |||
| 1213 | auto valid_date = parseYyyymmdd(valid_date_raw); | ||
| 1214 | if (!valid_date) | ||
| 1215 | record_errors.push_back("OPERDAY.ValidDate has invalid format, should be YYYY-MM-DD"); | ||
| 1216 | if (!record_errors.empty()) return; | ||
| 1217 | |||
| 1218 | records.operating_days.emplace_back( | ||
| 1219 | Kv1OperatingDay::Key( | ||
| 1220 | data_owner_code, | ||
| 1221 | organizational_unit_code, | ||
| 1222 | schedule_code, | ||
| 1223 | schedule_type_code, | ||
| 1224 | *valid_date), | ||
| 1225 | description); | ||
| 1226 | } | ||
| 1227 | |||
| 1228 | const std::unordered_map<std::string_view, Kv1Parser::ParseFunc> Kv1Parser::type_parsers{ | ||
| 1229 | { "ORUN", &Kv1Parser::parseOrganizationalUnit }, | ||
| 1230 | { "ORUNORUN", &Kv1Parser::parseHigherOrganizationalUnit }, | ||
| 1231 | { "USRSTOP", &Kv1Parser::parseUserStopPoint }, | ||
| 1232 | { "USRSTAR", &Kv1Parser::parseUserStopArea }, | ||
| 1233 | { "TILI", &Kv1Parser::parseTimingLink }, | ||
| 1234 | { "LINK", &Kv1Parser::parseLink }, | ||
| 1235 | { "LINE", &Kv1Parser::parseLine }, | ||
| 1236 | { "DEST", &Kv1Parser::parseDestination }, | ||
| 1237 | { "JOPA", &Kv1Parser::parseJourneyPattern }, | ||
| 1238 | { "CONFINREL", &Kv1Parser::parseConcessionFinancerRelation }, | ||
| 1239 | { "CONAREA", &Kv1Parser::parseConcessionArea }, | ||
| 1240 | { "FINANCER", &Kv1Parser::parseFinancer }, | ||
| 1241 | { "JOPATILI", &Kv1Parser::parseJourneyPatternTimingLink }, | ||
| 1242 | { "POINT", &Kv1Parser::parsePoint }, | ||
| 1243 | { "POOL", &Kv1Parser::parsePointOnLink }, | ||
| 1244 | { "ICON", &Kv1Parser::parseIcon }, | ||
| 1245 | { "NOTICE", &Kv1Parser::parseNotice }, | ||
| 1246 | { "NTCASSGNM", &Kv1Parser::parseNoticeAssignment }, | ||
| 1247 | { "TIMDEMGRP", &Kv1Parser::parseTimeDemandGroup }, | ||
| 1248 | { "TIMDEMRNT", &Kv1Parser::parseTimeDemandGroupRunTime }, | ||
| 1249 | { "PEGR", &Kv1Parser::parsePeriodGroup }, | ||
| 1250 | { "SPECDAY", &Kv1Parser::parseSpecificDay }, | ||
| 1251 | { "TIVE", &Kv1Parser::parseTimetableVersion }, | ||
| 1252 | { "PUJO", &Kv1Parser::parsePublicJourney }, | ||
| 1253 | { "PEGRVAL", &Kv1Parser::parsePeriodGroupValidity }, | ||
| 1254 | { "EXCOPDAY", &Kv1Parser::parseExceptionalOperatingDay }, | ||
| 1255 | { "SCHEDVERS", &Kv1Parser::parseScheduleVersion }, | ||
| 1256 | { "PUJOPASS", &Kv1Parser::parsePublicJourneyPassingTimes }, | ||
| 1257 | { "OPERDAY", &Kv1Parser::parseOperatingDay }, | ||
| 1258 | }; | ||
diff --git a/lib/libtmi8/src/kv1_types.cpp b/lib/libtmi8/src/kv1_types.cpp new file mode 100644 index 0000000..49e306e --- /dev/null +++ b/lib/libtmi8/src/kv1_types.cpp | |||
| @@ -0,0 +1,773 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <boost/container_hash/hash.hpp> | ||
| 4 | |||
| 5 | #include <tmi8/kv1_types.hpp> | ||
| 6 | |||
| 7 | size_t Kv1Records::size() const { | ||
| 8 | return organizational_units.size() | ||
| 9 | + higher_organizational_units.size() | ||
| 10 | + user_stop_points.size() | ||
| 11 | + user_stop_areas.size() | ||
| 12 | + timing_links.size() | ||
| 13 | + links.size() | ||
| 14 | + lines.size() | ||
| 15 | + destinations.size() | ||
| 16 | + journey_patterns.size() | ||
| 17 | + concession_financer_relations.size() | ||
| 18 | + concession_areas.size() | ||
| 19 | + financers.size() | ||
| 20 | + journey_pattern_timing_links.size() | ||
| 21 | + points.size() | ||
| 22 | + point_on_links.size() | ||
| 23 | + icons.size() | ||
| 24 | + notices.size() | ||
| 25 | + notice_assignments.size() | ||
| 26 | + time_demand_groups.size() | ||
| 27 | + time_demand_group_run_times.size() | ||
| 28 | + period_groups.size() | ||
| 29 | + specific_days.size() | ||
| 30 | + timetable_versions.size() | ||
| 31 | + public_journeys.size() | ||
| 32 | + period_group_validities.size() | ||
| 33 | + exceptional_operating_days.size() | ||
| 34 | + schedule_versions.size() | ||
| 35 | + public_journey_passing_times.size() | ||
| 36 | + operating_days.size(); | ||
| 37 | } | ||
| 38 | |||
| 39 | Kv1OrganizationalUnit::Key::Key( | ||
| 40 | std::string data_owner_code, | ||
| 41 | std::string organizational_unit_code) | ||
| 42 | : data_owner_code(std::move(data_owner_code)), | ||
| 43 | organizational_unit_code(std::move(organizational_unit_code)) | ||
| 44 | {} | ||
| 45 | |||
| 46 | Kv1HigherOrganizationalUnit::Key::Key( | ||
| 47 | std::string data_owner_code, | ||
| 48 | std::string organizational_unit_code_parent, | ||
| 49 | std::string organizational_unit_code_child, | ||
| 50 | std::chrono::year_month_day valid_from) | ||
| 51 | : data_owner_code(std::move(data_owner_code)), | ||
| 52 | organizational_unit_code_parent(std::move(organizational_unit_code_parent)), | ||
| 53 | organizational_unit_code_child(std::move(organizational_unit_code_child)), | ||
| 54 | valid_from(valid_from) | ||
| 55 | {} | ||
| 56 | |||
| 57 | Kv1UserStopPoint::Key::Key( | ||
| 58 | std::string data_owner_code, | ||
| 59 | std::string user_stop_code) | ||
| 60 | : data_owner_code(std::move(data_owner_code)), | ||
| 61 | user_stop_code(std::move(user_stop_code)) | ||
| 62 | {} | ||
| 63 | |||
| 64 | Kv1UserStopArea::Key::Key( | ||
| 65 | std::string data_owner_code, | ||
| 66 | std::string user_stop_area_code) | ||
| 67 | : data_owner_code(std::move(data_owner_code)), | ||
| 68 | user_stop_area_code(std::move(user_stop_area_code)) | ||
| 69 | {} | ||
| 70 | |||
| 71 | Kv1TimingLink::Key::Key( | ||
| 72 | std::string data_owner_code, | ||
| 73 | std::string user_stop_code_begin, | ||
| 74 | std::string user_stop_code_end) | ||
| 75 | : data_owner_code(std::move(data_owner_code)), | ||
| 76 | user_stop_code_begin(std::move(user_stop_code_begin)), | ||
| 77 | user_stop_code_end(std::move(user_stop_code_end)) | ||
| 78 | {} | ||
| 79 | |||
| 80 | Kv1Link::Key::Key(std::string data_owner_code, | ||
| 81 | std::string user_stop_code_begin, | ||
| 82 | std::string user_stop_code_end, | ||
| 83 | std::string transport_type) | ||
| 84 | : data_owner_code(std::move(data_owner_code)), | ||
| 85 | user_stop_code_begin(std::move(user_stop_code_begin)), | ||
| 86 | user_stop_code_end(std::move(user_stop_code_end)), | ||
| 87 | transport_type(std::move(transport_type)) | ||
| 88 | {} | ||
| 89 | |||
| 90 | Kv1Line::Key::Key(std::string data_owner_code, | ||
| 91 | std::string line_planning_number) | ||
| 92 | : data_owner_code(std::move(data_owner_code)), | ||
| 93 | line_planning_number(std::move(line_planning_number)) | ||
| 94 | {} | ||
| 95 | |||
| 96 | Kv1Destination::Key::Key(std::string data_owner_code, | ||
| 97 | std::string dest_code) | ||
| 98 | : data_owner_code(std::move(data_owner_code)), | ||
| 99 | dest_code(std::move(dest_code)) | ||
| 100 | {} | ||
| 101 | |||
| 102 | Kv1JourneyPattern::Key::Key(std::string data_owner_code, | ||
| 103 | std::string line_planning_number, | ||
| 104 | std::string journey_pattern_code) | ||
| 105 | : data_owner_code(std::move(data_owner_code)), | ||
| 106 | line_planning_number(std::move(line_planning_number)), | ||
| 107 | journey_pattern_code(std::move(journey_pattern_code)) | ||
| 108 | {} | ||
| 109 | |||
| 110 | Kv1ConcessionFinancerRelation::Key::Key(std::string data_owner_code, | ||
| 111 | std::string con_fin_rel_code) | ||
| 112 | : data_owner_code(std::move(data_owner_code)), | ||
| 113 | con_fin_rel_code(std::move(con_fin_rel_code)) | ||
| 114 | {} | ||
| 115 | |||
| 116 | Kv1ConcessionArea::Key::Key(std::string data_owner_code, | ||
| 117 | std::string concession_area_code) | ||
| 118 | : data_owner_code(std::move(data_owner_code)), | ||
| 119 | concession_area_code(std::move(concession_area_code)) | ||
| 120 | {} | ||
| 121 | |||
| 122 | Kv1Financer::Key::Key(std::string data_owner_code, | ||
| 123 | std::string financer_code) | ||
| 124 | : data_owner_code(std::move(data_owner_code)), | ||
| 125 | financer_code(std::move(financer_code)) | ||
| 126 | {} | ||
| 127 | |||
| 128 | Kv1JourneyPatternTimingLink::Key::Key(std::string data_owner_code, | ||
| 129 | std::string line_planning_number, | ||
| 130 | std::string journey_pattern_code, | ||
| 131 | short timing_link_order) | ||
| 132 | : data_owner_code(std::move(data_owner_code)), | ||
| 133 | line_planning_number(std::move(line_planning_number)), | ||
| 134 | journey_pattern_code(journey_pattern_code), | ||
| 135 | timing_link_order(timing_link_order) | ||
| 136 | {} | ||
| 137 | |||
| 138 | Kv1Point::Key::Key(std::string data_owner_code, | ||
| 139 | std::string point_code) | ||
| 140 | : data_owner_code(std::move(data_owner_code)), | ||
| 141 | point_code(std::move(point_code)) | ||
| 142 | {} | ||
| 143 | |||
| 144 | Kv1PointOnLink::Key::Key(std::string data_owner_code, | ||
| 145 | std::string user_stop_code_begin, | ||
| 146 | std::string user_stop_code_end, | ||
| 147 | std::string point_data_owner_code, | ||
| 148 | std::string point_code, | ||
| 149 | std::string transport_type) | ||
| 150 | : data_owner_code(std::move(data_owner_code)), | ||
| 151 | user_stop_code_begin(std::move(user_stop_code_begin)), | ||
| 152 | user_stop_code_end(std::move(user_stop_code_end)), | ||
| 153 | point_data_owner_code(std::move(point_data_owner_code)), | ||
| 154 | point_code(std::move(point_code)), | ||
| 155 | transport_type(std::move(transport_type)) | ||
| 156 | {} | ||
| 157 | |||
| 158 | Kv1Icon::Key::Key(std::string data_owner_code, | ||
| 159 | short icon_number) | ||
| 160 | : data_owner_code(std::move(data_owner_code)), | ||
| 161 | icon_number(icon_number) | ||
| 162 | {} | ||
| 163 | |||
| 164 | Kv1Notice::Key::Key(std::string data_owner_code, | ||
| 165 | std::string notice_code) | ||
| 166 | : data_owner_code(std::move(data_owner_code)), | ||
| 167 | notice_code(std::move(notice_code)) | ||
| 168 | {} | ||
| 169 | |||
| 170 | Kv1TimeDemandGroup::Key::Key(std::string data_owner_code, | ||
| 171 | std::string line_planning_number, | ||
| 172 | std::string journey_pattern_code, | ||
| 173 | std::string time_demand_group_code) | ||
| 174 | : data_owner_code(std::move(data_owner_code)), | ||
| 175 | line_planning_number(std::move(line_planning_number)), | ||
| 176 | journey_pattern_code(std::move(journey_pattern_code)), | ||
| 177 | time_demand_group_code(std::move(time_demand_group_code)) | ||
| 178 | {} | ||
| 179 | |||
| 180 | Kv1TimeDemandGroupRunTime::Key::Key(std::string data_owner_code, | ||
| 181 | std::string line_planning_number, | ||
| 182 | std::string journey_pattern_code, | ||
| 183 | std::string time_demand_group_code, | ||
| 184 | short timing_link_order) | ||
| 185 | : data_owner_code(std::move(data_owner_code)), | ||
| 186 | line_planning_number(std::move(line_planning_number)), | ||
| 187 | journey_pattern_code(std::move(journey_pattern_code)), | ||
| 188 | time_demand_group_code(std::move(time_demand_group_code)), | ||
| 189 | timing_link_order(std::move(timing_link_order)) | ||
| 190 | {} | ||
| 191 | |||
| 192 | Kv1PeriodGroup::Key::Key(std::string data_owner_code, | ||
| 193 | std::string period_group_code) | ||
| 194 | : data_owner_code(std::move(data_owner_code)), | ||
| 195 | period_group_code(std::move(period_group_code)) | ||
| 196 | {} | ||
| 197 | |||
| 198 | Kv1SpecificDay::Key::Key(std::string data_owner_code, | ||
| 199 | std::string specific_day_code) | ||
| 200 | : data_owner_code(std::move(data_owner_code)), | ||
| 201 | specific_day_code(std::move(specific_day_code)) | ||
| 202 | {} | ||
| 203 | |||
| 204 | Kv1TimetableVersion::Key::Key(std::string data_owner_code, | ||
| 205 | std::string organizational_unit_code, | ||
| 206 | std::string timetable_version_code, | ||
| 207 | std::string period_group_code, | ||
| 208 | std::string specific_day_code) | ||
| 209 | : data_owner_code(std::move(data_owner_code)), | ||
| 210 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 211 | timetable_version_code(std::move(timetable_version_code)), | ||
| 212 | period_group_code(std::move(period_group_code)), | ||
| 213 | specific_day_code(std::move(specific_day_code)) | ||
| 214 | {} | ||
| 215 | |||
| 216 | Kv1PublicJourney::Key::Key(std::string data_owner_code, | ||
| 217 | std::string timetable_version_code, | ||
| 218 | std::string organizational_unit_code, | ||
| 219 | std::string period_group_code, | ||
| 220 | std::string specific_day_code, | ||
| 221 | std::string day_type, | ||
| 222 | std::string line_planning_number, | ||
| 223 | int journey_number) | ||
| 224 | : data_owner_code(std::move(data_owner_code)), | ||
| 225 | timetable_version_code(std::move(timetable_version_code)), | ||
| 226 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 227 | period_group_code(std::move(period_group_code)), | ||
| 228 | specific_day_code(std::move(specific_day_code)), | ||
| 229 | day_type(std::move(day_type)), | ||
| 230 | line_planning_number(std::move(line_planning_number)), | ||
| 231 | journey_number(journey_number) | ||
| 232 | {} | ||
| 233 | |||
| 234 | Kv1PeriodGroupValidity::Key::Key(std::string data_owner_code, | ||
| 235 | std::string organizational_unit_code, | ||
| 236 | std::string period_group_code, | ||
| 237 | std::chrono::year_month_day valid_from) | ||
| 238 | : data_owner_code(std::move(data_owner_code)), | ||
| 239 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 240 | period_group_code(std::move(period_group_code)), | ||
| 241 | valid_from(valid_from) | ||
| 242 | {} | ||
| 243 | |||
| 244 | Kv1ExceptionalOperatingDay::Key::Key(std::string data_owner_code, | ||
| 245 | std::string organizational_unit_code, | ||
| 246 | std::chrono::sys_seconds valid_date) | ||
| 247 | : data_owner_code(std::move(data_owner_code)), | ||
| 248 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 249 | valid_date(valid_date) | ||
| 250 | {} | ||
| 251 | |||
| 252 | Kv1ScheduleVersion::Key::Key(std::string data_owner_code, | ||
| 253 | std::string organizational_unit_code, | ||
| 254 | std::string schedule_code, | ||
| 255 | std::string schedule_type_code) | ||
| 256 | : data_owner_code(std::move(data_owner_code)), | ||
| 257 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 258 | schedule_code(std::move(schedule_code)), | ||
| 259 | schedule_type_code(std::move(schedule_type_code)) | ||
| 260 | {} | ||
| 261 | |||
| 262 | Kv1PublicJourneyPassingTimes::Key::Key(std::string data_owner_code, | ||
| 263 | std::string organizational_unit_code, | ||
| 264 | std::string schedule_code, | ||
| 265 | std::string schedule_type_code, | ||
| 266 | std::string line_planning_number, | ||
| 267 | int journey_number, | ||
| 268 | short stop_order) | ||
| 269 | : data_owner_code(std::move(data_owner_code)), | ||
| 270 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 271 | schedule_code(std::move(schedule_code)), | ||
| 272 | schedule_type_code(std::move(schedule_type_code)), | ||
| 273 | line_planning_number(std::move(line_planning_number)), | ||
| 274 | journey_number(journey_number), | ||
| 275 | stop_order(stop_order) | ||
| 276 | {} | ||
| 277 | |||
| 278 | Kv1OperatingDay::Key::Key(std::string data_owner_code, | ||
| 279 | std::string organizational_unit_code, | ||
| 280 | std::string schedule_code, | ||
| 281 | std::string schedule_type_code, | ||
| 282 | std::chrono::year_month_day valid_date) | ||
| 283 | : data_owner_code(std::move(data_owner_code)), | ||
| 284 | organizational_unit_code(std::move(organizational_unit_code)), | ||
| 285 | schedule_code(std::move(schedule_code)), | ||
| 286 | schedule_type_code(std::move(schedule_type_code)), | ||
| 287 | valid_date(valid_date) | ||
| 288 | {} | ||
| 289 | |||
| 290 | bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b) { | ||
| 291 | return a.data_owner_code == b.data_owner_code | ||
| 292 | && a.organizational_unit_code == b.organizational_unit_code; | ||
| 293 | } | ||
| 294 | |||
| 295 | bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b) { | ||
| 296 | return a.data_owner_code == b.data_owner_code | ||
| 297 | && a.organizational_unit_code_parent == b.organizational_unit_code_parent | ||
| 298 | && a.organizational_unit_code_child == b.organizational_unit_code_child; | ||
| 299 | } | ||
| 300 | |||
| 301 | bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b) { | ||
| 302 | return a.data_owner_code == b.data_owner_code | ||
| 303 | && a.user_stop_code == b.user_stop_code; | ||
| 304 | } | ||
| 305 | |||
| 306 | bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b) { | ||
| 307 | return a.data_owner_code == b.data_owner_code | ||
| 308 | && a.user_stop_area_code == b.user_stop_area_code; | ||
| 309 | } | ||
| 310 | |||
| 311 | bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b) { | ||
| 312 | return a.data_owner_code == b.data_owner_code | ||
| 313 | && a.user_stop_code_begin == b.user_stop_code_begin | ||
| 314 | && a.user_stop_code_end == b.user_stop_code_end; | ||
| 315 | } | ||
| 316 | |||
| 317 | bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b) { | ||
| 318 | return a.data_owner_code == b.data_owner_code | ||
| 319 | && a.user_stop_code_begin == b.user_stop_code_begin | ||
| 320 | && a.user_stop_code_end == b.user_stop_code_end | ||
| 321 | && a.transport_type == b.transport_type; | ||
| 322 | } | ||
| 323 | |||
| 324 | bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b) { | ||
| 325 | return a.data_owner_code == b.data_owner_code | ||
| 326 | && a.line_planning_number == b.line_planning_number; | ||
| 327 | } | ||
| 328 | |||
| 329 | bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b) { | ||
| 330 | return a.data_owner_code == b.data_owner_code | ||
| 331 | && a.dest_code == b.dest_code; | ||
| 332 | } | ||
| 333 | |||
| 334 | bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b) { | ||
| 335 | return a.data_owner_code == b.data_owner_code | ||
| 336 | && a.line_planning_number == b.line_planning_number | ||
| 337 | && a.journey_pattern_code == b.journey_pattern_code; | ||
| 338 | } | ||
| 339 | |||
| 340 | bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b) { | ||
| 341 | return a.data_owner_code == b.data_owner_code | ||
| 342 | && a.con_fin_rel_code == b.con_fin_rel_code; | ||
| 343 | } | ||
| 344 | |||
| 345 | bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b) { | ||
| 346 | return a.data_owner_code == b.data_owner_code | ||
| 347 | && a.concession_area_code == b.concession_area_code; | ||
| 348 | } | ||
| 349 | |||
| 350 | bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b) { | ||
| 351 | return a.data_owner_code == b.data_owner_code | ||
| 352 | && a.financer_code == b.financer_code; | ||
| 353 | } | ||
| 354 | |||
| 355 | bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b) { | ||
| 356 | return a.data_owner_code == b.data_owner_code | ||
| 357 | && a.line_planning_number == b.line_planning_number | ||
| 358 | && a.journey_pattern_code == b.journey_pattern_code | ||
| 359 | && a.timing_link_order == b.timing_link_order; | ||
| 360 | } | ||
| 361 | |||
| 362 | bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b) { | ||
| 363 | return a.data_owner_code == b.data_owner_code | ||
| 364 | && a.point_code == b.point_code; | ||
| 365 | } | ||
| 366 | |||
| 367 | bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b) { | ||
| 368 | return a.data_owner_code == b.data_owner_code | ||
| 369 | && a.user_stop_code_begin == b.user_stop_code_begin | ||
| 370 | && a.user_stop_code_end == b.user_stop_code_end | ||
| 371 | && a.point_data_owner_code == b.point_data_owner_code | ||
| 372 | && a.point_code == b.point_code | ||
| 373 | && a.transport_type == b.transport_type; | ||
| 374 | } | ||
| 375 | |||
| 376 | bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b) { | ||
| 377 | return a.data_owner_code == b.data_owner_code | ||
| 378 | && a.icon_number == b.icon_number; | ||
| 379 | } | ||
| 380 | |||
| 381 | bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b) { | ||
| 382 | return a.data_owner_code == b.data_owner_code | ||
| 383 | && a.notice_code == b.notice_code; | ||
| 384 | } | ||
| 385 | |||
| 386 | bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b) { | ||
| 387 | return a.data_owner_code == b.data_owner_code | ||
| 388 | && a.line_planning_number == b.line_planning_number | ||
| 389 | && a.journey_pattern_code == b.journey_pattern_code | ||
| 390 | && a.time_demand_group_code == b.time_demand_group_code; | ||
| 391 | } | ||
| 392 | |||
| 393 | bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b) { | ||
| 394 | return a.data_owner_code == b.data_owner_code | ||
| 395 | && a.line_planning_number == b.line_planning_number | ||
| 396 | && a.journey_pattern_code == b.journey_pattern_code | ||
| 397 | && a.time_demand_group_code == b.time_demand_group_code | ||
| 398 | && a.timing_link_order == b.timing_link_order; | ||
| 399 | } | ||
| 400 | |||
| 401 | bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b) { | ||
| 402 | return a.data_owner_code == b.data_owner_code | ||
| 403 | && a.period_group_code == b.period_group_code; | ||
| 404 | } | ||
| 405 | |||
| 406 | bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b) { | ||
| 407 | return a.data_owner_code == b.data_owner_code | ||
| 408 | && a.specific_day_code == b.specific_day_code; | ||
| 409 | } | ||
| 410 | |||
| 411 | bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b) { | ||
| 412 | return a.data_owner_code == b.data_owner_code | ||
| 413 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 414 | && a.timetable_version_code == b.timetable_version_code | ||
| 415 | && a.period_group_code == b.period_group_code | ||
| 416 | && a.specific_day_code == b.specific_day_code; | ||
| 417 | } | ||
| 418 | |||
| 419 | bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b) { | ||
| 420 | return a.data_owner_code == b.data_owner_code | ||
| 421 | && a.timetable_version_code == b.timetable_version_code | ||
| 422 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 423 | && a.period_group_code == b.period_group_code | ||
| 424 | && a.specific_day_code == b.specific_day_code | ||
| 425 | && a.day_type == b.day_type | ||
| 426 | && a.line_planning_number == b.line_planning_number | ||
| 427 | && a.journey_number == b.journey_number; | ||
| 428 | } | ||
| 429 | |||
| 430 | bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b) { | ||
| 431 | return a.data_owner_code == b.data_owner_code | ||
| 432 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 433 | && a.period_group_code == b.period_group_code | ||
| 434 | && a.valid_from == b.valid_from; | ||
| 435 | } | ||
| 436 | |||
| 437 | bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b) { | ||
| 438 | return a.data_owner_code == b.data_owner_code | ||
| 439 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 440 | && a.valid_date == b.valid_date; | ||
| 441 | } | ||
| 442 | |||
| 443 | bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b) { | ||
| 444 | return a.data_owner_code == b.data_owner_code | ||
| 445 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 446 | && a.schedule_code == b.schedule_code | ||
| 447 | && a.schedule_type_code == b.schedule_type_code; | ||
| 448 | } | ||
| 449 | |||
| 450 | bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b) { | ||
| 451 | return a.data_owner_code == b.data_owner_code | ||
| 452 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 453 | && a.schedule_code == b.schedule_code | ||
| 454 | && a.schedule_type_code == b.schedule_type_code | ||
| 455 | && a.line_planning_number == b.line_planning_number | ||
| 456 | && a.journey_number == b.journey_number | ||
| 457 | && a.stop_order == b.stop_order; | ||
| 458 | } | ||
| 459 | |||
| 460 | bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b) { | ||
| 461 | return a.data_owner_code == b.data_owner_code | ||
| 462 | && a.organizational_unit_code == b.organizational_unit_code | ||
| 463 | && a.schedule_code == b.schedule_code | ||
| 464 | && a.schedule_type_code == b.schedule_type_code | ||
| 465 | && a.valid_date == b.valid_date; | ||
| 466 | } | ||
| 467 | |||
| 468 | namespace std::chrono { | ||
| 469 | static size_t hash_value(const year_month_day &ymd) { | ||
| 470 | size_t seed = 0; | ||
| 471 | |||
| 472 | boost::hash_combine(seed, int(ymd.year())); | ||
| 473 | boost::hash_combine(seed, unsigned(ymd.month())); | ||
| 474 | boost::hash_combine(seed, unsigned(ymd.day())); | ||
| 475 | |||
| 476 | return seed; | ||
| 477 | } | ||
| 478 | |||
| 479 | static size_t hash_value(const sys_seconds &s) { | ||
| 480 | return boost::hash<seconds::rep>()(s.time_since_epoch().count()); | ||
| 481 | } | ||
| 482 | } | ||
| 483 | |||
| 484 | size_t hash_value(const Kv1OrganizationalUnit::Key &k) { | ||
| 485 | size_t seed = 0; | ||
| 486 | |||
| 487 | boost::hash_combine(seed, k.data_owner_code); | ||
| 488 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 489 | |||
| 490 | return seed; | ||
| 491 | } | ||
| 492 | |||
| 493 | size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k) { | ||
| 494 | size_t seed = 0; | ||
| 495 | |||
| 496 | boost::hash_combine(seed, k.data_owner_code); | ||
| 497 | boost::hash_combine(seed, k.organizational_unit_code_parent); | ||
| 498 | boost::hash_combine(seed, k.organizational_unit_code_child); | ||
| 499 | boost::hash_combine(seed, k.valid_from); | ||
| 500 | |||
| 501 | return seed; | ||
| 502 | } | ||
| 503 | |||
| 504 | size_t hash_value(const Kv1UserStopPoint::Key &k) { | ||
| 505 | size_t seed = 0; | ||
| 506 | |||
| 507 | boost::hash_combine(seed, k.data_owner_code); | ||
| 508 | boost::hash_combine(seed, k.user_stop_code); | ||
| 509 | |||
| 510 | return seed; | ||
| 511 | } | ||
| 512 | |||
| 513 | size_t hash_value(const Kv1UserStopArea::Key &k) { | ||
| 514 | size_t seed = 0; | ||
| 515 | |||
| 516 | boost::hash_combine(seed, k.data_owner_code); | ||
| 517 | boost::hash_combine(seed, k.user_stop_area_code); | ||
| 518 | |||
| 519 | return seed; | ||
| 520 | } | ||
| 521 | |||
| 522 | size_t hash_value(const Kv1TimingLink::Key &k) { | ||
| 523 | size_t seed = 0; | ||
| 524 | |||
| 525 | boost::hash_combine(seed, k.data_owner_code); | ||
| 526 | boost::hash_combine(seed, k.user_stop_code_begin); | ||
| 527 | boost::hash_combine(seed, k.user_stop_code_end); | ||
| 528 | |||
| 529 | return seed; | ||
| 530 | } | ||
| 531 | |||
| 532 | size_t hash_value(const Kv1Link::Key &k) { | ||
| 533 | size_t seed = 0; | ||
| 534 | |||
| 535 | boost::hash_combine(seed, k.data_owner_code); | ||
| 536 | boost::hash_combine(seed, k.user_stop_code_begin); | ||
| 537 | boost::hash_combine(seed, k.user_stop_code_end); | ||
| 538 | boost::hash_combine(seed, k.transport_type); | ||
| 539 | |||
| 540 | return seed; | ||
| 541 | } | ||
| 542 | |||
| 543 | size_t hash_value(const Kv1Line::Key &k) { | ||
| 544 | size_t seed = 0; | ||
| 545 | |||
| 546 | boost::hash_combine(seed, k.data_owner_code); | ||
| 547 | boost::hash_combine(seed, k.line_planning_number); | ||
| 548 | |||
| 549 | return seed; | ||
| 550 | } | ||
| 551 | |||
| 552 | size_t hash_value(const Kv1Destination::Key &k) { | ||
| 553 | size_t seed = 0; | ||
| 554 | |||
| 555 | boost::hash_combine(seed, k.data_owner_code); | ||
| 556 | boost::hash_combine(seed, k.dest_code); | ||
| 557 | |||
| 558 | return seed; | ||
| 559 | } | ||
| 560 | |||
| 561 | size_t hash_value(const Kv1JourneyPattern::Key &k) { | ||
| 562 | size_t seed = 0; | ||
| 563 | |||
| 564 | boost::hash_combine(seed, k.data_owner_code); | ||
| 565 | boost::hash_combine(seed, k.line_planning_number); | ||
| 566 | boost::hash_combine(seed, k.journey_pattern_code); | ||
| 567 | |||
| 568 | return seed; | ||
| 569 | } | ||
| 570 | |||
| 571 | size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k) { | ||
| 572 | size_t seed = 0; | ||
| 573 | |||
| 574 | boost::hash_combine(seed, k.data_owner_code); | ||
| 575 | boost::hash_combine(seed, k.con_fin_rel_code); | ||
| 576 | |||
| 577 | return seed; | ||
| 578 | } | ||
| 579 | |||
| 580 | size_t hash_value(const Kv1ConcessionArea::Key &k) { | ||
| 581 | size_t seed = 0; | ||
| 582 | |||
| 583 | boost::hash_combine(seed, k.data_owner_code); | ||
| 584 | boost::hash_combine(seed, k.concession_area_code); | ||
| 585 | |||
| 586 | return seed; | ||
| 587 | } | ||
| 588 | |||
| 589 | size_t hash_value(const Kv1Financer::Key &k) { | ||
| 590 | size_t seed = 0; | ||
| 591 | |||
| 592 | boost::hash_combine(seed, k.data_owner_code); | ||
| 593 | boost::hash_combine(seed, k.financer_code); | ||
| 594 | |||
| 595 | return seed; | ||
| 596 | } | ||
| 597 | |||
| 598 | size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k) { | ||
| 599 | size_t seed = 0; | ||
| 600 | |||
| 601 | boost::hash_combine(seed, k.data_owner_code); | ||
| 602 | boost::hash_combine(seed, k.line_planning_number); | ||
| 603 | boost::hash_combine(seed, k.journey_pattern_code); | ||
| 604 | boost::hash_combine(seed, k.timing_link_order); | ||
| 605 | |||
| 606 | return seed; | ||
| 607 | } | ||
| 608 | |||
| 609 | size_t hash_value(const Kv1Point::Key &k) { | ||
| 610 | size_t seed = 0; | ||
| 611 | |||
| 612 | boost::hash_combine(seed, k.data_owner_code); | ||
| 613 | boost::hash_combine(seed, k.point_code); | ||
| 614 | |||
| 615 | return seed; | ||
| 616 | } | ||
| 617 | |||
| 618 | size_t hash_value(const Kv1PointOnLink::Key &k) { | ||
| 619 | size_t seed = 0; | ||
| 620 | |||
| 621 | boost::hash_combine(seed, k.data_owner_code); | ||
| 622 | boost::hash_combine(seed, k.user_stop_code_begin); | ||
| 623 | boost::hash_combine(seed, k.user_stop_code_end); | ||
| 624 | boost::hash_combine(seed, k.point_data_owner_code); | ||
| 625 | boost::hash_combine(seed, k.point_code); | ||
| 626 | boost::hash_combine(seed, k.transport_type); | ||
| 627 | |||
| 628 | return seed; | ||
| 629 | } | ||
| 630 | |||
| 631 | size_t hash_value(const Kv1Icon::Key &k) { | ||
| 632 | size_t seed = 0; | ||
| 633 | |||
| 634 | boost::hash_combine(seed, k.data_owner_code); | ||
| 635 | boost::hash_combine(seed, k.icon_number); | ||
| 636 | |||
| 637 | return seed; | ||
| 638 | } | ||
| 639 | |||
| 640 | size_t hash_value(const Kv1Notice::Key &k) { | ||
| 641 | size_t seed = 0; | ||
| 642 | |||
| 643 | boost::hash_combine(seed, k.data_owner_code); | ||
| 644 | boost::hash_combine(seed, k.notice_code); | ||
| 645 | |||
| 646 | return seed; | ||
| 647 | } | ||
| 648 | |||
| 649 | size_t hash_value(const Kv1TimeDemandGroup::Key &k) { | ||
| 650 | size_t seed = 0; | ||
| 651 | |||
| 652 | boost::hash_combine(seed, k.data_owner_code); | ||
| 653 | boost::hash_combine(seed, k.line_planning_number); | ||
| 654 | boost::hash_combine(seed, k.journey_pattern_code); | ||
| 655 | boost::hash_combine(seed, k.time_demand_group_code); | ||
| 656 | |||
| 657 | return seed; | ||
| 658 | } | ||
| 659 | |||
| 660 | size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k) { | ||
| 661 | size_t seed = 0; | ||
| 662 | |||
| 663 | boost::hash_combine(seed, k.data_owner_code); | ||
| 664 | boost::hash_combine(seed, k.line_planning_number); | ||
| 665 | boost::hash_combine(seed, k.journey_pattern_code); | ||
| 666 | boost::hash_combine(seed, k.time_demand_group_code); | ||
| 667 | boost::hash_combine(seed, k.timing_link_order); | ||
| 668 | |||
| 669 | return seed; | ||
| 670 | } | ||
| 671 | |||
| 672 | size_t hash_value(const Kv1PeriodGroup::Key &k) { | ||
| 673 | size_t seed = 0; | ||
| 674 | |||
| 675 | boost::hash_combine(seed, k.data_owner_code); | ||
| 676 | boost::hash_combine(seed, k.period_group_code); | ||
| 677 | |||
| 678 | return seed; | ||
| 679 | } | ||
| 680 | |||
| 681 | size_t hash_value(const Kv1SpecificDay::Key &k) { | ||
| 682 | size_t seed = 0; | ||
| 683 | |||
| 684 | boost::hash_combine(seed, k.data_owner_code); | ||
| 685 | boost::hash_combine(seed, k.specific_day_code); | ||
| 686 | |||
| 687 | return seed; | ||
| 688 | } | ||
| 689 | |||
| 690 | size_t hash_value(const Kv1TimetableVersion::Key &k) { | ||
| 691 | size_t seed = 0; | ||
| 692 | |||
| 693 | boost::hash_combine(seed, k.data_owner_code); | ||
| 694 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 695 | boost::hash_combine(seed, k.timetable_version_code); | ||
| 696 | boost::hash_combine(seed, k.period_group_code); | ||
| 697 | boost::hash_combine(seed, k.specific_day_code); | ||
| 698 | |||
| 699 | return seed; | ||
| 700 | } | ||
| 701 | |||
| 702 | size_t hash_value(const Kv1PublicJourney::Key &k) { | ||
| 703 | size_t seed = 0; | ||
| 704 | |||
| 705 | boost::hash_combine(seed, k.data_owner_code); | ||
| 706 | boost::hash_combine(seed, k.timetable_version_code); | ||
| 707 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 708 | boost::hash_combine(seed, k.period_group_code); | ||
| 709 | boost::hash_combine(seed, k.specific_day_code); | ||
| 710 | boost::hash_combine(seed, k.day_type); | ||
| 711 | boost::hash_combine(seed, k.line_planning_number); | ||
| 712 | boost::hash_combine(seed, k.journey_number); | ||
| 713 | |||
| 714 | return seed; | ||
| 715 | } | ||
| 716 | |||
| 717 | size_t hash_value(const Kv1PeriodGroupValidity::Key &k) { | ||
| 718 | size_t seed = 0; | ||
| 719 | |||
| 720 | boost::hash_combine(seed, k.data_owner_code); | ||
| 721 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 722 | boost::hash_combine(seed, k.period_group_code); | ||
| 723 | boost::hash_combine(seed, k.valid_from); | ||
| 724 | |||
| 725 | return seed; | ||
| 726 | } | ||
| 727 | |||
| 728 | size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k) { | ||
| 729 | size_t seed = 0; | ||
| 730 | |||
| 731 | boost::hash_combine(seed, k.data_owner_code); | ||
| 732 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 733 | boost::hash_combine(seed, k.valid_date); | ||
| 734 | |||
| 735 | return seed; | ||
| 736 | } | ||
| 737 | |||
| 738 | size_t hash_value(const Kv1ScheduleVersion::Key &k) { | ||
| 739 | size_t seed = 0; | ||
| 740 | |||
| 741 | boost::hash_combine(seed, k.data_owner_code); | ||
| 742 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 743 | boost::hash_combine(seed, k.schedule_code); | ||
| 744 | boost::hash_combine(seed, k.schedule_type_code); | ||
| 745 | |||
| 746 | return seed; | ||
| 747 | } | ||
| 748 | |||
| 749 | size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k) { | ||
| 750 | size_t seed = 0; | ||
| 751 | |||
| 752 | boost::hash_combine(seed, k.data_owner_code); | ||
| 753 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 754 | boost::hash_combine(seed, k.schedule_code); | ||
| 755 | boost::hash_combine(seed, k.schedule_type_code); | ||
| 756 | boost::hash_combine(seed, k.line_planning_number); | ||
| 757 | boost::hash_combine(seed, k.journey_number); | ||
| 758 | boost::hash_combine(seed, k.stop_order); | ||
| 759 | |||
| 760 | return seed; | ||
| 761 | } | ||
| 762 | |||
| 763 | size_t hash_value(const Kv1OperatingDay::Key &k) { | ||
| 764 | size_t seed = 0; | ||
| 765 | |||
| 766 | boost::hash_combine(seed, k.data_owner_code); | ||
| 767 | boost::hash_combine(seed, k.organizational_unit_code); | ||
| 768 | boost::hash_combine(seed, k.schedule_code); | ||
| 769 | boost::hash_combine(seed, k.schedule_type_code); | ||
| 770 | boost::hash_combine(seed, k.valid_date); | ||
| 771 | |||
| 772 | return seed; | ||
| 773 | } | ||
diff --git a/lib/libtmi8/src/kv6_parquet.cpp b/lib/libtmi8/src/kv6_parquet.cpp new file mode 100644 index 0000000..ca70b7f --- /dev/null +++ b/lib/libtmi8/src/kv6_parquet.cpp | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <tmi8/kv6_parquet.hpp> | ||
| 4 | |||
| 5 | ParquetBuilder::ParquetBuilder() { | ||
| 6 | std::shared_ptr<arrow::Field> field_type, field_data_owner_code, field_line_planning_number, field_operating_day, | ||
| 7 | field_journey_number, field_reinforcement_number, field_timestamp, field_source, | ||
| 8 | field_punctuality, field_user_stop_code, field_passage_sequence_number, | ||
| 9 | field_vehicle_number, field_block_code, field_wheelchair_accessible, | ||
| 10 | field_number_of_coaches, field_rd_y, field_rd_x, field_distance_since_last_user_stop; | ||
| 11 | field_type = arrow::field("type", arrow::utf8()); | ||
| 12 | field_data_owner_code = arrow::field("data_owner_code", arrow::utf8()); | ||
| 13 | field_line_planning_number = arrow::field("line_planning_number", arrow::utf8()); | ||
| 14 | field_operating_day = arrow::field("operating_day", arrow::date32()); | ||
| 15 | field_journey_number = arrow::field("journey_number", arrow::uint32()); | ||
| 16 | field_reinforcement_number = arrow::field("reinforcement_number", arrow::uint8()); | ||
| 17 | field_timestamp = arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::SECOND)); | ||
| 18 | field_source = arrow::field("source", arrow::utf8()); | ||
| 19 | field_punctuality = arrow::field("punctuality", arrow::int16()); | ||
| 20 | field_user_stop_code = arrow::field("user_stop_code", arrow::utf8()); | ||
| 21 | field_passage_sequence_number = arrow::field("passage_sequence_number", arrow::uint16()); | ||
| 22 | field_vehicle_number = arrow::field("vehicle_number", arrow::uint32()); | ||
| 23 | field_block_code = arrow::field("block_code", arrow::uint32()); | ||
| 24 | field_wheelchair_accessible = arrow::field("wheelchair_accessible", arrow::utf8()); | ||
| 25 | field_number_of_coaches = arrow::field("number_of_coaches", arrow::uint8()); | ||
| 26 | field_rd_y = arrow::field("rd_y", arrow::int32()); | ||
| 27 | field_rd_x = arrow::field("rd_x", arrow::int32()); | ||
| 28 | field_distance_since_last_user_stop = arrow::field("distance_since_last_user_stop", arrow::uint32()); | ||
| 29 | |||
| 30 | schema = arrow::schema({ field_type, field_data_owner_code, field_line_planning_number, | ||
| 31 | field_operating_day, field_journey_number, | ||
| 32 | field_reinforcement_number, field_timestamp, field_source, | ||
| 33 | field_punctuality, field_user_stop_code, | ||
| 34 | field_passage_sequence_number, field_vehicle_number, | ||
| 35 | field_block_code, field_wheelchair_accessible, | ||
| 36 | field_number_of_coaches, field_rd_y, field_rd_x, | ||
| 37 | field_distance_since_last_user_stop }); | ||
| 38 | } | ||
| 39 | |||
| 40 | arrow::Result<std::shared_ptr<arrow::Table>> ParquetBuilder::getTable() { | ||
| 41 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> types, types.Finish()); | ||
| 42 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> data_owner_codes, data_owner_codes.Finish()); | ||
| 43 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> line_planning_numbers, line_planning_numbers.Finish()); | ||
| 44 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> operating_days, operating_days.Finish()); | ||
| 45 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> journey_numbers, journey_numbers.Finish()); | ||
| 46 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> reinforcement_numbers, reinforcement_numbers.Finish()); | ||
| 47 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> timestamps, timestamps.Finish()); | ||
| 48 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> sources, sources.Finish()); | ||
| 49 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> punctualities, punctualities.Finish()); | ||
| 50 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> user_stop_codes, user_stop_codes.Finish()); | ||
| 51 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> passage_sequence_numbers, passage_sequence_numbers.Finish()); | ||
| 52 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> vehicle_numbers, vehicle_numbers.Finish()); | ||
| 53 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> block_codes, block_codes.Finish()); | ||
| 54 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> wheelchair_accessibles, wheelchair_accessibles.Finish()); | ||
| 55 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> number_of_coaches, number_of_coaches.Finish()); | ||
| 56 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> rd_ys, rd_ys.Finish()); | ||
| 57 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> rd_xs, rd_xs.Finish()); | ||
| 58 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Array> distance_since_last_user_stops, distance_since_last_user_stops.Finish()); | ||
| 59 | |||
| 60 | std::vector<std::shared_ptr<arrow::Array>> columns = { types, data_owner_codes, line_planning_numbers, operating_days, | ||
| 61 | journey_numbers, reinforcement_numbers, timestamps, sources, | ||
| 62 | punctualities, user_stop_codes, passage_sequence_numbers, | ||
| 63 | vehicle_numbers, block_codes, wheelchair_accessibles, | ||
| 64 | number_of_coaches, rd_ys, rd_xs, | ||
| 65 | distance_since_last_user_stops }; | ||
| 66 | return arrow::Result(arrow::Table::Make(schema, columns)); | ||
| 67 | } | ||
| 68 | |||
| 69 | arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename) { | ||
| 70 | std::shared_ptr<parquet::WriterProperties> props = parquet::WriterProperties::Builder() | ||
| 71 | .compression(arrow::Compression::ZSTD) | ||
| 72 | ->created_by("oeuf-libtmi8") | ||
| 73 | ->version(parquet::ParquetVersion::PARQUET_2_6) | ||
| 74 | ->data_page_version(parquet::ParquetDataPageVersion::V2) | ||
| 75 | ->max_row_group_length(MAX_PARQUET_CHUNK) | ||
| 76 | ->build(); | ||
| 77 | |||
| 78 | std::shared_ptr<parquet::ArrowWriterProperties> arrow_props = parquet::ArrowWriterProperties::Builder() | ||
| 79 | .store_schema()->build(); | ||
| 80 | |||
| 81 | std::shared_ptr<arrow::io::FileOutputStream> out_file; | ||
| 82 | std::string filename_str = filename; | ||
| 83 | ARROW_ASSIGN_OR_RAISE(out_file, arrow::io::FileOutputStream::Open(filename_str + ".part")); | ||
| 84 | |||
| 85 | ARROW_ASSIGN_OR_RAISE(auto writer, | ||
| 86 | parquet::arrow::FileWriter::Open(*rbr.schema(), arrow::default_memory_pool(), out_file, props, arrow_props)); | ||
| 87 | for (const auto &batchr : rbr) { | ||
| 88 | ARROW_ASSIGN_OR_RAISE(auto batch, batchr); | ||
| 89 | ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); | ||
| 90 | } | ||
| 91 | ARROW_RETURN_NOT_OK(writer->Close()); | ||
| 92 | ARROW_RETURN_NOT_OK(out_file->Close()); | ||
| 93 | |||
| 94 | std::filesystem::rename(filename_str + ".part", filename); | ||
| 95 | |||
| 96 | return arrow::Status::OK(); | ||
| 97 | } | ||
| 98 | |||
| 99 | arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename) { | ||
| 100 | auto tbr = arrow::TableBatchReader(table); | ||
| 101 | return writeArrowRecordsAsParquetFile(tbr, filename); | ||
| 102 | } | ||
diff --git a/module/default.nix b/module/default.nix new file mode 100644 index 0000000..c891ceb --- /dev/null +++ b/module/default.nix | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | flake: { lib, config, pkgs, ... }: | ||
| 2 | with lib; | ||
| 3 | let | ||
| 4 | inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-recvkv6; | ||
| 5 | inherit (flake.packages.${pkgs.stdenv.hostPlatform.system}) oeuf-archiver; | ||
| 6 | |||
| 7 | cfg = config.services.oeuf-recvkv6; | ||
| 8 | archiverCfg = config.services.oeuf-archiver; | ||
| 9 | in | ||
| 10 | { | ||
| 11 | options.services.oeuf-recvkv6 = { | ||
| 12 | enable = mkEnableOption "oeuf-recvkv6"; | ||
| 13 | ndovProduction = mkEnableOption "usage of the NDOV Loket production ZeroMQ server"; | ||
| 14 | metricsAddr = mkOption { | ||
| 15 | type = types.str; | ||
| 16 | }; | ||
| 17 | }; | ||
| 18 | |||
| 19 | options.services.oeuf-archiver = with types; { | ||
| 20 | enable = mkEnableOption "oeuf-archiver"; | ||
| 21 | s3 = mkOption { | ||
| 22 | type = submodule { | ||
| 23 | options = { | ||
| 24 | accessKeyIDFile = mkOption { | ||
| 25 | type = str; | ||
| 26 | }; | ||
| 27 | secretAccessKeyFile = mkOption { | ||
| 28 | type = str; | ||
| 29 | }; | ||
| 30 | provider = mkOption { | ||
| 31 | type = str; | ||
| 32 | }; | ||
| 33 | region = mkOption { | ||
| 34 | type = str; | ||
| 35 | }; | ||
| 36 | endpoint = mkOption { | ||
| 37 | type = str; | ||
| 38 | }; | ||
| 39 | bucket = mkOption { | ||
| 40 | type = str; | ||
| 41 | }; | ||
| 42 | }; | ||
| 43 | }; | ||
| 44 | }; | ||
| 45 | prometheusPushURL = mkOption { | ||
| 46 | type = str; | ||
| 47 | }; | ||
| 48 | supplementaryServiceGroups = mkOption { | ||
| 49 | type = listOf str; | ||
| 50 | }; | ||
| 51 | }; | ||
| 52 | |||
| 53 | config = mkIf (cfg.enable || archiverCfg.enable) (mkMerge [ | ||
| 54 | { | ||
| 55 | users.users.oeuf = { | ||
| 56 | description = "oeuf service user"; | ||
| 57 | isSystemUser = true; | ||
| 58 | group = "oeuf"; | ||
| 59 | }; | ||
| 60 | |||
| 61 | users.groups.oeuf = { }; | ||
| 62 | } | ||
| 63 | (mkIf cfg.enable { | ||
| 64 | systemd.services.oeuf-recvkv6 = { | ||
| 65 | after = [ "network-online.target" ]; | ||
| 66 | wantedBy = [ "multi-user.target" ]; | ||
| 67 | environment = { | ||
| 68 | METRICS_ADDR = cfg.metricsAddr; | ||
| 69 | NDOV_PRODUCTION = lib.boolToString cfg.ndovProduction; | ||
| 70 | }; | ||
| 71 | serviceConfig = { | ||
| 72 | User = config.users.users.oeuf.name; | ||
| 73 | Group = config.users.users.oeuf.group; | ||
| 74 | Restart = "always"; | ||
| 75 | StateDirectory = "oeuf"; | ||
| 76 | WorkingDirectory = "/var/lib/oeuf"; | ||
| 77 | ExecStart = "${lib.getBin oeuf-recvkv6}/bin/oeuf-recvkv6"; | ||
| 78 | }; | ||
| 79 | }; | ||
| 80 | }) | ||
| 81 | (mkIf archiverCfg.enable { | ||
| 82 | systemd.timers.oeuf-archiver = { | ||
| 83 | wantedBy = [ "timers.target" ]; | ||
| 84 | partOf = [ "oeuf-archiver.service" ]; | ||
| 85 | timerConfig = { | ||
| 86 | OnBootSec = "5m"; | ||
| 87 | OnUnitActiveSec = "5m"; | ||
| 88 | Unit = "oeuf-archiver.service"; | ||
| 89 | }; | ||
| 90 | }; | ||
| 91 | |||
| 92 | systemd.services.oeuf-archiver = { | ||
| 93 | after = [ "network-online.target" ]; | ||
| 94 | environment = { | ||
| 95 | S3_PROVIDER = archiverCfg.s3.provider; | ||
| 96 | S3_REGION = archiverCfg.s3.region; | ||
| 97 | S3_ENDPOINT = archiverCfg.s3.endpoint; | ||
| 98 | S3_BUCKET = archiverCfg.s3.bucket; | ||
| 99 | PROMETHEUS_PUSH_URL = archiverCfg.prometheusPushURL; | ||
| 100 | }; | ||
| 101 | script = '' | ||
| 102 | export S3_ACCESS_KEY_ID="$(cat ${archiverCfg.s3.accessKeyIDFile})" | ||
| 103 | export S3_SECRET_ACCESS_KEY="$(cat ${archiverCfg.s3.secretAccessKeyFile})" | ||
| 104 | ${lib.getBin oeuf-archiver}/bin/oeuf-archiver | ||
| 105 | ''; | ||
| 106 | serviceConfig = { | ||
| 107 | Type = "oneshot"; | ||
| 108 | User = config.users.users.oeuf.name; | ||
| 109 | Group = config.users.users.oeuf.group; | ||
| 110 | SupplementaryGroups = archiverCfg.supplementaryServiceGroups; | ||
| 111 | StateDirectory = "oeuf"; | ||
| 112 | WorkingDirectory = "/var/lib/oeuf"; | ||
| 113 | AmbientCapabilities = "CAP_NET_BIND_SERVICE"; | ||
| 114 | }; | ||
| 115 | }; | ||
| 116 | }) | ||
| 117 | ]); | ||
| 118 | } | ||
diff --git a/script/archiver/default.nix b/script/archiver/default.nix new file mode 100644 index 0000000..4a464e0 --- /dev/null +++ b/script/archiver/default.nix | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | { pkgs ? import <nixpkgs> { } }: with pkgs; | ||
| 2 | |||
| 3 | stdenv.mkDerivation { | ||
| 4 | name = "oeuf-archiver"; | ||
| 5 | src = ./.; | ||
| 6 | |||
| 7 | buildInputs = [ bash rclone oeuf-bundleparquet ]; | ||
| 8 | nativeBuildInputs = [ makeWrapper ]; | ||
| 9 | installPhase = '' | ||
| 10 | mkdir -p $out/bin | ||
| 11 | cp oeuf-archiver.sh $out/bin/oeuf-archiver | ||
| 12 | wrapProgram $out/bin/oeuf-archiver \ | ||
| 13 | --prefix PATH : ${lib.makeBinPath [ bash rclone oeuf-bundleparquet ]} | ||
| 14 | ''; | ||
| 15 | } | ||
diff --git a/script/archiver/oeuf-archiver.sh b/script/archiver/oeuf-archiver.sh new file mode 100755 index 0000000..478d4d9 --- /dev/null +++ b/script/archiver/oeuf-archiver.sh | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | |||
| 3 | set -eux | ||
| 4 | set -o pipefail | ||
| 5 | |||
| 6 | # This option prevents the loop from running | ||
| 7 | # if it does not match any files | ||
| 8 | shopt -s nullglob | ||
| 9 | |||
| 10 | oeuf-bundleparquet | ||
| 11 | |||
| 12 | export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" | ||
| 13 | set +x # Don't print the secret access key to the log | ||
| 14 | export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" | ||
| 15 | set -x | ||
| 16 | |||
| 17 | for file in ./merged/oeuf-*.parquet; do | ||
| 18 | rclone move \ | ||
| 19 | --s3-provider "$S3_PROVIDER" \ | ||
| 20 | --s3-region "$S3_REGION" \ | ||
| 21 | --s3-endpoint "$S3_ENDPOINT" \ | ||
| 22 | --s3-env-auth \ | ||
| 23 | $file.meta.json :s3:$S3_BUCKET \ | ||
| 24 | && \ | ||
| 25 | rclone move \ | ||
| 26 | --s3-provider "$S3_PROVIDER" \ | ||
| 27 | --s3-region "$S3_REGION" \ | ||
| 28 | --s3-endpoint "$S3_ENDPOINT" \ | ||
| 29 | --s3-env-auth \ | ||
| 30 | $file :s3:$S3_BUCKET | ||
| 31 | done | ||
diff --git a/script/synckv6/default.nix b/script/synckv6/default.nix new file mode 100644 index 0000000..95a9331 --- /dev/null +++ b/script/synckv6/default.nix | |||
| @@ -0,0 +1,15 @@ | |||
| 1 | { pkgs ? import <nixpkgs> { } }: with pkgs; | ||
| 2 | |||
| 3 | stdenv.mkDerivation { | ||
| 4 | name = "oeuf-synckv6"; | ||
| 5 | src = ./.; | ||
| 6 | |||
| 7 | buildInputs = [ bash rclone ]; | ||
| 8 | nativeBuildInputs = [ makeWrapper ]; | ||
| 9 | installPhase = '' | ||
| 10 | mkdir -p $out/bin | ||
| 11 | cp oeuf-synckv6.sh $out/bin/oeuf-synckv6 | ||
| 12 | wrapProgram $out/bin/oeuf-synckv6 \ | ||
| 13 | --prefix PATH : ${lib.makeBinPath [ bash rclone ]} | ||
| 14 | ''; | ||
| 15 | } | ||
diff --git a/script/synckv6/oeuf-synckv6.sh b/script/synckv6/oeuf-synckv6.sh new file mode 100755 index 0000000..6b24347 --- /dev/null +++ b/script/synckv6/oeuf-synckv6.sh | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | #!/usr/bin/env bash | ||
| 2 | |||
| 3 | set -eu | ||
| 4 | set -o pipefail | ||
| 5 | |||
| 6 | export AWS_ACCESS_KEY_ID="$S3_ACCESS_KEY_ID" | ||
| 7 | export AWS_SECRET_ACCESS_KEY="$S3_SECRET_ACCESS_KEY" | ||
| 8 | |||
| 9 | set +x | ||
| 10 | all_files=() | ||
| 11 | declare -A metafiles | ||
| 12 | |||
| 13 | while IFS=' ' read -r size filename; do | ||
| 14 | if [[ "$filename" == *.parquet.meta.json ]]; then | ||
| 15 | metafiles["$filename"]=1 | ||
| 16 | else | ||
| 17 | all_files+=($filename) | ||
| 18 | fi | ||
| 19 | done < <(rclone ls \ | ||
| 20 | --s3-provider "$S3_PROVIDER" \ | ||
| 21 | --s3-region "$S3_REGION" \ | ||
| 22 | --s3-endpoint "$S3_ENDPOINT" \ | ||
| 23 | --s3-env-auth \ | ||
| 24 | :s3:$S3_BUCKET) | ||
| 25 | |||
| 26 | files=() | ||
| 27 | for filename in "${all_files[@]}"; do | ||
| 28 | if [[ -v metafiles["$filename.meta.json"] ]]; then | ||
| 29 | files+=($filename) | ||
| 30 | fi | ||
| 31 | done | ||
| 32 | |||
| 33 | echo "Found ${#files[@]} relevant KV6 Parquet files" | ||
| 34 | echo "Synching this directory with these files" | ||
| 35 | |||
| 36 | printf "%s\n" "${files[@]}" | rclone copy \ | ||
| 37 | --s3-provider "$S3_PROVIDER" \ | ||
| 38 | --s3-region "$S3_REGION" \ | ||
| 39 | --s3-endpoint "$S3_ENDPOINT" \ | ||
| 40 | --s3-env-auth \ | ||
| 41 | --progress \ | ||
| 42 | --files-from - \ | ||
| 43 | :s3:$S3_BUCKET ./ | ||
diff --git a/src/augmentkv6/.envrc b/src/augmentkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/augmentkv6/.envrc | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | source_env ../../ | ||
| 2 | export DEVMODE=1 | ||
diff --git a/src/augmentkv6/Makefile b/src/augmentkv6/Makefile new file mode 100644 index 0000000..cebb291 --- /dev/null +++ b/src/augmentkv6/Makefile | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-larrow -larrow_acero -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | |||
| 16 | augmentkv6: main.cpp | ||
| 17 | $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
| 18 | |||
| 19 | .PHONY: clean | ||
| 20 | clean: | ||
| 21 | rm augmentkv6 | ||
diff --git a/src/augmentkv6/main.cpp b/src/augmentkv6/main.cpp new file mode 100644 index 0000000..81a54d3 --- /dev/null +++ b/src/augmentkv6/main.cpp | |||
| @@ -0,0 +1,510 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <chrono> | ||
| 4 | #include <cstdio> | ||
| 5 | #include <deque> | ||
| 6 | #include <filesystem> | ||
| 7 | #include <format> | ||
| 8 | #include <fstream> | ||
| 9 | #include <iostream> | ||
| 10 | #include <string> | ||
| 11 | #include <string_view> | ||
| 12 | #include <vector> | ||
| 13 | |||
| 14 | #include <arrow/acero/exec_plan.h> | ||
| 15 | #include <arrow/api.h> | ||
| 16 | #include <arrow/compute/api.h> | ||
| 17 | #include <arrow/dataset/api.h> | ||
| 18 | #include <arrow/filesystem/api.h> | ||
| 19 | #include <arrow/io/api.h> | ||
| 20 | #include <parquet/arrow/reader.h> | ||
| 21 | |||
| 22 | #include <tmi8/kv1_index.hpp> | ||
| 23 | #include <tmi8/kv1_lexer.hpp> | ||
| 24 | #include <tmi8/kv1_parser.hpp> | ||
| 25 | #include <tmi8/kv1_types.hpp> | ||
| 26 | #include <tmi8/kv6_parquet.hpp> | ||
| 27 | |||
| 28 | using namespace std::string_view_literals; | ||
| 29 | |||
| 30 | namespace ac = arrow::acero; | ||
| 31 | namespace ds = arrow::dataset; | ||
| 32 | namespace cp = arrow::compute; | ||
| 33 | using namespace arrow; | ||
| 34 | |||
| 35 | using TimingClock = std::conditional_t< | ||
| 36 | std::chrono::high_resolution_clock::is_steady, | ||
| 37 | std::chrono::high_resolution_clock, | ||
| 38 | std::chrono::steady_clock>; | ||
| 39 | |||
| 40 | std::string readKv1() { | ||
| 41 | fputs("Reading KV1 from standard input\n", stderr); | ||
| 42 | |||
| 43 | char buf[4096]; | ||
| 44 | std::string data; | ||
| 45 | while (!feof(stdin) && !ferror(stdin)) { | ||
| 46 | size_t read = fread(buf, sizeof(char), 4096, stdin); | ||
| 47 | data.append(buf, read); | ||
| 48 | } | ||
| 49 | if (ferror(stdin)) { | ||
| 50 | fputs("Error when reading from stdin\n", stderr); | ||
| 51 | exit(1); | ||
| 52 | } | ||
| 53 | fprintf(stderr, "Read %lu bytes\n", data.size()); | ||
| 54 | |||
| 55 | return data; | ||
| 56 | } | ||
| 57 | |||
| 58 | std::vector<Kv1Token> lex() { | ||
| 59 | std::string data = readKv1(); | ||
| 60 | |||
| 61 | auto start = TimingClock::now(); | ||
| 62 | Kv1Lexer lexer(data); | ||
| 63 | lexer.lex(); | ||
| 64 | auto end = TimingClock::now(); | ||
| 65 | |||
| 66 | std::chrono::duration<double> elapsed{end - start}; | ||
| 67 | double bytes = static_cast<double>(data.size()) / 1'000'000; | ||
| 68 | double speed = bytes / elapsed.count(); | ||
| 69 | |||
| 70 | if (!lexer.errors.empty()) { | ||
| 71 | fputs("Lexer reported errors:\n", stderr); | ||
| 72 | for (const auto &error : lexer.errors) | ||
| 73 | fprintf(stderr, "- %s\n", error.c_str()); | ||
| 74 | exit(1); | ||
| 75 | } | ||
| 76 | |||
| 77 | fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size()); | ||
| 78 | fprintf(stderr, "Duration: %f s\n", elapsed.count()); | ||
| 79 | fprintf(stderr, "Speed: %f MB/s\n", speed); | ||
| 80 | |||
| 81 | return std::move(lexer.tokens); | ||
| 82 | } | ||
| 83 | |||
| 84 | bool parse(Kv1Records &into) { | ||
| 85 | std::vector<Kv1Token> tokens = lex(); | ||
| 86 | |||
| 87 | Kv1Parser parser(tokens, into); | ||
| 88 | parser.parse(); | ||
| 89 | |||
| 90 | bool ok = true; | ||
| 91 | if (!parser.gerrors.empty()) { | ||
| 92 | ok = false; | ||
| 93 | fputs("Parser reported errors:\n", stderr); | ||
| 94 | for (const auto &error : parser.gerrors) | ||
| 95 | fprintf(stderr, "- %s\n", error.c_str()); | ||
| 96 | } | ||
| 97 | if (!parser.warns.empty()) { | ||
| 98 | fputs("Parser reported warnings:\n", stderr); | ||
| 99 | for (const auto &warn : parser.warns) | ||
| 100 | fprintf(stderr, "- %s\n", warn.c_str()); | ||
| 101 | } | ||
| 102 | |||
| 103 | fprintf(stderr, "Parsed %lu records\n", into.size()); | ||
| 104 | |||
| 105 | return ok; | ||
| 106 | } | ||
| 107 | |||
| 108 | void printParsedRecords(const Kv1Records &records) { | ||
| 109 | fputs("Parsed records:\n", stderr); | ||
| 110 | fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size()); | ||
| 111 | fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size()); | ||
| 112 | fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size()); | ||
| 113 | fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size()); | ||
| 114 | fprintf(stderr, " timing_links: %lu\n", records.timing_links.size()); | ||
| 115 | fprintf(stderr, " links: %lu\n", records.links.size()); | ||
| 116 | fprintf(stderr, " lines: %lu\n", records.lines.size()); | ||
| 117 | fprintf(stderr, " destinations: %lu\n", records.destinations.size()); | ||
| 118 | fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size()); | ||
| 119 | fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size()); | ||
| 120 | fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size()); | ||
| 121 | fprintf(stderr, " financers: %lu\n", records.financers.size()); | ||
| 122 | fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size()); | ||
| 123 | fprintf(stderr, " points: %lu\n", records.points.size()); | ||
| 124 | fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size()); | ||
| 125 | fprintf(stderr, " icons: %lu\n", records.icons.size()); | ||
| 126 | fprintf(stderr, " notices: %lu\n", records.notices.size()); | ||
| 127 | fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size()); | ||
| 128 | fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size()); | ||
| 129 | fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size()); | ||
| 130 | fprintf(stderr, " period_groups: %lu\n", records.period_groups.size()); | ||
| 131 | fprintf(stderr, " specific_days: %lu\n", records.specific_days.size()); | ||
| 132 | fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size()); | ||
| 133 | fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size()); | ||
| 134 | fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size()); | ||
| 135 | fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size()); | ||
| 136 | fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size()); | ||
| 137 | fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size()); | ||
| 138 | fprintf(stderr, " operating_days: %lu\n", records.operating_days.size()); | ||
| 139 | } | ||
| 140 | |||
| 141 | void printIndexSize(const Kv1Index &index) { | ||
| 142 | fputs("Index size:\n", stderr); | ||
| 143 | fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size()); | ||
| 144 | fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size()); | ||
| 145 | fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size()); | ||
| 146 | fprintf(stderr, " timing_links: %lu\n", index.timing_links.size()); | ||
| 147 | fprintf(stderr, " links: %lu\n", index.links.size()); | ||
| 148 | fprintf(stderr, " lines: %lu\n", index.lines.size()); | ||
| 149 | fprintf(stderr, " destinations: %lu\n", index.destinations.size()); | ||
| 150 | fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size()); | ||
| 151 | fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size()); | ||
| 152 | fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size()); | ||
| 153 | fprintf(stderr, " financers: %lu\n", index.financers.size()); | ||
| 154 | fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size()); | ||
| 155 | fprintf(stderr, " points: %lu\n", index.points.size()); | ||
| 156 | fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size()); | ||
| 157 | fprintf(stderr, " icons: %lu\n", index.icons.size()); | ||
| 158 | fprintf(stderr, " notices: %lu\n", index.notices.size()); | ||
| 159 | fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size()); | ||
| 160 | fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size()); | ||
| 161 | fprintf(stderr, " period_groups: %lu\n", index.period_groups.size()); | ||
| 162 | fprintf(stderr, " specific_days: %lu\n", index.specific_days.size()); | ||
| 163 | fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size()); | ||
| 164 | fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size()); | ||
| 165 | fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size()); | ||
| 166 | fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size()); | ||
| 167 | fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size()); | ||
| 168 | fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size()); | ||
| 169 | fprintf(stderr, " operating_days: %lu\n", index.operating_days.size()); | ||
| 170 | } | ||
| 171 | |||
| 172 | struct BasicJourneyKey { | ||
| 173 | std::string data_owner_code; | ||
| 174 | std::string line_planning_number; | ||
| 175 | int journey_number; | ||
| 176 | |||
| 177 | auto operator<=>(const BasicJourneyKey &) const = default; | ||
| 178 | }; | ||
| 179 | |||
| 180 | size_t hash_value(const BasicJourneyKey &k) { | ||
| 181 | size_t seed = 0; | ||
| 182 | |||
| 183 | boost::hash_combine(seed, k.data_owner_code); | ||
| 184 | boost::hash_combine(seed, k.line_planning_number); | ||
| 185 | boost::hash_combine(seed, k.journey_number); | ||
| 186 | |||
| 187 | return seed; | ||
| 188 | } | ||
| 189 | |||
| 190 | using BasicJourneyKeySet = std::unordered_set<BasicJourneyKey, boost::hash<BasicJourneyKey>>; | ||
| 191 | |||
| 192 | arrow::Result<BasicJourneyKeySet> basicJourneys(std::shared_ptr<arrow::Table> table) { | ||
| 193 | ac::TableSourceNodeOptions table_source_node_options(table); | ||
| 194 | ac::Declaration table_source("table_source", std::move(table_source_node_options)); | ||
| 195 | auto aggregate_options = ac::AggregateNodeOptions{ | ||
| 196 | /* .aggregates = */ {}, | ||
| 197 | /* .keys = */ { "data_owner_code", "line_planning_number", "journey_number" }, | ||
| 198 | }; | ||
| 199 | ac::Declaration aggregate("aggregate", { std::move(table_source) }, std::move(aggregate_options)); | ||
| 200 | |||
| 201 | std::shared_ptr<arrow::Table> result; | ||
| 202 | ARROW_ASSIGN_OR_RAISE(result, ac::DeclarationToTable(std::move(aggregate))); | ||
| 203 | |||
| 204 | std::shared_ptr<arrow::ChunkedArray> data_owner_codes = result->GetColumnByName("data_owner_code"); | ||
| 205 | std::shared_ptr<arrow::ChunkedArray> line_planning_numbers = result->GetColumnByName("line_planning_number"); | ||
| 206 | std::shared_ptr<arrow::ChunkedArray> journey_numbers = result->GetColumnByName("journey_number"); | ||
| 207 | |||
| 208 | int i_data_owner_codes_chunk = 0; | ||
| 209 | int i_journey_numbers_chunk = 0; | ||
| 210 | int i_line_planning_numbers_chunk = 0; | ||
| 211 | int i_in_data_owner_codes_chunk = 0; | ||
| 212 | int i_in_journey_numbers_chunk = 0; | ||
| 213 | int i_in_line_planning_numbers_chunk = 0; | ||
| 214 | |||
| 215 | BasicJourneyKeySet journeys; | ||
| 216 | |||
| 217 | for (int64_t i = 0; i < result->num_rows(); i++) { | ||
| 218 | auto data_owner_codes_chunk = std::static_pointer_cast<arrow::StringArray>(data_owner_codes->chunk(i_data_owner_codes_chunk)); | ||
| 219 | auto line_planning_numbers_chunk = std::static_pointer_cast<arrow::StringArray>(line_planning_numbers->chunk(i_line_planning_numbers_chunk)); | ||
| 220 | auto journey_numbers_chunk = std::static_pointer_cast<arrow::UInt32Array>(journey_numbers->chunk(i_journey_numbers_chunk)); | ||
| 221 | |||
| 222 | std::string_view data_owner_code = data_owner_codes_chunk->Value(i_in_data_owner_codes_chunk); | ||
| 223 | std::string_view line_planning_number = line_planning_numbers_chunk->Value(i_in_line_planning_numbers_chunk); | ||
| 224 | uint32_t journey_number = journey_numbers_chunk->Value(i_in_journey_numbers_chunk); | ||
| 225 | |||
| 226 | journeys.emplace( | ||
| 227 | std::string(data_owner_code), | ||
| 228 | std::string(line_planning_number), | ||
| 229 | journey_number | ||
| 230 | ); | ||
| 231 | |||
| 232 | i_in_data_owner_codes_chunk++; | ||
| 233 | i_in_line_planning_numbers_chunk++; | ||
| 234 | i_in_journey_numbers_chunk++; | ||
| 235 | if (i_in_data_owner_codes_chunk >= data_owner_codes_chunk->length()) { | ||
| 236 | i_data_owner_codes_chunk++; | ||
| 237 | i_in_data_owner_codes_chunk = 0; | ||
| 238 | } | ||
| 239 | if (i_in_line_planning_numbers_chunk >= line_planning_numbers_chunk->length()) { | ||
| 240 | i_line_planning_numbers_chunk++; | ||
| 241 | i_in_line_planning_numbers_chunk = 0; | ||
| 242 | } | ||
| 243 | if (i_in_journey_numbers_chunk >= journey_numbers_chunk->length()) { | ||
| 244 | i_journey_numbers_chunk++; | ||
| 245 | i_in_journey_numbers_chunk = 0; | ||
| 246 | } | ||
| 247 | } | ||
| 248 | |||
| 249 | return journeys; | ||
| 250 | } | ||
| 251 | |||
| 252 | struct DistanceKey { | ||
| 253 | BasicJourneyKey journey; | ||
| 254 | std::string last_passed_user_stop_code; | ||
| 255 | |||
| 256 | auto operator<=>(const DistanceKey &) const = default; | ||
| 257 | }; | ||
| 258 | |||
| 259 | size_t hash_value(const DistanceKey &k) { | ||
| 260 | size_t seed = 0; | ||
| 261 | |||
| 262 | boost::hash_combine(seed, k.journey); | ||
| 263 | boost::hash_combine(seed, k.last_passed_user_stop_code); | ||
| 264 | |||
| 265 | return seed; | ||
| 266 | } | ||
| 267 | |||
| 268 | struct DistanceTimingLink { | ||
| 269 | const Kv1JourneyPatternTimingLink *jopatili; | ||
| 270 | double distance_since_start_of_journey = 0; // at the start of the link | ||
| 271 | }; | ||
| 272 | |||
| 273 | using DistanceMap = std::unordered_map<DistanceKey, double, boost::hash<DistanceKey>>; | ||
| 274 | |||
| 275 | // Returns a map, where | ||
| 276 | // DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode -> | ||
| 277 | // Distance of Last User Stop | ||
| 278 | DistanceMap makeDistanceMap(Kv1Records &records, Kv1Index &index, BasicJourneyKeySet &journeys) { | ||
| 279 | std::unordered_map< | ||
| 280 | Kv1JourneyPattern::Key, | ||
| 281 | std::vector<DistanceTimingLink>, | ||
| 282 | boost::hash<Kv1JourneyPattern::Key>> jopatili_index; | ||
| 283 | std::unordered_map< | ||
| 284 | BasicJourneyKey, | ||
| 285 | const Kv1PublicJourney *, | ||
| 286 | boost::hash<BasicJourneyKey>> journey_index; | ||
| 287 | for (size_t i = 0; i < records.public_journeys.size(); i++) { | ||
| 288 | const Kv1PublicJourney *pujo = &records.public_journeys[i]; | ||
| 289 | |||
| 290 | BasicJourneyKey journey_key( | ||
| 291 | pujo->key.data_owner_code, | ||
| 292 | pujo->key.line_planning_number, | ||
| 293 | pujo->key.journey_number); | ||
| 294 | |||
| 295 | if (journeys.contains(journey_key)) { | ||
| 296 | journey_index[journey_key] = pujo; | ||
| 297 | |||
| 298 | Kv1JourneyPattern::Key jopa_key( | ||
| 299 | pujo->key.data_owner_code, | ||
| 300 | pujo->key.line_planning_number, | ||
| 301 | pujo->journey_pattern_code); | ||
| 302 | jopatili_index[jopa_key] = {}; | ||
| 303 | } | ||
| 304 | } | ||
| 305 | |||
| 306 | for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { | ||
| 307 | const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; | ||
| 308 | Kv1JourneyPattern::Key jopa_key( | ||
| 309 | jopatili->key.data_owner_code, | ||
| 310 | jopatili->key.line_planning_number, | ||
| 311 | jopatili->key.journey_pattern_code); | ||
| 312 | if (jopatili_index.contains(jopa_key)) { | ||
| 313 | jopatili_index[jopa_key].push_back(DistanceTimingLink(jopatili, 0)); | ||
| 314 | } | ||
| 315 | } | ||
| 316 | |||
| 317 | for (auto &[jopa_key, timing_links] : jopatili_index) { | ||
| 318 | std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) { | ||
| 319 | return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; | ||
| 320 | }); | ||
| 321 | |||
| 322 | const std::string transport_type = index.journey_patterns[jopa_key]->p_line->transport_type; | ||
| 323 | |||
| 324 | for (size_t i = 1; i < timing_links.size(); i++) { | ||
| 325 | DistanceTimingLink *timing_link = &timing_links[i]; | ||
| 326 | DistanceTimingLink *prev_timing_link = &timing_links[i - 1]; | ||
| 327 | |||
| 328 | const Kv1Link::Key link_key( | ||
| 329 | prev_timing_link->jopatili->key.data_owner_code, | ||
| 330 | prev_timing_link->jopatili->user_stop_code_begin, | ||
| 331 | prev_timing_link->jopatili->user_stop_code_end, | ||
| 332 | transport_type); | ||
| 333 | double link_distance = index.links[link_key]->distance; | ||
| 334 | timing_link->distance_since_start_of_journey = | ||
| 335 | prev_timing_link->distance_since_start_of_journey + link_distance; | ||
| 336 | } | ||
| 337 | } | ||
| 338 | |||
| 339 | // DataOwnerCode + LinePlanningNumber + JourneyNumber + UserStopCode -> | ||
| 340 | // Distance of Last User Stop | ||
| 341 | DistanceMap distance_map; | ||
| 342 | |||
| 343 | for (const auto &journey : journeys) { | ||
| 344 | const Kv1PublicJourney *pujo = journey_index[journey]; | ||
| 345 | if (pujo == nullptr) { | ||
| 346 | std::cerr << "Warning: No PUJO found for [" << journey.data_owner_code << "] " | ||
| 347 | << journey.line_planning_number << "/" << journey.journey_number << std::endl; | ||
| 348 | continue; | ||
| 349 | } | ||
| 350 | Kv1JourneyPattern::Key jopa_key( | ||
| 351 | pujo->key.data_owner_code, | ||
| 352 | pujo->key.line_planning_number, | ||
| 353 | pujo->journey_pattern_code); | ||
| 354 | for (const auto &timing_link : jopatili_index[jopa_key]) { | ||
| 355 | DistanceKey key(journey, timing_link.jopatili->user_stop_code_begin); | ||
| 356 | distance_map[key] = timing_link.distance_since_start_of_journey; | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | return distance_map; | ||
| 361 | } | ||
| 362 | |||
| 363 | arrow::Result<std::shared_ptr<arrow::Table>> augment( | ||
| 364 | std::shared_ptr<arrow::Table> table, | ||
| 365 | const DistanceMap &distance_map | ||
| 366 | ) { | ||
| 367 | for (int i = 0; i < table->num_columns(); i++) { | ||
| 368 | if (table->column(i)->num_chunks() > 1) { | ||
| 369 | std::stringstream ss; | ||
| 370 | ss << "Error: Expected column " << i | ||
| 371 | << " (" << table->ColumnNames()[i] << ") to have 1 chunk, got " | ||
| 372 | << table->column(i)->num_chunks(); | ||
| 373 | return arrow::Status::Invalid(ss.str()); | ||
| 374 | } | ||
| 375 | } | ||
| 376 | |||
| 377 | auto data_owner_codes = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("data_owner_code")->chunk(0)); | ||
| 378 | auto line_planning_numbers = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("line_planning_number")->chunk(0)); | ||
| 379 | auto journey_numbers = std::static_pointer_cast<arrow::UInt32Array>(table->GetColumnByName("journey_number")->chunk(0)); | ||
| 380 | auto user_stop_codes = std::static_pointer_cast<arrow::StringArray>(table->GetColumnByName("user_stop_code")->chunk(0)); | ||
| 381 | auto distance_since_last_user_stops = std::static_pointer_cast<arrow::UInt32Array>(table->GetColumnByName("distance_since_last_user_stop")->chunk(0)); | ||
| 382 | auto timestamps = std::static_pointer_cast<arrow::TimestampArray>(table->GetColumnByName("timestamp")->chunk(0)); | ||
| 383 | |||
| 384 | auto timestamps_type = table->schema()->GetFieldByName("timestamp")->type(); | ||
| 385 | if (timestamps_type->id() != arrow::Type::TIMESTAMP) | ||
| 386 | return arrow::Status::Invalid("Field 'timestamp' does not have expected type TIMESTAMP"); | ||
| 387 | if (std::static_pointer_cast<arrow::TimestampType>(timestamps_type)->unit() != arrow::TimeUnit::MILLI) | ||
| 388 | return arrow::Status::Invalid("Field 'timestamp' does not have unit MILLI"); | ||
| 389 | if (!std::static_pointer_cast<arrow::TimestampType>(timestamps_type)->timezone().empty()) | ||
| 390 | return arrow::Status::Invalid("Field 'timestamp' should have empty time zone name"); | ||
| 391 | |||
| 392 | std::shared_ptr<arrow::Field> field_distance_since_start_of_journey = | ||
| 393 | arrow::field("distance_since_start_of_journey", arrow::uint32()); | ||
| 394 | std::shared_ptr<arrow::Field> field_day_of_week = | ||
| 395 | arrow::field("timestamp_iso_day_of_week", arrow::int64()); | ||
| 396 | std::shared_ptr<arrow::Field> field_date = | ||
| 397 | arrow::field("timestamp_date", arrow::date32()); | ||
| 398 | std::shared_ptr<arrow::Field> field_local_time = | ||
| 399 | arrow::field("timestamp_local_time", arrow::time32(arrow::TimeUnit::SECOND)); | ||
| 400 | arrow::UInt32Builder distance_since_start_of_journey_builder; | ||
| 401 | arrow::Int64Builder day_of_week_builder; | ||
| 402 | arrow::Date32Builder date_builder; | ||
| 403 | arrow::Time32Builder local_time_builder(arrow::time32(arrow::TimeUnit::SECOND), arrow::default_memory_pool()); | ||
| 404 | |||
| 405 | const std::chrono::time_zone *amsterdam = std::chrono::locate_zone("Europe/Amsterdam"); | ||
| 406 | |||
| 407 | for (int64_t i = 0; i < table->num_rows(); i++) { | ||
| 408 | DistanceKey key( | ||
| 409 | BasicJourneyKey( | ||
| 410 | std::string(data_owner_codes->Value(i)), | ||
| 411 | std::string(line_planning_numbers->Value(i)), | ||
| 412 | journey_numbers->Value(i)), | ||
| 413 | std::string(user_stop_codes->Value(i))); | ||
| 414 | |||
| 415 | uint32_t distance_since_last_user_stop = distance_since_last_user_stops->Value(i); | ||
| 416 | if (distance_map.contains(key)) { | ||
| 417 | uint32_t total_distance = distance_since_last_user_stop + static_cast<uint32_t>(distance_map.at(key)); | ||
| 418 | ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.Append(total_distance)); | ||
| 419 | } else { | ||
| 420 | ARROW_RETURN_NOT_OK(distance_since_start_of_journey_builder.AppendNull()); | ||
| 421 | } | ||
| 422 | |||
| 423 | // Welp, this has gotten a bit complicated! | ||
| 424 | std::chrono::sys_seconds timestamp(std::chrono::floor<std::chrono::seconds>(std::chrono::milliseconds(timestamps->Value(i)))); | ||
| 425 | std::chrono::zoned_seconds zoned_timestamp(amsterdam, timestamp); | ||
| 426 | std::chrono::local_seconds local_timestamp(zoned_timestamp); | ||
| 427 | std::chrono::local_days local_date = std::chrono::floor<std::chrono::days>(local_timestamp); | ||
| 428 | std::chrono::year_month_day date(local_date); | ||
| 429 | std::chrono::weekday day_of_week(local_date); | ||
| 430 | std::chrono::hh_mm_ss<std::chrono::seconds> time(local_timestamp - local_date); | ||
| 431 | std::chrono::sys_days unix_date(date); | ||
| 432 | |||
| 433 | int64_t iso_day_of_week = day_of_week.iso_encoding(); | ||
| 434 | int32_t unix_days = static_cast<int32_t>(unix_date.time_since_epoch().count()); | ||
| 435 | int32_t secs_since_midnight = static_cast<int32_t>(std::chrono::seconds(time).count()); | ||
| 436 | |||
| 437 | ARROW_RETURN_NOT_OK(day_of_week_builder.Append(iso_day_of_week)); | ||
| 438 | ARROW_RETURN_NOT_OK(date_builder.Append(unix_days)); | ||
| 439 | ARROW_RETURN_NOT_OK(local_time_builder.Append(secs_since_midnight)); | ||
| 440 | } | ||
| 441 | |||
| 442 | ARROW_ASSIGN_OR_RAISE(auto distance_since_start_of_journey_col_chunk, distance_since_start_of_journey_builder.Finish()); | ||
| 443 | ARROW_ASSIGN_OR_RAISE(auto day_of_week_col_chunk, day_of_week_builder.Finish()); | ||
| 444 | ARROW_ASSIGN_OR_RAISE(auto date_col_chunk, date_builder.Finish()); | ||
| 445 | ARROW_ASSIGN_OR_RAISE(auto local_time_col_chunk, local_time_builder.Finish()); | ||
| 446 | auto distance_since_start_of_journey_col = | ||
| 447 | std::make_shared<arrow::ChunkedArray>(distance_since_start_of_journey_col_chunk); | ||
| 448 | auto day_of_week_col = std::make_shared<arrow::ChunkedArray>(day_of_week_col_chunk); | ||
| 449 | auto date_col = std::make_shared<arrow::ChunkedArray>(date_col_chunk); | ||
| 450 | auto local_time_col = std::make_shared<arrow::ChunkedArray>(local_time_col_chunk); | ||
| 451 | |||
| 452 | ARROW_ASSIGN_OR_RAISE(table, table->AddColumn( | ||
| 453 | table->num_columns(), | ||
| 454 | field_distance_since_start_of_journey, | ||
| 455 | distance_since_start_of_journey_col)); | ||
| 456 | ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_day_of_week, day_of_week_col)); | ||
| 457 | ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_date, date_col)); | ||
| 458 | ARROW_ASSIGN_OR_RAISE(table, table->AddColumn(table->num_columns(), field_local_time, local_time_col)); | ||
| 459 | |||
| 460 | return table; | ||
| 461 | } | ||
| 462 | |||
| 463 | arrow::Status processTables(Kv1Records &records, Kv1Index &index) { | ||
| 464 | std::shared_ptr<arrow::io::RandomAccessFile> input; | ||
| 465 | ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open("oeuf-input.parquet")); | ||
| 466 | |||
| 467 | std::unique_ptr<parquet::arrow::FileReader> arrow_reader; | ||
| 468 | ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader)); | ||
| 469 | |||
| 470 | std::shared_ptr<arrow::Table> table; | ||
| 471 | ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); | ||
| 472 | |||
| 473 | std::cerr << "Input KV6 file has " << table->num_rows() << " rows" << std::endl; | ||
| 474 | ARROW_ASSIGN_OR_RAISE(BasicJourneyKeySet journeys, basicJourneys(table)); | ||
| 475 | std::cerr << "Found " << journeys.size() << " distinct journeys" << std::endl; | ||
| 476 | DistanceMap distance_map = makeDistanceMap(records, index, journeys); | ||
| 477 | std::cerr << "Distance map has " << distance_map.size() << " keys" << std::endl; | ||
| 478 | |||
| 479 | std::cerr << "Creating augmented table" << std::endl; | ||
| 480 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Table> augmented, augment(table, distance_map)); | ||
| 481 | |||
| 482 | std::cerr << "Writing augmented table" << std::endl; | ||
| 483 | return writeArrowTableAsParquetFile(*augmented, "oeuf-augmented.parquet"); | ||
| 484 | } | ||
| 485 | |||
| 486 | int main(int argc, char *argv[]) { | ||
| 487 | Kv1Records records; | ||
| 488 | if (!parse(records)) { | ||
| 489 | fputs("Error parsing records, exiting\n", stderr); | ||
| 490 | return EXIT_FAILURE; | ||
| 491 | } | ||
| 492 | printParsedRecords(records); | ||
| 493 | fputs("Indexing...\n", stderr); | ||
| 494 | Kv1Index index(&records); | ||
| 495 | fprintf(stderr, "Indexed %lu records\n", index.size()); | ||
| 496 | // Only notice assignments are not indexed. If this equality is not valid, | ||
| 497 | // then this means that we had duplicate keys or that something else went | ||
| 498 | // wrong. That would really not be great. | ||
| 499 | assert(index.size() == records.size() - records.notice_assignments.size()); | ||
| 500 | printIndexSize(index); | ||
| 501 | fputs("Linking records...\n", stderr); | ||
| 502 | kv1LinkRecords(index); | ||
| 503 | fputs("Done linking\n", stderr); | ||
| 504 | |||
| 505 | arrow::Status st = processTables(records, index); | ||
| 506 | if (!st.ok()) { | ||
| 507 | std::cerr << "Failed to process tables: " << st << std::endl; | ||
| 508 | return EXIT_FAILURE; | ||
| 509 | } | ||
| 510 | } | ||
diff --git a/src/bundleparquet/.envrc b/src/bundleparquet/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/bundleparquet/.envrc | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | source_env ../../ | ||
| 2 | export DEVMODE=1 | ||
diff --git a/src/bundleparquet/Makefile b/src/bundleparquet/Makefile new file mode 100644 index 0000000..170304d --- /dev/null +++ b/src/bundleparquet/Makefile | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-larrow -lcurl -lparquet -lprometheus-cpp-push -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | |||
| 16 | bundleparquet: main.cpp spliturl.cpp | ||
| 17 | $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
| 18 | |||
| 19 | .PHONY: clean | ||
| 20 | clean: | ||
| 21 | rm bundleparquet | ||
diff --git a/src/bundleparquet/main.cpp b/src/bundleparquet/main.cpp new file mode 100644 index 0000000..05fd881 --- /dev/null +++ b/src/bundleparquet/main.cpp | |||
| @@ -0,0 +1,213 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <chrono> | ||
| 4 | #include <deque> | ||
| 5 | #include <filesystem> | ||
| 6 | #include <format> | ||
| 7 | #include <fstream> | ||
| 8 | #include <iostream> | ||
| 9 | |||
| 10 | #include <arrow/api.h> | ||
| 11 | #include <arrow/io/api.h> | ||
| 12 | #include <parquet/arrow/reader.h> | ||
| 13 | |||
| 14 | #include <nlohmann/json.hpp> | ||
| 15 | |||
| 16 | #include <prometheus/counter.h> | ||
| 17 | #include <prometheus/gateway.h> | ||
| 18 | #include <prometheus/registry.h> | ||
| 19 | |||
| 20 | #include <tmi8/kv6_parquet.hpp> | ||
| 21 | |||
| 22 | #include "spliturl.hpp" | ||
| 23 | |||
| 24 | static const int MIN_COMBINED_ROWS = 1000000; // one million | ||
| 25 | static const int MAX_COMBINED_ROWS = 2000000; // two million | ||
| 26 | |||
| 27 | struct FileMetadata { | ||
| 28 | int64_t min_timestamp = 0; | ||
| 29 | int64_t max_timestamp = 0; | ||
| 30 | int64_t rows_written = 0; | ||
| 31 | }; | ||
| 32 | |||
| 33 | struct File { | ||
| 34 | FileMetadata metadata; | ||
| 35 | std::filesystem::path filename; | ||
| 36 | }; | ||
| 37 | |||
| 38 | FileMetadata readMetadataOf(std::filesystem::path filename) { | ||
| 39 | std::string meta_filename = std::string(filename) + ".meta.json"; | ||
| 40 | std::ifstream meta_file = std::ifstream(meta_filename, std::ifstream::in|std::ifstream::binary); | ||
| 41 | nlohmann::json meta_json; | ||
| 42 | meta_file >> meta_json; | ||
| 43 | FileMetadata meta = { | ||
| 44 | .min_timestamp = meta_json["min_timestamp"], | ||
| 45 | .max_timestamp = meta_json["max_timestamp"], | ||
| 46 | .rows_written = meta_json["rows_written"], | ||
| 47 | }; | ||
| 48 | return meta; | ||
| 49 | } | ||
| 50 | |||
| 51 | arrow::Status processFirstTables(std::deque<File> &files, prometheus::Counter &rows_written) { | ||
| 52 | if (files.size() == 0) { | ||
| 53 | std::cerr << "Did not find any files" << std::endl; | ||
| 54 | return arrow::Status::OK(); | ||
| 55 | } | ||
| 56 | |||
| 57 | int64_t rows = 0; | ||
| 58 | |||
| 59 | std::vector<std::shared_ptr<arrow::Table>> tables; | ||
| 60 | std::vector<std::filesystem::path> processed; | ||
| 61 | int64_t min_timestamp = std::numeric_limits<int64_t>::max(); | ||
| 62 | int64_t max_timestamp = 0; | ||
| 63 | |||
| 64 | bool over_capacity_risk = false; | ||
| 65 | auto it = files.begin(); | ||
| 66 | while (it != files.end()) { | ||
| 67 | const std::filesystem::path &filename = it->filename; | ||
| 68 | const FileMetadata &metadata = it->metadata; | ||
| 69 | |||
| 70 | std::shared_ptr<arrow::io::RandomAccessFile> input; | ||
| 71 | ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(filename)); | ||
| 72 | |||
| 73 | std::unique_ptr<parquet::arrow::FileReader> arrow_reader; | ||
| 74 | ARROW_RETURN_NOT_OK(parquet::arrow::OpenFile(input, arrow::default_memory_pool(), &arrow_reader)); | ||
| 75 | |||
| 76 | if (metadata.min_timestamp < min_timestamp) | ||
| 77 | min_timestamp = metadata.min_timestamp; | ||
| 78 | if (metadata.max_timestamp > max_timestamp) | ||
| 79 | max_timestamp = metadata.max_timestamp; | ||
| 80 | |||
| 81 | if (rows + metadata.rows_written > MAX_COMBINED_ROWS) { | ||
| 82 | over_capacity_risk = true; | ||
| 83 | break; | ||
| 84 | } | ||
| 85 | |||
| 86 | std::shared_ptr<arrow::Table> table; | ||
| 87 | ARROW_RETURN_NOT_OK(arrow_reader->ReadTable(&table)); | ||
| 88 | tables.push_back(table); | ||
| 89 | processed.push_back(filename); | ||
| 90 | rows += metadata.rows_written; | ||
| 91 | it = files.erase(it); | ||
| 92 | } | ||
| 93 | |||
| 94 | if (rows < MIN_COMBINED_ROWS && !over_capacity_risk) { | ||
| 95 | std::cerr << "Found files, but not enough to satisfy the minimum amount of rows for the combined file" << std::endl; | ||
| 96 | std::cerr << "(We have " << rows << "/" << MIN_COMBINED_ROWS << " rows at the moment, so " | ||
| 97 | << static_cast<float>(rows)/static_cast<float>(MIN_COMBINED_ROWS)*100.f << "%)" << std::endl; | ||
| 98 | return arrow::Status::OK(); | ||
| 99 | } else if (rows == 0 && over_capacity_risk) { | ||
| 100 | const std::filesystem::path &filename = files.front().filename; | ||
| 101 | std::filesystem::rename(filename, "merged" / filename); | ||
| 102 | std::filesystem::rename(std::string(filename) + ".meta.json", std::string("merged" / filename) + ".meta.json"); | ||
| 103 | rows_written.Increment(static_cast<double>(files.front().metadata.rows_written)); | ||
| 104 | files.pop_front(); | ||
| 105 | return arrow::Status::OK(); | ||
| 106 | } | ||
| 107 | |||
| 108 | // Default options specify that the schemas are not unified, which is | ||
| 109 | // luckliy exactly what we want :) | ||
| 110 | std::shared_ptr<arrow::Table> merged_table; | ||
| 111 | ARROW_ASSIGN_OR_RAISE(merged_table, arrow::ConcatenateTables(tables)); | ||
| 112 | |||
| 113 | auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::system_clock::now()); | ||
| 114 | std::string filename = std::format("merged/oeuf-{:%FT%T%Ez}.parquet", timestamp); | ||
| 115 | ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*merged_table, filename)); | ||
| 116 | |||
| 117 | std::cerr << "Wrote merged table to " << filename << std::endl; | ||
| 118 | |||
| 119 | std::ofstream metaf(filename + ".meta.json.part", std::ios::binary); | ||
| 120 | nlohmann::json meta{ | ||
| 121 | { "min_timestamp", min_timestamp }, | ||
| 122 | { "max_timestamp", max_timestamp }, | ||
| 123 | { "rows_written", rows }, | ||
| 124 | }; | ||
| 125 | metaf << meta; | ||
| 126 | metaf.close(); | ||
| 127 | std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json"); | ||
| 128 | |||
| 129 | std::cerr << "Wrote merged table metadata" << std::endl; | ||
| 130 | rows_written.Increment(static_cast<double>(rows)); | ||
| 131 | |||
| 132 | for (const std::filesystem::path &filename : processed) { | ||
| 133 | std::filesystem::remove(filename); | ||
| 134 | std::filesystem::remove(std::string(filename) + ".meta.json"); | ||
| 135 | } | ||
| 136 | |||
| 137 | std::cerr << "Successfully wrote merged table, metadata and deleted old files" << std::endl; | ||
| 138 | |||
| 139 | return arrow::Status::OK(); | ||
| 140 | } | ||
| 141 | |||
| 142 | arrow::Status processTables(std::deque<File> &files, prometheus::Counter &rows_written) { | ||
| 143 | while (!files.empty()) | ||
| 144 | ARROW_RETURN_NOT_OK(processFirstTables(files, rows_written)); | ||
| 145 | return arrow::Status::OK(); | ||
| 146 | } | ||
| 147 | |||
| 148 | int main(int argc, char *argv[]) { | ||
| 149 | std::filesystem::path cwd = std::filesystem::current_path(); | ||
| 150 | std::filesystem::create_directory(cwd / "merged"); | ||
| 151 | |||
| 152 | const char *prom_push_url = getenv("PROMETHEUS_PUSH_URL"); | ||
| 153 | if (!prom_push_url || strlen(prom_push_url) == 0) { | ||
| 154 | std::cerr << "Error: no PROMETHEUS_PUSH_URL set!" << std::endl; | ||
| 155 | return EXIT_FAILURE; | ||
| 156 | } | ||
| 157 | |||
| 158 | std::string split_err; | ||
| 159 | auto split_prom_push_url = splitUrl(prom_push_url, &split_err); | ||
| 160 | if (!split_prom_push_url) { | ||
| 161 | std::cerr << "Could not process URL in environment variable PROMETHEUS_PUSH_URL: " | ||
| 162 | << split_err << std::endl; | ||
| 163 | return EXIT_FAILURE; | ||
| 164 | } | ||
| 165 | std::cout << "Prometheus Push URL: " << split_prom_push_url->schemehost << ":" | ||
| 166 | << split_prom_push_url->portpath << std::endl; | ||
| 167 | |||
| 168 | prometheus::Gateway gateway{split_prom_push_url->schemehost, | ||
| 169 | split_prom_push_url->portpath, | ||
| 170 | "oeuf-archiver"}; | ||
| 171 | |||
| 172 | auto registry = std::make_shared<prometheus::Registry>(); | ||
| 173 | prometheus::Gauge &rows_available = prometheus::BuildGauge() | ||
| 174 | .Name("archiver_rows_available") | ||
| 175 | .Help("Number of rows available to the archiver") | ||
| 176 | .Register(*registry) | ||
| 177 | .Add({}); | ||
| 178 | prometheus::Counter &rows_written = prometheus::BuildCounter() | ||
| 179 | .Name("archiver_rows_written") | ||
| 180 | .Help("Number of rows written by the archiver") | ||
| 181 | .Register(*registry) | ||
| 182 | .Add({}); | ||
| 183 | gateway.RegisterCollectable(registry); | ||
| 184 | |||
| 185 | std::deque<File> files; | ||
| 186 | for (auto const &dir_entry : std::filesystem::directory_iterator{cwd}) { | ||
| 187 | if (!dir_entry.is_regular_file()) continue; | ||
| 188 | std::filesystem::path filename = dir_entry.path().filename(); | ||
| 189 | const std::string &filename_str = filename; | ||
| 190 | if (filename_str.starts_with("oeuf-") && filename_str.ends_with("+00:00.parquet")) { | ||
| 191 | try { | ||
| 192 | FileMetadata meta = readMetadataOf(filename); | ||
| 193 | File file = { .metadata = meta, .filename = filename }; | ||
| 194 | files.push_back(file); | ||
| 195 | |||
| 196 | rows_available.Increment(static_cast<double>(meta.rows_written)); | ||
| 197 | } catch (const std::exception &e) { | ||
| 198 | std::cerr << "Failed to read metadata of file " << filename << ": " << e.what() << std::endl; | ||
| 199 | return EXIT_FAILURE; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | } | ||
| 203 | |||
| 204 | std::sort(files.begin(), files.end(), | ||
| 205 | [](const File &f1, const File &f2) { return f1.filename < f2.filename; }); | ||
| 206 | arrow::Status st = processTables(files, rows_written); | ||
| 207 | if (!st.ok()) { | ||
| 208 | std::cerr << "Failed to process tables: " << st << std::endl; | ||
| 209 | return EXIT_FAILURE; | ||
| 210 | } | ||
| 211 | |||
| 212 | gateway.Push(); | ||
| 213 | } | ||
diff --git a/src/bundleparquet/spliturl.cpp b/src/bundleparquet/spliturl.cpp new file mode 100644 index 0000000..90fd821 --- /dev/null +++ b/src/bundleparquet/spliturl.cpp | |||
| @@ -0,0 +1,203 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <cstring> | ||
| 4 | #include <iostream> | ||
| 5 | #include <optional> | ||
| 6 | #include <sstream> | ||
| 7 | #include <string> | ||
| 8 | |||
| 9 | #include <curl/curl.h> | ||
| 10 | |||
| 11 | #include "spliturl.hpp" | ||
| 12 | |||
| 13 | // splitUrl takes a URL of the shape '[http[s]://]HOST[:PORT][/PATH]', and | ||
| 14 | // splits it into two URLs: | ||
| 15 | // - scheme + host -> '[http[s]://]HOST' | ||
| 16 | // - port + path -> '[PORT][/PATH]' | ||
| 17 | // In case an IPv6 address is provided, the host must enclosed in square | ||
| 18 | // brackets. The zone ID may also be indicated. Note that in the resulting | ||
| 19 | // parts, the colon preceding the port number is omitted. This is on purpose. | ||
| 20 | std::optional<SplitUrl> splitUrl(const std::string &url, std::string *error) { | ||
| 21 | std::stringstream errs; | ||
| 22 | std::optional<SplitUrl> result; | ||
| 23 | char *processed = nullptr; | ||
| 24 | char *scheme = nullptr; | ||
| 25 | char *user = nullptr; | ||
| 26 | char *password = nullptr; | ||
| 27 | char *zoneid = nullptr; | ||
| 28 | char *query = nullptr; | ||
| 29 | char *fragment = nullptr; | ||
| 30 | CURLU *schemehost = nullptr; | ||
| 31 | char *schemehost_url = nullptr; | ||
| 32 | char *portpath_url = nullptr; | ||
| 33 | |||
| 34 | // Parse the URL, allowing the user to omit the scheme. CURL will use 'https' | ||
| 35 | // by default if no scheme is specified. | ||
| 36 | |||
| 37 | CURLU *parsed = curl_url(); | ||
| 38 | CURLUcode rc = curl_url_set(parsed, CURLUPART_URL, url.c_str(), CURLU_DEFAULT_SCHEME); | ||
| 39 | if (rc != CURLUE_OK) { | ||
| 40 | errs << "Failed to parse URL: " << curl_url_strerror(rc); | ||
| 41 | goto Exit; | ||
| 42 | } | ||
| 43 | |||
| 44 | // As we parse the URL with the option CURLU_DEFAULT_SCHEME, the CURL API | ||
| 45 | // won't require the user to provide the scheme part of the URL. It will | ||
| 46 | // automatically default the scheme to https. However, we do not usually want | ||
| 47 | // it to default to HTTPS, but HTTP instead (as the use case, connecting to a | ||
| 48 | // PushGateway server, usually is served over a private network via HTTP). | ||
| 49 | // | ||
| 50 | // This is why we check if the scheme was put there by CURL and otherwise set | ||
| 51 | // it to HTTP. We also check for any other schemes that the user may have | ||
| 52 | // provided, and reject anything that is not http/https. | ||
| 53 | if (!url.starts_with("http://") && !url.starts_with("https://")) { | ||
| 54 | rc = curl_url_get(parsed, CURLUPART_SCHEME, &scheme, 0); | ||
| 55 | if (rc != CURLUE_OK) { | ||
| 56 | errs << "Could not get scheme from parsed URL: " << curl_url_strerror(rc); | ||
| 57 | goto Exit; | ||
| 58 | } | ||
| 59 | if (strcmp(scheme, "https")) { | ||
| 60 | errs << "Unexpected scheme" << scheme << "in provided URL (expected http or https)"; | ||
| 61 | goto Exit; | ||
| 62 | } | ||
| 63 | rc = curl_url_set(parsed, CURLUPART_SCHEME, "http", 0); | ||
| 64 | if (rc != CURLUE_OK) { | ||
| 65 | errs << "Could not set URL scheme to http: " << curl_url_strerror(rc); | ||
| 66 | goto Exit; | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | // Turn the parsed URL back into a string. | ||
| 71 | rc = curl_url_get(parsed, CURLUPART_URL, &processed, 0); | ||
| 72 | if (rc != CURLUE_OK) { | ||
| 73 | errs << "Failed to output parsed URL: " << curl_url_strerror(rc); | ||
| 74 | goto Exit; | ||
| 75 | } | ||
| 76 | |||
| 77 | // This part of the code checks if no prohibited parts are present in the URL | ||
| 78 | // (basic auth: (user, password), query, fragment). | ||
| 79 | |||
| 80 | rc = curl_url_get(parsed, CURLUPART_USER, &user, 0); | ||
| 81 | if (rc == CURLUE_OK && strlen(user) != 0) { | ||
| 82 | errs << "Provided URL should not contain a user part"; | ||
| 83 | goto Exit; | ||
| 84 | } else if (rc != CURLUE_NO_USER && rc != CURLUE_OK) { | ||
| 85 | errs << "Failed to get check user part existence in provided url: " << curl_url_strerror(rc); | ||
| 86 | goto Exit; | ||
| 87 | } | ||
| 88 | |||
| 89 | rc = curl_url_get(parsed, CURLUPART_PASSWORD, &password, 0); | ||
| 90 | if (rc == CURLUE_OK && strlen(password) != 0) { | ||
| 91 | errs << "Provided URL should not contain a password part"; | ||
| 92 | goto Exit; | ||
| 93 | } else if (rc != CURLUE_NO_PASSWORD && rc != CURLUE_OK) { | ||
| 94 | errs << "Failed to get check password part existence in provided url: " << curl_url_strerror(rc); | ||
| 95 | goto Exit; | ||
| 96 | } | ||
| 97 | |||
| 98 | rc = curl_url_get(parsed, CURLUPART_QUERY, &query, 0); | ||
| 99 | if (rc == CURLUE_OK && strlen(query) != 0) { | ||
| 100 | errs << "Provided URL should not contain a query part"; | ||
| 101 | goto Exit; | ||
| 102 | } else if (rc != CURLUE_NO_QUERY && rc != CURLUE_OK) { | ||
| 103 | errs << "Failed to get check query part existence in provided url: " << curl_url_strerror(rc); | ||
| 104 | goto Exit; | ||
| 105 | } | ||
| 106 | |||
| 107 | rc = curl_url_get(parsed, CURLUPART_FRAGMENT, &fragment, 0); | ||
| 108 | if (rc == CURLUE_OK && strlen(fragment) != 0) { | ||
| 109 | errs << "Provided URL should not contain a fragment part"; | ||
| 110 | goto Exit; | ||
| 111 | } else if (rc != CURLUE_NO_FRAGMENT && rc != CURLUE_OK) { | ||
| 112 | errs << "Failed to get check fragment part existence in provided url: " << curl_url_strerror(rc); | ||
| 113 | goto Exit; | ||
| 114 | } | ||
| 115 | |||
| 116 | // Now that we know that the provided URL makes sense, we can start doing | ||
| 117 | // some arts and crafts. We get started by copying the parsed URL into | ||
| 118 | // schemehost and simply delete all parts which are not scheme + host. | ||
| 119 | |||
| 120 | schemehost = curl_url_dup(parsed); | ||
| 121 | |||
| 122 | // CURL BUG WORKAROUND: CURLUPART_ZONEID is NOT copied by curl_url_dup! | ||
| 123 | // ^ fixed in CURL 8.3.0 after https://curl.se/mail/lib-2023-07/0047.html | ||
| 124 | rc = curl_url_get(parsed, CURLUPART_ZONEID, &zoneid, 0); | ||
| 125 | if (rc == CURLUE_OK) { | ||
| 126 | rc = curl_url_set(schemehost, CURLUPART_ZONEID, zoneid, 0); | ||
| 127 | if (rc != CURLUE_OK) { | ||
| 128 | errs << "Could not copy zone ID to duplicated URL: " << curl_url_strerror(rc); | ||
| 129 | goto Exit; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | rc = curl_url_set(schemehost, CURLUPART_PORT, nullptr, 0); | ||
| 133 | if (rc != CURLUE_OK) { | ||
| 134 | errs << "Could not unset port in duplicated URL: " << curl_url_strerror(rc); | ||
| 135 | goto Exit; | ||
| 136 | } | ||
| 137 | rc = curl_url_set(schemehost, CURLUPART_PATH, nullptr, 0); | ||
| 138 | if (rc != CURLUE_OK) { | ||
| 139 | errs << "Could not unset path in duplicated URL: " << curl_url_strerror(rc); | ||
| 140 | goto Exit; | ||
| 141 | } | ||
| 142 | |||
| 143 | // Okay, now we have the schemehost CURLU all ready to go. Note that a URL | ||
| 144 | // only consisting of a scheme and host is considered valid, so CURL will be | ||
| 145 | // more than happy to actually turn it into a string for us. Which is exactly | ||
| 146 | // what we do here :) | ||
| 147 | |||
| 148 | rc = curl_url_get(schemehost, CURLUPART_URL, &schemehost_url, 0); | ||
| 149 | if (rc != CURLUE_OK) { | ||
| 150 | errs << "Could not get scheme + host URL: " << curl_url_strerror(rc); | ||
| 151 | goto Exit; | ||
| 152 | } | ||
| 153 | |||
| 154 | // Remove any trailing slash after the scheme + host URL that CURL might have | ||
| 155 | // put there -- we still want to get a valid URL if we paste the port + path | ||
| 156 | // part behind it. | ||
| 157 | |||
| 158 | if (strlen(schemehost_url) > 0) { | ||
| 159 | if (schemehost_url[strlen(schemehost_url) - 1] != '/') { | ||
| 160 | errs << "Scheme + host URL does not end with a slash"; | ||
| 161 | goto Exit; | ||
| 162 | } | ||
| 163 | schemehost_url[strlen(schemehost_url) - 1] = '\0'; | ||
| 164 | } | ||
| 165 | |||
| 166 | // Look, this is really gross. Because the port + path part of the URL is not | ||
| 167 | // a valid URL itself, but the scheme + host should be a prefix of the full | ||
| 168 | // URL containing the port + path, we can simply check if it is indeed a | ||
| 169 | // prefix, and then strip it from the full URL, giving us the port + path | ||
| 170 | // (after deleting the colon preceding the port). | ||
| 171 | |||
| 172 | if (!std::string_view(processed).starts_with(schemehost_url)) { | ||
| 173 | errs << "Scheme + host URL is not a prefix of the processed URL"; | ||
| 174 | goto Exit; | ||
| 175 | } | ||
| 176 | |||
| 177 | portpath_url = processed + strlen(schemehost_url); | ||
| 178 | // We should not have the colon before the port, prometheus-cpp inserts it | ||
| 179 | if (strlen(portpath_url) > 0 && portpath_url[0] == ':') portpath_url++; | ||
| 180 | // We do not need a trailing slash | ||
| 181 | if (strlen(portpath_url) > 0 && portpath_url[strlen(portpath_url)-1] == '/') | ||
| 182 | portpath_url[strlen(portpath_url)-1] = '\0'; | ||
| 183 | |||
| 184 | // It has been done. BLECH | ||
| 185 | result = std::make_optional<SplitUrl>(schemehost_url, portpath_url); | ||
| 186 | |||
| 187 | Exit: | ||
| 188 | curl_free(processed); | ||
| 189 | curl_free(scheme); | ||
| 190 | curl_free(user); | ||
| 191 | curl_free(password); | ||
| 192 | curl_free(query); | ||
| 193 | curl_free(fragment); | ||
| 194 | curl_free(zoneid); | ||
| 195 | curl_free(schemehost_url); | ||
| 196 | curl_url_cleanup(schemehost); | ||
| 197 | curl_url_cleanup(parsed); | ||
| 198 | |||
| 199 | if (!result && error) | ||
| 200 | *error = errs.str(); | ||
| 201 | |||
| 202 | return result; | ||
| 203 | } | ||
diff --git a/src/bundleparquet/spliturl.hpp b/src/bundleparquet/spliturl.hpp new file mode 100644 index 0000000..d8150e0 --- /dev/null +++ b/src/bundleparquet/spliturl.hpp | |||
| @@ -0,0 +1,11 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <optional> | ||
| 4 | #include <string> | ||
| 5 | |||
| 6 | struct SplitUrl { | ||
| 7 | std::string schemehost; | ||
| 8 | std::string portpath; | ||
| 9 | }; | ||
| 10 | |||
| 11 | std::optional<SplitUrl> splitUrl(const std::string &url, std::string *error = nullptr); | ||
diff --git a/src/filterkv6/.envrc b/src/filterkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/filterkv6/.envrc | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | source_env ../../ | ||
| 2 | export DEVMODE=1 | ||
diff --git a/src/filterkv6/Makefile b/src/filterkv6/Makefile new file mode 100644 index 0000000..13bb38e --- /dev/null +++ b/src/filterkv6/Makefile | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-larrow -larrow_dataset -lparquet -ltmi8 -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | |||
| 16 | filterkv6: main.cpp | ||
| 17 | $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
| 18 | |||
| 19 | .PHONY: clean | ||
| 20 | clean: | ||
| 21 | rm filterkv6 | ||
diff --git a/src/filterkv6/main.cpp b/src/filterkv6/main.cpp new file mode 100644 index 0000000..a32220a --- /dev/null +++ b/src/filterkv6/main.cpp | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <chrono> | ||
| 4 | #include <deque> | ||
| 5 | #include <filesystem> | ||
| 6 | #include <format> | ||
| 7 | #include <fstream> | ||
| 8 | #include <iostream> | ||
| 9 | |||
| 10 | #include <arrow/api.h> | ||
| 11 | #include <arrow/compute/api.h> | ||
| 12 | #include <arrow/filesystem/api.h> | ||
| 13 | #include <arrow/dataset/api.h> | ||
| 14 | #include <arrow/io/api.h> | ||
| 15 | |||
| 16 | #include <tmi8/kv6_parquet.hpp> | ||
| 17 | |||
| 18 | namespace ds = arrow::dataset; | ||
| 19 | namespace cp = arrow::compute; | ||
| 20 | using namespace arrow; | ||
| 21 | |||
| 22 | arrow::Status processTables(std::string lineno) { | ||
| 23 | auto filesystem = std::make_shared<fs::LocalFileSystem>(); | ||
| 24 | |||
| 25 | fs::FileSelector selector; | ||
| 26 | selector.base_dir = std::filesystem::current_path(); | ||
| 27 | selector.recursive = false; | ||
| 28 | |||
| 29 | auto format = std::static_pointer_cast<ds::FileFormat>(std::make_shared<ds::ParquetFileFormat>()); | ||
| 30 | |||
| 31 | ARROW_ASSIGN_OR_RAISE(auto factory, | ||
| 32 | ds::FileSystemDatasetFactory::Make(filesystem, selector, format, | ||
| 33 | ds::FileSystemFactoryOptions())); | ||
| 34 | |||
| 35 | ARROW_ASSIGN_OR_RAISE(auto dataset, factory->Finish()); | ||
| 36 | |||
| 37 | printf("Scanning dataset for line %s...\n", lineno.c_str()); | ||
| 38 | // Read specified columns with a row filter | ||
| 39 | ARROW_ASSIGN_OR_RAISE(auto scan_builder, dataset->NewScan()); | ||
| 40 | ARROW_RETURN_NOT_OK(scan_builder->Filter(cp::and_({ | ||
| 41 | cp::equal(cp::field_ref("line_planning_number"), cp::literal(lineno)), | ||
| 42 | cp::is_valid(cp::field_ref("rd_x")), | ||
| 43 | cp::is_valid(cp::field_ref("rd_y")), | ||
| 44 | }))); | ||
| 45 | |||
| 46 | ARROW_ASSIGN_OR_RAISE(auto scanner, scan_builder->Finish()); | ||
| 47 | ARROW_ASSIGN_OR_RAISE(auto table, scanner->ToTable()); | ||
| 48 | |||
| 49 | puts("Finished loading data, computing stable sort indices..."); | ||
| 50 | |||
| 51 | arrow::Datum sort_indices; | ||
| 52 | cp::SortOptions sort_options; | ||
| 53 | sort_options.sort_keys = { cp::SortKey("timestamp" /* ascending by default */) }; | ||
| 54 | ARROW_ASSIGN_OR_RAISE(sort_indices, cp::CallFunction("sort_indices", { table }, &sort_options)); | ||
| 55 | puts("Finished computing stable sort indices, creating sorted table..."); | ||
| 56 | |||
| 57 | arrow::Datum sorted; | ||
| 58 | ARROW_ASSIGN_OR_RAISE(sorted, cp::CallFunction("take", { table, sort_indices })); | ||
| 59 | |||
| 60 | puts("Writing sorted table to disk..."); | ||
| 61 | ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*sorted.table(), "merged/oeuf-merged.parquet")); | ||
| 62 | puts("Syncing..."); | ||
| 63 | sync(); | ||
| 64 | puts("Done. Have a nice day."); | ||
| 65 | |||
| 66 | return arrow::Status::OK(); | ||
| 67 | } | ||
| 68 | |||
| 69 | #define NOTICE "Notice: This tool will fail if any non-Parquet files in are present in the\n" \ | ||
| 70 | " current working directory. It does not load files which are present in\n" \ | ||
| 71 | " any possible subdirectories." | ||
| 72 | |||
| 73 | const char help[] = | ||
| 74 | "Usage: %s <LINENO>\n" | ||
| 75 | "\n" | ||
| 76 | " LINENO The LinePlanningNumber as in the KV1/KV6 data\n\n" | ||
| 77 | NOTICE "\n"; | ||
| 78 | |||
| 79 | void exitHelp(const char *progname, int code = 1) { | ||
| 80 | printf(help, progname); | ||
| 81 | exit(code); | ||
| 82 | } | ||
| 83 | |||
| 84 | int main(int argc, char *argv[]) { | ||
| 85 | const char *progname = argv[0]; | ||
| 86 | if (argc != 2) { | ||
| 87 | puts("Error: incorrect number of arguments provided\n"); | ||
| 88 | exitHelp(progname); | ||
| 89 | } | ||
| 90 | char *lineno = argv[1]; | ||
| 91 | puts(NOTICE "\n"); | ||
| 92 | |||
| 93 | std::filesystem::path cwd = std::filesystem::current_path(); | ||
| 94 | std::filesystem::create_directory(cwd / "merged"); | ||
| 95 | |||
| 96 | puts("Running this program may take a while, especially on big datasets. If you're\n" | ||
| 97 | "processing the data of a single bus line over the course of multiple months,\n" | ||
| 98 | "you may see memory usage of up to 10 GiB. Make sure that you have sufficient\n" | ||
| 99 | "RAM available, to avoid overloading and subsequently freezing your system.\n"); | ||
| 100 | |||
| 101 | arrow::Status st = processTables(std::string(lineno)); | ||
| 102 | if (!st.ok()) { | ||
| 103 | std::cerr << "Failed to process tables: " << st << std::endl; | ||
| 104 | return EXIT_FAILURE; | ||
| 105 | } | ||
| 106 | } | ||
diff --git a/src/querykv1/.envrc b/src/querykv1/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/querykv1/.envrc | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | source_env ../../ | ||
| 2 | export DEVMODE=1 | ||
diff --git a/src/querykv1/.gitignore b/src/querykv1/.gitignore new file mode 100644 index 0000000..5761abc --- /dev/null +++ b/src/querykv1/.gitignore | |||
| @@ -0,0 +1 @@ | |||
| *.o | |||
diff --git a/src/querykv1/Makefile b/src/querykv1/Makefile new file mode 100644 index 0000000..a8791f5 --- /dev/null +++ b/src/querykv1/Makefile | |||
| @@ -0,0 +1,28 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-ltmi8 -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | |||
| 16 | HDRS=cliopts.hpp daterange.hpp joparoute.hpp journeyinfo.hpp journeyroute.hpp journeys.hpp schedule.hpp | ||
| 17 | SRCS=main.cpp cliopts.cpp daterange.cpp joparoute.cpp journeyinfo.cpp journeyroute.cpp journeys.cpp schedule.cpp | ||
| 18 | OBJS=$(patsubst %.cpp,%.o,$(SRCS)) | ||
| 19 | |||
| 20 | %.o: %.cpp $(HDRS) | ||
| 21 | $(CXX) -c -o $@ $< $(CXXFLAGS) | ||
| 22 | |||
| 23 | querykv1: $(OBJS) | ||
| 24 | $(CXX) -fPIE -pie -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
| 25 | |||
| 26 | .PHONY: clean | ||
| 27 | clean: | ||
| 28 | rm querykv1 | ||
diff --git a/src/querykv1/cliopts.cpp b/src/querykv1/cliopts.cpp new file mode 100644 index 0000000..bef7a98 --- /dev/null +++ b/src/querykv1/cliopts.cpp | |||
| @@ -0,0 +1,456 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <cstdlib> | ||
| 4 | #include <cstdio> | ||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | |||
| 8 | #include <getopt.h> | ||
| 9 | |||
| 10 | #include "cliopts.hpp" | ||
| 11 | |||
| 12 | using namespace std::string_view_literals; | ||
| 13 | |||
| 14 | const char *opt_set = ""; | ||
| 15 | const char *opt_unset = nullptr; | ||
| 16 | |||
| 17 | const char help[] = R"(Usage: %1$s [OPTIONS] <COMMAND> | ||
| 18 | |||
| 19 | Global Options: | ||
| 20 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 21 | -h, --help Print this help | ||
| 22 | |||
| 23 | Commands: | ||
| 24 | joparoute Generate CSV for journey pattern route | ||
| 25 | journeyinfo Print some information on a journey | ||
| 26 | journeyroute Generate CSV for journey route | ||
| 27 | journeys List journeys of a specific line going from stop A to B | ||
| 28 | schedule Generate schedule | ||
| 29 | )"; | ||
| 30 | |||
| 31 | const char joparoute_help[] = R"(Usage: %1$s joparoute --line <NUMBER> --jopa <CODE> [OPTIONS] | ||
| 32 | |||
| 33 | Options: | ||
| 34 | --line <NUMBER> Line planning number as in schedule | ||
| 35 | --jopa <CODE> Journey pattern code as in KV1 data | ||
| 36 | -o <PATH> Path of file to write to, '-' for stdout | ||
| 37 | |||
| 38 | Global Options: | ||
| 39 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 40 | -h, --help Print this help | ||
| 41 | )"; | ||
| 42 | |||
| 43 | const char journeyroute_help[] = R"(Usage: %1$s journeyroute --line <NUMBER> [OPTIONS] | ||
| 44 | |||
| 45 | Options: | ||
| 46 | --line <NUMBER> Line planning number as in KV1 data | ||
| 47 | --journey <NUMBER> Journey number as in KV1 data | ||
| 48 | -o <PATH> Path of file to write to, '-' for stdout | ||
| 49 | |||
| 50 | Global Options: | ||
| 51 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 52 | -h, --help Print this help | ||
| 53 | )"; | ||
| 54 | |||
| 55 | const char journeys_help[] = R"(Usage: %1$s journeys --line <NUMBER> --begin <STOP> --end <STOP> [OPTIONS] | ||
| 56 | |||
| 57 | For the --begin and --end arguments, use the following format: | ||
| 58 | --begin/--end stop:<USRSTOP CODE> | ||
| 59 | --begin/--end star:<USRSTAR CODE> | ||
| 60 | |||
| 61 | Options: | ||
| 62 | --begin <STOP> User stop code/area of stop the journey should begin at | ||
| 63 | --end <STOP> User stop code/area of stop the journey should end at | ||
| 64 | --line <NUMBER> Line planning number to filter on | ||
| 65 | -o <PATH> Path of file to write to, '-' for stdout | ||
| 66 | |||
| 67 | Global Options: | ||
| 68 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 69 | -h, --help Print this help | ||
| 70 | )"; | ||
| 71 | |||
| 72 | const char journeyinfo_help[] = R"(Usage: %1$s journeyinfo --line <NUMBER> --journey <NUMBER> [OPTIONS] | ||
| 73 | |||
| 74 | Options: | ||
| 75 | --line <NUMBER> Line planning number to filter on | ||
| 76 | --journey <NUMBER> Journey number as in schedule | ||
| 77 | |||
| 78 | Global Options: | ||
| 79 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 80 | -h, --help Print this help | ||
| 81 | )"; | ||
| 82 | |||
| 83 | const char schedule_help[] = R"(Usage: %1$s schedule --line <NUMBER> [OPTIONS] | ||
| 84 | |||
| 85 | Options: | ||
| 86 | --line <NUMBER> Line planning number to generate schedule for | ||
| 87 | -o <PATH> Path of file to write to, '-' for stdout | ||
| 88 | |||
| 89 | Global Options: | ||
| 90 | --kv1 <PATH> Path to file containing all KV1 data, '-' for stdin | ||
| 91 | -h, --help Print this help | ||
| 92 | )"; | ||
| 93 | |||
| 94 | void journeyRouteValidateOptions(const char *progname, Options *options) { | ||
| 95 | #define X(name, argument, long_, short_) \ | ||
| 96 | if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ | ||
| 97 | && #name != "journey_number"sv && #name != "help"sv && #name != "output_file_path"sv) \ | ||
| 98 | if (options->name) { \ | ||
| 99 | if (long_) { \ | ||
| 100 | if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyroute subcommand\n\n", progname, static_cast<const char *>(long_), short_); \ | ||
| 101 | else fprintf(stderr, "%s: unexpected flag --%s for journeyroute subcommand\n\n", progname, static_cast<const char *>(long_)); \ | ||
| 102 | } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyroute subcommand\n\n", progname, short_); \ | ||
| 103 | fprintf(stderr, journeyroute_help, progname); \ | ||
| 104 | exit(1); \ | ||
| 105 | } | ||
| 106 | LONG_OPTIONS | ||
| 107 | SHORT_OPTIONS | ||
| 108 | #undef X | ||
| 109 | |||
| 110 | if (options->positional.size() > 0) { | ||
| 111 | fprintf(stderr, "%s: unexpected positional argument(s) for journeyroute subcommand\n\n", progname); | ||
| 112 | for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); | ||
| 113 | fprintf(stderr, journeyroute_help, progname); | ||
| 114 | exit(1); | ||
| 115 | } | ||
| 116 | |||
| 117 | if (!options->kv1_file_path) | ||
| 118 | options->kv1_file_path = "-"; | ||
| 119 | if (!options->output_file_path) | ||
| 120 | options->output_file_path = "-"; | ||
| 121 | if (options->kv1_file_path == ""sv) { | ||
| 122 | fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); | ||
| 123 | fprintf(stderr, journeyroute_help, progname); | ||
| 124 | exit(1); | ||
| 125 | } | ||
| 126 | if (options->output_file_path == ""sv) { | ||
| 127 | fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); | ||
| 128 | fprintf(stderr, journeyroute_help, progname); | ||
| 129 | exit(1); | ||
| 130 | } | ||
| 131 | if (!options->journey_number || options->journey_number == ""sv) { | ||
| 132 | fprintf(stderr, "%s: journey number must be provided\n\n", progname); | ||
| 133 | fprintf(stderr, journeyroute_help, progname); | ||
| 134 | exit(1); | ||
| 135 | } | ||
| 136 | if (!options->line_planning_number || options->line_planning_number == ""sv) { | ||
| 137 | fprintf(stderr, "%s: line planning number must be provided\n\n", progname); | ||
| 138 | fprintf(stderr, journeyroute_help, progname); | ||
| 139 | exit(1); | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | void scheduleValidateOptions(const char *progname, Options *options) { | ||
| 144 | #define X(name, argument, long_, short_) \ | ||
| 145 | if (#name != "kv1_file_path"sv && #name != "help"sv \ | ||
| 146 | && #name != "line_planning_number"sv && #name != "output_file_path"sv) \ | ||
| 147 | if (options->name) { \ | ||
| 148 | if (long_) { \ | ||
| 149 | if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for schedule subcommand\n\n", progname, static_cast<const char *>(long_), short_); \ | ||
| 150 | else fprintf(stderr, "%s: unexpected flag --%s for schedule subcommand\n\n", progname, static_cast<const char *>(long_)); \ | ||
| 151 | } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for schedule subcommand\n\n", progname, short_); \ | ||
| 152 | fprintf(stderr, schedule_help, progname); \ | ||
| 153 | exit(1); \ | ||
| 154 | } | ||
| 155 | LONG_OPTIONS | ||
| 156 | SHORT_OPTIONS | ||
| 157 | #undef X | ||
| 158 | |||
| 159 | if (options->positional.size() > 0) { | ||
| 160 | fprintf(stderr, "%s: unexpected positional argument(s) for schedule subcommand\n\n", progname); | ||
| 161 | for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); | ||
| 162 | fprintf(stderr, schedule_help, progname); | ||
| 163 | exit(1); | ||
| 164 | } | ||
| 165 | |||
| 166 | if (!options->kv1_file_path) | ||
| 167 | options->kv1_file_path = "-"; | ||
| 168 | if (!options->output_file_path) | ||
| 169 | options->output_file_path = "-"; | ||
| 170 | if (options->kv1_file_path == ""sv) { | ||
| 171 | fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); | ||
| 172 | fprintf(stderr, schedule_help, progname); | ||
| 173 | exit(1); | ||
| 174 | } | ||
| 175 | if (options->output_file_path == ""sv) { | ||
| 176 | fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); | ||
| 177 | fprintf(stderr, schedule_help, progname); | ||
| 178 | exit(1); | ||
| 179 | } | ||
| 180 | if (!options->line_planning_number || options->line_planning_number == ""sv) { | ||
| 181 | fprintf(stderr, "%s: line planning number must be provided\n\n", progname); | ||
| 182 | fprintf(stderr, schedule_help, progname); | ||
| 183 | exit(1); | ||
| 184 | } | ||
| 185 | } | ||
| 186 | |||
| 187 | void journeysValidateOptions(const char *progname, Options *options) { | ||
| 188 | #define X(name, argument, long_, short_) \ | ||
| 189 | if (#name != "kv1_file_path"sv && #name != "help"sv \ | ||
| 190 | && #name != "line_planning_number"sv && #name != "output_file_path"sv \ | ||
| 191 | && #name != "begin_stop_code"sv && #name != "end_stop_code"sv) \ | ||
| 192 | if (options->name) { \ | ||
| 193 | if (long_) { \ | ||
| 194 | if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeys subcommand\n\n", progname, static_cast<const char *>(long_), short_); \ | ||
| 195 | else fprintf(stderr, "%s: unexpected flag --%s for journeys subcommand\n\n", progname, static_cast<const char *>(long_)); \ | ||
| 196 | } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeys subcommand\n\n", progname, short_); \ | ||
| 197 | fprintf(stderr, journeys_help, progname); \ | ||
| 198 | exit(1); \ | ||
| 199 | } | ||
| 200 | LONG_OPTIONS | ||
| 201 | SHORT_OPTIONS | ||
| 202 | #undef X | ||
| 203 | |||
| 204 | if (options->positional.size() > 0) { | ||
| 205 | fprintf(stderr, "%s: unexpected positional argument(s) for journeys subcommand\n\n", progname); | ||
| 206 | for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); | ||
| 207 | fprintf(stderr, journeys_help, progname); | ||
| 208 | exit(1); | ||
| 209 | } | ||
| 210 | |||
| 211 | if (!options->kv1_file_path) | ||
| 212 | options->kv1_file_path = "-"; | ||
| 213 | if (!options->output_file_path) | ||
| 214 | options->output_file_path = "-"; | ||
| 215 | if (options->kv1_file_path == ""sv) { | ||
| 216 | fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); | ||
| 217 | fprintf(stderr, journeys_help, progname); | ||
| 218 | exit(1); | ||
| 219 | } | ||
| 220 | if (options->output_file_path == ""sv) { | ||
| 221 | fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); | ||
| 222 | fprintf(stderr, journeys_help, progname); | ||
| 223 | exit(1); | ||
| 224 | } | ||
| 225 | if (!options->line_planning_number || options->line_planning_number == ""sv) { | ||
| 226 | fprintf(stderr, "%s: line planning number must be provided\n\n", progname); | ||
| 227 | fprintf(stderr, journeys_help, progname); | ||
| 228 | exit(1); | ||
| 229 | } | ||
| 230 | if (!options->begin_stop_code || options->begin_stop_code == ""sv) { | ||
| 231 | fprintf(stderr, "%s: start user stop code must be provided\n\n", progname); | ||
| 232 | fprintf(stderr, journeys_help, progname); | ||
| 233 | exit(1); | ||
| 234 | } | ||
| 235 | if (!options->end_stop_code || options->end_stop_code == ""sv) { | ||
| 236 | fprintf(stderr, "%s: end user stop code must be provided\n\n", progname); | ||
| 237 | fprintf(stderr, journeys_help, progname); | ||
| 238 | exit(1); | ||
| 239 | } | ||
| 240 | if (!std::string_view(options->begin_stop_code).starts_with("star:") | ||
| 241 | && !std::string_view(options->begin_stop_code).starts_with("stop:")) { | ||
| 242 | fprintf(stderr, "%s: begin user stop code must be prefixed with star:/stop:\n\n", progname); | ||
| 243 | fprintf(stderr, journeys_help, progname); | ||
| 244 | exit(1); | ||
| 245 | } | ||
| 246 | if (!std::string_view(options->end_stop_code).starts_with("star:") | ||
| 247 | && !std::string_view(options->end_stop_code).starts_with("stop:")) { | ||
| 248 | fprintf(stderr, "%s: end user stop code must be prefixed with star:/stop:\n\n", progname); | ||
| 249 | fprintf(stderr, journeys_help, progname); | ||
| 250 | exit(1); | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | void journeyInfoValidateOptions(const char *progname, Options *options) { | ||
| 255 | #define X(name, argument, long_, short_) \ | ||
| 256 | if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ | ||
| 257 | && #name != "journey_number"sv && #name != "help"sv) \ | ||
| 258 | if (options->name) { \ | ||
| 259 | if (long_) { \ | ||
| 260 | if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for journeyinfo subcommand\n\n", progname, static_cast<const char *>(long_), short_); \ | ||
| 261 | else fprintf(stderr, "%s: unexpected flag --%s for journeyinfo subcommand\n\n", progname, static_cast<const char *>(long_)); \ | ||
| 262 | } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for journeyinfo subcommand\n\n", progname, short_); \ | ||
| 263 | fprintf(stderr, journeyinfo_help, progname); \ | ||
| 264 | exit(1); \ | ||
| 265 | } | ||
| 266 | LONG_OPTIONS | ||
| 267 | SHORT_OPTIONS | ||
| 268 | #undef X | ||
| 269 | |||
| 270 | if (options->positional.size() > 0) { | ||
| 271 | fprintf(stderr, "%s: unexpected positional argument(s) for journeyinfo subcommand\n\n", progname); | ||
| 272 | for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); | ||
| 273 | fprintf(stderr, journeyinfo_help, progname); | ||
| 274 | exit(1); | ||
| 275 | } | ||
| 276 | |||
| 277 | if (!options->kv1_file_path) | ||
| 278 | options->kv1_file_path = "-"; | ||
| 279 | if (options->kv1_file_path == ""sv) { | ||
| 280 | fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); | ||
| 281 | fprintf(stderr, journeyinfo_help, progname); | ||
| 282 | exit(1); | ||
| 283 | } | ||
| 284 | if (!options->journey_number || options->journey_number == ""sv) { | ||
| 285 | fprintf(stderr, "%s: journey number must be provided\n\n", progname); | ||
| 286 | fprintf(stderr, journeyinfo_help, progname); | ||
| 287 | exit(1); | ||
| 288 | } | ||
| 289 | if (!options->line_planning_number || options->line_planning_number == ""sv) { | ||
| 290 | fprintf(stderr, "%s: line planning number must be provided\n\n", progname); | ||
| 291 | fprintf(stderr, journeyinfo_help, progname); | ||
| 292 | exit(1); | ||
| 293 | } | ||
| 294 | } | ||
| 295 | |||
| 296 | void jopaRouteValidateOptions(const char *progname, Options *options) { | ||
| 297 | #define X(name, argument, long_, short_) \ | ||
| 298 | if (#name != "kv1_file_path"sv && #name != "line_planning_number"sv \ | ||
| 299 | && #name != "journey_pattern_code"sv && #name != "help"sv && #name != "output_file_path"sv) \ | ||
| 300 | if (options->name) { \ | ||
| 301 | if (long_) { \ | ||
| 302 | if (short_) fprintf(stderr, "%s: unexpected flag --%s (-%c) for joparoute subcommand\n\n", progname, static_cast<const char *>(long_), short_); \ | ||
| 303 | else fprintf(stderr, "%s: unexpected flag --%s for joparoute subcommand\n\n", progname, static_cast<const char *>(long_)); \ | ||
| 304 | } else if (short_) fprintf(stderr, "%s: unexpected flag -%c for joparoute subcommand\n\n", progname, short_); \ | ||
| 305 | fprintf(stderr, joparoute_help, progname); \ | ||
| 306 | exit(1); \ | ||
| 307 | } | ||
| 308 | LONG_OPTIONS | ||
| 309 | SHORT_OPTIONS | ||
| 310 | #undef X | ||
| 311 | |||
| 312 | if (options->positional.size() > 0) { | ||
| 313 | fprintf(stderr, "%s: unexpected positional argument(s) for joparoute subcommand\n\n", progname); | ||
| 314 | for (auto pos : options->positional) fprintf(stderr, "opt: %s\n", pos); | ||
| 315 | fprintf(stderr, joparoute_help, progname); | ||
| 316 | exit(1); | ||
| 317 | } | ||
| 318 | |||
| 319 | if (!options->kv1_file_path) | ||
| 320 | options->kv1_file_path = "-"; | ||
| 321 | if (!options->output_file_path) | ||
| 322 | options->output_file_path = "-"; | ||
| 323 | if (options->kv1_file_path == ""sv) { | ||
| 324 | fprintf(stderr, "%s: KV1 file path cannot be empty\n\n", progname); | ||
| 325 | fprintf(stderr, joparoute_help, progname); | ||
| 326 | exit(1); | ||
| 327 | } | ||
| 328 | if (options->output_file_path == ""sv) { | ||
| 329 | fprintf(stderr, "%s: output file path cannot be empty\n\n", progname); | ||
| 330 | fprintf(stderr, joparoute_help, progname); | ||
| 331 | exit(1); | ||
| 332 | } | ||
| 333 | if (!options->journey_pattern_code || options->journey_pattern_code == ""sv) { | ||
| 334 | fprintf(stderr, "%s: journey pattern code must be provided\n\n", progname); | ||
| 335 | fprintf(stderr, joparoute_help, progname); | ||
| 336 | exit(1); | ||
| 337 | } | ||
| 338 | if (!options->line_planning_number || options->line_planning_number == ""sv) { | ||
| 339 | fprintf(stderr, "%s: line planning number must be provided\n\n", progname); | ||
| 340 | fprintf(stderr, joparoute_help, progname); | ||
| 341 | exit(1); | ||
| 342 | } | ||
| 343 | } | ||
| 344 | |||
| 345 | struct ShortFlag { | ||
| 346 | int has_arg; | ||
| 347 | int c; | ||
| 348 | }; | ||
| 349 | |||
| 350 | template<ShortFlag ...flags> | ||
| 351 | const std::string mkargarr = | ||
| 352 | (std::string() | ||
| 353 | + ... | ||
| 354 | + (flags.c == 0 | ||
| 355 | ? "" | ||
| 356 | : std::string((const char[]){ flags.c, '\0' }) | ||
| 357 | + (flags.has_arg == required_argument | ||
| 358 | ? ":" | ||
| 359 | : flags.has_arg == optional_argument | ||
| 360 | ? "::" | ||
| 361 | : ""))); | ||
| 362 | |||
| 363 | #define X(name, has_arg, long_, short_) ShortFlag(has_arg, short_), | ||
| 364 | const std::string argarr = mkargarr<SHORT_OPTIONS LONG_OPTIONS ShortFlag(no_argument, 0)>; | ||
| 365 | #undef X | ||
| 366 | |||
| 367 | Options parseOptions(int argc, char *argv[]) { | ||
| 368 | const char *progname = argv[0]; | ||
| 369 | |||
| 370 | // Struct with options for augmentkv6. | ||
| 371 | Options options; | ||
| 372 | |||
| 373 | static option long_options[] = { | ||
| 374 | #define X(name, argument, long_, short_) { long_, argument, nullptr, short_ }, | ||
| 375 | LONG_OPTIONS | ||
| 376 | #undef X | ||
| 377 | { 0 }, | ||
| 378 | }; | ||
| 379 | |||
| 380 | int c; | ||
| 381 | int option_index = 0; | ||
| 382 | bool error = false; | ||
| 383 | while ((c = getopt_long(argc, argv, argarr.c_str(), long_options, &option_index)) != -1) { | ||
| 384 | // If a long option was used, c corresponds with val. We have val = 0 for | ||
| 385 | // options which have no short alternative, so checking for c = 0 gives us | ||
| 386 | // whether a long option with no short alternative was used. | ||
| 387 | // Below, we check for c = 'h', which corresponds with the long option | ||
| 388 | // '--help', for which val = 'h'. | ||
| 389 | if (c == 0) { | ||
| 390 | const char *name = long_options[option_index].name; | ||
| 391 | #define X(opt_name, opt_has_arg, opt_long, opt_short) \ | ||
| 392 | if (name == opt_long ## sv) { options.opt_name = optarg; continue; } | ||
| 393 | LONG_OPTIONS | ||
| 394 | #undef X | ||
| 395 | error = true; | ||
| 396 | } | ||
| 397 | #define X(opt_name, opt_has_arg, opt_long, opt_short) \ | ||
| 398 | if (c == opt_short) { options.opt_name = optarg ? optarg : opt_set; continue; } | ||
| 399 | LONG_OPTIONS | ||
| 400 | SHORT_OPTIONS | ||
| 401 | #undef X | ||
| 402 | error = true; | ||
| 403 | } | ||
| 404 | |||
| 405 | if (optind < argc) | ||
| 406 | options.subcommand = argv[optind++]; | ||
| 407 | while (optind < argc) | ||
| 408 | options.positional.push_back(argv[optind++]); | ||
| 409 | |||
| 410 | if (options.subcommand | ||
| 411 | && options.subcommand != "schedule"sv | ||
| 412 | && options.subcommand != "joparoute"sv | ||
| 413 | && options.subcommand != "journeyinfo"sv | ||
| 414 | && options.subcommand != "journeyroute"sv | ||
| 415 | && options.subcommand != "journeys"sv) { | ||
| 416 | fprintf(stderr, "%s: unknown subcommand '%s'\n\n", progname, options.subcommand); | ||
| 417 | fprintf(stderr, help, progname); | ||
| 418 | exit(1); | ||
| 419 | } | ||
| 420 | if (options.subcommand && error) { | ||
| 421 | fputc('\n', stderr); | ||
| 422 | if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname); | ||
| 423 | if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname); | ||
| 424 | if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname); | ||
| 425 | if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname); | ||
| 426 | if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname); | ||
| 427 | exit(1); | ||
| 428 | } | ||
| 429 | if (error || !options.subcommand) { | ||
| 430 | if (!options.subcommand) fprintf(stderr, "%s: no subcommand provided\n", progname); | ||
| 431 | fputc('\n', stderr); | ||
| 432 | fprintf(stderr, help, progname); | ||
| 433 | exit(1); | ||
| 434 | } | ||
| 435 | if (options.help) { | ||
| 436 | if (options.subcommand == "joparoute"sv) fprintf(stderr, joparoute_help, progname); | ||
| 437 | if (options.subcommand == "journeyinfo"sv) fprintf(stderr, journeyinfo_help, progname); | ||
| 438 | if (options.subcommand == "journeyroute"sv) fprintf(stderr, journeyroute_help, progname); | ||
| 439 | if (options.subcommand == "journeys"sv) fprintf(stderr, journeys_help, progname); | ||
| 440 | if (options.subcommand == "schedule"sv) fprintf(stderr, schedule_help, progname); | ||
| 441 | exit(0); | ||
| 442 | } | ||
| 443 | |||
| 444 | if (options.subcommand == "joparoute"sv) | ||
| 445 | jopaRouteValidateOptions(progname, &options); | ||
| 446 | if (options.subcommand == "journeyinfo"sv) | ||
| 447 | journeyInfoValidateOptions(progname, &options); | ||
| 448 | if (options.subcommand == "journeyroute"sv) | ||
| 449 | journeyRouteValidateOptions(progname, &options); | ||
| 450 | if (options.subcommand == "journeys"sv) | ||
| 451 | journeysValidateOptions(progname, &options); | ||
| 452 | if (options.subcommand == "schedule"sv) | ||
| 453 | scheduleValidateOptions(progname, &options); | ||
| 454 | |||
| 455 | return options; | ||
| 456 | } | ||
diff --git a/src/querykv1/cliopts.hpp b/src/querykv1/cliopts.hpp new file mode 100644 index 0000000..df8630e --- /dev/null +++ b/src/querykv1/cliopts.hpp | |||
| @@ -0,0 +1,35 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_CLIOPTS_HPP | ||
| 4 | #define OEUF_QUERYKV1_CLIOPTS_HPP | ||
| 5 | |||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #define LONG_OPTIONS \ | ||
| 9 | /* name req/opt/no arg long short */ | ||
| 10 | X(kv1_file_path, required_argument, "kv1", 0 ) \ | ||
| 11 | X(line_planning_number, required_argument, "line", 0 ) \ | ||
| 12 | X(journey_number, required_argument, "journey", 0 ) \ | ||
| 13 | X(journey_pattern_code, required_argument, "jopa", 0 ) \ | ||
| 14 | X(begin_stop_code, required_argument, "begin", 0 ) \ | ||
| 15 | X(end_stop_code, required_argument, "end", 0 ) \ | ||
| 16 | X(help, no_argument, "help", 'h') | ||
| 17 | |||
| 18 | #define SHORT_OPTIONS \ | ||
| 19 | X(output_file_path, required_argument, nullptr, 'o') | ||
| 20 | |||
| 21 | struct Options { | ||
| 22 | const char *subcommand = nullptr; | ||
| 23 | std::vector<const char *> positional; | ||
| 24 | #define X(name, argument, long_, short_) const char *name = nullptr; | ||
| 25 | LONG_OPTIONS | ||
| 26 | SHORT_OPTIONS | ||
| 27 | #undef X | ||
| 28 | }; | ||
| 29 | |||
| 30 | extern const char *opt_set; | ||
| 31 | extern const char *opt_unset; | ||
| 32 | |||
| 33 | Options parseOptions(int argc, char *argv[]); | ||
| 34 | |||
| 35 | #endif // OEUF_QUERYKV1_CLIOPTS_HPP | ||
diff --git a/src/querykv1/daterange.cpp b/src/querykv1/daterange.cpp new file mode 100644 index 0000000..5ce42bf --- /dev/null +++ b/src/querykv1/daterange.cpp | |||
| @@ -0,0 +1,91 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include "daterange.hpp" | ||
| 4 | |||
| 5 | static std::chrono::year_month_day nextDay(std::chrono::year_month_day ymd) { | ||
| 6 | return std::chrono::sys_days(ymd) + std::chrono::days(1); | ||
| 7 | } | ||
| 8 | |||
| 9 | // DateRange expresses the date range [from, thru]. | ||
| 10 | DateRange::Iterator &DateRange::Iterator::operator++() { | ||
| 11 | ymd_ = nextDay(ymd_); | ||
| 12 | return *this; | ||
| 13 | } | ||
| 14 | |||
| 15 | std::chrono::year_month_day DateRange::Iterator::operator*() const { | ||
| 16 | return ymd_; | ||
| 17 | } | ||
| 18 | |||
| 19 | std::chrono::year_month_day DateRange::Iterator::ymd() const { | ||
| 20 | return ymd_; | ||
| 21 | } | ||
| 22 | |||
| 23 | DateRange::Iterator::Iterator(std::chrono::year_month_day ymd) : ymd_(ymd) {} | ||
| 24 | |||
| 25 | DateRange::DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru) | ||
| 26 | : from_(from), thru_(thru) | ||
| 27 | {} | ||
| 28 | |||
| 29 | DateRange::Iterator DateRange::begin() const { | ||
| 30 | return DateRange::Iterator(from_); | ||
| 31 | } | ||
| 32 | |||
| 33 | DateRange::Iterator DateRange::end() const { | ||
| 34 | return DateRange::Iterator(nextDay(thru_)); | ||
| 35 | } | ||
| 36 | |||
| 37 | bool DateRange::valid() const { | ||
| 38 | return from_ <= thru_; | ||
| 39 | } | ||
| 40 | |||
| 41 | std::chrono::year_month_day DateRange::from() const { | ||
| 42 | return from_; | ||
| 43 | } | ||
| 44 | |||
| 45 | std::chrono::year_month_day DateRange::thru() const { | ||
| 46 | return thru_; | ||
| 47 | } | ||
| 48 | |||
| 49 | bool operator==(const DateRange::Iterator a, const DateRange::Iterator b) { | ||
| 50 | return *a == *b; | ||
| 51 | } | ||
| 52 | |||
| 53 | DateRangeSeq::DateRangeSeq(std::initializer_list<DateRange> ranges) | ||
| 54 | : DateRangeSeq(ranges.begin(), ranges.end()) | ||
| 55 | {} | ||
| 56 | |||
| 57 | DateRangeSeq DateRangeSeq::clampFrom(std::chrono::year_month_day from) const { | ||
| 58 | std::vector<DateRange> new_ranges; | ||
| 59 | new_ranges.reserve(ranges_.size()); | ||
| 60 | for (const DateRange range : ranges_) { | ||
| 61 | if (range.from() < from) { | ||
| 62 | if (range.thru() < from) | ||
| 63 | continue; | ||
| 64 | new_ranges.emplace_back(from, range.thru()); | ||
| 65 | } | ||
| 66 | new_ranges.push_back(range); | ||
| 67 | } | ||
| 68 | return DateRangeSeq(new_ranges.begin(), new_ranges.end()); | ||
| 69 | } | ||
| 70 | |||
| 71 | DateRangeSeq DateRangeSeq::clampThru(std::chrono::year_month_day thru) const { | ||
| 72 | std::vector<DateRange> new_ranges; | ||
| 73 | new_ranges.reserve(ranges_.size()); | ||
| 74 | for (const DateRange range : ranges_) { | ||
| 75 | if (range.thru() > thru) { | ||
| 76 | if (range.from() > thru) | ||
| 77 | continue; | ||
| 78 | new_ranges.emplace_back(range.from(), thru); | ||
| 79 | } | ||
| 80 | new_ranges.push_back(range); | ||
| 81 | } | ||
| 82 | return DateRangeSeq(new_ranges.begin(), new_ranges.end()); | ||
| 83 | } | ||
| 84 | |||
| 85 | std::vector<DateRange>::const_iterator DateRangeSeq::begin() const { | ||
| 86 | return ranges_.begin(); | ||
| 87 | } | ||
| 88 | |||
| 89 | std::vector<DateRange>::const_iterator DateRangeSeq::end() const { | ||
| 90 | return ranges_.end(); | ||
| 91 | } | ||
diff --git a/src/querykv1/daterange.hpp b/src/querykv1/daterange.hpp new file mode 100644 index 0000000..e34c39c --- /dev/null +++ b/src/querykv1/daterange.hpp | |||
| @@ -0,0 +1,118 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_DATERANGE_HPP | ||
| 4 | #define OEUF_QUERYKV1_DATERANGE_HPP | ||
| 5 | |||
| 6 | #include <cassert> | ||
| 7 | #include <chrono> | ||
| 8 | #include <concepts> | ||
| 9 | #include <iterator> | ||
| 10 | #include <utility> | ||
| 11 | #include <vector> | ||
| 12 | |||
| 13 | // DateRange expresses the date range [from, thru]. | ||
| 14 | class DateRange { | ||
| 15 | public: | ||
| 16 | class Iterator { | ||
| 17 | friend class DateRange; | ||
| 18 | |||
| 19 | public: | ||
| 20 | Iterator &operator++(); | ||
| 21 | |||
| 22 | std::chrono::year_month_day operator*() const; | ||
| 23 | std::chrono::year_month_day ymd() const; | ||
| 24 | |||
| 25 | private: | ||
| 26 | explicit Iterator(std::chrono::year_month_day ymd); | ||
| 27 | |||
| 28 | std::chrono::year_month_day ymd_; | ||
| 29 | }; | ||
| 30 | |||
| 31 | explicit DateRange(std::chrono::year_month_day from, std::chrono::year_month_day thru); | ||
| 32 | |||
| 33 | Iterator begin() const; | ||
| 34 | Iterator end() const; | ||
| 35 | bool valid() const; | ||
| 36 | std::chrono::year_month_day from() const; | ||
| 37 | std::chrono::year_month_day thru() const; | ||
| 38 | |||
| 39 | private: | ||
| 40 | std::chrono::year_month_day from_; | ||
| 41 | std::chrono::year_month_day thru_; | ||
| 42 | }; | ||
| 43 | |||
| 44 | bool operator==(const DateRange::Iterator a, const DateRange::Iterator b); | ||
| 45 | |||
| 46 | template<typename Tp, typename T> | ||
| 47 | concept DerefsTo = requires(Tp p) { | ||
| 48 | { *p } -> std::convertible_to<T>; | ||
| 49 | }; | ||
| 50 | |||
| 51 | class DateRangeSeq { | ||
| 52 | // The way LE and GE are ordered makes a difference for how the sorting | ||
| 53 | // (insertion based on lower_bound) works. Do not carelessly reorder this. | ||
| 54 | enum LeGe { | ||
| 55 | GE, // >= | ||
| 56 | LE, // <= | ||
| 57 | }; | ||
| 58 | |||
| 59 | std::vector<DateRange> ranges_; | ||
| 60 | |||
| 61 | public: | ||
| 62 | template<std::input_iterator InputIt> | ||
| 63 | requires DerefsTo<InputIt, DateRange> | ||
| 64 | explicit DateRangeSeq(InputIt begin, InputIt end) { | ||
| 65 | // We convert every inclusive date range [x, y] into (x, >=) and (y, <=) | ||
| 66 | // and put these into a list, using binary search to make sure that these | ||
| 67 | // stay ordered. We then reduce this list, removing tautological | ||
| 68 | // predicates, giving us a final list of ranges that do not overlap. | ||
| 69 | |||
| 70 | std::vector<std::pair<std::chrono::year_month_day, LeGe>> preds; | ||
| 71 | |||
| 72 | size_t n = 0; | ||
| 73 | for (auto it = begin; it != end; it++) { | ||
| 74 | auto &range = *it; | ||
| 75 | if (!range.valid()) continue; | ||
| 76 | |||
| 77 | auto a = std::make_pair(range.from(), GE); | ||
| 78 | auto b = std::make_pair(range.thru(), LE); | ||
| 79 | preds.insert(std::lower_bound(preds.begin(), preds.end(), a), a); | ||
| 80 | preds.insert(std::lower_bound(preds.begin(), preds.end(), b), b); | ||
| 81 | |||
| 82 | n++; | ||
| 83 | } | ||
| 84 | |||
| 85 | if (preds.empty()) | ||
| 86 | return; | ||
| 87 | |||
| 88 | assert(preds.size() >= 2); | ||
| 89 | assert(preds.front().second == GE); | ||
| 90 | assert(preds.back().second == LE); | ||
| 91 | |||
| 92 | std::chrono::year_month_day begin_ymd = preds[0].first; | ||
| 93 | for (size_t i = 1; i < preds.size(); i++) { | ||
| 94 | if (preds[i].second == LE && (i + 1 == preds.size() || preds[i + 1].second == GE)) { | ||
| 95 | std::chrono::year_month_day end_ymd = preds[i].first; | ||
| 96 | if (!ranges_.empty() && ranges_.back().thru() == begin_ymd) | ||
| 97 | ranges_.back() = DateRange(ranges_.back().from(), end_ymd); | ||
| 98 | else | ||
| 99 | ranges_.push_back(DateRange(begin_ymd, end_ymd)); | ||
| 100 | if (i + 1 != preds.size()) { | ||
| 101 | begin_ymd = preds[i + 1].first; | ||
| 102 | i++; | ||
| 103 | } | ||
| 104 | } | ||
| 105 | } | ||
| 106 | } | ||
| 107 | |||
| 108 | explicit DateRangeSeq(std::initializer_list<DateRange> ranges); | ||
| 109 | |||
| 110 | DateRangeSeq clampFrom(std::chrono::year_month_day from) const; | ||
| 111 | DateRangeSeq clampThru(std::chrono::year_month_day thru) const; | ||
| 112 | |||
| 113 | public: | ||
| 114 | std::vector<DateRange>::const_iterator begin() const; | ||
| 115 | std::vector<DateRange>::const_iterator end() const; | ||
| 116 | }; | ||
| 117 | |||
| 118 | #endif // OEUF_QUERYKV1_DATERANGE_HPP | ||
diff --git a/src/querykv1/grammar.abnf b/src/querykv1/grammar.abnf new file mode 100644 index 0000000..1c93760 --- /dev/null +++ b/src/querykv1/grammar.abnf | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | ; This grammar does *not* allow fields to contain LF, unless the entire content | ||
| 2 | ; of the field is quoted. The file is simply rejected otherwise. | ||
| 3 | ; I took the liberty to take some inspiration from the somewhat similar IETF RFC 4180. | ||
| 4 | |||
| 5 | document = [header NEWLINE] (comment / record / empty-line) *(NEWLINE (comment / record / empty-line)) [NEWLINE] / header | ||
| 6 | |||
| 7 | header = OPENBRACK *NOTCRLF | ||
| 8 | comment = SEMICOLON *NOTCRLF | ||
| 9 | |||
| 10 | empty-line = *WHITESPACE | ||
| 11 | |||
| 12 | record = field *(PIPE field) | ||
| 13 | field = *WHITESPACE field-data *WHITESPACE | ||
| 14 | field-data = escaped / unescaped | ||
| 15 | |||
| 16 | ; Unescaped fields are also allowed to contain double quotes, | ||
| 17 | ; they are just not interpreted in any special way. | ||
| 18 | escaped = DQUOTE *(TEXTDATA / WHITESPACE / NEWLINE / PIPE / 2DQUOTE) DQUOTE | ||
| 19 | unescaped = [TEXTDATA *(*WHITESPACE (TEXTDATA / DQUOTE))] | ||
| 20 | |||
| 21 | HTAB = %x09 ; <horizontal tab, "\t"> | ||
| 22 | LF = %x0A ; <line feed, "\n"> | ||
| 23 | VTAB = %x0B ; <vertical tab, "\v"> | ||
| 24 | FF = %x0C ; <form feed, "\f"> | ||
| 25 | CR = %x0D ; <carriage return, "\r"> | ||
| 26 | SPACE = %x20 ; <space, " "> | ||
| 27 | DQUOTE = %x22 ; " | ||
| 28 | SEMICOLON = %x3B ; ; | ||
| 29 | OPENBRACK = %x5B ; [ | ||
| 30 | PIPE = %x7C ; | | ||
| 31 | |||
| 32 | ; All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE | ||
| 33 | ; Semicolon is included, as comments are only defined as 'lines starting with a semicolon'. | ||
| 34 | ; So it should be fine if a semicolon is part of a field, the rest of the line would not | ||
| 35 | ; be interpreted as a comment in that case. | ||
| 36 | TEXTDATA = %x00-08 / %x0E-1F / %x21 / %x23-5A / %x5C-7B / %x7D-10FFFF | ||
| 37 | |||
| 38 | ; Not including LF here even though TMI8/KV1 does not officially consider it | ||
| 39 | ; a newline, as newlines are defined as 'CR optionally followed by LF' | ||
| 40 | WHITESPACE = SPACE / FF / HTAB / VTAB | ||
| 41 | |||
| 42 | ; All codepoints excluding CR and LF | ||
| 43 | NOTCRLF = %x00-09 / %x0B-0C / %x0E-10FFFF | ||
| 44 | NEWLINE = CR [LF] | ||
diff --git a/src/querykv1/grammar.ebnf b/src/querykv1/grammar.ebnf new file mode 100644 index 0000000..94f8cde --- /dev/null +++ b/src/querykv1/grammar.ebnf | |||
| @@ -0,0 +1,47 @@ | |||
| 1 | /* This grammar does allow fields to contain stray LFs, not after any specific | ||
| 2 | * CR. I took the liberty to take some inspiration from the somewhat similar | ||
| 3 | * IETF RFC 4180. | ||
| 4 | */ | ||
| 5 | document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header | ||
| 6 | |||
| 7 | header ::= OPENBRACK NOTCR* | ||
| 8 | comment ::= SEMICOLON NOTCR* | ||
| 9 | |||
| 10 | empty-line ::= WHITESPACE* | ||
| 11 | |||
| 12 | record ::= field (PIPE field)* | ||
| 13 | field ::= WHITESPACE* field-data WHITESPACE* | ||
| 14 | field-data ::= DQUOTE escaped DQUOTE | unescaped | ||
| 15 | |||
| 16 | /* Unescaped fields are also allowed to contain double quotes, they are just | ||
| 17 | * not interpreted in any special way. | ||
| 18 | */ | ||
| 19 | escaped ::= (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* | ||
| 20 | unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? | ||
| 21 | |||
| 22 | HTAB ::= #x09 /* <horizontal tab, "\t"> */ | ||
| 23 | LF ::= #x0A /* <line feed, "\n"> */ | ||
| 24 | VTAB ::= #x0B /* <vertical tab, "\v"> */ | ||
| 25 | FF ::= #x0C /* <form feed, "\f"> */ | ||
| 26 | CR ::= #x0D /* <carriage return, "\r"> */ | ||
| 27 | SPACE ::= #x20 /* <space, " "> */ | ||
| 28 | DQUOTE ::= #x22 /* " */ | ||
| 29 | SEMICOLON ::= #x3B /* ; */ | ||
| 30 | OPENBRACK ::= #x5B /* [ */ | ||
| 31 | PIPE ::= #x7C /* | */ | ||
| 32 | |||
| 33 | /* All codepoints, except CR, LF, SPACE, FF, HTAB, VTAB, PIPE, DQUOTE. | ||
| 34 | * Semicolon is included, as comments are only defined as 'lines starting with | ||
| 35 | * a semicolon'. So it should be fine if a semicolon is part of a field, the | ||
| 36 | * rest of the line would not be interpreted as a comment in that case. | ||
| 37 | */ | ||
| 38 | TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] | ||
| 39 | |||
| 40 | /* Including LF here as TMI8/KV1 does not consider it a newline, | ||
| 41 | * as newlines are defined as 'CR optionally followed by LF' | ||
| 42 | */ | ||
| 43 | WHITESPACE ::= SPACE | LF | FF | HTAB | VTAB | ||
| 44 | |||
| 45 | /* All codepoints excluding CR and LF */ | ||
| 46 | NOTCR ::= [#x00-#x0C#x0E-#x10FFFF] | ||
| 47 | NEWLINE ::= CR LF? | ||
diff --git a/src/querykv1/grammar.ebnf.bak b/src/querykv1/grammar.ebnf.bak new file mode 100644 index 0000000..b5acbf5 --- /dev/null +++ b/src/querykv1/grammar.ebnf.bak | |||
| @@ -0,0 +1,23 @@ | |||
| 1 | document ::= (header NEWLINE)? (comment | record | empty-line) (NEWLINE (comment | record | empty-line))* NEWLINE? | header | ||
| 2 | header ::= OPENBRACK NOTCRLF* | ||
| 3 | comment ::= SEMICOLON NOTCRLF* | ||
| 4 | empty-line ::= WHITESPACE* | ||
| 5 | record ::= field (PIPE field)* | ||
| 6 | field ::= WHITESPACE* field-data WHITESPACE* | ||
| 7 | field-data ::= escaped | unescaped | ||
| 8 | escaped ::= DQUOTE (TEXTDATA | WHITESPACE | NEWLINE | PIPE | DQUOTE DQUOTE)* DQUOTE | ||
| 9 | unescaped ::= (TEXTDATA (WHITESPACE* (TEXTDATA | DQUOTE))*)? | ||
| 10 | HTAB ::= #x09 | ||
| 11 | LF ::= #x0A | ||
| 12 | VTAB ::= #x0B | ||
| 13 | FF ::= #x0C | ||
| 14 | CR ::= #x0D | ||
| 15 | SPACE ::= #x20 | ||
| 16 | DQUOTE ::= #x22 | ||
| 17 | SEMICOLON ::= #x3B | ||
| 18 | OPENBRACK ::= #x5B | ||
| 19 | PIPE ::= #x7C | ||
| 20 | WHITESPACE ::= SPACE | FF | HTAB | VTAB | ||
| 21 | NOTCRLF ::= [#x00-#x09#x0B-#x0C#x0E-#x10FFFF] | ||
| 22 | TEXTDATA ::= [#x00-#x08#x0E-#x1F#x21#x23-#x5A#x5C-#x7B#x7D-#x10FFFF] | ||
| 23 | NEWLINE ::= CR LF? | ||
diff --git a/src/querykv1/joparoute.cpp b/src/querykv1/joparoute.cpp new file mode 100644 index 0000000..94ed359 --- /dev/null +++ b/src/querykv1/joparoute.cpp | |||
| @@ -0,0 +1,102 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <cstdio> | ||
| 4 | #include <iostream> | ||
| 5 | #include <string_view> | ||
| 6 | |||
| 7 | #include "joparoute.hpp" | ||
| 8 | |||
| 9 | using namespace std::string_view_literals; | ||
| 10 | |||
| 11 | void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index) { | ||
| 12 | FILE *out = stdout; | ||
| 13 | if (options.output_file_path != "-"sv) | ||
| 14 | out = fopen(options.output_file_path, "wb"); | ||
| 15 | if (!out) { | ||
| 16 | fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); | ||
| 17 | exit(EXIT_FAILURE); | ||
| 18 | } | ||
| 19 | |||
| 20 | const std::string data_owner_code = "CXX"; | ||
| 21 | Kv1JourneyPattern::Key jopa_key( | ||
| 22 | // Of course it is bad to hardcode this, but we really have no time to make | ||
| 23 | // everything nice and dynamic. We're only working with CXX data anyway, | ||
| 24 | // and provide no support for the 'Schedules and Passing Times' KV1 | ||
| 25 | // variant. | ||
| 26 | data_owner_code, | ||
| 27 | options.line_planning_number, | ||
| 28 | options.journey_pattern_code); | ||
| 29 | |||
| 30 | const Kv1JourneyPattern *jopa = index.journey_patterns[jopa_key]; | ||
| 31 | if (!jopa) { | ||
| 32 | std::cerr << "Journey pattern not found" << std::endl; | ||
| 33 | return; | ||
| 34 | } | ||
| 35 | const Kv1Line *line = jopa->p_line; | ||
| 36 | |||
| 37 | struct Point { | ||
| 38 | bool is_stop = false; | ||
| 39 | const Kv1JourneyPatternTimingLink *jopatili = nullptr; | ||
| 40 | const Kv1Link *link = nullptr; | ||
| 41 | const Kv1Point *point = nullptr; | ||
| 42 | double distance_since_start_of_link = 0; | ||
| 43 | double distance_since_start_of_journey = 0; | ||
| 44 | }; | ||
| 45 | std::vector<Point> points; | ||
| 46 | |||
| 47 | for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { | ||
| 48 | const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; | ||
| 49 | if (jopatili->key.line_planning_number == jopa->key.line_planning_number | ||
| 50 | && jopatili->key.journey_pattern_code == jopa->key.journey_pattern_code) { | ||
| 51 | const Kv1Link::Key link_key(data_owner_code, jopatili->user_stop_code_begin, | ||
| 52 | jopatili->user_stop_code_end, line->transport_type); | ||
| 53 | const Kv1Link *link = index.links[link_key]; | ||
| 54 | const Kv1UserStopPoint::Key link_begin_key(data_owner_code, jopatili->user_stop_code_begin); | ||
| 55 | const Kv1UserStopPoint::Key link_end_key(data_owner_code, jopatili->user_stop_code_end); | ||
| 56 | const Kv1UserStopPoint *link_begin = index.user_stop_points[link_begin_key]; | ||
| 57 | const Kv1UserStopPoint *link_end = index.user_stop_points[link_end_key]; | ||
| 58 | |||
| 59 | points.emplace_back(true, jopatili, link, link_begin->p_point, 0); | ||
| 60 | |||
| 61 | for (size_t j = 0; j < records.point_on_links.size(); j++) { | ||
| 62 | Kv1PointOnLink *pool = &records.point_on_links[j]; | ||
| 63 | if (pool->key.user_stop_code_begin == jopatili->user_stop_code_begin | ||
| 64 | && pool->key.user_stop_code_end == jopatili->user_stop_code_end | ||
| 65 | && pool->key.transport_type == jopatili->p_line->transport_type) { | ||
| 66 | points.emplace_back(false, jopatili, link, pool->p_point, pool->distance_since_start_of_link); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | |||
| 70 | points.emplace_back(true, jopatili, link, link_end->p_point, link->distance); | ||
| 71 | } | ||
| 72 | } | ||
| 73 | |||
| 74 | std::sort(points.begin(), points.end(), [](Point &a, Point &b) { | ||
| 75 | if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order) | ||
| 76 | return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; | ||
| 77 | return a.distance_since_start_of_link < b.distance_since_start_of_link; | ||
| 78 | }); | ||
| 79 | |||
| 80 | double distance_since_start_of_journey = 0; | ||
| 81 | for (size_t i = 0; i < points.size(); i++) { | ||
| 82 | Point *p = &points[i]; | ||
| 83 | if (i > 0) { | ||
| 84 | Point *prev = &points[i - 1]; | ||
| 85 | if (p->link != prev->link) { | ||
| 86 | distance_since_start_of_journey += prev->link->distance; | ||
| 87 | } | ||
| 88 | } | ||
| 89 | p->distance_since_start_of_journey = distance_since_start_of_journey + p->distance_since_start_of_link; | ||
| 90 | } | ||
| 91 | |||
| 92 | fputs("is_stop,link_usrstop_begin,link_usrstop_end,point_code,rd_x,rd_y,distance_since_start_of_link,distance_since_start_of_journey\n", out); | ||
| 93 | for (const auto &point : points) { | ||
| 94 | fprintf(out, "%s,%s,%s,%s,%f,%f,%f,%f\n", | ||
| 95 | point.is_stop ? "true" : "false", | ||
| 96 | point.jopatili->user_stop_code_begin.c_str(), point.jopatili->user_stop_code_end.c_str(), | ||
| 97 | point.point->key.point_code.c_str(), point.point->location_x_ew, point.point->location_y_ns, | ||
| 98 | point.distance_since_start_of_link, point.distance_since_start_of_journey); | ||
| 99 | } | ||
| 100 | |||
| 101 | if (options.output_file_path != "-"sv) fclose(out); | ||
| 102 | } | ||
diff --git a/src/querykv1/joparoute.hpp b/src/querykv1/joparoute.hpp new file mode 100644 index 0000000..ade94e8 --- /dev/null +++ b/src/querykv1/joparoute.hpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_JOPAROUTE_HPP | ||
| 4 | #define OEUF_QUERYKV1_JOPAROUTE_HPP | ||
| 5 | |||
| 6 | #include <tmi8/kv1_types.hpp> | ||
| 7 | #include <tmi8/kv1_index.hpp> | ||
| 8 | |||
| 9 | #include "cliopts.hpp" | ||
| 10 | |||
| 11 | void jopaRoute(const Options &options, Kv1Records &records, Kv1Index &index); | ||
| 12 | |||
| 13 | #endif // OEUF_QUERYKV1_JOPAROUTE_HPP | ||
diff --git a/src/querykv1/journeyinfo.cpp b/src/querykv1/journeyinfo.cpp new file mode 100644 index 0000000..bd29490 --- /dev/null +++ b/src/querykv1/journeyinfo.cpp | |||
| @@ -0,0 +1,64 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <iostream> | ||
| 4 | |||
| 5 | #include "journeyinfo.hpp" | ||
| 6 | |||
| 7 | void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index) { | ||
| 8 | std::cout << "Info for journey " << options.line_planning_number | ||
| 9 | << "/" << options.journey_number << std::endl; | ||
| 10 | |||
| 11 | std::unordered_map<std::string, const Kv1UserStopPoint *> usrstops; | ||
| 12 | for (size_t i = 0; i < records.user_stop_points.size(); i++) { | ||
| 13 | const Kv1UserStopPoint *usrstop = &records.user_stop_points[i]; | ||
| 14 | usrstops[usrstop->key.user_stop_code] = usrstop; | ||
| 15 | } | ||
| 16 | |||
| 17 | for (const auto &pujo : records.public_journeys) { | ||
| 18 | if (pujo.key.line_planning_number != options.line_planning_number | ||
| 19 | || std::to_string(pujo.key.journey_number) != options.journey_number) | ||
| 20 | continue; | ||
| 21 | |||
| 22 | std::vector<const Kv1JourneyPatternTimingLink *> timing_links; | ||
| 23 | for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { | ||
| 24 | const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; | ||
| 25 | if (jopatili->key.line_planning_number != options.line_planning_number | ||
| 26 | || jopatili->key.journey_pattern_code != pujo.journey_pattern_code) | ||
| 27 | continue; | ||
| 28 | timing_links.push_back(jopatili); | ||
| 29 | } | ||
| 30 | |||
| 31 | std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool { | ||
| 32 | return a->key.timing_link_order < b->key.timing_link_order; | ||
| 33 | }); | ||
| 34 | auto begin_stop = timing_links.front()->user_stop_code_begin; | ||
| 35 | auto end_stop = timing_links.back()->user_stop_code_end; | ||
| 36 | |||
| 37 | const auto *begin = usrstops[begin_stop]; | ||
| 38 | const auto *end = usrstops[end_stop]; | ||
| 39 | |||
| 40 | std::cout << " Journey pattern: " << pujo.key.line_planning_number | ||
| 41 | << "/" << pujo.journey_pattern_code << std::endl | ||
| 42 | << " Begin stop: " << begin_stop | ||
| 43 | << "; name: " << std::quoted(begin->name) | ||
| 44 | << "; town: " << std::quoted(begin->town) << std::endl | ||
| 45 | << " End stop: " << end_stop | ||
| 46 | << "; name: " << std::quoted(end->name) | ||
| 47 | << "; town: " << std::quoted(end->town) << std::endl; | ||
| 48 | |||
| 49 | const auto *begin_star = begin->p_user_stop_area; | ||
| 50 | const auto *end_star = end->p_user_stop_area; | ||
| 51 | if (begin_star) | ||
| 52 | std::cout << " Begin stop area: " << begin_star->key.user_stop_area_code | ||
| 53 | << "; name: " << std::quoted(begin_star->name) | ||
| 54 | << ", town: " << std::quoted(begin_star->town) | ||
| 55 | << std::endl; | ||
| 56 | if (end_star) | ||
| 57 | std::cout << " End stop area: " << end_star->key.user_stop_area_code | ||
| 58 | << "; name: " << std::quoted(end_star->name) | ||
| 59 | << ", town: " << std::quoted(end_star->town) | ||
| 60 | << std::endl; | ||
| 61 | |||
| 62 | break; | ||
| 63 | } | ||
| 64 | } | ||
diff --git a/src/querykv1/journeyinfo.hpp b/src/querykv1/journeyinfo.hpp new file mode 100644 index 0000000..2a2118d --- /dev/null +++ b/src/querykv1/journeyinfo.hpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_JOURNEYINFO_HPP | ||
| 4 | #define OEUF_QUERYKV1_JOURNEYINFO_HPP | ||
| 5 | |||
| 6 | #include <tmi8/kv1_types.hpp> | ||
| 7 | #include <tmi8/kv1_index.hpp> | ||
| 8 | |||
| 9 | #include "cliopts.hpp" | ||
| 10 | |||
| 11 | void journeyInfo(const Options &options, Kv1Records &records, Kv1Index &index); | ||
| 12 | |||
| 13 | #endif // OEUF_QUERYKV1_JOURNEYINFO_HPP | ||
diff --git a/src/querykv1/journeyroute.cpp b/src/querykv1/journeyroute.cpp new file mode 100644 index 0000000..013ea1c --- /dev/null +++ b/src/querykv1/journeyroute.cpp | |||
| @@ -0,0 +1,96 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <iostream> | ||
| 4 | #include <string_view> | ||
| 5 | |||
| 6 | #include "journeyroute.hpp" | ||
| 7 | |||
| 8 | using namespace std::string_view_literals; | ||
| 9 | |||
| 10 | void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index) { | ||
| 11 | FILE *out = stdout; | ||
| 12 | if (options.output_file_path != "-"sv) | ||
| 13 | out = fopen(options.output_file_path, "wb"); | ||
| 14 | if (!out) { | ||
| 15 | fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); | ||
| 16 | exit(EXIT_FAILURE); | ||
| 17 | } | ||
| 18 | |||
| 19 | for (auto &pujo : records.public_journeys) { | ||
| 20 | if (pujo.key.line_planning_number == options.line_planning_number && std::to_string(pujo.key.journey_number) == options.journey_number) { | ||
| 21 | fprintf(stderr, "Got PUJO %s/%s:\n", options.line_planning_number, options.journey_number); | ||
| 22 | fprintf(stderr, " Day type: %s\n", pujo.key.day_type.c_str()); | ||
| 23 | auto &pegr = *pujo.p_period_group; | ||
| 24 | fprintf(stderr, " PEGR Code: %s\n", pegr.key.period_group_code.c_str()); | ||
| 25 | fprintf(stderr, " PEGR Description: %s\n", pegr.description.c_str()); | ||
| 26 | fprintf(stderr, " SPECDAY Code: %s\n", pujo.key.specific_day_code.c_str()); | ||
| 27 | auto &timdemgrp = *pujo.p_time_demand_group; | ||
| 28 | |||
| 29 | for (auto &pegrval : records.period_group_validities) { | ||
| 30 | if (pegrval.key.period_group_code == pegr.key.period_group_code) { | ||
| 31 | fprintf(stderr, "Got PEGRVAL for PEGR %s\n", pegr.key.period_group_code.c_str()); | ||
| 32 | std::cerr << " Valid from: " << pegrval.key.valid_from << std::endl; | ||
| 33 | std::cerr << " Valid thru: " << pegrval.valid_thru << std::endl; | ||
| 34 | } | ||
| 35 | } | ||
| 36 | |||
| 37 | struct Point { | ||
| 38 | Kv1JourneyPatternTimingLink *jopatili = nullptr; | ||
| 39 | Kv1TimeDemandGroupRunTime *timdemrnt = nullptr; | ||
| 40 | double distance_since_start_of_link = 0; | ||
| 41 | double rd_x = 0; | ||
| 42 | double rd_y = 0; | ||
| 43 | double total_time_s = 0; | ||
| 44 | }; | ||
| 45 | std::vector<Point> points; | ||
| 46 | |||
| 47 | for (size_t i = 0; i < records.time_demand_group_run_times.size(); i++) { | ||
| 48 | Kv1TimeDemandGroupRunTime *timdemrnt = &records.time_demand_group_run_times[i]; | ||
| 49 | if (timdemrnt->key.line_planning_number == timdemgrp.key.line_planning_number | ||
| 50 | && timdemrnt->key.journey_pattern_code == timdemgrp.key.journey_pattern_code | ||
| 51 | && timdemrnt->key.time_demand_group_code == timdemgrp.key.time_demand_group_code) { | ||
| 52 | Kv1JourneyPatternTimingLink *jopatili = timdemrnt->p_journey_pattern_timing_link; | ||
| 53 | for (auto &pool : records.point_on_links) { | ||
| 54 | if (pool.key.user_stop_code_begin == timdemrnt->user_stop_code_begin | ||
| 55 | && pool.key.user_stop_code_end == timdemrnt->user_stop_code_end | ||
| 56 | && pool.key.transport_type == jopatili->p_line->transport_type) { | ||
| 57 | points.emplace_back( | ||
| 58 | jopatili, | ||
| 59 | timdemrnt, | ||
| 60 | pool.distance_since_start_of_link, | ||
| 61 | pool.p_point->location_x_ew, | ||
| 62 | pool.p_point->location_y_ns | ||
| 63 | ); | ||
| 64 | } | ||
| 65 | } | ||
| 66 | } | ||
| 67 | } | ||
| 68 | |||
| 69 | std::sort(points.begin(), points.end(), [](Point &a, Point &b) { | ||
| 70 | if (a.jopatili->key.timing_link_order != b.jopatili->key.timing_link_order) | ||
| 71 | return a.jopatili->key.timing_link_order < b.jopatili->key.timing_link_order; | ||
| 72 | return a.distance_since_start_of_link < b.distance_since_start_of_link; | ||
| 73 | }); | ||
| 74 | |||
| 75 | double total_time_s = 0; | ||
| 76 | for (size_t i = 0; i < points.size(); i++) { | ||
| 77 | Point *p = &points[i]; | ||
| 78 | p->total_time_s = total_time_s; | ||
| 79 | if (i > 0) { | ||
| 80 | Point *prev = &points[i - 1]; | ||
| 81 | if (p->timdemrnt != prev->timdemrnt) { | ||
| 82 | total_time_s += prev->timdemrnt->total_drive_time_s; | ||
| 83 | prev->total_time_s = total_time_s; | ||
| 84 | } | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | fputs("rd_x,rd_y,total_time_s,is_timing_stop\n", out); | ||
| 89 | for (const auto &point : points) { | ||
| 90 | fprintf(out, "%f,%f,%f,%d\n", point.rd_x, point.rd_y, point.total_time_s, point.jopatili->is_timing_stop); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | } | ||
| 94 | |||
| 95 | if (options.output_file_path != "-"sv) fclose(out); | ||
| 96 | } | ||
diff --git a/src/querykv1/journeyroute.hpp b/src/querykv1/journeyroute.hpp new file mode 100644 index 0000000..ccd996c --- /dev/null +++ b/src/querykv1/journeyroute.hpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_JOURNEYROUTE_HPP | ||
| 4 | #define OEUF_QUERYKV1_JOURNEYROUTE_HPP | ||
| 5 | |||
| 6 | #include <tmi8/kv1_types.hpp> | ||
| 7 | #include <tmi8/kv1_index.hpp> | ||
| 8 | |||
| 9 | #include "cliopts.hpp" | ||
| 10 | |||
| 11 | void journeyRoute(const Options &options, Kv1Records &records, Kv1Index &index); | ||
| 12 | |||
| 13 | #endif // OEUF_QUERYKV1_JOURNEYROUTE_HPP | ||
diff --git a/src/querykv1/journeys.cpp b/src/querykv1/journeys.cpp new file mode 100644 index 0000000..96566b2 --- /dev/null +++ b/src/querykv1/journeys.cpp | |||
| @@ -0,0 +1,95 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <iostream> | ||
| 4 | #include <map> | ||
| 5 | #include <string_view> | ||
| 6 | #include <unordered_set> | ||
| 7 | |||
| 8 | #include "journeys.hpp" | ||
| 9 | |||
| 10 | using namespace std::string_view_literals; | ||
| 11 | |||
| 12 | void journeys(const Options &options, Kv1Records &records, Kv1Index &index) { | ||
| 13 | const std::string_view want_begin_stop_code(options.begin_stop_code); | ||
| 14 | const std::string_view want_end_stop_code(options.end_stop_code); | ||
| 15 | |||
| 16 | FILE *out = stdout; | ||
| 17 | if (options.output_file_path != "-"sv) | ||
| 18 | out = fopen(options.output_file_path, "wb"); | ||
| 19 | if (!out) { | ||
| 20 | fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); | ||
| 21 | exit(EXIT_FAILURE); | ||
| 22 | } | ||
| 23 | |||
| 24 | std::cerr << "Generating journeys for " << options.line_planning_number << ", going from stop " | ||
| 25 | << options.begin_stop_code << " to " << options.end_stop_code << std::endl; | ||
| 26 | |||
| 27 | std::unordered_map<std::string, const Kv1UserStopPoint *> usrstops; | ||
| 28 | for (size_t i = 0; i < records.user_stop_points.size(); i++) { | ||
| 29 | const Kv1UserStopPoint *usrstop = &records.user_stop_points[i]; | ||
| 30 | usrstops[usrstop->key.user_stop_code] = usrstop; | ||
| 31 | } | ||
| 32 | |||
| 33 | std::unordered_set<std::string> journey_pattern_codes; | ||
| 34 | for (const auto &jopa : records.journey_patterns) { | ||
| 35 | if (jopa.key.line_planning_number != options.line_planning_number) | ||
| 36 | continue; | ||
| 37 | journey_pattern_codes.insert(jopa.key.journey_pattern_code); | ||
| 38 | } | ||
| 39 | |||
| 40 | std::unordered_map<std::string, std::vector<const Kv1JourneyPatternTimingLink *>> jopatilis; | ||
| 41 | for (size_t i = 0; i < records.journey_pattern_timing_links.size(); i++) { | ||
| 42 | const Kv1JourneyPatternTimingLink *jopatili = &records.journey_pattern_timing_links[i]; | ||
| 43 | if (jopatili->key.line_planning_number != options.line_planning_number | ||
| 44 | || !journey_pattern_codes.contains(jopatili->key.journey_pattern_code)) | ||
| 45 | continue; | ||
| 46 | jopatilis[jopatili->key.journey_pattern_code].push_back(jopatili); | ||
| 47 | } | ||
| 48 | |||
| 49 | std::unordered_set<std::string> valid_jopas; | ||
| 50 | for (auto &[journey_pattern_code, timing_links] : jopatilis) { | ||
| 51 | std::sort(timing_links.begin(), timing_links.end(), [](auto a, auto b) -> bool { | ||
| 52 | return a->key.timing_link_order < b->key.timing_link_order; | ||
| 53 | }); | ||
| 54 | auto begin_stop = timing_links.front()->user_stop_code_begin; | ||
| 55 | auto end_stop = timing_links.back()->user_stop_code_end; | ||
| 56 | |||
| 57 | const auto *begin = usrstops[begin_stop]; | ||
| 58 | const auto *end = usrstops[end_stop]; | ||
| 59 | |||
| 60 | bool begin_stop_ok = false; | ||
| 61 | if (want_begin_stop_code.starts_with("stop:")) | ||
| 62 | begin_stop_ok = want_begin_stop_code.substr(5) == begin_stop; | ||
| 63 | else if (want_begin_stop_code.starts_with("star:")) | ||
| 64 | begin_stop_ok = want_begin_stop_code.substr(5) == begin->user_stop_area_code; | ||
| 65 | |||
| 66 | bool end_stop_ok = false; | ||
| 67 | if (want_end_stop_code.starts_with("stop:")) | ||
| 68 | end_stop_ok = want_end_stop_code.substr(5) == end_stop; | ||
| 69 | else if (want_end_stop_code.starts_with("star:")) | ||
| 70 | end_stop_ok = want_end_stop_code.substr(5) == end->user_stop_area_code; | ||
| 71 | |||
| 72 | if (begin_stop_ok && end_stop_ok) { | ||
| 73 | valid_jopas.insert(journey_pattern_code); | ||
| 74 | } | ||
| 75 | } | ||
| 76 | |||
| 77 | std::map<int, std::pair<std::string, std::string>> valid_journeys; | ||
| 78 | for (const auto &pujo : records.public_journeys) { | ||
| 79 | if (pujo.key.line_planning_number == options.line_planning_number | ||
| 80 | && valid_jopas.contains(pujo.journey_pattern_code)) { | ||
| 81 | valid_journeys[pujo.key.journey_number] = { | ||
| 82 | pujo.time_demand_group_code, | ||
| 83 | pujo.journey_pattern_code, | ||
| 84 | }; | ||
| 85 | } | ||
| 86 | } | ||
| 87 | |||
| 88 | fputs("journey_number,time_demand_group_code,journey_pattern_code\n", out); | ||
| 89 | for (const auto &[journey_number, timdemgrp_jopa] : valid_journeys) { | ||
| 90 | const auto &[time_demand_group_code, journey_pattern_code] = timdemgrp_jopa; | ||
| 91 | fprintf(out, "%d,%s,%s\n", journey_number, time_demand_group_code.c_str(), journey_pattern_code.c_str()); | ||
| 92 | } | ||
| 93 | |||
| 94 | if (options.output_file_path != "-"sv) fclose(out); | ||
| 95 | } | ||
diff --git a/src/querykv1/journeys.hpp b/src/querykv1/journeys.hpp new file mode 100644 index 0000000..cf615c7 --- /dev/null +++ b/src/querykv1/journeys.hpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_JOURNEYS_HPP | ||
| 4 | #define OEUF_QUERYKV1_JOURNEYS_HPP | ||
| 5 | |||
| 6 | #include <tmi8/kv1_types.hpp> | ||
| 7 | #include <tmi8/kv1_index.hpp> | ||
| 8 | |||
| 9 | #include "cliopts.hpp" | ||
| 10 | |||
| 11 | void journeys(const Options &options, Kv1Records &records, Kv1Index &index); | ||
| 12 | |||
| 13 | #endif // OEUF_QUERYKV1_JOURNEYS_HPP | ||
diff --git a/src/querykv1/main.cpp b/src/querykv1/main.cpp new file mode 100644 index 0000000..6c606ba --- /dev/null +++ b/src/querykv1/main.cpp | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <chrono> | ||
| 4 | #include <cstdio> | ||
| 5 | #include <string> | ||
| 6 | #include <string_view> | ||
| 7 | #include <vector> | ||
| 8 | |||
| 9 | #include <tmi8/kv1_types.hpp> | ||
| 10 | #include <tmi8/kv1_index.hpp> | ||
| 11 | #include <tmi8/kv1_lexer.hpp> | ||
| 12 | #include <tmi8/kv1_parser.hpp> | ||
| 13 | |||
| 14 | #include "cliopts.hpp" | ||
| 15 | #include "joparoute.hpp" | ||
| 16 | #include "journeyinfo.hpp" | ||
| 17 | #include "journeyroute.hpp" | ||
| 18 | #include "journeys.hpp" | ||
| 19 | #include "schedule.hpp" | ||
| 20 | |||
| 21 | using namespace std::string_view_literals; | ||
| 22 | |||
| 23 | using TimingClock = std::conditional_t< | ||
| 24 | std::chrono::high_resolution_clock::is_steady, | ||
| 25 | std::chrono::high_resolution_clock, | ||
| 26 | std::chrono::steady_clock>; | ||
| 27 | |||
| 28 | std::string readKv1(const char *path) { | ||
| 29 | FILE *in = stdin; | ||
| 30 | if (path != "-"sv) in = fopen(path, "rb"); | ||
| 31 | else fputs("Reading KV1 from standard input\n", stderr); | ||
| 32 | if (!in) { | ||
| 33 | fprintf(stderr, "Open %s: %s\n", path, strerrordesc_np(errno)); | ||
| 34 | exit(1); | ||
| 35 | } | ||
| 36 | |||
| 37 | char buf[4096]; | ||
| 38 | std::string data; | ||
| 39 | while (!feof(in) && !ferror(in)) { | ||
| 40 | size_t read = fread(buf, sizeof(char), 4096, in); | ||
| 41 | data.append(buf, read); | ||
| 42 | } | ||
| 43 | if (ferror(in)) { | ||
| 44 | if (path == "-"sv) | ||
| 45 | fputs("Error when reading from stdin\n", stderr); | ||
| 46 | else | ||
| 47 | fprintf(stderr, "Error reading from file \"%s\"\n", path); | ||
| 48 | exit(1); | ||
| 49 | } | ||
| 50 | fprintf(stderr, "Read %lu bytes\n", data.size()); | ||
| 51 | |||
| 52 | if (path != "-"sv) | ||
| 53 | fclose(in); | ||
| 54 | |||
| 55 | return data; | ||
| 56 | } | ||
| 57 | |||
| 58 | std::vector<Kv1Token> lex(const char *path) { | ||
| 59 | std::string data = readKv1(path); | ||
| 60 | |||
| 61 | auto start = TimingClock::now(); | ||
| 62 | Kv1Lexer lexer(data); | ||
| 63 | lexer.lex(); | ||
| 64 | auto end = TimingClock::now(); | ||
| 65 | |||
| 66 | std::chrono::duration<double> elapsed{end - start}; | ||
| 67 | double bytes = static_cast<double>(data.size()) / 1'000'000; | ||
| 68 | double speed = bytes / elapsed.count(); | ||
| 69 | |||
| 70 | if (!lexer.errors.empty()) { | ||
| 71 | fputs("Lexer reported errors:\n", stderr); | ||
| 72 | for (const auto &error : lexer.errors) | ||
| 73 | fprintf(stderr, "- %s\n", error.c_str()); | ||
| 74 | exit(1); | ||
| 75 | } | ||
| 76 | |||
| 77 | fprintf(stderr, "Got %lu tokens\n", lexer.tokens.size()); | ||
| 78 | fprintf(stderr, "Duration: %f s\n", elapsed.count()); | ||
| 79 | fprintf(stderr, "Speed: %f MB/s\n", speed); | ||
| 80 | |||
| 81 | return std::move(lexer.tokens); | ||
| 82 | } | ||
| 83 | |||
| 84 | bool parse(const char *path, Kv1Records &into) { | ||
| 85 | std::vector<Kv1Token> tokens = lex(path); | ||
| 86 | |||
| 87 | Kv1Parser parser(tokens, into); | ||
| 88 | parser.parse(); | ||
| 89 | |||
| 90 | bool ok = true; | ||
| 91 | if (!parser.gerrors.empty()) { | ||
| 92 | ok = false; | ||
| 93 | fputs("Parser reported errors:\n", stderr); | ||
| 94 | for (const auto &error : parser.gerrors) | ||
| 95 | fprintf(stderr, "- %s\n", error.c_str()); | ||
| 96 | } | ||
| 97 | if (!parser.warns.empty()) { | ||
| 98 | fputs("Parser reported warnings:\n", stderr); | ||
| 99 | for (const auto &warn : parser.warns) | ||
| 100 | fprintf(stderr, "- %s\n", warn.c_str()); | ||
| 101 | } | ||
| 102 | |||
| 103 | fprintf(stderr, "Parsed %lu records\n", into.size()); | ||
| 104 | |||
| 105 | return ok; | ||
| 106 | } | ||
| 107 | |||
| 108 | void printParsedRecords(const Kv1Records &records) { | ||
| 109 | fputs("Parsed records:\n", stderr); | ||
| 110 | fprintf(stderr, " organizational_units: %lu\n", records.organizational_units.size()); | ||
| 111 | fprintf(stderr, " higher_organizational_units: %lu\n", records.higher_organizational_units.size()); | ||
| 112 | fprintf(stderr, " user_stop_points: %lu\n", records.user_stop_points.size()); | ||
| 113 | fprintf(stderr, " user_stop_areas: %lu\n", records.user_stop_areas.size()); | ||
| 114 | fprintf(stderr, " timing_links: %lu\n", records.timing_links.size()); | ||
| 115 | fprintf(stderr, " links: %lu\n", records.links.size()); | ||
| 116 | fprintf(stderr, " lines: %lu\n", records.lines.size()); | ||
| 117 | fprintf(stderr, " destinations: %lu\n", records.destinations.size()); | ||
| 118 | fprintf(stderr, " journey_patterns: %lu\n", records.journey_patterns.size()); | ||
| 119 | fprintf(stderr, " concession_financer_relations: %lu\n", records.concession_financer_relations.size()); | ||
| 120 | fprintf(stderr, " concession_areas: %lu\n", records.concession_areas.size()); | ||
| 121 | fprintf(stderr, " financers: %lu\n", records.financers.size()); | ||
| 122 | fprintf(stderr, " journey_pattern_timing_links: %lu\n", records.journey_pattern_timing_links.size()); | ||
| 123 | fprintf(stderr, " points: %lu\n", records.points.size()); | ||
| 124 | fprintf(stderr, " point_on_links: %lu\n", records.point_on_links.size()); | ||
| 125 | fprintf(stderr, " icons: %lu\n", records.icons.size()); | ||
| 126 | fprintf(stderr, " notices: %lu\n", records.notices.size()); | ||
| 127 | fprintf(stderr, " notice_assignments: %lu\n", records.notice_assignments.size()); | ||
| 128 | fprintf(stderr, " time_demand_groups: %lu\n", records.time_demand_groups.size()); | ||
| 129 | fprintf(stderr, " time_demand_group_run_times: %lu\n", records.time_demand_group_run_times.size()); | ||
| 130 | fprintf(stderr, " period_groups: %lu\n", records.period_groups.size()); | ||
| 131 | fprintf(stderr, " specific_days: %lu\n", records.specific_days.size()); | ||
| 132 | fprintf(stderr, " timetable_versions: %lu\n", records.timetable_versions.size()); | ||
| 133 | fprintf(stderr, " public_journeys: %lu\n", records.public_journeys.size()); | ||
| 134 | fprintf(stderr, " period_group_validities: %lu\n", records.period_group_validities.size()); | ||
| 135 | fprintf(stderr, " exceptional_operating_days: %lu\n", records.exceptional_operating_days.size()); | ||
| 136 | fprintf(stderr, " schedule_versions: %lu\n", records.schedule_versions.size()); | ||
| 137 | fprintf(stderr, " public_journey_passing_times: %lu\n", records.public_journey_passing_times.size()); | ||
| 138 | fprintf(stderr, " operating_days: %lu\n", records.operating_days.size()); | ||
| 139 | } | ||
| 140 | |||
| 141 | void printIndexSize(const Kv1Index &index) { | ||
| 142 | fputs("Index size:\n", stderr); | ||
| 143 | fprintf(stderr, " organizational_units: %lu\n", index.organizational_units.size()); | ||
| 144 | fprintf(stderr, " user_stop_points: %lu\n", index.user_stop_points.size()); | ||
| 145 | fprintf(stderr, " user_stop_areas: %lu\n", index.user_stop_areas.size()); | ||
| 146 | fprintf(stderr, " timing_links: %lu\n", index.timing_links.size()); | ||
| 147 | fprintf(stderr, " links: %lu\n", index.links.size()); | ||
| 148 | fprintf(stderr, " lines: %lu\n", index.lines.size()); | ||
| 149 | fprintf(stderr, " destinations: %lu\n", index.destinations.size()); | ||
| 150 | fprintf(stderr, " journey_patterns: %lu\n", index.journey_patterns.size()); | ||
| 151 | fprintf(stderr, " concession_financer_relations: %lu\n", index.concession_financer_relations.size()); | ||
| 152 | fprintf(stderr, " concession_areas: %lu\n", index.concession_areas.size()); | ||
| 153 | fprintf(stderr, " financers: %lu\n", index.financers.size()); | ||
| 154 | fprintf(stderr, " journey_pattern_timing_links: %lu\n", index.journey_pattern_timing_links.size()); | ||
| 155 | fprintf(stderr, " points: %lu\n", index.points.size()); | ||
| 156 | fprintf(stderr, " point_on_links: %lu\n", index.point_on_links.size()); | ||
| 157 | fprintf(stderr, " icons: %lu\n", index.icons.size()); | ||
| 158 | fprintf(stderr, " notices: %lu\n", index.notices.size()); | ||
| 159 | fprintf(stderr, " time_demand_groups: %lu\n", index.time_demand_groups.size()); | ||
| 160 | fprintf(stderr, " time_demand_group_run_times: %lu\n", index.time_demand_group_run_times.size()); | ||
| 161 | fprintf(stderr, " period_groups: %lu\n", index.period_groups.size()); | ||
| 162 | fprintf(stderr, " specific_days: %lu\n", index.specific_days.size()); | ||
| 163 | fprintf(stderr, " timetable_versions: %lu\n", index.timetable_versions.size()); | ||
| 164 | fprintf(stderr, " public_journeys: %lu\n", index.public_journeys.size()); | ||
| 165 | fprintf(stderr, " period_group_validities: %lu\n", index.period_group_validities.size()); | ||
| 166 | fprintf(stderr, " exceptional_operating_days: %lu\n", index.exceptional_operating_days.size()); | ||
| 167 | fprintf(stderr, " schedule_versions: %lu\n", index.schedule_versions.size()); | ||
| 168 | fprintf(stderr, " public_journey_passing_times: %lu\n", index.public_journey_passing_times.size()); | ||
| 169 | fprintf(stderr, " operating_days: %lu\n", index.operating_days.size()); | ||
| 170 | } | ||
| 171 | |||
| 172 | int main(int argc, char *argv[]) { | ||
| 173 | Options options = parseOptions(argc, argv); | ||
| 174 | |||
| 175 | Kv1Records records; | ||
| 176 | if (!parse(options.kv1_file_path, records)) { | ||
| 177 | fputs("Error parsing records, exiting\n", stderr); | ||
| 178 | return EXIT_FAILURE; | ||
| 179 | } | ||
| 180 | printParsedRecords(records); | ||
| 181 | fputs("Indexing...\n", stderr); | ||
| 182 | Kv1Index index(&records); | ||
| 183 | fprintf(stderr, "Indexed %lu records\n", index.size()); | ||
| 184 | // Only notice assignments are not indexed. If this equality is not valid, | ||
| 185 | // then this means that we had duplicate keys or that something else went | ||
| 186 | // wrong. That would really not be great. | ||
| 187 | assert(index.size() == records.size() - records.notice_assignments.size()); | ||
| 188 | printIndexSize(index); | ||
| 189 | fputs("Linking records...\n", stderr); | ||
| 190 | kv1LinkRecords(index); | ||
| 191 | fputs("Done linking\n", stderr); | ||
| 192 | |||
| 193 | if (options.subcommand == "joparoute"sv) jopaRoute(options, records, index); | ||
| 194 | if (options.subcommand == "journeyroute"sv) journeyRoute(options, records, index); | ||
| 195 | if (options.subcommand == "journeys"sv) journeys(options, records, index); | ||
| 196 | if (options.subcommand == "journeyinfo"sv) journeyInfo(options, records, index); | ||
| 197 | if (options.subcommand == "schedule"sv) schedule(options, records, index); | ||
| 198 | } | ||
diff --git a/src/querykv1/schedule.cpp b/src/querykv1/schedule.cpp new file mode 100644 index 0000000..2bcfe0a --- /dev/null +++ b/src/querykv1/schedule.cpp | |||
| @@ -0,0 +1,63 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <iostream> | ||
| 4 | #include <string_view> | ||
| 5 | #include <unordered_map> | ||
| 6 | #include <vector> | ||
| 7 | |||
| 8 | #include "daterange.hpp" | ||
| 9 | #include "schedule.hpp" | ||
| 10 | |||
| 11 | using namespace std::string_view_literals; | ||
| 12 | |||
| 13 | void schedule(const Options &options, Kv1Records &records, Kv1Index &index) { | ||
| 14 | FILE *out = stdout; | ||
| 15 | if (options.output_file_path != "-"sv) | ||
| 16 | out = fopen(options.output_file_path, "wb"); | ||
| 17 | if (!out) { | ||
| 18 | fprintf(stderr, "Open %s: %s\n", options.output_file_path, strerrordesc_np(errno)); | ||
| 19 | exit(EXIT_FAILURE); | ||
| 20 | } | ||
| 21 | |||
| 22 | std::cerr << "Generating schedule for " << options.line_planning_number << std::endl; | ||
| 23 | |||
| 24 | std::unordered_multimap<std::string, Kv1PeriodGroupValidity> period_group_validities; | ||
| 25 | for (const auto &pegr : records.period_group_validities) | ||
| 26 | period_group_validities.insert({ pegr.key.period_group_code, pegr }); | ||
| 27 | std::unordered_multimap<std::string, Kv1PublicJourney> public_journeys; | ||
| 28 | for (const auto &pujo : records.public_journeys) | ||
| 29 | public_journeys.insert({ pujo.key.timetable_version_code, pujo }); | ||
| 30 | |||
| 31 | std::cout << "line_planning_number,journey_number,date,departure_time" << std::endl; | ||
| 32 | for (const auto &tive : records.timetable_versions) { | ||
| 33 | std::vector<DateRange> tive_pegrval_ranges; | ||
| 34 | |||
| 35 | auto pegrval_range = period_group_validities.equal_range(tive.key.period_group_code); | ||
| 36 | for (auto it = pegrval_range.first; it != pegrval_range.second; it++) { | ||
| 37 | const auto &[_, pegrval] = *it; | ||
| 38 | tive_pegrval_ranges.emplace_back(pegrval.key.valid_from, pegrval.valid_thru); | ||
| 39 | } | ||
| 40 | |||
| 41 | DateRangeSeq seq(tive_pegrval_ranges.begin(), tive_pegrval_ranges.end()); | ||
| 42 | seq = seq.clampFrom(tive.valid_from); | ||
| 43 | if (tive.valid_thru) | ||
| 44 | seq = seq.clampThru(*tive.valid_thru); | ||
| 45 | |||
| 46 | for (const auto &range : seq) for (auto date : range) { | ||
| 47 | auto weekday = std::chrono::year_month_weekday(std::chrono::sys_days(date)).weekday(); | ||
| 48 | |||
| 49 | auto pujo_range = public_journeys.equal_range(tive.key.timetable_version_code); | ||
| 50 | for (auto itt = pujo_range.first; itt != pujo_range.second; itt++) { | ||
| 51 | const auto &[_, pujo] = *itt; | ||
| 52 | |||
| 53 | if (pujo.key.line_planning_number == options.line_planning_number && pujo.key.day_type.size() == 7 | ||
| 54 | && pujo.key.day_type[weekday.iso_encoding() - 1] == static_cast<char>('0' + weekday.iso_encoding())) { | ||
| 55 | std::cout << pujo.key.line_planning_number << "," << pujo.key.journey_number << "," | ||
| 56 | << date << "," << pujo.departure_time << std::endl; | ||
| 57 | } | ||
| 58 | } | ||
| 59 | } | ||
| 60 | } | ||
| 61 | |||
| 62 | if (options.output_file_path != "-"sv) fclose(out); | ||
| 63 | } | ||
diff --git a/src/querykv1/schedule.hpp b/src/querykv1/schedule.hpp new file mode 100644 index 0000000..100bd4c --- /dev/null +++ b/src/querykv1/schedule.hpp | |||
| @@ -0,0 +1,13 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #ifndef OEUF_QUERYKV1_SCHEDULE_HPP | ||
| 4 | #define OEUF_QUERYKV1_SCHEDULE_HPP | ||
| 5 | |||
| 6 | #include <tmi8/kv1_types.hpp> | ||
| 7 | #include <tmi8/kv1_index.hpp> | ||
| 8 | |||
| 9 | #include "cliopts.hpp" | ||
| 10 | |||
| 11 | void schedule(const Options &options, Kv1Records &records, Kv1Index &index); | ||
| 12 | |||
| 13 | #endif // OEUF_QUERYKV1_SCHEDULE_HPP | ||
diff --git a/src/recvkv6/.envrc b/src/recvkv6/.envrc new file mode 100644 index 0000000..694e74f --- /dev/null +++ b/src/recvkv6/.envrc | |||
| @@ -0,0 +1,2 @@ | |||
| 1 | source_env ../../ | ||
| 2 | export DEVMODE=1 | ||
diff --git a/src/recvkv6/Makefile b/src/recvkv6/Makefile new file mode 100644 index 0000000..12ff7fb --- /dev/null +++ b/src/recvkv6/Makefile | |||
| @@ -0,0 +1,21 @@ | |||
| 1 | # Taken from: | ||
| 2 | # Open Source Security Foundation (OpenSSF), “Compiler Options Hardening Guide | ||
| 3 | # for C and C++,” OpenSSF Best Practices Working Group. Accessed: Dec. 01, | ||
| 4 | # 2023. [Online]. Available: | ||
| 5 | # https://best.openssf.org/Compiler-Hardening-Guides/Compiler-Options-Hardening-Guide-for-C-and-C++.html | ||
| 6 | CXXFLAGS=-std=c++2b -g -fno-omit-frame-pointer $(if $(DEVMODE),-Werror,)\ | ||
| 7 | -O2 -Wall -Wformat=2 -Wconversion -Wtrampolines -Wimplicit-fallthrough \ | ||
| 8 | -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 \ | ||
| 9 | -D_GLIBCXX_ASSERTIONS \ | ||
| 10 | -fstrict-flex-arrays=3 \ | ||
| 11 | -fstack-clash-protection -fstack-protector-strong | ||
| 12 | LDFLAGS=-lzmq -larrow -lparquet -lprometheus-cpp-pull -lprometheus-cpp-core -lz -ltmi8 -Wl,-z,defs \ | ||
| 13 | -Wl,-z,nodlopen -Wl,-z,noexecstack \ | ||
| 14 | -Wl,-z,relro -Wl,-z,now | ||
| 15 | |||
| 16 | recvkv6: main.cpp | ||
| 17 | $(CXX) -o $@ $^ $(CXXFLAGS) $(LDFLAGS) | ||
| 18 | |||
| 19 | .PHONY: clean | ||
| 20 | clean: | ||
| 21 | rm recvkv6 | ||
diff --git a/src/recvkv6/main.cpp b/src/recvkv6/main.cpp new file mode 100644 index 0000000..2ac3669 --- /dev/null +++ b/src/recvkv6/main.cpp | |||
| @@ -0,0 +1,1300 @@ | |||
| 1 | // vim:set sw=2 ts=2 sts et: | ||
| 2 | |||
| 3 | #include <array> | ||
| 4 | #include <cassert> | ||
| 5 | #include <chrono> | ||
| 6 | #include <csignal> | ||
| 7 | #include <cstring> | ||
| 8 | #include <filesystem> | ||
| 9 | #include <format> | ||
| 10 | #include <fstream> | ||
| 11 | #include <iostream> | ||
| 12 | #include <optional> | ||
| 13 | #include <stack> | ||
| 14 | #include <string> | ||
| 15 | #include <sstream> | ||
| 16 | #include <vector> | ||
| 17 | |||
| 18 | #include <zlib.h> | ||
| 19 | #include <zmq.h> | ||
| 20 | |||
| 21 | #include <nlohmann/json.hpp> | ||
| 22 | |||
| 23 | #include <prometheus/counter.h> | ||
| 24 | #include <prometheus/exposer.h> | ||
| 25 | #include <prometheus/histogram.h> | ||
| 26 | #include <prometheus/registry.h> | ||
| 27 | |||
| 28 | #include <rapidxml/rapidxml.hpp> | ||
| 29 | |||
| 30 | #include <tmi8/kv6_parquet.hpp> | ||
| 31 | |||
| 32 | #define CHUNK 16384 | ||
| 33 | |||
| 34 | struct RawMessage { | ||
| 35 | public: | ||
| 36 | // Takes ownership of envelope and body | ||
| 37 | RawMessage(zmq_msg_t envelope, zmq_msg_t body) | ||
| 38 | : envelope(envelope), body(body) | ||
| 39 | {} | ||
| 40 | |||
| 41 | // Prevent copying | ||
| 42 | RawMessage(const RawMessage &) = delete; | ||
| 43 | RawMessage &operator=(RawMessage const &) = delete; | ||
| 44 | |||
| 45 | std::string_view getEnvelope() { | ||
| 46 | return static_cast<const char *>(zmq_msg_data(&envelope)); | ||
| 47 | } | ||
| 48 | |||
| 49 | char *getBody() { | ||
| 50 | return static_cast<char *>(zmq_msg_data(&body)); | ||
| 51 | } | ||
| 52 | |||
| 53 | size_t getBodySize() { | ||
| 54 | return zmq_msg_size(&body); | ||
| 55 | } | ||
| 56 | |||
| 57 | ~RawMessage() { | ||
| 58 | zmq_msg_close(&envelope); | ||
| 59 | zmq_msg_close(&body); | ||
| 60 | } | ||
| 61 | |||
| 62 | private: | ||
| 63 | zmq_msg_t envelope; | ||
| 64 | zmq_msg_t body; | ||
| 65 | }; | ||
| 66 | |||
| 67 | std::optional<RawMessage> recvMsg(void *socket) { | ||
| 68 | while (true) { | ||
| 69 | zmq_msg_t envelope, body; | ||
| 70 | int rc = zmq_msg_init(&envelope); | ||
| 71 | assert(rc == 0); | ||
| 72 | rc = zmq_msg_init(&body); | ||
| 73 | assert(rc == 0); | ||
| 74 | |||
| 75 | rc = zmq_msg_recv(&envelope, socket, 0); | ||
| 76 | if (rc == -1) return std::nullopt; | ||
| 77 | |||
| 78 | int more; | ||
| 79 | size_t more_size = sizeof(more); | ||
| 80 | rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size); | ||
| 81 | if (!more) { | ||
| 82 | zmq_msg_close(&envelope); | ||
| 83 | zmq_msg_close(&body); | ||
| 84 | continue; | ||
| 85 | } | ||
| 86 | |||
| 87 | rc = zmq_msg_recv(&body, socket, 0); | ||
| 88 | if (rc == -1) return std::nullopt; | ||
| 89 | |||
| 90 | rc = zmq_getsockopt(socket, ZMQ_RCVMORE, &more, &more_size); | ||
| 91 | assert(!more); | ||
| 92 | |||
| 93 | return std::make_optional<RawMessage>(envelope, body); | ||
| 94 | } | ||
| 95 | } | ||
| 96 | |||
| 97 | // Ensures that <return value>[output_size] == 0 | ||
| 98 | char *decompress(char *raw, unsigned int input_size, unsigned int &output_size) { | ||
| 99 | assert(input_size <= UINT32_MAX); | ||
| 100 | |||
| 101 | z_stream strm; | ||
| 102 | strm.next_in = reinterpret_cast<unsigned char *>(raw); | ||
| 103 | strm.avail_in = input_size; | ||
| 104 | strm.zalloc = Z_NULL; | ||
| 105 | strm.zfree = Z_NULL; | ||
| 106 | strm.opaque = Z_NULL; | ||
| 107 | int rc = inflateInit2(&strm, 32); | ||
| 108 | assert(rc == Z_OK); | ||
| 109 | |||
| 110 | unsigned int buf_cap = CHUNK; | ||
| 111 | unsigned int buf_len = 0; | ||
| 112 | char *buf = static_cast<char *>(malloc(CHUNK)); | ||
| 113 | do { | ||
| 114 | if (buf_len + CHUNK > buf_cap) { | ||
| 115 | assert(buf_cap <= UINT32_MAX); | ||
| 116 | buf_cap *= 2; | ||
| 117 | buf = static_cast<char *>(realloc(buf, buf_cap)); | ||
| 118 | } | ||
| 119 | strm.avail_out = buf_cap - buf_len; | ||
| 120 | strm.next_out = reinterpret_cast<unsigned char *>(buf + buf_len); | ||
| 121 | |||
| 122 | unsigned long old_total = strm.total_out; | ||
| 123 | rc = inflate(&strm, Z_FINISH); | ||
| 124 | unsigned progress = static_cast<unsigned int>(strm.total_out - old_total); | ||
| 125 | buf_len += progress; | ||
| 126 | assert(progress != 0 || rc == Z_STREAM_END); | ||
| 127 | } while (strm.total_in < input_size); | ||
| 128 | |||
| 129 | if (buf_len == buf_cap) { | ||
| 130 | buf = static_cast<char *>(realloc(buf, buf_len + 1)); | ||
| 131 | } | ||
| 132 | buf[buf_len] = 0; | ||
| 133 | output_size = buf_len; | ||
| 134 | |||
| 135 | rc = inflateEnd(&strm); | ||
| 136 | assert(rc == Z_OK); | ||
| 137 | |||
| 138 | return buf; | ||
| 139 | } | ||
| 140 | |||
| 141 | struct Date { | ||
| 142 | int16_t year = 0; | ||
| 143 | uint8_t month = 0; | ||
| 144 | uint8_t day = 0; | ||
| 145 | |||
| 146 | static bool parse(Date &dest, std::string_view src) { | ||
| 147 | dest.year = 0, dest.month = 0, dest.day = 0; | ||
| 148 | |||
| 149 | int16_t y_mul_fac = 1; | ||
| 150 | bool extended = false; | ||
| 151 | |||
| 152 | size_t plus = src.find('+'); | ||
| 153 | if (plus != std::string_view::npos) { | ||
| 154 | extended = true; | ||
| 155 | src = src.substr(1); // remove plus sign from the start | ||
| 156 | } | ||
| 157 | if (!extended) { | ||
| 158 | size_t min_or_dash = src.find('-'); | ||
| 159 | if (min_or_dash == std::string_view::npos) return false; | ||
| 160 | if (min_or_dash == 0) { | ||
| 161 | y_mul_fac = -1; // it's a minus sign | ||
| 162 | src = src.substr(1); // remove minus sign at the start | ||
| 163 | } | ||
| 164 | } | ||
| 165 | |||
| 166 | int y_chars = 0; | ||
| 167 | while (src.size() > 0 && src[0] >= '0' && src[0] <= '9') { | ||
| 168 | dest.year = static_cast<int16_t>(dest.year * 10 + src[0] - '0'); | ||
| 169 | src = src.substr(1); | ||
| 170 | y_chars++; | ||
| 171 | } | ||
| 172 | if (src.size() == 0) { dest.year = 0; return false; } | ||
| 173 | if (src[0] != '-') { dest.year = 0; return false; } | ||
| 174 | src = src.substr(1); // remove dash | ||
| 175 | if (y_chars < 4 || (y_chars > 4 && !extended)) { dest.year = 0; return false; } | ||
| 176 | dest.year *= y_mul_fac; | ||
| 177 | |||
| 178 | bool rest_correct = src.size() == 5 | ||
| 179 | && src[0] >= '0' && src[0] <= '9' | ||
| 180 | && src[1] >= '0' && src[1] <= '9' | ||
| 181 | && src[3] >= '0' && src[3] <= '9' | ||
| 182 | && src[4] >= '0' && src[4] <= '9'; | ||
| 183 | if (!rest_correct) { dest.year = 0; return false; } | ||
| 184 | dest.month = static_cast<uint8_t>((src[0] - '0') * 10 + src[1] - '0'); | ||
| 185 | dest.day = static_cast<uint8_t>((src[3] - '0') * 10 + src[4] - '0'); | ||
| 186 | if (dest.month > 12 || dest.day > 31) { | ||
| 187 | dest.year = 0, dest.month = 0, dest.day = 0; | ||
| 188 | return false; | ||
| 189 | } | ||
| 190 | return true; | ||
| 191 | } | ||
| 192 | |||
| 193 | std::string toString() const { | ||
| 194 | if (year < 0 || year > 9999 || month < 0 || month > 12 || day < 0 || day > 31) | ||
| 195 | throw std::invalid_argument("one or more date components (year, month, day) out of range"); | ||
| 196 | char data[11] = "XXXX-XX-XX"; | ||
| 197 | sprintf(data, "%04u-%02u-%02u", year, month, day); | ||
| 198 | return data; | ||
| 199 | } | ||
| 200 | |||
| 201 | std::chrono::days toUnixDays() const { | ||
| 202 | std::chrono::year_month_day ymd{std::chrono::year(year), std::chrono::month(month), std::chrono::day(day)}; | ||
| 203 | // This is valid since C++20: as of C++20, the system clock is defined to measure the | ||
| 204 | // Unix Time, the amount of seconds since Thursday 1 January 1970, without leap seconds. | ||
| 205 | std::chrono::days since_epoch = std::chrono::sys_days(ymd).time_since_epoch(); | ||
| 206 | return since_epoch; | ||
| 207 | } | ||
| 208 | }; | ||
| 209 | |||
| 210 | struct Time { | ||
| 211 | uint8_t hour = 0; | ||
| 212 | uint8_t minute = 0; | ||
| 213 | uint8_t second = 0; | ||
| 214 | |||
| 215 | static bool parse(Time &dest, std::string_view src) { | ||
| 216 | bool okay = src.size() == 8 | ||
| 217 | && src[0] >= '0' && src[0] <= '9' | ||
| 218 | && src[1] >= '0' && src[1] <= '9' | ||
| 219 | && src[2] == ':' | ||
| 220 | && src[3] >= '0' && src[3] <= '9' | ||
| 221 | && src[4] >= '0' && src[4] <= '9' | ||
| 222 | && src[5] == ':' | ||
| 223 | && src[6] >= '0' && src[6] <= '9' | ||
| 224 | && src[7] >= '0' && src[7] <= '9'; | ||
| 225 | if (!okay) return false; | ||
| 226 | dest.hour = static_cast<uint8_t>((src[0] - '0') * 10 + src[1] - '0'); | ||
| 227 | dest.minute = static_cast<uint8_t>((src[3] - '0') * 10 + src[4] - '0'); | ||
| 228 | dest.second = static_cast<uint8_t>((src[6] - '0') * 10 + src[7] - '0'); | ||
| 229 | if (dest.hour > 23 || dest.minute > 59 || dest.second > 59) { | ||
| 230 | dest.hour = 0, dest.minute = 0, dest.second = 0; | ||
| 231 | return false; | ||
| 232 | } | ||
| 233 | return true; | ||
| 234 | } | ||
| 235 | |||
| 236 | std::string toString() const { | ||
| 237 | if (hour < 0 || hour > 23 || minute < 0 || minute > 59 || second < 0 || second > 59) | ||
| 238 | throw std::invalid_argument("one or more time components (hour, minute, second) out of range"); | ||
| 239 | char data[9] = "XX:XX:XX"; | ||
| 240 | sprintf(data, "%02u:%02u:%02u", hour, minute, second); | ||
| 241 | return data; | ||
| 242 | } | ||
| 243 | }; | ||
| 244 | |||
| 245 | // Time zone designator | ||
| 246 | struct Tzd { | ||
| 247 | int16_t minutes = 0; | ||
| 248 | |||
| 249 | static bool parse(Tzd &dest, std::string_view src) { | ||
| 250 | dest.minutes = 0; | ||
| 251 | |||
| 252 | if (src.size() == 0) return false; | ||
| 253 | if (src == "Z") return true; | ||
| 254 | |||
| 255 | int16_t multiplier = 1; | ||
| 256 | if (src[0] == '-') multiplier = -1; | ||
| 257 | else if (src[0] != '+') return false; | ||
| 258 | src = src.substr(1); | ||
| 259 | |||
| 260 | bool okay = src.size() == 5 | ||
| 261 | && src[0] >= '0' && src[0] <= '9' | ||
| 262 | && src[1] >= '0' && src[1] <= '9' | ||
| 263 | && src[2] == ':' | ||
| 264 | && src[3] >= '0' && src[3] <= '9' | ||
| 265 | && src[4] >= '0' && src[4] <= '9'; | ||
| 266 | if (!okay) return false; | ||
| 267 | int16_t hours = static_cast<int16_t>((src[0] - '0') * 10 + src[1] - '0'); | ||
| 268 | int16_t minutes = static_cast<int16_t>((src[3] - '0') * 10 + src[4] - '0'); | ||
| 269 | if (hours > 23 || minutes > 59) return false; | ||
| 270 | dest.minutes = static_cast<int16_t>(multiplier * (60 * hours + minutes)); | ||
| 271 | return true; | ||
| 272 | } | ||
| 273 | |||
| 274 | std::string toString() const { | ||
| 275 | if (minutes == 0) | ||
| 276 | return "Z"; | ||
| 277 | |||
| 278 | bool negative = minutes < 0; | ||
| 279 | int hours_off = abs(minutes / 60); | ||
| 280 | int mins_off = abs(minutes) - hours_off*60; | ||
| 281 | if (hours_off > 23 || mins_off > 59) | ||
| 282 | throw std::invalid_argument("offset out of range"); | ||
| 283 | char data[7] = "+XX:XX"; | ||
| 284 | sprintf(data, "%c%02u:%02u", negative ? '-' : '+', hours_off, mins_off); | ||
| 285 | return data; | ||
| 286 | } | ||
| 287 | }; | ||
| 288 | |||
| 289 | struct Timestamp { | ||
| 290 | Date date; | ||
| 291 | Tzd off; | ||
| 292 | Time time; | ||
| 293 | |||
| 294 | static bool parse(Timestamp &dest, std::string_view src) { | ||
| 295 | size_t t = src.find('T'); | ||
| 296 | if (t == std::string_view::npos || t + 1 >= src.size()) return false; | ||
| 297 | |||
| 298 | std::string_view date = src.substr(0, t); | ||
| 299 | std::string_view time_and_tzd = src.substr(t + 1); | ||
| 300 | if (time_and_tzd.size() < 9) return false; | ||
| 301 | if (!Date::parse(dest.date, date)) return false; | ||
| 302 | |||
| 303 | std::string_view time = time_and_tzd.substr(0, 8); | ||
| 304 | std::string_view tzd = time_and_tzd.substr(8); | ||
| 305 | if (!Time::parse(dest.time, time)) return false; | ||
| 306 | return Tzd::parse(dest.off, tzd); | ||
| 307 | } | ||
| 308 | |||
| 309 | std::string toString() const { | ||
| 310 | return date.toString() + "T" + time.toString() + off.toString(); | ||
| 311 | } | ||
| 312 | |||
| 313 | std::chrono::seconds toUnixSeconds() const { | ||
| 314 | std::chrono::year_month_day ymd(std::chrono::year(date.year), | ||
| 315 | std::chrono::month(date.month), | ||
| 316 | std::chrono::day(date.day)); | ||
| 317 | std::chrono::sys_days sys_days(ymd); | ||
| 318 | std::chrono::time_point<std::chrono::utc_clock, std::chrono::days> utc_days(sys_days.time_since_epoch()); | ||
| 319 | std::chrono::utc_seconds utc_seconds = std::chrono::time_point_cast<std::chrono::seconds>(utc_days); | ||
| 320 | utc_seconds += std::chrono::hours(time.hour) + std::chrono::minutes(time.minute) + | ||
| 321 | std::chrono::seconds(time.second) - std::chrono::minutes(off.minutes); | ||
| 322 | std::chrono::sys_seconds sys_seconds = std::chrono::utc_clock::to_sys(utc_seconds); | ||
| 323 | std::chrono::seconds unix = sys_seconds.time_since_epoch(); | ||
| 324 | return unix; | ||
| 325 | } | ||
| 326 | }; | ||
| 327 | |||
| 328 | static const std::string_view TMI8_XML_NS = "http://bison.connekt.nl/tmi8/kv6/msg"; | ||
| 329 | |||
| 330 | enum Kv6RecordType { | ||
| 331 | KV6T_UNKNOWN = 0, | ||
| 332 | KV6T_DELAY = 1, | ||
| 333 | KV6T_INIT = 2, | ||
| 334 | KV6T_ARRIVAL = 3, | ||
| 335 | KV6T_ON_STOP = 4, | ||
| 336 | KV6T_DEPARTURE = 5, | ||
| 337 | KV6T_ON_ROUTE = 6, | ||
| 338 | KV6T_ON_PATH = 7, | ||
| 339 | KV6T_OFF_ROUTE = 8, | ||
| 340 | KV6T_END = 9, | ||
| 341 | // Always keep this updated to correspond to the | ||
| 342 | // first and last elements of the enumeration! | ||
| 343 | _KV6T_FIRST_TYPE = KV6T_UNKNOWN, | ||
| 344 | _KV6T_LAST_TYPE = KV6T_END, | ||
| 345 | }; | ||
| 346 | |||
| 347 | enum Kv6Field { | ||
| 348 | KV6F_NONE = 0, | ||
| 349 | KV6F_DATA_OWNER_CODE = 1, | ||
| 350 | KV6F_LINE_PLANNING_NUMBER = 2, | ||
| 351 | KV6F_OPERATING_DAY = 4, | ||
| 352 | KV6F_JOURNEY_NUMBER = 8, | ||
| 353 | KV6F_REINFORCEMENT_NUMBER = 16, | ||
| 354 | KV6F_TIMESTAMP = 32, | ||
| 355 | KV6F_SOURCE = 64, | ||
| 356 | KV6F_PUNCTUALITY = 128, | ||
| 357 | KV6F_USER_STOP_CODE = 256, | ||
| 358 | KV6F_PASSAGE_SEQUENCE_NUMBER = 512, | ||
| 359 | KV6F_VEHICLE_NUMBER = 1024, | ||
| 360 | KV6F_BLOCK_CODE = 2048, | ||
| 361 | KV6F_WHEELCHAIR_ACCESSIBLE = 4096, | ||
| 362 | KV6F_NUMBER_OF_COACHES = 8192, | ||
| 363 | KV6F_RD_Y = 16384, | ||
| 364 | KV6F_RD_X = 32768, | ||
| 365 | KV6F_DISTANCE_SINCE_LAST_USER_STOP = 65536, | ||
| 366 | }; | ||
| 367 | |||
| 368 | static constexpr Kv6Field KV6T_REQUIRED_FIELDS[_KV6T_LAST_TYPE + 1] = { | ||
| 369 | // KV6T_UNKNOWN | ||
| 370 | KV6F_NONE, | ||
| 371 | // KV6T_DELAY | ||
| 372 | static_cast<Kv6Field>( | ||
| 373 | KV6F_DATA_OWNER_CODE | ||
| 374 | | KV6F_LINE_PLANNING_NUMBER | ||
| 375 | | KV6F_OPERATING_DAY | ||
| 376 | | KV6F_JOURNEY_NUMBER | ||
| 377 | | KV6F_REINFORCEMENT_NUMBER | ||
| 378 | | KV6F_TIMESTAMP | ||
| 379 | | KV6F_SOURCE | ||
| 380 | | KV6F_PUNCTUALITY), | ||
| 381 | // KV6T_INIT | ||
| 382 | static_cast<Kv6Field>( | ||
| 383 | KV6F_DATA_OWNER_CODE | ||
| 384 | | KV6F_LINE_PLANNING_NUMBER | ||
| 385 | | KV6F_OPERATING_DAY | ||
| 386 | | KV6F_JOURNEY_NUMBER | ||
| 387 | | KV6F_REINFORCEMENT_NUMBER | ||
| 388 | | KV6F_TIMESTAMP | ||
| 389 | | KV6F_SOURCE | ||
| 390 | | KV6F_USER_STOP_CODE | ||
| 391 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 392 | | KV6F_VEHICLE_NUMBER | ||
| 393 | | KV6F_BLOCK_CODE | ||
| 394 | | KV6F_WHEELCHAIR_ACCESSIBLE | ||
| 395 | | KV6F_NUMBER_OF_COACHES), | ||
| 396 | // KV6T_ARRIVAL | ||
| 397 | static_cast<Kv6Field>( | ||
| 398 | KV6F_DATA_OWNER_CODE | ||
| 399 | | KV6F_LINE_PLANNING_NUMBER | ||
| 400 | | KV6F_OPERATING_DAY | ||
| 401 | | KV6F_JOURNEY_NUMBER | ||
| 402 | | KV6F_REINFORCEMENT_NUMBER | ||
| 403 | | KV6F_USER_STOP_CODE | ||
| 404 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 405 | | KV6F_TIMESTAMP | ||
| 406 | | KV6F_SOURCE | ||
| 407 | | KV6F_VEHICLE_NUMBER | ||
| 408 | | KV6F_PUNCTUALITY), | ||
| 409 | // KV6T_ON_STOP | ||
| 410 | static_cast<Kv6Field>( | ||
| 411 | KV6F_DATA_OWNER_CODE | ||
| 412 | | KV6F_LINE_PLANNING_NUMBER | ||
| 413 | | KV6F_OPERATING_DAY | ||
| 414 | | KV6F_JOURNEY_NUMBER | ||
| 415 | | KV6F_REINFORCEMENT_NUMBER | ||
| 416 | | KV6F_USER_STOP_CODE | ||
| 417 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 418 | | KV6F_TIMESTAMP | ||
| 419 | | KV6F_SOURCE | ||
| 420 | | KV6F_VEHICLE_NUMBER | ||
| 421 | | KV6F_PUNCTUALITY), | ||
| 422 | // KV6T_DEPARTURE | ||
| 423 | static_cast<Kv6Field>( | ||
| 424 | KV6F_DATA_OWNER_CODE | ||
| 425 | | KV6F_LINE_PLANNING_NUMBER | ||
| 426 | | KV6F_OPERATING_DAY | ||
| 427 | | KV6F_JOURNEY_NUMBER | ||
| 428 | | KV6F_REINFORCEMENT_NUMBER | ||
| 429 | | KV6F_USER_STOP_CODE | ||
| 430 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 431 | | KV6F_TIMESTAMP | ||
| 432 | | KV6F_SOURCE | ||
| 433 | | KV6F_VEHICLE_NUMBER | ||
| 434 | | KV6F_PUNCTUALITY), | ||
| 435 | // KV6T_ON_ROUTE | ||
| 436 | static_cast<Kv6Field>( | ||
| 437 | KV6F_DATA_OWNER_CODE | ||
| 438 | | KV6F_LINE_PLANNING_NUMBER | ||
| 439 | | KV6F_OPERATING_DAY | ||
| 440 | | KV6F_JOURNEY_NUMBER | ||
| 441 | | KV6F_REINFORCEMENT_NUMBER | ||
| 442 | | KV6F_USER_STOP_CODE | ||
| 443 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 444 | | KV6F_TIMESTAMP | ||
| 445 | | KV6F_SOURCE | ||
| 446 | | KV6F_VEHICLE_NUMBER | ||
| 447 | | KV6F_PUNCTUALITY | ||
| 448 | | KV6F_RD_X | ||
| 449 | | KV6F_RD_Y), | ||
| 450 | // KV6T_ON_PATH | ||
| 451 | KV6F_NONE, | ||
| 452 | // KV6T_OFF_ROUTE | ||
| 453 | static_cast<Kv6Field>( | ||
| 454 | KV6F_DATA_OWNER_CODE | ||
| 455 | | KV6F_LINE_PLANNING_NUMBER | ||
| 456 | | KV6F_OPERATING_DAY | ||
| 457 | | KV6F_JOURNEY_NUMBER | ||
| 458 | | KV6F_REINFORCEMENT_NUMBER | ||
| 459 | | KV6F_TIMESTAMP | ||
| 460 | | KV6F_SOURCE | ||
| 461 | | KV6F_USER_STOP_CODE | ||
| 462 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 463 | | KV6F_VEHICLE_NUMBER | ||
| 464 | | KV6F_RD_X | ||
| 465 | | KV6F_RD_Y), | ||
| 466 | // KV6T_END | ||
| 467 | static_cast<Kv6Field>( | ||
| 468 | KV6F_DATA_OWNER_CODE | ||
| 469 | | KV6F_LINE_PLANNING_NUMBER | ||
| 470 | | KV6F_OPERATING_DAY | ||
| 471 | | KV6F_JOURNEY_NUMBER | ||
| 472 | | KV6F_REINFORCEMENT_NUMBER | ||
| 473 | | KV6F_TIMESTAMP | ||
| 474 | | KV6F_SOURCE | ||
| 475 | | KV6F_USER_STOP_CODE | ||
| 476 | | KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 477 | | KV6F_VEHICLE_NUMBER), | ||
| 478 | }; | ||
| 479 | |||
| 480 | static constexpr Kv6Field KV6T_OPTIONAL_FIELDS[_KV6T_LAST_TYPE + 1] = { | ||
| 481 | // KV6T_UNKNOWN | ||
| 482 | KV6F_NONE, | ||
| 483 | // KV6T_DELAY | ||
| 484 | KV6F_NONE, | ||
| 485 | // KV6T_INIT | ||
| 486 | KV6F_NONE, | ||
| 487 | // KV6T_ARRIVAL | ||
| 488 | static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y), | ||
| 489 | // KV6T_ON_STOP | ||
| 490 | static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y), | ||
| 491 | // KV6T_DEPARTURE | ||
| 492 | static_cast<Kv6Field>(KV6F_RD_X | KV6F_RD_Y), | ||
| 493 | // KV6T_ON_ROUTE | ||
| 494 | KV6F_DISTANCE_SINCE_LAST_USER_STOP, | ||
| 495 | // KV6T_ON_PATH | ||
| 496 | KV6F_NONE, | ||
| 497 | // KV6T_OFF_ROUTE | ||
| 498 | KV6F_NONE, | ||
| 499 | // KV6T_END | ||
| 500 | KV6F_NONE, | ||
| 501 | }; | ||
| 502 | |||
| 503 | struct Kv6Record { | ||
| 504 | Kv6RecordType type = KV6T_UNKNOWN; | ||
| 505 | Kv6Field presence = KV6F_NONE; | ||
| 506 | Kv6Field next = KV6F_NONE; | ||
| 507 | std::string data_owner_code; | ||
| 508 | std::string line_planning_number; | ||
| 509 | std::string source; | ||
| 510 | std::string user_stop_code; | ||
| 511 | std::string wheelchair_accessible; | ||
| 512 | Date operating_day; | ||
| 513 | Timestamp timestamp; | ||
| 514 | uint32_t block_code = 0; | ||
| 515 | uint32_t journey_number = 0; | ||
| 516 | uint32_t vehicle_number = 0; | ||
| 517 | int32_t rd_x = 0; | ||
| 518 | int32_t rd_y = 0; | ||
| 519 | // The TMI8 specification is unclear: this field | ||
| 520 | // might actually be called distancesincelaststop | ||
| 521 | uint32_t distance_since_last_user_stop = 0; | ||
| 522 | uint16_t passage_sequence_number = 0; | ||
| 523 | int16_t punctuality = 0; | ||
| 524 | uint8_t number_of_coaches = 0; | ||
| 525 | uint8_t reinforcement_number = 0; | ||
| 526 | |||
| 527 | void markPresent(Kv6Field field) { | ||
| 528 | presence = static_cast<Kv6Field>(presence | field); | ||
| 529 | } | ||
| 530 | |||
| 531 | void removeUnsupportedFields() { | ||
| 532 | Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type]; | ||
| 533 | Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type]; | ||
| 534 | Kv6Field supported_fields = static_cast<Kv6Field>(required_fields | optional_fields); | ||
| 535 | presence = static_cast<Kv6Field>(presence & supported_fields); | ||
| 536 | } | ||
| 537 | |||
| 538 | bool valid() { | ||
| 539 | Kv6Field required_fields = KV6T_REQUIRED_FIELDS[type]; | ||
| 540 | Kv6Field optional_fields = KV6T_OPTIONAL_FIELDS[type]; | ||
| 541 | Kv6Field supported_fields = static_cast<Kv6Field>(required_fields | optional_fields); | ||
| 542 | |||
| 543 | Kv6Field required_field_presence = static_cast<Kv6Field>(presence & required_fields); | ||
| 544 | Kv6Field unsupported_field_presence = static_cast<Kv6Field>(presence & ~supported_fields); | ||
| 545 | |||
| 546 | return required_field_presence == required_fields && !unsupported_field_presence; | ||
| 547 | } | ||
| 548 | }; | ||
| 549 | |||
| 550 | enum Tmi8VvTmPushInfoField { | ||
| 551 | TMI8F_NONE = 0, | ||
| 552 | TMI8F_SUBSCRIBER_ID = 1, | ||
| 553 | TMI8F_VERSION = 2, | ||
| 554 | TMI8F_DOSSIER_NAME = 4, | ||
| 555 | TMI8F_TIMESTAMP = 8, | ||
| 556 | }; | ||
| 557 | |||
| 558 | struct Tmi8VvTmPushInfo { | ||
| 559 | Tmi8VvTmPushInfoField next = TMI8F_NONE; | ||
| 560 | Tmi8VvTmPushInfoField presence = TMI8F_NONE; | ||
| 561 | std::string subscriber_id; | ||
| 562 | std::string version; | ||
| 563 | std::string dossier_name; | ||
| 564 | Timestamp timestamp; | ||
| 565 | std::vector<Kv6Record> messages; | ||
| 566 | |||
| 567 | void markPresent(Tmi8VvTmPushInfoField field) { | ||
| 568 | presence = static_cast<Tmi8VvTmPushInfoField>(presence | field); | ||
| 569 | } | ||
| 570 | |||
| 571 | bool valid() { | ||
| 572 | const Tmi8VvTmPushInfoField REQUIRED_FIELDS = | ||
| 573 | static_cast<Tmi8VvTmPushInfoField>( | ||
| 574 | TMI8F_SUBSCRIBER_ID | ||
| 575 | | TMI8F_VERSION | ||
| 576 | | TMI8F_DOSSIER_NAME | ||
| 577 | | TMI8F_TIMESTAMP); | ||
| 578 | return (presence & REQUIRED_FIELDS) == REQUIRED_FIELDS; | ||
| 579 | } | ||
| 580 | }; | ||
| 581 | |||
| 582 | static const std::array<std::string_view, _KV6T_LAST_TYPE + 1> KV6_POS_INFO_RECORD_TYPES = { | ||
| 583 | "UNKNOWN", "DELAY", "INIT", "ARRIVAL", "ONSTOP", "DEPARTURE", "ONROUTE", "ONPATH", "OFFROUTE", "END", | ||
| 584 | }; | ||
| 585 | |||
| 586 | std::optional<std::string_view> findKv6PosInfoRecordTypeName(Kv6RecordType type) { | ||
| 587 | if (type > _KV6T_LAST_TYPE) | ||
| 588 | return std::nullopt; | ||
| 589 | return KV6_POS_INFO_RECORD_TYPES[type]; | ||
| 590 | } | ||
| 591 | |||
| 592 | const std::array<std::tuple<std::string_view, Kv6Field>, 17> KV6_POS_INFO_RECORD_FIELDS = {{ | ||
| 593 | { "dataownercode", KV6F_DATA_OWNER_CODE }, | ||
| 594 | { "lineplanningnumber", KV6F_LINE_PLANNING_NUMBER }, | ||
| 595 | { "operatingday", KV6F_OPERATING_DAY }, | ||
| 596 | { "journeynumber", KV6F_JOURNEY_NUMBER }, | ||
| 597 | { "reinforcementnumber", KV6F_REINFORCEMENT_NUMBER }, | ||
| 598 | { "timestamp", KV6F_TIMESTAMP }, | ||
| 599 | { "source", KV6F_SOURCE }, | ||
| 600 | { "punctuality", KV6F_PUNCTUALITY }, | ||
| 601 | { "userstopcode", KV6F_USER_STOP_CODE }, | ||
| 602 | { "passagesequencenumber", KV6F_PASSAGE_SEQUENCE_NUMBER }, | ||
| 603 | { "vehiclenumber", KV6F_VEHICLE_NUMBER }, | ||
| 604 | { "blockcode", KV6F_BLOCK_CODE }, | ||
| 605 | { "wheelchairaccessible", KV6F_WHEELCHAIR_ACCESSIBLE }, | ||
| 606 | { "numberofcoaches", KV6F_NUMBER_OF_COACHES }, | ||
| 607 | { "rd-y", KV6F_RD_Y }, | ||
| 608 | { "rd-x", KV6F_RD_X }, | ||
| 609 | { "distancesincelastuserstop", KV6F_DISTANCE_SINCE_LAST_USER_STOP }, | ||
| 610 | }}; | ||
| 611 | |||
| 612 | // Returns the maximum amount of digits such that it is guaranteed that | ||
| 613 | // a corresponding amount of repeated 9's can be represented by the type. | ||
| 614 | template<std::integral T> | ||
| 615 | constexpr size_t maxDigits() { | ||
| 616 | size_t digits = 0; | ||
| 617 | for (T x = std::numeric_limits<T>::max(); x != 0; x /= 10) digits++; | ||
| 618 | return digits - 1; | ||
| 619 | } | ||
| 620 | |||
| 621 | template<size_t MaxDigits, std::unsigned_integral T> | ||
| 622 | constexpr bool parseUnsigned(T &out, std::string_view src) { | ||
| 623 | static_assert(MaxDigits <= maxDigits<T>()); | ||
| 624 | if (src.size() > MaxDigits) return false; | ||
| 625 | T res = 0; | ||
| 626 | while (src.size() > 0) { | ||
| 627 | if (src[0] < '0' || src[0] > '9') return false; | ||
| 628 | res = static_cast<T>(res * 10 + src[0] - '0'); | ||
| 629 | src = src.substr(1); | ||
| 630 | } | ||
| 631 | out = res; | ||
| 632 | return true; | ||
| 633 | } | ||
| 634 | |||
| 635 | template<size_t MaxDigits, std::signed_integral T> | ||
| 636 | constexpr bool parseSigned(T &out, std::string_view src) { | ||
| 637 | static_assert(MaxDigits <= maxDigits<T>()); | ||
| 638 | if (src.size() == 0) return false; | ||
| 639 | bool negative = src[0] == '-'; | ||
| 640 | if (negative) src = src.substr(1); | ||
| 641 | if (src.size() > MaxDigits) return false; | ||
| 642 | T res = 0; | ||
| 643 | while (src.size() > 0) { | ||
| 644 | if (src[0] < '0' || src[0] > '9') return false; | ||
| 645 | res = static_cast<T>(res * 10 + src[0] - '0'); | ||
| 646 | src = src.substr(1); | ||
| 647 | } | ||
| 648 | out = negative ? -res : res; | ||
| 649 | return true; | ||
| 650 | } | ||
| 651 | |||
| 652 | struct Xmlns { | ||
| 653 | const Xmlns *next; | ||
| 654 | std::string_view prefix; | ||
| 655 | std::string_view url; | ||
| 656 | }; | ||
| 657 | |||
| 658 | std::optional<std::string_view> resolve(std::string_view prefix, const Xmlns *nss) { | ||
| 659 | while (nss) | ||
| 660 | if (nss->prefix == prefix) | ||
| 661 | return nss->url; | ||
| 662 | else | ||
| 663 | nss = nss->next; | ||
| 664 | return std::nullopt; | ||
| 665 | } | ||
| 666 | |||
| 667 | template<typename T> | ||
| 668 | void withXmlnss(const rapidxml::xml_attribute<> *attr, const Xmlns *nss, const T &fn) { | ||
| 669 | while (attr) { | ||
| 670 | std::string_view name(attr->name(), attr->name_size()); | ||
| 671 | if (name.starts_with("xmlns")) { | ||
| 672 | if (name.size() == 5) { // just xmlns | ||
| 673 | Xmlns ns0 = { | ||
| 674 | .next = nss, | ||
| 675 | .url = std::string_view(attr->value(), attr->value_size()), | ||
| 676 | }; | ||
| 677 | withXmlnss(attr->next_attribute(), &ns0, fn); | ||
| 678 | return; | ||
| 679 | } else if (name.size() > 6 && name[5] == ':') { // xmlns:<something> | ||
| 680 | Xmlns ns0 = { | ||
| 681 | .next = nss, | ||
| 682 | .prefix = name.substr(6), | ||
| 683 | .url = std::string_view(attr->value(), attr->value_size()), | ||
| 684 | }; | ||
| 685 | withXmlnss(attr->next_attribute(), &ns0, fn); | ||
| 686 | return; | ||
| 687 | } | ||
| 688 | } | ||
| 689 | attr = attr->next_attribute(); | ||
| 690 | } | ||
| 691 | fn(nss); | ||
| 692 | } | ||
| 693 | |||
| 694 | template<typename T> | ||
| 695 | void ifResolvable(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) { | ||
| 696 | std::string_view name(node.name(), node.name_size()); | ||
| 697 | std::string_view ns; | ||
| 698 | size_t colon = name.find(':'); | ||
| 699 | |||
| 700 | if (colon != std::string_view::npos) { | ||
| 701 | if (colon >= name.size() - 1) // last character | ||
| 702 | return; | ||
| 703 | ns = name.substr(0, colon); | ||
| 704 | name = name.substr(colon + 1); | ||
| 705 | } | ||
| 706 | |||
| 707 | withXmlnss(node.first_attribute(), nss, [&](const Xmlns *nss) { | ||
| 708 | std::optional<std::string_view> ns_url = resolve(ns, nss); | ||
| 709 | if (!ns_url && !ns.empty()) return; | ||
| 710 | if (!ns_url) fn(std::string_view(), name, nss); | ||
| 711 | else fn(*ns_url, name, nss); | ||
| 712 | }); | ||
| 713 | } | ||
| 714 | |||
| 715 | template<typename T> | ||
| 716 | void ifTmi8Element(const rapidxml::xml_node<> &node, const Xmlns *nss, const T &fn) { | ||
| 717 | ifResolvable(node, nss, [&](std::string_view ns_url, std::string_view name, const Xmlns *nss) { | ||
| 718 | if (node.type() == rapidxml::node_element && (ns_url.empty() || ns_url == TMI8_XML_NS)) fn(name, nss); | ||
| 719 | }); | ||
| 720 | } | ||
| 721 | |||
| 722 | bool onlyTextElement(const rapidxml::xml_node<> &node) { | ||
| 723 | return node.type() == rapidxml::node_element | ||
| 724 | && node.first_node() | ||
| 725 | && node.first_node() == node.last_node() | ||
| 726 | && node.first_node()->type() == rapidxml::node_data; | ||
| 727 | } | ||
| 728 | |||
| 729 | std::string_view getValue(const rapidxml::xml_node<> &node) { | ||
| 730 | return std::string_view(node.value(), node.value_size()); | ||
| 731 | } | ||
| 732 | |||
| 733 | bool parseStringValue(std::string &into, size_t max_len, std::string_view val) { | ||
| 734 | if (val.size() > max_len) | ||
| 735 | return false; | ||
| 736 | into = val; | ||
| 737 | return true; | ||
| 738 | } | ||
| 739 | |||
| 740 | struct Kv6Parser { | ||
| 741 | std::stringstream &errs; | ||
| 742 | std::stringstream &warns; | ||
| 743 | |||
| 744 | void error(std::string_view msg) { | ||
| 745 | errs << msg << '\n'; | ||
| 746 | } | ||
| 747 | |||
| 748 | void warn(std::string_view msg) { | ||
| 749 | warns << msg << '\n'; | ||
| 750 | } | ||
| 751 | |||
| 752 | #define PERRASSERT(msg, ...) do { if (!(__VA_ARGS__)) { error(msg); return; } } while (false) | ||
| 753 | #define PWARNASSERT(msg, ...) do { if (!(__VA_ARGS__)) { warn(msg); return; } } while (false) | ||
| 754 | |||
| 755 | std::optional<Kv6Record> parseKv6PosInfoRecord(Kv6RecordType type, const rapidxml::xml_node<> &node, const Xmlns *nss) { | ||
| 756 | Kv6Record fields = { .type = type }; | ||
| 757 | for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { | ||
| 758 | ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { | ||
| 759 | for (const auto &[fname, field] : KV6_POS_INFO_RECORD_FIELDS) { | ||
| 760 | if (field == KV6F_NONE) | ||
| 761 | continue; | ||
| 762 | if (fname == name) { | ||
| 763 | PWARNASSERT("Expected KV6 record field element to only contain data", | ||
| 764 | onlyTextElement(*child)); | ||
| 765 | std::string_view childval = getValue(*child); | ||
| 766 | switch (field) { | ||
| 767 | case KV6F_DATA_OWNER_CODE: | ||
| 768 | PWARNASSERT("Invalid value for dataownercode", | ||
| 769 | parseStringValue(fields.data_owner_code, 10, childval)); | ||
| 770 | break; | ||
| 771 | case KV6F_LINE_PLANNING_NUMBER: | ||
| 772 | PWARNASSERT("Invalid value for lineplanningnumber", | ||
| 773 | parseStringValue(fields.line_planning_number, 10, childval)); | ||
| 774 | break; | ||
| 775 | case KV6F_OPERATING_DAY: | ||
| 776 | PWARNASSERT("Invalid value for operatatingday: not a valid date", | ||
| 777 | Date::parse(fields.operating_day, childval)); | ||
| 778 | break; | ||
| 779 | case KV6F_JOURNEY_NUMBER: | ||
| 780 | PWARNASSERT("Invalid value for journeynumber:" | ||
| 781 | " not a valid unsigned number with at most six digits", | ||
| 782 | parseUnsigned<6>(fields.journey_number, childval)); | ||
| 783 | break; | ||
| 784 | case KV6F_REINFORCEMENT_NUMBER: | ||
| 785 | PWARNASSERT("Invalid value for reinforcementnumber:" | ||
| 786 | " not a valid unsigned number with at most two digits", | ||
| 787 | parseUnsigned<2>(fields.reinforcement_number, childval)); | ||
| 788 | break; | ||
| 789 | case KV6F_TIMESTAMP: | ||
| 790 | PWARNASSERT("Invalid value for timestamp: not a valid timestamp", | ||
| 791 | Timestamp::parse(fields.timestamp, childval)); | ||
| 792 | break; | ||
| 793 | case KV6F_SOURCE: | ||
| 794 | PWARNASSERT("Invalid value for source:" | ||
| 795 | " not a valid string of at most 10 bytes", | ||
| 796 | parseStringValue(fields.source, 10, childval)); | ||
| 797 | break; | ||
| 798 | case KV6F_PUNCTUALITY: | ||
| 799 | PWARNASSERT("Invalid value for punctuality:" | ||
| 800 | " not a valid signed number with at most four digits", | ||
| 801 | parseSigned<4>(fields.punctuality, childval)); | ||
| 802 | break; | ||
| 803 | case KV6F_USER_STOP_CODE: | ||
| 804 | PWARNASSERT("Invalid value for userstopcode:" | ||
| 805 | " not a valid string of at most 10 bytes", | ||
| 806 | parseStringValue(fields.user_stop_code, 10, childval)); | ||
| 807 | break; | ||
| 808 | case KV6F_PASSAGE_SEQUENCE_NUMBER: | ||
| 809 | PWARNASSERT("Invalid value for passagesequencenumber:" | ||
| 810 | " not a valid unsigned number with at most four digits", | ||
| 811 | parseUnsigned<4>(fields.passage_sequence_number, childval)); | ||
| 812 | break; | ||
| 813 | case KV6F_VEHICLE_NUMBER: | ||
| 814 | PWARNASSERT("Invalid value for vehiclenumber:" | ||
| 815 | " not a valid unsigned number with at most six digits", | ||
| 816 | parseUnsigned<6>(fields.vehicle_number, childval)); | ||
| 817 | break; | ||
| 818 | case KV6F_BLOCK_CODE: | ||
| 819 | PWARNASSERT("Invalid value for blockcode:" | ||
| 820 | " not a valid unsigned number with at most eight digits", | ||
| 821 | parseUnsigned<8>(fields.block_code, childval)); | ||
| 822 | break; | ||
| 823 | case KV6F_WHEELCHAIR_ACCESSIBLE: | ||
| 824 | PWARNASSERT("Invalid value for wheelchairaccessible:" | ||
| 825 | " not a valid value for wheelchair accessibility", | ||
| 826 | childval == "ACCESSIBLE" | ||
| 827 | || childval == "NOTACCESSIBLE" | ||
| 828 | || childval == "UNKNOWN"); | ||
| 829 | fields.wheelchair_accessible = childval; | ||
| 830 | break; | ||
| 831 | case KV6F_NUMBER_OF_COACHES: | ||
| 832 | PWARNASSERT("Invalid for numberofcoaches:" | ||
| 833 | " not a valid unsigned number with at most two digits", | ||
| 834 | parseUnsigned<2>(fields.number_of_coaches, childval)); | ||
| 835 | break; | ||
| 836 | case KV6F_RD_X: | ||
| 837 | PWARNASSERT("Invalid value for rd-x:" | ||
| 838 | " not a valid signed number with at most six digits", | ||
| 839 | parseSigned<6>(fields.rd_x, childval)); | ||
| 840 | break; | ||
| 841 | case KV6F_RD_Y: | ||
| 842 | PWARNASSERT("Invalid value for rd-y:" | ||
| 843 | " not a valid signed number with at most six digits", | ||
| 844 | parseSigned<6>(fields.rd_y, childval)); | ||
| 845 | break; | ||
| 846 | case KV6F_DISTANCE_SINCE_LAST_USER_STOP: | ||
| 847 | PWARNASSERT("Invalid value for distancesincelastuserstop:" | ||
| 848 | " not a valid unsigned number with at most five digits", | ||
| 849 | parseUnsigned<5>(fields.distance_since_last_user_stop, childval)); | ||
| 850 | break; | ||
| 851 | case KV6F_NONE: | ||
| 852 | error("NONE field type case should be unreachable in parseKv6PosInfoRecord"); | ||
| 853 | return; | ||
| 854 | } | ||
| 855 | fields.markPresent(field); | ||
| 856 | break; | ||
| 857 | } | ||
| 858 | } | ||
| 859 | }); | ||
| 860 | } | ||
| 861 | |||
| 862 | fields.removeUnsupportedFields(); | ||
| 863 | |||
| 864 | if (!fields.valid()) | ||
| 865 | return std::nullopt; | ||
| 866 | return fields; | ||
| 867 | } | ||
| 868 | |||
| 869 | std::vector<Kv6Record> parseKv6PosInfo(const rapidxml::xml_node<> &node, const Xmlns *nss) { | ||
| 870 | std::vector<Kv6Record> records; | ||
| 871 | for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { | ||
| 872 | ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { | ||
| 873 | for (auto type = _KV6T_FIRST_TYPE; | ||
| 874 | type != _KV6T_LAST_TYPE; | ||
| 875 | type = static_cast<Kv6RecordType>(type + 1)) { | ||
| 876 | if (type == KV6T_UNKNOWN) | ||
| 877 | continue; | ||
| 878 | if (KV6_POS_INFO_RECORD_TYPES[type] == name) { | ||
| 879 | auto record = parseKv6PosInfoRecord(type, *child, nss); | ||
| 880 | if (record) { | ||
| 881 | records.push_back(*record); | ||
| 882 | } | ||
| 883 | } | ||
| 884 | } | ||
| 885 | }); | ||
| 886 | } | ||
| 887 | return records; | ||
| 888 | } | ||
| 889 | |||
| 890 | std::optional<Tmi8VvTmPushInfo> parseVvTmPush(const rapidxml::xml_node<> &node, const Xmlns *nss) { | ||
| 891 | Tmi8VvTmPushInfo info; | ||
| 892 | for (const rapidxml::xml_node<> *child = node.first_node(); child; child = child->next_sibling()) { | ||
| 893 | ifTmi8Element(*child, nss, [&](std::string_view name, const Xmlns *nss) { | ||
| 894 | if (name == "Timestamp") { | ||
| 895 | PERRASSERT("Invalid value for Timestamp: Bad format", onlyTextElement(*child)); | ||
| 896 | PERRASSERT("Invalid value for Timestamp: Invalid timestamp", Timestamp::parse(info.timestamp, getValue(*child))); | ||
| 897 | info.markPresent(TMI8F_TIMESTAMP); | ||
| 898 | } else if (name == "SubscriberID") { | ||
| 899 | PERRASSERT("Invalid value for SubscriberID: Bad format", onlyTextElement(*child)); | ||
| 900 | info.subscriber_id = getValue(*child); | ||
| 901 | info.markPresent(TMI8F_SUBSCRIBER_ID); | ||
| 902 | } else if (name == "Version") { | ||
| 903 | PERRASSERT("Invalid value for Version: Bad format", onlyTextElement(*child)); | ||
| 904 | info.version = getValue(*child); | ||
| 905 | info.markPresent(TMI8F_VERSION); | ||
| 906 | } else if (name == "DossierName") { | ||
| 907 | PERRASSERT("Invalid value for DossierName: Bad format", onlyTextElement(*child)); | ||
| 908 | info.dossier_name = getValue(*child); | ||
| 909 | info.markPresent(TMI8F_DOSSIER_NAME); | ||
| 910 | } else if (name == "KV6posinfo") { | ||
| 911 | info.messages = parseKv6PosInfo(*child, nss); | ||
| 912 | } | ||
| 913 | }); | ||
| 914 | } | ||
| 915 | |||
| 916 | if (!info.valid()) | ||
| 917 | return std::nullopt; | ||
| 918 | return info; | ||
| 919 | } | ||
| 920 | |||
| 921 | std::optional<Tmi8VvTmPushInfo> parse(const rapidxml::xml_document<> &doc) { | ||
| 922 | std::optional<Tmi8VvTmPushInfo> msg; | ||
| 923 | for (const rapidxml::xml_node<> *node = doc.first_node(); node; node = node->next_sibling()) { | ||
| 924 | ifTmi8Element(*node, nullptr /* nss */, [&](std::string_view name, const Xmlns *nss) { | ||
| 925 | if (name == "VV_TM_PUSH") { | ||
| 926 | if (msg) { | ||
| 927 | error("Duplicated VV_TM_PUSH"); | ||
| 928 | return; | ||
| 929 | } | ||
| 930 | msg = parseVvTmPush(*node, nss); | ||
| 931 | if (!msg) { | ||
| 932 | error("Invalid VV_TM_PUSH"); | ||
| 933 | } | ||
| 934 | } | ||
| 935 | }); | ||
| 936 | } | ||
| 937 | if (!msg) | ||
| 938 | error("Expected to find VV_TM_PUSH"); | ||
| 939 | return msg; | ||
| 940 | } | ||
| 941 | }; | ||
| 942 | |||
| 943 | std::optional<Tmi8VvTmPushInfo> parseXml(const rapidxml::xml_document<> &doc, std::stringstream &errs, std::stringstream &warns) { | ||
| 944 | Kv6Parser parser = { errs, warns }; | ||
| 945 | return parser.parse(doc); | ||
| 946 | } | ||
| 947 | |||
| 948 | struct Metrics { | ||
| 949 | prometheus::Counter &messages_counter_ok; | ||
| 950 | prometheus::Counter &messages_counter_error; | ||
| 951 | prometheus::Counter &messages_counter_warning; | ||
| 952 | prometheus::Counter &rows_written_counter; | ||
| 953 | prometheus::Histogram &records_hist; | ||
| 954 | prometheus::Histogram &message_parse_hist; | ||
| 955 | prometheus::Histogram &payload_size_hist; | ||
| 956 | |||
| 957 | using BucketBoundaries = prometheus::Histogram::BucketBoundaries; | ||
| 958 | |||
| 959 | enum class ParseStatus { | ||
| 960 | OK, | ||
| 961 | WARNING, | ||
| 962 | ERROR, | ||
| 963 | }; | ||
| 964 | |||
| 965 | Metrics(std::shared_ptr<prometheus::Registry> registry) : | ||
| 966 | Metrics(registry, prometheus::BuildCounter() | ||
| 967 | .Name("kv6_vv_tm_push_messages_total") | ||
| 968 | .Help("Number of KV6 VV_TM_PUSH messages received") | ||
| 969 | .Register(*registry)) | ||
| 970 | {} | ||
| 971 | |||
| 972 | void addMeasurement(std::chrono::duration<double> took_secs, size_t payload_size, size_t records, ParseStatus parsed) { | ||
| 973 | double millis = took_secs.count() * 1000.0; | ||
| 974 | |||
| 975 | if (parsed == ParseStatus::OK) messages_counter_ok.Increment(); | ||
| 976 | else if (parsed == ParseStatus::WARNING) messages_counter_warning.Increment(); | ||
| 977 | else if (parsed == ParseStatus::ERROR) messages_counter_error.Increment(); | ||
| 978 | records_hist.Observe(static_cast<double>(records)); | ||
| 979 | message_parse_hist.Observe(millis); | ||
| 980 | payload_size_hist.Observe(static_cast<double>(payload_size)); | ||
| 981 | } | ||
| 982 | |||
| 983 | void rowsWritten(int64_t rows) { | ||
| 984 | rows_written_counter.Increment(static_cast<double>(rows)); | ||
| 985 | } | ||
| 986 | |||
| 987 | private: | ||
| 988 | Metrics(std::shared_ptr<prometheus::Registry> registry, | ||
| 989 | prometheus::Family<prometheus::Counter> &messages_counter) : | ||
| 990 | messages_counter_ok(messages_counter | ||
| 991 | .Add({{ "status", "ok" }})), | ||
| 992 | messages_counter_error(messages_counter | ||
| 993 | .Add({{ "status", "error" }})), | ||
| 994 | messages_counter_warning(messages_counter | ||
| 995 | .Add({{ "status", "warning" }})), | ||
| 996 | rows_written_counter(prometheus::BuildCounter() | ||
| 997 | .Name("kv6_vv_tm_push_records_written") | ||
| 998 | .Help("Numer of VV_TM_PUSH records written to disk") | ||
| 999 | .Register(*registry) | ||
| 1000 | .Add({})), | ||
| 1001 | records_hist(prometheus::BuildHistogram() | ||
| 1002 | .Name("kv6_vv_tm_push_records_amount") | ||
| 1003 | .Help("Number of KV6 VV_TM_PUSH records") | ||
| 1004 | .Register(*registry) | ||
| 1005 | .Add({}, BucketBoundaries{ 5.0, 10.0, 20.0, 50.0, 100.0, 250.0, 500.0 })), | ||
| 1006 | message_parse_hist(prometheus::BuildHistogram() | ||
| 1007 | .Name("kv6_vv_tm_push_message_parse_millis") | ||
| 1008 | .Help("Milliseconds taken to parse KV6 VV_TM_PUSH messages") | ||
| 1009 | .Register(*registry) | ||
| 1010 | .Add({}, BucketBoundaries{ 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 100.0, 1000.0, 2000.0 })), | ||
| 1011 | payload_size_hist(prometheus::BuildHistogram() | ||
| 1012 | .Name("kv6_payload_size") | ||
| 1013 | .Help("Sizes of KV6 ZeroMQ message payloads") | ||
| 1014 | .Register(*registry) | ||
| 1015 | .Add({}, BucketBoundaries{ 500.0, 1000.0, 2500.0, 5000.0, 10000.0, 25000.0, 50000.0 })) | ||
| 1016 | {} | ||
| 1017 | }; | ||
| 1018 | |||
| 1019 | // Note: it *must* hold that decompressed[size] == 0 | ||
| 1020 | std::optional<Tmi8VvTmPushInfo> parseMsg(char *decompressed, size_t size, Metrics &metrics, std::stringstream &errs, std::stringstream &warns) { | ||
| 1021 | auto start = std::chrono::steady_clock::now(); | ||
| 1022 | |||
| 1023 | std::optional<Tmi8VvTmPushInfo> info; | ||
| 1024 | |||
| 1025 | if (decompressed[size] != 0) { | ||
| 1026 | errs << "Not parsing: missing null terminator" << '\n'; | ||
| 1027 | } else { | ||
| 1028 | rapidxml::xml_document<> doc; | ||
| 1029 | constexpr int PARSE_FLAGS = rapidxml::parse_trim_whitespace | ||
| 1030 | | rapidxml::parse_no_string_terminators | ||
| 1031 | | rapidxml::parse_validate_closing_tags; | ||
| 1032 | |||
| 1033 | try { | ||
| 1034 | doc.parse<PARSE_FLAGS>(decompressed); | ||
| 1035 | info = parseXml(doc, errs, warns); | ||
| 1036 | } catch (const rapidxml::parse_error &err) { | ||
| 1037 | errs << "XML parsing failed" << '\n'; | ||
| 1038 | } | ||
| 1039 | } | ||
| 1040 | |||
| 1041 | auto end = std::chrono::steady_clock::now(); | ||
| 1042 | std::chrono::duration<double> took = end - start; | ||
| 1043 | |||
| 1044 | if (info) | ||
| 1045 | if (warns.view().empty()) | ||
| 1046 | metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::OK); | ||
| 1047 | else | ||
| 1048 | metrics.addMeasurement(took, size, info->messages.size(), Metrics::ParseStatus::WARNING); | ||
| 1049 | else | ||
| 1050 | metrics.addMeasurement(took, size, 0, Metrics::ParseStatus::ERROR); | ||
| 1051 | |||
| 1052 | return info; | ||
| 1053 | } | ||
| 1054 | |||
| 1055 | bool terminate = false; | ||
| 1056 | |||
| 1057 | void onSigIntOrTerm(int /* signum */) { | ||
| 1058 | terminate = true; | ||
| 1059 | } | ||
| 1060 | |||
| 1061 | arrow::Result<std::shared_ptr<arrow::Table>> getTable(const std::vector<Kv6Record> &messages, size_t &rows_written) { | ||
| 1062 | ParquetBuilder builder; | ||
| 1063 | |||
| 1064 | for (const auto &msg : messages) { | ||
| 1065 | Kv6Field present = msg.presence; | ||
| 1066 | Kv6Field required = KV6T_REQUIRED_FIELDS[msg.type]; | ||
| 1067 | Kv6Field optional = KV6T_OPTIONAL_FIELDS[msg.type]; | ||
| 1068 | if ((~msg.presence & required) != 0) { | ||
| 1069 | std::cout << "Invalid message: not all required fields present; skipping" << std::endl; | ||
| 1070 | continue; | ||
| 1071 | } | ||
| 1072 | Kv6Field used = static_cast<Kv6Field>(present & (required | optional)); | ||
| 1073 | rows_written++; | ||
| 1074 | |||
| 1075 | // RD-X and RD-Y fix: some datatypes have these fields marked as required, but still give option | ||
| 1076 | // of not providing these fields by setting them to -1. We want this normalized, where these | ||
| 1077 | // fields are instead simply marked as not present. | ||
| 1078 | if ((used & KV6F_RD_X) && msg.rd_x == -1) | ||
| 1079 | used = static_cast<Kv6Field>(used & ~KV6F_RD_X); | ||
| 1080 | if ((used & KV6F_RD_Y) && msg.rd_y == -1) | ||
| 1081 | used = static_cast<Kv6Field>(used & ~KV6F_RD_Y); | ||
| 1082 | |||
| 1083 | ARROW_RETURN_NOT_OK(builder.types.Append(*findKv6PosInfoRecordTypeName(msg.type))); | ||
| 1084 | ARROW_RETURN_NOT_OK(used & KV6F_DATA_OWNER_CODE | ||
| 1085 | ? builder.data_owner_codes.Append(msg.data_owner_code) | ||
| 1086 | : builder.data_owner_codes.AppendNull()); | ||
| 1087 | ARROW_RETURN_NOT_OK(used & KV6F_LINE_PLANNING_NUMBER | ||
| 1088 | ? builder.line_planning_numbers.Append(msg.line_planning_number) | ||
| 1089 | : builder.line_planning_numbers.AppendNull()); | ||
| 1090 | ARROW_RETURN_NOT_OK(used & KV6F_OPERATING_DAY | ||
| 1091 | ? builder.operating_days.Append(static_cast<int32_t>(msg.operating_day.toUnixDays().count())) | ||
| 1092 | : builder.operating_days.AppendNull()); | ||
| 1093 | ARROW_RETURN_NOT_OK(used & KV6F_JOURNEY_NUMBER | ||
| 1094 | ? builder.journey_numbers.Append(msg.journey_number) | ||
| 1095 | : builder.journey_numbers.AppendNull()); | ||
| 1096 | ARROW_RETURN_NOT_OK(used & KV6F_REINFORCEMENT_NUMBER | ||
| 1097 | ? builder.reinforcement_numbers.Append(msg.reinforcement_number) | ||
| 1098 | : builder.reinforcement_numbers.AppendNull()); | ||
| 1099 | ARROW_RETURN_NOT_OK(used & KV6F_TIMESTAMP | ||
| 1100 | ? builder.timestamps.Append(msg.timestamp.toUnixSeconds().count()) | ||
| 1101 | : builder.timestamps.AppendNull()); | ||
| 1102 | ARROW_RETURN_NOT_OK(used & KV6F_SOURCE | ||
| 1103 | ? builder.sources.Append(msg.source) | ||
| 1104 | : builder.sources.AppendNull()); | ||
| 1105 | ARROW_RETURN_NOT_OK(used & KV6F_PUNCTUALITY | ||
| 1106 | ? builder.punctualities.Append(msg.punctuality) | ||
| 1107 | : builder.punctualities.AppendNull()); | ||
| 1108 | ARROW_RETURN_NOT_OK(used & KV6F_USER_STOP_CODE | ||
| 1109 | ? builder.user_stop_codes.Append(msg.user_stop_code) | ||
| 1110 | : builder.user_stop_codes.AppendNull()); | ||
| 1111 | ARROW_RETURN_NOT_OK(used & KV6F_PASSAGE_SEQUENCE_NUMBER | ||
| 1112 | ? builder.passage_sequence_numbers.Append(msg.passage_sequence_number) | ||
| 1113 | : builder.passage_sequence_numbers.AppendNull()); | ||
| 1114 | ARROW_RETURN_NOT_OK(used & KV6F_VEHICLE_NUMBER | ||
| 1115 | ? builder.vehicle_numbers.Append(msg.vehicle_number) | ||
| 1116 | : builder.vehicle_numbers.AppendNull()); | ||
| 1117 | ARROW_RETURN_NOT_OK(used & KV6F_BLOCK_CODE | ||
| 1118 | ? builder.block_codes.Append(msg.block_code) | ||
| 1119 | : builder.block_codes.AppendNull()); | ||
| 1120 | ARROW_RETURN_NOT_OK(used & KV6F_WHEELCHAIR_ACCESSIBLE | ||
| 1121 | ? builder.wheelchair_accessibles.Append(msg.wheelchair_accessible) | ||
| 1122 | : builder.wheelchair_accessibles.AppendNull()); | ||
| 1123 | ARROW_RETURN_NOT_OK(used & KV6F_NUMBER_OF_COACHES | ||
| 1124 | ? builder.number_of_coaches.Append(msg.number_of_coaches) | ||
| 1125 | : builder.number_of_coaches.AppendNull()); | ||
| 1126 | ARROW_RETURN_NOT_OK(used & KV6F_RD_Y | ||
| 1127 | ? builder.rd_ys.Append(msg.rd_y) | ||
| 1128 | : builder.rd_ys.AppendNull()); | ||
| 1129 | ARROW_RETURN_NOT_OK(used & KV6F_RD_X | ||
| 1130 | ? builder.rd_xs.Append(msg.rd_x) | ||
| 1131 | : builder.rd_xs.AppendNull()); | ||
| 1132 | ARROW_RETURN_NOT_OK(used & KV6F_DISTANCE_SINCE_LAST_USER_STOP | ||
| 1133 | ? builder.distance_since_last_user_stops.Append(msg.distance_since_last_user_stop) | ||
| 1134 | : builder.distance_since_last_user_stops.AppendNull()); | ||
| 1135 | } | ||
| 1136 | |||
| 1137 | return builder.getTable(); | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | std::tuple<int64_t, int64_t> getMinMaxTimestamp(const std::vector<Kv6Record> &messages) { | ||
| 1141 | if (messages.size() == 0) | ||
| 1142 | return { 0, 0 }; | ||
| 1143 | int64_t min = std::numeric_limits<int64_t>::max(); | ||
| 1144 | int64_t max = 0; | ||
| 1145 | for (const auto &message : messages) { | ||
| 1146 | if (~message.presence & KV6F_TIMESTAMP) | ||
| 1147 | continue; | ||
| 1148 | int64_t seconds = message.timestamp.toUnixSeconds().count(); | ||
| 1149 | if (seconds < min) | ||
| 1150 | min = seconds; | ||
| 1151 | if (seconds > max) | ||
| 1152 | max = seconds; | ||
| 1153 | } | ||
| 1154 | if (min == std::numeric_limits<decltype(min)>::max()) | ||
| 1155 | return { 0, 0 }; // this is stupid | ||
| 1156 | return { min, max }; | ||
| 1157 | } | ||
| 1158 | |||
| 1159 | arrow::Status writeParquet(const std::vector<Kv6Record> &messages, Metrics &metrics) { | ||
| 1160 | size_t rows_written = 0; | ||
| 1161 | ARROW_ASSIGN_OR_RAISE(std::shared_ptr<arrow::Table> table, getTable(messages, rows_written)); | ||
| 1162 | |||
| 1163 | auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::utc_clock::now()); | ||
| 1164 | std::string filename = std::format("oeuf-{:%FT%T%Ez}.parquet", timestamp); | ||
| 1165 | ARROW_RETURN_NOT_OK(writeArrowTableAsParquetFile(*table, filename)); | ||
| 1166 | std::cout << "Wrote Parquet file " << filename << std::endl; | ||
| 1167 | |||
| 1168 | auto [min_timestamp, max_timestamp] = getMinMaxTimestamp(messages); | ||
| 1169 | std::ofstream metaf(filename + ".meta.json.part", std::ios::binary); | ||
| 1170 | nlohmann::json meta{ | ||
| 1171 | { "min_timestamp", min_timestamp }, | ||
| 1172 | { "max_timestamp", max_timestamp }, | ||
| 1173 | { "rows_written", rows_written }, | ||
| 1174 | }; | ||
| 1175 | metaf << meta; | ||
| 1176 | metaf.close(); | ||
| 1177 | std::filesystem::rename(filename + ".meta.json.part", filename + ".meta.json"); | ||
| 1178 | |||
| 1179 | metrics.rowsWritten(rows_written); | ||
| 1180 | |||
| 1181 | return arrow::Status::OK(); | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | using SteadyTime = std::chrono::steady_clock::time_point; | ||
| 1185 | |||
| 1186 | std::string dumpFailedMsg(std::string_view txt, std::string_view errs, std::string_view warns) { | ||
| 1187 | auto timestamp = std::chrono::round<std::chrono::seconds>(std::chrono::utc_clock::now()); | ||
| 1188 | std::string filename = std::format("oeuf-error-{:%FT%T%Ez}.txt", timestamp); | ||
| 1189 | std::ofstream dumpf(filename, std::ios::binary); | ||
| 1190 | dumpf << "======= ERROR MESSAGES ========" << std::endl; | ||
| 1191 | dumpf << errs; | ||
| 1192 | dumpf << "======= WARNING MESSAGES ======" << std::endl; | ||
| 1193 | dumpf << warns; | ||
| 1194 | dumpf << "======= RECEIVED MESSAGE ======" << std::endl; | ||
| 1195 | dumpf << txt << std::endl; | ||
| 1196 | dumpf.close(); | ||
| 1197 | return filename; | ||
| 1198 | } | ||
| 1199 | |||
| 1200 | void handleMsg(RawMessage &msg, Metrics &metrics, SteadyTime &last_output, std::vector<Kv6Record> &msg_buf) { | ||
| 1201 | unsigned int decompressed_size = 0; | ||
| 1202 | if (msg.getBodySize() > std::numeric_limits<unsigned int>::max()) | ||
| 1203 | std::cout << "parseMsg failed due to too large message" << std::endl; | ||
| 1204 | char *decompressed = decompress(msg.getBody(), static_cast<unsigned int>(msg.getBodySize()), decompressed_size); | ||
| 1205 | |||
| 1206 | std::stringstream errs; | ||
| 1207 | std::stringstream warns; | ||
| 1208 | // We know that decompressed[decompressed_size] == 0 because decompress() ensures this. | ||
| 1209 | auto parsed_msg = parseMsg(decompressed, decompressed_size, metrics, errs, warns); | ||
| 1210 | if (parsed_msg) { | ||
| 1211 | const Tmi8VvTmPushInfo &info = *parsed_msg; | ||
| 1212 | auto new_msgs_it = info.messages.begin(); | ||
| 1213 | while (new_msgs_it != info.messages.end()) { | ||
| 1214 | size_t remaining_space = MAX_PARQUET_CHUNK - msg_buf.size(); | ||
| 1215 | size_t new_msgs_left = info.messages.end() - new_msgs_it; | ||
| 1216 | auto new_msgs_start = new_msgs_it; | ||
| 1217 | auto new_msgs_end = new_msgs_start + std::min(remaining_space, new_msgs_left); | ||
| 1218 | new_msgs_it = new_msgs_end; | ||
| 1219 | msg_buf.insert(msg_buf.end(), new_msgs_start, new_msgs_end); | ||
| 1220 | |||
| 1221 | bool time_expired = std::chrono::steady_clock::now() - last_output > std::chrono::minutes(5); | ||
| 1222 | if (msg_buf.size() >= MAX_PARQUET_CHUNK || (new_msgs_it == info.messages.end() && time_expired)) { | ||
| 1223 | arrow::Status status = writeParquet(msg_buf, metrics); | ||
| 1224 | if (!status.ok()) | ||
| 1225 | std::cout << "Writing Parquet file failed: " << status << std::endl; | ||
| 1226 | msg_buf.clear(); | ||
| 1227 | last_output = std::chrono::steady_clock::now(); | ||
| 1228 | } | ||
| 1229 | } | ||
| 1230 | if (!errs.view().empty() || !warns.view().empty()) { | ||
| 1231 | std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str()); | ||
| 1232 | std::cout << "parseMsg finished with warnings: details dumped to " << dump_file << std::endl; | ||
| 1233 | } | ||
| 1234 | } else { | ||
| 1235 | std::filesystem::path dump_file = dumpFailedMsg(std::string_view(decompressed, decompressed_size), errs.str(), warns.str()); | ||
| 1236 | std::cout << "parseMsg failed: error details dumped to " << dump_file << std::endl; | ||
| 1237 | } | ||
| 1238 | free(decompressed); | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | int main(int argc, char *argv[]) { | ||
| 1242 | std::cout << "Working directory: " << std::filesystem::current_path() << std::endl; | ||
| 1243 | |||
| 1244 | const char *metrics_addr = getenv("METRICS_ADDR"); | ||
| 1245 | if (!metrics_addr || strlen(metrics_addr) == 0) { | ||
| 1246 | std::cout << "Error: no METRICS_ADDR set!" << std::endl; | ||
| 1247 | exit(EXIT_FAILURE); | ||
| 1248 | } | ||
| 1249 | prometheus::Exposer exposer{metrics_addr}; | ||
| 1250 | |||
| 1251 | bool prod = false; | ||
| 1252 | const char *prod_env = getenv("NDOV_PRODUCTION"); | ||
| 1253 | if (prod_env && strcmp(prod_env, "true") == 0) prod = true; | ||
| 1254 | |||
| 1255 | void *zmq_context = zmq_ctx_new(); | ||
| 1256 | void *zmq_subscriber = zmq_socket(zmq_context, ZMQ_SUB); | ||
| 1257 | int rc = zmq_connect(zmq_subscriber, prod ? "tcp://pubsub.ndovloket.nl:7658" : "tcp://pubsub.besteffort.ndovloket.nl:7658"); | ||
| 1258 | assert(rc == 0); | ||
| 1259 | |||
| 1260 | const char *topic = "/CXX/KV6posinfo"; | ||
| 1261 | rc = zmq_setsockopt(zmq_subscriber, ZMQ_SUBSCRIBE, topic, strlen(topic)); | ||
| 1262 | assert(rc == 0); | ||
| 1263 | |||
| 1264 | signal(SIGINT, onSigIntOrTerm); | ||
| 1265 | signal(SIGTERM, onSigIntOrTerm); | ||
| 1266 | |||
| 1267 | SteadyTime last_output = std::chrono::steady_clock::now(); | ||
| 1268 | |||
| 1269 | auto registry = std::make_shared<prometheus::Registry>(); | ||
| 1270 | Metrics metrics(registry); | ||
| 1271 | exposer.RegisterCollectable(registry); | ||
| 1272 | |||
| 1273 | std::vector<Kv6Record> msg_buf; | ||
| 1274 | while (!terminate) { | ||
| 1275 | std::optional<RawMessage> msg = recvMsg(zmq_subscriber); | ||
| 1276 | if (!msg) { | ||
| 1277 | if (!terminate) | ||
| 1278 | perror("recvMsg"); | ||
| 1279 | continue; | ||
| 1280 | } | ||
| 1281 | handleMsg(*msg, metrics, last_output, msg_buf); | ||
| 1282 | } | ||
| 1283 | |||
| 1284 | std::cout << "Terminating" << std::endl; | ||
| 1285 | if (msg_buf.size() > 0) { | ||
| 1286 | arrow::Status status = writeParquet(msg_buf, metrics); | ||
| 1287 | if (!status.ok()) std::cout << "Writing final Parquet file failed: " << status << std::endl; | ||
| 1288 | else std::cout << "Final data written" << std::endl; | ||
| 1289 | msg_buf.clear(); | ||
| 1290 | } | ||
| 1291 | |||
| 1292 | if (zmq_close(zmq_subscriber)) | ||
| 1293 | perror("zmq_close"); | ||
| 1294 | if (zmq_ctx_destroy(zmq_context)) | ||
| 1295 | perror("zmq_ctx_destroy"); | ||
| 1296 | |||
| 1297 | std::cout << "Bye" << std::endl; | ||
| 1298 | |||
| 1299 | return 0; | ||
| 1300 | } | ||