From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- lib/libtmi8/src/kv1_index.cpp | 461 ++++++++++++++ lib/libtmi8/src/kv1_lexer.cpp | 152 +++++ lib/libtmi8/src/kv1_parser.cpp | 1258 +++++++++++++++++++++++++++++++++++++++ lib/libtmi8/src/kv1_types.cpp | 773 ++++++++++++++++++++++++ lib/libtmi8/src/kv6_parquet.cpp | 102 ++++ 5 files changed, 2746 insertions(+) create mode 100644 lib/libtmi8/src/kv1_index.cpp create mode 100644 lib/libtmi8/src/kv1_lexer.cpp create mode 100644 lib/libtmi8/src/kv1_parser.cpp create mode 100644 lib/libtmi8/src/kv1_types.cpp create mode 100644 lib/libtmi8/src/kv6_parquet.cpp (limited to 'lib/libtmi8/src') diff --git a/lib/libtmi8/src/kv1_index.cpp b/lib/libtmi8/src/kv1_index.cpp new file mode 100644 index 0000000..23e9596 --- /dev/null +++ b/lib/libtmi8/src/kv1_index.cpp @@ -0,0 +1,461 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +Kv1Index::Kv1Index(Kv1Records *records) : records(records) { + organizational_units.reserve(records->organizational_units.size()); + for (size_t i = 0; i < records->organizational_units.size(); i++) { + auto *it = &records->organizational_units[i]; + organizational_units[it->key] = it; + } + higher_organizational_units.reserve(records->higher_organizational_units.size()); + for (size_t i = 0; i < records->higher_organizational_units.size(); i++) { + auto *it = &records->higher_organizational_units[i]; + higher_organizational_units[it->key] = it; + } + user_stop_points.reserve(records->user_stop_points.size()); + for (size_t i = 0; i < records->user_stop_points.size(); i++) { + auto *it = &records->user_stop_points[i]; + user_stop_points[it->key] = it; + } + user_stop_areas.reserve(records->user_stop_areas.size()); + for (size_t i = 0; i < records->user_stop_areas.size(); i++) { + auto *it = &records->user_stop_areas[i]; + user_stop_areas[it->key] = it; + } + timing_links.reserve(records->timing_links.size()); + for (size_t i = 0; i < records->timing_links.size(); i++) { + auto *it = &records->timing_links[i]; + timing_links[it->key] = it; + } + links.reserve(records->links.size()); + for (size_t i = 0; i < records->links.size(); i++) { + auto *it = &records->links[i]; + links[it->key] = it; + } + lines.reserve(records->lines.size()); + for (size_t i = 0; i < records->lines.size(); i++) { + auto *it = &records->lines[i]; + lines[it->key] = it; + } + destinations.reserve(records->destinations.size()); + for (size_t i = 0; i < records->destinations.size(); i++) { + auto *it = &records->destinations[i]; + destinations[it->key] = it; + } + journey_patterns.reserve(records->journey_patterns.size()); + for (size_t i = 0; i < records->journey_patterns.size(); i++) { + auto *it = &records->journey_patterns[i]; + journey_patterns[it->key] = it; + } + concession_financer_relations.reserve(records->concession_financer_relations.size()); + for (size_t i = 0; i < records->concession_financer_relations.size(); i++) { + auto *it = &records->concession_financer_relations[i]; + concession_financer_relations[it->key] = it; + } + concession_areas.reserve(records->concession_areas.size()); + for (size_t i = 0; i < records->concession_areas.size(); i++) { + auto *it = &records->concession_areas[i]; + concession_areas[it->key] = it; + } + financers.reserve(records->financers.size()); + for (size_t i = 0; i < records->financers.size(); i++) { + auto *it = &records->financers[i]; + financers[it->key] = it; + } + journey_pattern_timing_links.reserve(records->journey_pattern_timing_links.size()); + for (size_t i = 0; i < records->journey_pattern_timing_links.size(); i++) { + auto *it = &records->journey_pattern_timing_links[i]; + journey_pattern_timing_links[it->key] = it; + } + points.reserve(records->points.size()); + for (size_t i = 0; i < records->points.size(); i++) { + auto *it = &records->points[i]; + points[it->key] = it; + } + point_on_links.reserve(records->point_on_links.size()); + for (size_t i = 0; i < records->point_on_links.size(); i++) { + auto *it = &records->point_on_links[i]; + point_on_links[it->key] = it; + } + icons.reserve(records->icons.size()); + for (size_t i = 0; i < records->icons.size(); i++) { + auto *it = &records->icons[i]; + icons[it->key] = it; + } + notices.reserve(records->notices.size()); + for (size_t i = 0; i < records->notices.size(); i++) { + auto *it = &records->notices[i]; + notices[it->key] = it; + } + time_demand_groups.reserve(records->time_demand_groups.size()); + for (size_t i = 0; i < records->time_demand_groups.size(); i++) { + auto *it = &records->time_demand_groups[i]; + time_demand_groups[it->key] = it; + } + time_demand_group_run_times.reserve(records->time_demand_group_run_times.size()); + for (size_t i = 0; i < records->time_demand_group_run_times.size(); i++) { + auto *it = &records->time_demand_group_run_times[i]; + time_demand_group_run_times[it->key] = it; + } + period_groups.reserve(records->period_groups.size()); + for (size_t i = 0; i < records->period_groups.size(); i++) { + auto *it = &records->period_groups[i]; + period_groups[it->key] = it; + } + specific_days.reserve(records->specific_days.size()); + for (size_t i = 0; i < records->specific_days.size(); i++) { + auto *it = &records->specific_days[i]; + specific_days[it->key] = it; + } + timetable_versions.reserve(records->timetable_versions.size()); + for (size_t i = 0; i < records->timetable_versions.size(); i++) { + auto *it = &records->timetable_versions[i]; + timetable_versions[it->key] = it; + } + public_journeys.reserve(records->public_journeys.size()); + for (size_t i = 0; i < records->public_journeys.size(); i++) { + auto *it = &records->public_journeys[i]; + public_journeys[it->key] = it; + } + period_group_validities.reserve(records->period_group_validities.size()); + for (size_t i = 0; i < records->period_group_validities.size(); i++) { + auto *it = &records->period_group_validities[i]; + period_group_validities[it->key] = it; + } + exceptional_operating_days.reserve(records->exceptional_operating_days.size()); + for (size_t i = 0; i < records->exceptional_operating_days.size(); i++) { + auto *it = &records->exceptional_operating_days[i]; + exceptional_operating_days[it->key] = it; + } + schedule_versions.reserve(records->schedule_versions.size()); + for (size_t i = 0; i < records->schedule_versions.size(); i++) { + auto *it = &records->schedule_versions[i]; + schedule_versions[it->key] = it; + } + public_journey_passing_times.reserve(records->public_journey_passing_times.size()); + for (size_t i = 0; i < records->public_journey_passing_times.size(); i++) { + auto *it = &records->public_journey_passing_times[i]; + public_journey_passing_times[it->key] = it; + } + operating_days.reserve(records->operating_days.size()); + for (size_t i = 0; i < records->operating_days.size(); i++) { + auto *it = &records->operating_days[i]; + operating_days[it->key] = it; + } +} + +size_t Kv1Index::size() const { + return organizational_units.size() + + higher_organizational_units.size() + + user_stop_points.size() + + user_stop_areas.size() + + timing_links.size() + + links.size() + + lines.size() + + destinations.size() + + journey_patterns.size() + + concession_financer_relations.size() + + concession_areas.size() + + financers.size() + + journey_pattern_timing_links.size() + + points.size() + + point_on_links.size() + + icons.size() + + notices.size() + + time_demand_groups.size() + + time_demand_group_run_times.size() + + period_groups.size() + + specific_days.size() + + timetable_versions.size() + + public_journeys.size() + + period_group_validities.size() + + exceptional_operating_days.size() + + schedule_versions.size() + + public_journey_passing_times.size() + + operating_days.size(); +} + +void kv1LinkRecords(Kv1Index &index) { + for (auto &orunorun : index.records->higher_organizational_units) { + Kv1OrganizationalUnit::Key orun_parent_key( + orunorun.key.data_owner_code, + orunorun.key.organizational_unit_code_parent); + Kv1OrganizationalUnit::Key orun_child_key( + orunorun.key.data_owner_code, + orunorun.key.organizational_unit_code_child); + orunorun.p_organizational_unit_parent = index.organizational_units[orun_parent_key]; + orunorun.p_organizational_unit_child = index.organizational_units[orun_child_key]; + } + for (auto &usrstop : index.records->user_stop_points) { + Kv1Point::Key point_key( + usrstop.key.data_owner_code, + usrstop.key.user_stop_code); + usrstop.p_point = index.points[point_key]; + if (!usrstop.user_stop_area_code.empty()) { + Kv1UserStopArea::Key usrstar_key( + usrstop.key.data_owner_code, + usrstop.user_stop_area_code); + usrstop.p_user_stop_area = index.user_stop_areas[usrstar_key]; + } + } + for (auto &tili : index.records->timing_links) { + Kv1UserStopPoint::Key usrstop_begin_key( + tili.key.data_owner_code, + tili.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + tili.key.data_owner_code, + tili.key.user_stop_code_end); + tili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + tili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + } + for (auto &link : index.records->links) { + Kv1UserStopPoint::Key usrstop_begin_key( + link.key.data_owner_code, + link.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + link.key.data_owner_code, + link.key.user_stop_code_end); + link.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + link.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + } + for (auto &line : index.records->lines) { + if (!line.line_icon) + continue; + Kv1Icon::Key icon_key( + line.key.data_owner_code, + *line.line_icon); + line.p_line_icon = index.icons[icon_key]; + } + for (auto &jopa : index.records->journey_patterns) { + Kv1Line::Key line_key( + jopa.key.data_owner_code, + jopa.key.line_planning_number); + jopa.p_line = index.lines[line_key]; + } + for (auto &confinrel : index.records->concession_financer_relations) { + Kv1ConcessionArea::Key conarea_key( + confinrel.key.data_owner_code, + confinrel.concession_area_code); + confinrel.p_concession_area = index.concession_areas[conarea_key]; + if (!confinrel.financer_code.empty()) { + Kv1Financer::Key financer_key( + confinrel.key.data_owner_code, + confinrel.financer_code); + confinrel.p_financer = index.financers[financer_key]; + } + } + for (auto &jopatili : index.records->journey_pattern_timing_links) { + Kv1Line::Key line_key( + jopatili.key.data_owner_code, + jopatili.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + jopatili.key.data_owner_code, + jopatili.key.line_planning_number, + jopatili.key.journey_pattern_code); + Kv1UserStopPoint::Key usrstop_begin_key( + jopatili.key.data_owner_code, + jopatili.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + jopatili.key.data_owner_code, + jopatili.user_stop_code_end); + Kv1ConcessionFinancerRelation::Key confinrel_key( + jopatili.key.data_owner_code, + jopatili.con_fin_rel_code); + Kv1Destination::Key dest_key( + jopatili.key.data_owner_code, + jopatili.dest_code); + jopatili.p_line = index.lines[line_key]; + jopatili.p_journey_pattern = index.journey_patterns[jopa_key]; + jopatili.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + jopatili.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + jopatili.p_con_fin_rel = index.concession_financer_relations[confinrel_key]; + jopatili.p_dest = index.destinations[dest_key]; + if (jopatili.line_dest_icon) { + Kv1Icon::Key icon_key{ + jopatili.key.data_owner_code, + *jopatili.line_dest_icon, + }; + jopatili.p_line_dest_icon = index.icons[icon_key]; + } + } + for (auto &pool : index.records->point_on_links) { + Kv1UserStopPoint::Key usrstop_begin_key( + pool.key.data_owner_code, + pool.key.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + pool.key.data_owner_code, + pool.key.user_stop_code_end); + Kv1Point::Key point_key( + pool.key.point_data_owner_code, + pool.key.point_code); + pool.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + pool.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + pool.p_point = index.points[point_key]; + } + for (auto &ntcassgnm : index.records->notice_assignments) { + Kv1Notice::Key notice_key( + ntcassgnm.data_owner_code, + ntcassgnm.notice_code); + ntcassgnm.p_notice = index.notices[notice_key]; + } + for (auto &timdemgrp : index.records->time_demand_groups) { + Kv1Line::Key line_key( + timdemgrp.key.data_owner_code, + timdemgrp.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + timdemgrp.key.data_owner_code, + timdemgrp.key.line_planning_number, + timdemgrp.key.journey_pattern_code); + timdemgrp.p_line = index.lines[line_key]; + timdemgrp.p_journey_pattern = index.journey_patterns[jopa_key]; + } + for (auto &timdemrnt : index.records->time_demand_group_run_times) { + Kv1Line::Key line_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code); + Kv1TimeDemandGroup::Key timdemgrp_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code, + timdemrnt.key.time_demand_group_code); + Kv1UserStopPoint::Key usrstop_begin_key( + timdemrnt.key.data_owner_code, + timdemrnt.user_stop_code_begin); + Kv1UserStopPoint::Key usrstop_end_key( + timdemrnt.key.data_owner_code, + timdemrnt.user_stop_code_end); + Kv1JourneyPatternTimingLink::Key jopatili_key( + timdemrnt.key.data_owner_code, + timdemrnt.key.line_planning_number, + timdemrnt.key.journey_pattern_code, + timdemrnt.key.timing_link_order); + timdemrnt.p_line = index.lines[line_key]; + timdemrnt.p_user_stop_end = index.user_stop_points[usrstop_end_key]; + timdemrnt.p_user_stop_begin = index.user_stop_points[usrstop_begin_key]; + timdemrnt.p_journey_pattern = index.journey_patterns[jopa_key]; + timdemrnt.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; + timdemrnt.p_journey_pattern_timing_link = index.journey_pattern_timing_links[jopatili_key]; + } + for (auto &tive : index.records->timetable_versions) { + Kv1OrganizationalUnit::Key orun_key( + tive.key.data_owner_code, + tive.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + tive.key.data_owner_code, + tive.key.period_group_code); + Kv1SpecificDay::Key specday_key( + tive.key.data_owner_code, + tive.key.specific_day_code); + tive.p_organizational_unit = index.organizational_units[orun_key]; + tive.p_period_group = index.period_groups[pegr_key]; + tive.p_specific_day = index.specific_days[specday_key]; + } + for (auto &pujo : index.records->public_journeys) { + Kv1TimetableVersion::Key tive_key( + pujo.key.data_owner_code, + pujo.key.organizational_unit_code, + pujo.key.timetable_version_code, + pujo.key.period_group_code, + pujo.key.specific_day_code); + Kv1OrganizationalUnit::Key orun_key( + pujo.key.data_owner_code, + pujo.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + pujo.key.data_owner_code, + pujo.key.period_group_code); + Kv1SpecificDay::Key specday_key( + pujo.key.data_owner_code, + pujo.key.specific_day_code); + Kv1Line::Key line_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number); + Kv1TimeDemandGroup::Key timdemgrp_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number, + pujo.journey_pattern_code, + pujo.time_demand_group_code); + Kv1JourneyPattern::Key jopa_key( + pujo.key.data_owner_code, + pujo.key.line_planning_number, + pujo.journey_pattern_code); + pujo.p_timetable_version = index.timetable_versions[tive_key]; + pujo.p_organizational_unit = index.organizational_units[orun_key]; + pujo.p_period_group = index.period_groups[pegr_key]; + pujo.p_specific_day = index.specific_days[specday_key]; + pujo.p_line = index.lines[line_key]; + pujo.p_time_demand_group = index.time_demand_groups[timdemgrp_key]; + pujo.p_journey_pattern = index.journey_patterns[jopa_key]; + } + for (auto &pegrval : index.records->period_group_validities) { + Kv1OrganizationalUnit::Key orun_key( + pegrval.key.data_owner_code, + pegrval.key.organizational_unit_code); + Kv1PeriodGroup::Key pegr_key( + pegrval.key.data_owner_code, + pegrval.key.period_group_code); + pegrval.p_organizational_unit = index.organizational_units[orun_key]; + pegrval.p_period_group = index.period_groups[pegr_key]; + } + for (auto &excopday : index.records->exceptional_operating_days) { + Kv1OrganizationalUnit::Key orun_key( + excopday.key.data_owner_code, + excopday.key.organizational_unit_code); + Kv1SpecificDay::Key specday_key( + excopday.key.data_owner_code, + excopday.specific_day_code); + Kv1PeriodGroup::Key pegr_key( + excopday.key.data_owner_code, + excopday.period_group_code); + excopday.p_organizational_unit = index.organizational_units[orun_key]; + excopday.p_specific_day = index.specific_days[specday_key]; + excopday.p_period_group = index.period_groups[pegr_key]; + } + for (auto &schedvers : index.records->schedule_versions) { + Kv1OrganizationalUnit::Key orun_key( + schedvers.key.data_owner_code, + schedvers.key.organizational_unit_code); + schedvers.p_organizational_unit = index.organizational_units[orun_key]; + } + for (auto &pujopass : index.records->public_journey_passing_times) { + Kv1OrganizationalUnit::Key orun_key( + pujopass.key.data_owner_code, + pujopass.key.organizational_unit_code); + Kv1ScheduleVersion::Key schedvers_key( + pujopass.key.data_owner_code, + pujopass.key.organizational_unit_code, + pujopass.key.schedule_code, + pujopass.key.schedule_type_code); + Kv1Line::Key line_key( + pujopass.key.data_owner_code, + pujopass.key.line_planning_number); + Kv1JourneyPattern::Key jopa_key( + pujopass.key.data_owner_code, + pujopass.key.line_planning_number, + pujopass.journey_pattern_code); + Kv1UserStopPoint::Key usrstop_key( + pujopass.key.data_owner_code, + pujopass.user_stop_code); + pujopass.p_organizational_unit = index.organizational_units[orun_key]; + pujopass.p_schedule_version = index.schedule_versions[schedvers_key]; + pujopass.p_line = index.lines[line_key]; + pujopass.p_journey_pattern = index.journey_patterns[jopa_key]; + pujopass.p_user_stop = index.user_stop_points[usrstop_key]; + } + for (auto &operday : index.records->operating_days) { + Kv1OrganizationalUnit::Key orun_key( + operday.key.data_owner_code, + operday.key.organizational_unit_code); + Kv1ScheduleVersion::Key schedvers_key( + operday.key.data_owner_code, + operday.key.organizational_unit_code, + operday.key.schedule_code, + operday.key.schedule_type_code); + operday.p_organizational_unit = index.organizational_units[orun_key]; + operday.p_schedule_version = index.schedule_versions[schedvers_key]; + } +} diff --git a/lib/libtmi8/src/kv1_lexer.cpp b/lib/libtmi8/src/kv1_lexer.cpp new file mode 100644 index 0000000..028127b --- /dev/null +++ b/lib/libtmi8/src/kv1_lexer.cpp @@ -0,0 +1,152 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +Kv1Lexer::Kv1Lexer(std::string_view input) + : input(input), slice(input) +{} + +// Does not eat newline character. +void Kv1Lexer::eatRestOfLine() { + size_t end = slice.size(); + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } + } + slice = slice.substr(end); +} + +void Kv1Lexer::lexOptionalHeader() { + if (slice.starts_with('[')) eatRestOfLine(); +} + +void Kv1Lexer::lexOptionalComment() { + if (slice.starts_with(';')) eatRestOfLine(); +} + +inline bool Kv1Lexer::isWhitespace(int c) { + return c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; +} + +void Kv1Lexer::readQuotedColumn() { + Kv1Token token{ .type = KV1_TOKEN_CELL }; + + if (slice.size() == 0 || slice[0] != '"') { + errors.push_back("(internal error) readQuotedColumn: slice[0] != '\"'"); + return; + } + slice = slice.substr(1); + while (true) { + size_t quote = slice.find('"'); + if (quote == std::string_view::npos) { + errors.push_back("readQuotedColumn: no matching closing quote found"); + return; + } + if (quote+1 == slice.size() || slice[quote + 1] != '"') { + token.data.append(slice.substr(0, quote)); + break; + } + token.data.append(slice.substr(0, quote + 1)); + slice = slice.substr(quote + 2); + } + + size_t end = slice.size(); + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } + if (!isWhitespace(slice[i])) { + errors.push_back("readQuotedColumn: encountered non-whitespace character after closing quote"); + return; + } + } + if (end != std::string_view::npos) slice = slice.substr(end); + else slice = slice.substr(slice.size()); + + tokens.push_back(std::move(token)); +} + +void Kv1Lexer::readUnquotedColumn() { + size_t end = slice.size(); + size_t content_end = 0; + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '|' || slice[i] == '\r' || slice[i] == '\n') { + end = i; + break; + } else if (!isWhitespace(slice[i])) { + content_end = i + 1; + } + } + tokens.emplace_back(KV1_TOKEN_CELL, std::string(slice.substr(0, content_end))); + if (end != std::string_view::npos) slice = slice.substr(end); + else slice = slice.substr(slice.size()); +} + +void Kv1Lexer::lexRow() { + size_t cols = 0; + while (slice.size() > 0 && slice[0] != '\r' && slice[0] != '\n') { + if (slice[0] == '"') readQuotedColumn(); + else readUnquotedColumn(); + if (!errors.empty()) return; + cols++; + if (slice.size() != 0) { + if (slice[0] == '|') { + slice = slice.substr(1); + // A newline/eof right after pipe? That means an empty field at the end + // of the record, we also want to emit that as a token. + if (slice.size() == 0 || slice[0] == '\r' || slice[0] == '\n') { + tokens.push_back({ .type = KV1_TOKEN_CELL }); + } + } else if (slice[0] == '\r') { + if (slice.size() > 1 && slice[1] == '\n') slice = slice.substr(2); + else slice = slice.substr(1); + break; + } else if (slice[0] == '\n') { + slice = slice.substr(1); + break; + } else { + errors.push_back("lexRow: expected CR, LF or |"); + return; + } + } + } + tokens.push_back({ .type = KV1_TOKEN_ROW_END }); +} + +// Returns true when a line ending was consumed. +bool Kv1Lexer::eatWhitespace() { + for (size_t i = 0; i < slice.size(); i++) { + if (slice[i] == '\r') { + slice = slice.substr(i + 1); + if (slice.size() > 1 && slice[i + 1] == '\n') + slice = slice.substr(i + 2); + return true; + } + if (slice[i] == '\n') { + slice = slice.substr(i + 1); + return true; + } + + if (slice[i] != ' ' && slice[i] != '\f' && slice[i] != '\t' && slice[i] != '\v') { + slice = slice.substr(i); + return false; + } + } + return false; +} + +void Kv1Lexer::lex() { + lexOptionalHeader(); + eatWhitespace(); + + while (errors.empty() && !slice.empty()) { + lexOptionalComment(); + bool newline = eatWhitespace(); + if (newline) continue; + // We are now either (1) at the end of the file or (2) at the start of some column data + if (errors.empty()) lexRow(); + } +} diff --git a/lib/libtmi8/src/kv1_parser.cpp b/lib/libtmi8/src/kv1_parser.cpp new file mode 100644 index 0000000..ac0c6bf --- /dev/null +++ b/lib/libtmi8/src/kv1_parser.cpp @@ -0,0 +1,1258 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +using rune = uint32_t; + +static size_t decodeUtf8Cp(std::string_view s, rune *dest = nullptr) { + rune res = 0xFFFD; + size_t length = 1; + + if (s.size() == 0) + return 0; + const uint8_t *b = reinterpret_cast(s.data()); + if (!(b[0] & 0x80)) + res = static_cast(b[0]); + else if ((b[0] & 0xE0) == 0xC0) { + length = 2; + if (s.size() >= 2 && (b[1] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xC0) << 6; + res |= static_cast(b[1] & ~0x80); + } + } else if ((b[0] & 0xF0) == 0xE0) { + length = 3; + if (s.size() >= 3 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xE0) << 12; + res |= static_cast(b[1] & ~0x80) << 6; + res |= static_cast(b[2] & ~0x80); + } + } else if (b[0] == 0xF0) { + length = 4; + if (s.size() >= 4 && (b[1] & 0xC0) == 0x80 && (b[2] & 0xC0) == 0x80 && (b[3] & 0xC0) == 0x80) { + res = static_cast(b[0] & ~0xF0) << 18; + res |= static_cast(b[1] & ~0x80) << 12; + res |= static_cast(b[2] & ~0x80) << 6; + res |= static_cast(b[3] & ~0x80); + } + } + + if (dest) + *dest = res; + return length; +} + +// Counts the number of codepoints in a valid UTF-8 string. Returns SIZE_MAX if +// the string contains invalid UTF-8 codepoints. +static size_t stringViewLengthUtf8(std::string_view sv) { + size_t codepoints = 0; + while (sv.size() > 0) { + size_t codepoint_size = decodeUtf8Cp(sv); + if (codepoint_size == 0) return SIZE_MAX; + codepoints++; + sv = sv.substr(codepoint_size); + } + return codepoints; +} + +Kv1Parser::Kv1Parser(std::vector tokens, Kv1Records &parse_into) + : tokens(std::move(tokens)), + records(parse_into) +{} + +bool Kv1Parser::atEnd() const { + return pos >= tokens.size(); +} + +void Kv1Parser::eatRowEnds() { + while (!atEnd() && tokens[pos].type == KV1_TOKEN_ROW_END) pos++; +} + +const Kv1Token *Kv1Parser::cur() const { + if (atEnd()) return nullptr; + return &tokens[pos]; +} + +const std::string *Kv1Parser::eatCell(std::string_view parsing_what) { + const Kv1Token *tok = cur(); + if (!tok) { + record_errors.push_back(std::format("Expected cell but got end of file when parsing {}", parsing_what)); + return nullptr; + } + if (tok->type == KV1_TOKEN_ROW_END) { + record_errors.push_back(std::format("Expected cell but got end of row when parsing {}", parsing_what)); + return nullptr; + } + pos++; + return &tok->data; +} + +void Kv1Parser::requireString(std::string_view field, bool mandatory, size_t max_length, std::string_view value) { + if (value.empty() && mandatory) { + record_errors.push_back(std::format("{} has length zero but is required", field)); + return; + } + size_t codepoints = stringViewLengthUtf8(value); + if (codepoints == SIZE_MAX) { + global_errors.push_back(std::format("{} contains invalid UTF-8 code points", field)); + return; + } + if (codepoints > max_length) { + record_errors.push_back(std::format("{} has length ({}) that is greater than maximum length ({})", + field, value.size(), max_length)); + } +} + +static inline std::optional parseBoolean(std::string_view src) { + if (src == "1") return true; + if (src == "0") return false; + if (src == "true") return true; + if (src == "false") return false; + return std::nullopt; +} + +std::optional Kv1Parser::requireBoolean(std::string_view field, bool mandatory, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + auto parsed = parseBoolean(value); + if (!parsed.has_value()) + record_errors.push_back(std::format("{} should have value \"1\", \"0\", \"true\" or \"false\"", field)); + return parsed; +} + +static inline size_t countDigits(long x) { + size_t digits = 0; + while (x != 0) { digits++; x /= 10; } + return digits; +} + +std::optional Kv1Parser::requireNumber(std::string_view field, bool mandatory, size_t max_digits, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} has no value but is required", field)); + return std::nullopt; + } + + double parsed; + auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); + if (ec != std::errc()) { + record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); + return std::nullopt; + } + if (ptr != value.data() + value.size()) { + record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); + return std::nullopt; + } + + size_t digits = countDigits(static_cast(parsed)); + if (digits > max_digits) { + record_errors.push_back(std::format("{} contains more digits (in the integral part) ({}) than allowed ({})", + field, digits, max_digits)); + return std::nullopt; + } + + return parsed; +} + +static inline bool isHexDigit(char c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'); +} + +static inline uint8_t fromHex(char c) { + if (c >= '0' && c <= '9') return static_cast(c - '0'); + else if (c >= 'A' && c <= 'F') return static_cast(c - 'A' + 10); + return 0; +} + +static std::optional parseRgbColor(std::string_view src) { + bool valid = src.size() == 6 + && isHexDigit(src[0]) && isHexDigit(src[1]) + && isHexDigit(src[2]) && isHexDigit(src[3]) + && isHexDigit(src[4]) && isHexDigit(src[5]); + if (!valid) return std::nullopt; + uint8_t r = static_cast(fromHex(src[0]) << 4) + fromHex(src[1]); + uint8_t g = static_cast(fromHex(src[2]) << 4) + fromHex(src[3]); + uint8_t b = static_cast(fromHex(src[4]) << 4) + fromHex(src[5]); + return RgbColor{ r, g, b }; +} + +std::optional Kv1Parser::requireRgbColor(std::string_view field, bool mandatory, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + auto parsed = parseRgbColor(value); + if (!parsed.has_value()) + record_errors.push_back(std::format("{} should be an RGB color, i.e. a sequence of six hexadecimally represented nibbles", field)); + return parsed; +} + +std::optional Kv1Parser::requireRdCoord(std::string_view field, bool mandatory, size_t min_digits, std::string_view value) { + if (value.empty()) { + if (mandatory) + record_errors.push_back(std::format("{} is required, but has no value", field)); + return std::nullopt; + } + if (value.size() > 15) { + record_errors.push_back(std::format("{} may not have more than 15 characters", field)); + return std::nullopt; + } + + double parsed; + auto [ptr, ec] = std::from_chars(value.data(), value.data() + value.size(), parsed, std::chars_format::fixed); + if (ec != std::errc()) { + record_errors.push_back(std::format("{} has a bad value that cannot be parsed as a number", field)); + return std::nullopt; + } + if (ptr != value.data() + value.size()) { + record_errors.push_back(std::format("{} contains characters that were not parsed as a number", field)); + return std::nullopt; + } + + size_t digits = countDigits(static_cast(parsed)); + if (digits < min_digits) { + record_errors.push_back(std::format("{} contains less digits (in the integral part) ({}) than required ({}) [value: {}]", + field, digits, min_digits, value)); + return std::nullopt; + } + + return parsed; +} + +std::string Kv1Parser::eatString(std::string_view field, bool mandatory, size_t max_length) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + requireString(field, mandatory, max_length, *value); + return std::move(*value); +} + +std::optional Kv1Parser::eatBoolean(std::string_view field, bool mandatory) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireBoolean(field, mandatory, *value); +} + +std::optional Kv1Parser::eatNumber(std::string_view field, bool mandatory, size_t max_digits) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireNumber(field, mandatory, max_digits, *value); +} + +std::optional Kv1Parser::eatRgbColor(std::string_view field, bool mandatory) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireRgbColor(field, mandatory, *value); +} + +std::optional Kv1Parser::eatRdCoord(std::string_view field, bool mandatory, size_t min_digits) { + auto value = eatCell(field); + if (!record_errors.empty()) return {}; + return requireRdCoord(field, mandatory, min_digits, *value); +} + +std::string Kv1Parser::parseHeader() { + auto record_type = eatString("
.Recordtype", true, 10); + auto version_number = eatString("
.VersionNumber", true, 2); + auto implicit_explicit = eatString("
.Implicit/Explicit", true, 1); + if (!record_errors.empty()) return {}; + + if (version_number != "1") { + record_errors.push_back("
.VersionNumber should be 1"); + return ""; + } + if (implicit_explicit != "I") { + record_errors.push_back("
.Implicit/Explicit should be 'I'"); + return ""; + } + + return record_type; +} + +void Kv1Parser::eatRestOfRow() { + while (!atEnd() && cur()->type != KV1_TOKEN_ROW_END) pos++; +} + +void Kv1Parser::parse() { + while (!atEnd()) { + eatRowEnds(); + if (atEnd()) return; + + std::string record_type = parseHeader(); + if (!record_errors.empty()) break; + if (!type_parsers.contains(record_type)) { + warns.push_back(std::format("Recordtype ({}) is bad or names a record type that this program cannot process", + record_type)); + eatRestOfRow(); + continue; + } + + ParseFunc parseType = Kv1Parser::type_parsers.at(record_type); + (this->*parseType)(); + if (cur() && cur()->type != KV1_TOKEN_ROW_END) { + record_errors.push_back(std::format("Parser function for Recordtype ({}) did not eat all record fields", + record_type)); + eatRestOfRow(); + } + if (!record_errors.empty()) { + global_errors.insert(global_errors.end(), record_errors.begin(), record_errors.end()); + record_errors.clear(); + } + } +} + +void Kv1Parser::parseOrganizationalUnit() { + auto data_owner_code = eatString("ORUN.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("ORUN.OrganizationalUnitCode", true, 10); + auto name = eatString("ORUN.Name", true, 50); + auto organizational_unit_type = eatString("ORUN.OrganizationalUnitType", true, 10); + auto description = eatString("ORUN.Description", false, 255); + if (!record_errors.empty()) return; + + records.organizational_units.emplace_back( + Kv1OrganizationalUnit::Key( + data_owner_code, + organizational_unit_code), + name, + organizational_unit_type, + description); +} + +static inline bool isDigit(char c) { + return c >= '0' && c <= '9'; +} + +// Parse a string of the format YYYY-MM-DD. +static std::optional parseYyyymmdd(std::string_view src) { + bool valid = src.size() == 10 + && isDigit(src[0]) && isDigit(src[1]) + && isDigit(src[2]) && isDigit(src[3]) && src[4] == '-' + && isDigit(src[5]) && isDigit(src[6]) && src[7] == '-' + && isDigit(src[8]) && isDigit(src[9]); + if (!valid) return std::nullopt; + int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; + int month = (src[5] - '0') * 10 + src[6] - '0'; + int day = (src[8] - '0') * 10 + src[9] - '0'; + return std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); +} + +// Parse a string of the format HH:MM:SS. +static std::optional> parseHhmmss(std::string_view src) { + bool valid = src.size() == 8 + && isDigit(src[0]) && isDigit(src[1]) && src[2] == ':' + && isDigit(src[3]) && isDigit(src[4]) && src[5] == ':' + && isDigit(src[6]) && isDigit(src[7]); + if (!valid) return std::nullopt; + int hh = (src[0] - '0') * 10 + src[1] - '0'; + int mm = (src[3] - '0') * 10 + src[4] - '0'; + int ss = (src[6] - '0') * 10 + src[7] - '0'; + // The check for the hour not being greater than 32 comes from the fact the + // specification explicitly allows hours greater than 23, noting that the + // period 24:00-32:00 is equivalent to 00:00-08:00 in the next day, for + // exploitation of two days. + if (hh > 32 || mm > 59 || ss > 59) return std::nullopt; + return std::chrono::hh_mm_ss(std::chrono::hours(hh) + std::chrono::minutes(mm) + std::chrono::seconds(ss)); +} + +static std::optional parseDateTime(std::string_view src, const std::chrono::time_zone *amsterdam, std::string_view *error = nullptr) { +#define ERROR(err) do { if (error) *error = err; return std::nullopt; } while (0) + if (src.size() > 23) ERROR("timestamp string is too big"); + if (src.size() < 17) ERROR("timestamp string is too small"); + + bool valid_year = isDigit(src[0]) && isDigit(src[1]) && isDigit(src[2]) && isDigit(src[3]); + if (!valid_year) ERROR("year has bad format"); + + size_t month_off = src[4] == '-' ? 5 : 4; + size_t day_off = src[month_off + 2] == '-' ? month_off + 3 : month_off + 2; + size_t time_off = day_off + 2; + if (src[time_off] != 'T' && src[time_off] != ' ') + ERROR("missing date/time separator"); + size_t tzd_off = time_off + 9; + // For clarity, TZD stands for Time Zone Designator. It often takes the form + // of Z (Zulu, UTC+00:00) or as an offset from UTC in hours and minutes, + // formatted as +|-HH:MM (e.g. +01:00, -12:00). + + if (time_off + 8 >= src.size()) ERROR("bad format, not enough space for hh:mm:ss"); + + int year = (src[0] - '0') * 1000 + (src[1] - '0') * 100 + (src[2] - '0') * 10 + src[3] - '0'; + int month = (src[month_off] - '0') * 10 + src[month_off + 1] - '0'; + int day = (src[day_off] - '0') * 10 + src[day_off + 1] - '0'; + int hour = (src[time_off + 1] - '0') * 10 + src[time_off + 2] - '0'; + int minute = (src[time_off + 4] - '0') * 10 + src[time_off + 5] - '0'; + int second = (src[time_off + 7] - '0') * 10 + src[time_off + 8] - '0'; + + auto date = std::chrono::year(year) / std::chrono::month(month) / std::chrono::day(day); + auto time = std::chrono::hours(hour) + std::chrono::minutes(minute) + std::chrono::seconds(second); + + std::chrono::sys_seconds unix_start_of_day; + if (tzd_off < src.size()) { + unix_start_of_day = std::chrono::sys_days(date); + } else { + auto local_days = std::chrono::local_days(date); + std::chrono::zoned_seconds zoned_start_of_day = std::chrono::zoned_time(amsterdam, local_days); + unix_start_of_day = std::chrono::sys_seconds(zoned_start_of_day); + } + + std::chrono::minutes offset(0); + if (tzd_off + 1 == src.size() && src[tzd_off] != 'Z') { + ERROR("bad TZD (missing Zulu indicator)"); + } else if (tzd_off + 6 == src.size()) { + bool valid_tzd = (src[tzd_off] == '+' || src[tzd_off] == '-') + && isDigit(src[tzd_off + 1]) && isDigit(src[tzd_off + 2]) && src[tzd_off + 3] == ':' + && isDigit(src[tzd_off + 4]) && isDigit(src[tzd_off + 5]); + if (!valid_tzd) ERROR("bad offset TZD format (expected +|-hh:mm)"); + int sign = src[tzd_off] == '-' ? -1 : 1; + int tzd_hh = (src[tzd_off + 1] - '0') * 10 + src[tzd_off + 2] - '0'; + int tzd_mm = (src[tzd_off + 3] - '0') * 10 + src[tzd_off + 4] - '0'; + offset = sign * std::chrono::minutes(tzd_hh * 60 + tzd_mm); + } else if (tzd_off < src.size()) { + // There is a TZD but we literally have no clue how to parse it :/ + ERROR("cannot parse TZD of unexpected length"); + } + + return unix_start_of_day + time - offset; +#undef ERROR +} + +void Kv1Parser::parseHigherOrganizationalUnit() { + auto data_owner_code = eatString("ORUNORUN.DataOwnerCode", true, 10); + auto organizational_unit_code_parent = eatString("ORUNORUN.OrganizationalUnitCodeParent", true, 10); + auto organizational_unit_code_child = eatString("ORUNORUN.OrganizationalUnitCodeChild", true, 10); + auto valid_from_raw = eatString("ORUNORUN.ValidFrom", true, 10); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) { + record_errors.push_back("ORUNORUN.ValidFrom has invalid format, should be YYYY-MM-DD"); + return; + } + + records.higher_organizational_units.emplace_back( + Kv1HigherOrganizationalUnit::Key( + data_owner_code, + organizational_unit_code_parent, + organizational_unit_code_child, + *valid_from)); +} + +void Kv1Parser::parseUserStopPoint() { + auto data_owner_code = eatString ("USRSTOP.DataOwnerCode", true, 10); + auto user_stop_code = eatString ("USRSTOP.UserStopCode", true, 10); + auto timing_point_code = eatString ("USRSTOP.TimingPointCode", false, 10); + auto get_in = eatBoolean("USRSTOP.GetIn", true ); + auto get_out = eatBoolean("USRSTOP.GetOut", true ); + eatCell ("USRSTOP." ); + auto name = eatString ("USRSTOP.Name", true, 50); + auto town = eatString ("USRSTOP.Town", true, 50); + auto user_stop_area_code = eatString ("USRSTOP.UserStopAreaCode", false, 10); + auto stop_side_code = eatString ("USRSTOP.StopSideCode", true, 10); + eatCell ("USRSTOP." ); + eatCell ("USRSTOP." ); + auto minimal_stop_time = eatNumber ("USRSTOP.MinimalStopTime", true, 5); + auto stop_side_length = eatNumber ("USRSTOP.StopSideLength", false, 3); + auto description = eatString ("USRSTOP.Description", false, 255); + auto user_stop_type = eatString ("USRSTOP.UserStopType", true, 10); + auto quay_code = eatString ("USRSTOP.QuayCode", false, 30); + if (!record_errors.empty()) return; + + records.user_stop_points.emplace_back( + Kv1UserStopPoint::Key( + data_owner_code, + user_stop_code), + timing_point_code, + *get_in, + *get_out, + name, + town, + user_stop_area_code, + stop_side_code, + *minimal_stop_time, + stop_side_length, + description, + user_stop_type, + quay_code); +} + +void Kv1Parser::parseUserStopArea() { + auto data_owner_code = eatString("USRSTAR.DataOwnerCode", true, 10); + auto user_stop_area_code = eatString("USRSTAR.UserStopAreaCode", true, 10); + auto name = eatString("USRSTAR.Name", true, 50); + auto town = eatString("USRSTAR.Town", true, 50); + eatCell ("USRSTAR." ); + eatCell ("USRSTAR." ); + auto description = eatString("USRSTAR.Description", false, 255); + if (!record_errors.empty()) return; + + records.user_stop_areas.emplace_back( + Kv1UserStopArea::Key( + data_owner_code, + user_stop_area_code), + name, + town, + description); +} + +void Kv1Parser::parseTimingLink() { + auto data_owner_code = eatString("TILI.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("TILI.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("TILI.UserStopCodeEnd", true, 10); + auto minimal_drive_time = eatNumber("TILI.MinimalDriveTime", false, 5); + auto description = eatString("TILI.Description", false, 255); + if (!record_errors.empty()) return; + + records.timing_links.emplace_back( + Kv1TimingLink::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end), + minimal_drive_time, + description); +} + +void Kv1Parser::parseLink() { + auto data_owner_code = eatString("LINK.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("LINK.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("LINK.UserStopCodeEnd", true, 10); + eatCell("LINK." ); + auto distance = eatNumber("LINK.Distance", true, 6); + auto description = eatString("LINK.Description", false, 255); + auto transport_type = eatString("LINK.TransportType", true, 5); + if (!record_errors.empty()) return; + + records.links.emplace_back( + Kv1Link::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end, + transport_type), + *distance, + description); +} + +void Kv1Parser::parseLine() { + auto data_owner_code = eatString ("LINE.DataOwnerCode", true, 10); + auto line_planning_number = eatString ("LINE.LinePlanningNumber", true, 10); + auto line_public_number = eatString ("LINE.LinePublicNumber", true, 4); + auto line_name = eatString ("LINE.LineName", true, 50); + auto line_ve_tag_number = eatNumber ("LINE.LineVeTagNumber", true, 3); + auto description = eatString ("LINE.Description", false, 255); + auto transport_type = eatString ("LINE.TransportType", true, 5); + auto line_icon = eatNumber ("LINE.LineIcon", false, 4); + auto line_color = eatRgbColor("LINE.LineColor", false ); + auto line_text_color = eatRgbColor("LINE.LineTextColor", false ); + if (!record_errors.empty()) return; + + // NOTE: This check, although it should be performed to comply with the + // specification, is not actually honored by transit operators (such as + // Connexxion) :/ That's enough reason to keep it disabled here for now. + // if (*line_ve_tag_number < 0 || *line_ve_tag_number > 399) { + // record_errors.push_back(std::format("LINE.LineVeTagNumber is out of range [0-399] with value {}", *line_ve_tag_number)); + // return; + // } + if (*line_ve_tag_number != static_cast(*line_ve_tag_number)) + record_errors.push_back("LINE.LineVeTagNumber should be an integer"); + if (line_icon && *line_icon != static_cast(*line_icon)) + record_errors.push_back("LINE.LineIcon should be an integer"); + if (!record_errors.empty()) return; + + records.lines.emplace_back( + Kv1Line::Key( + data_owner_code, + line_planning_number), + line_public_number, + line_name, + static_cast(*line_ve_tag_number), + description, + transport_type, + static_cast>(line_icon), + line_color, + line_text_color); +} + +void Kv1Parser::parseDestination() { + auto data_owner_code = eatString ("DEST.DataOwnerCode", true, 10); + auto dest_code = eatString ("DEST.DestCode", true, 10); + auto dest_name_full = eatString ("DEST.DestNameFull", true, 50); + auto dest_name_main = eatString ("DEST.DestNameMain", true, 24); + auto dest_name_detail = eatString ("DEST.DestNameDetail", false, 24); + auto relevant_dest_name_detail = eatBoolean ("DEST.RelevantDestNameDetail", true ); + auto dest_name_main_21 = eatString ("DEST.DestNameMain21", true, 21); + auto dest_name_detail_21 = eatString ("DEST.DestNameDetail21", false, 21); + auto dest_name_main_19 = eatString ("DEST.DestNameMain19", true, 19); + auto dest_name_detail_19 = eatString ("DEST.DestNameDetail19", false, 19); + auto dest_name_main_16 = eatString ("DEST.DestNameMain16", true, 16); + auto dest_name_detail_16 = eatString ("DEST.DestNameDetail16", false, 16); + auto dest_icon = eatNumber ("DEST.DestIcon", false, 4); + auto dest_color = eatRgbColor("DEST.DestColor", false ); + // NOTE: Deviating from the offical KV1 specification here. It specifies that + // the maximum length for this field should be 30, but then proceeds to + // specify that it should contain a RGB value comprising of three + // hexadecimally encoded octets, i.e. six characters. We assume that the + // latter is correct and the intended interpretation. + auto dest_text_color = eatRgbColor("DEST.DestTextColor", false ); + if (!record_errors.empty()) return; + + if (dest_icon && *dest_icon != static_cast(*dest_icon)) { + record_errors.push_back("DEST.DestIcon should be an integer"); + return; + } + + records.destinations.emplace_back( + Kv1Destination::Key( + data_owner_code, + dest_code), + dest_name_full, + dest_name_main, + dest_name_detail, + *relevant_dest_name_detail, + dest_name_main_21, + dest_name_detail_21, + dest_name_main_19, + dest_name_detail_19, + dest_name_main_16, + dest_name_detail_16, + dest_icon, + dest_color, + dest_text_color); +} + +void Kv1Parser::parseJourneyPattern() { + auto data_owner_code = eatString("JOPA.DataOwnerCode", true, 10); + auto line_planning_number = eatString("JOPA.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("JOPA.JourneyPatternCode", true, 10); + auto journey_pattern_type = eatString("JOPA.JourneyPatternType", true, 10); + auto direction = eatString("JOPA.Direction", true, 1); + auto description = eatString("JOPA.Description", false, 255); + if (!record_errors.empty()) return; + + if (direction != "1" && direction != "2" && direction != "A" && direction != "B") { + record_errors.push_back("JOPA.Direction should be in [1, 2, A, B]"); + return; + } + + records.journey_patterns.emplace_back( + Kv1JourneyPattern::Key( + data_owner_code, + line_planning_number, + journey_pattern_code), + journey_pattern_type, + direction[0], + description); +} + +void Kv1Parser::parseConcessionFinancerRelation() { + auto data_owner_code = eatString("CONFINREL.DataOwnerCode", true, 10); + auto con_fin_rel_code = eatString("CONFINREL.ConFinRelCode", true, 10); + auto concession_area_code = eatString("CONFINREL.ConcessionAreaCode", true, 10); + auto financer_code = eatString("CONFINREL.FinancerCode", false, 10); + if (!record_errors.empty()) return; + + records.concession_financer_relations.emplace_back( + Kv1ConcessionFinancerRelation::Key( + data_owner_code, + con_fin_rel_code), + concession_area_code, + financer_code); +} + +void Kv1Parser::parseConcessionArea() { + auto data_owner_code = eatString("CONAREA.DataOwnerCode", true, 10); + auto concession_area_code = eatString("CONAREA.ConcessionAreaCode", true, 10); + auto description = eatString("CONAREA.Description", true, 255); + if (!record_errors.empty()) return; + + records.concession_areas.emplace_back( + Kv1ConcessionArea::Key( + data_owner_code, + concession_area_code), + description); +} + +void Kv1Parser::parseFinancer() { + auto data_owner_code = eatString("FINANCER.DataOwnerCode", true, 10); + auto financer_code = eatString("FINANCER.FinancerCode", true, 10); + auto description = eatString("FINANCER.Description", true, 255); + if (!record_errors.empty()) return; + + records.financers.emplace_back( + Kv1Financer::Key( + data_owner_code, + financer_code), + description); +} + +void Kv1Parser::parseJourneyPatternTimingLink() { + auto data_owner_code = eatString ("JOPATILI.DataOwnerCode", true, 10); + auto line_planning_number = eatString ("JOPATILI.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString ("JOPATILI.JourneyPatternCode", true, 10); + auto timing_link_order = eatNumber ("JOPATILI.TimingLinkOrder", true, 3); + auto user_stop_code_begin = eatString ("JOPATILI.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString ("JOPATILI.UserStopCodeEnd", true, 10); + auto con_fin_rel_code = eatString ("JOPATILI.ConFinRelCode", true, 10); + auto dest_code = eatString ("JOPATILI.DestCode", true, 10); + eatCell ("JOPATILI." ); + auto is_timing_stop = eatBoolean ("JOPATILI.IsTimingStop", true ); + auto display_public_line = eatString ("JOPATILI.DisplayPublicLine", false, 4); + auto product_formula_type = eatNumber ("JOPATILI.ProductFormulaType", false, 4); + auto get_in = eatBoolean ("JOPATILI.GetIn", true ); + auto get_out = eatBoolean ("JOPATILI.GetOut", true ); + auto show_flexible_trip = eatString ("JOPATILI.ShowFlexibleTrip", false, 8); + auto line_dest_icon = eatNumber ("JOPATILI.LineDestIcon", false, 4); + auto line_dest_color = eatRgbColor("JOPATILI.LineDestColor", false ); + auto line_dest_text_color = eatRgbColor("JOPATILI.LineDestTextColor", false ); + if (!record_errors.empty()) return; + + if (line_dest_icon && *line_dest_icon != static_cast(*line_dest_icon)) + record_errors.push_back("JOPATILI.LineDestIcon should be an integer"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("JOPATILI.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + if (!record_errors.empty()) return; + + records.journey_pattern_timing_links.emplace_back( + Kv1JourneyPatternTimingLink::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + static_cast(*timing_link_order)), + user_stop_code_begin, + user_stop_code_end, + con_fin_rel_code, + dest_code, + *is_timing_stop, + display_public_line, + product_formula_type, + *get_in, + *get_out, + show_flexible_trip, + line_dest_icon, + line_dest_color, + line_dest_text_color); +} + +void Kv1Parser::parsePoint() { + auto data_owner_code = eatString("POINT.DataOwnerCode", true, 10); + auto point_code = eatString("POINT.PointCode", true, 10); + eatCell ("POINT." ); + auto point_type = eatString("POINT.PointType", true, 10); + auto coordinate_system_type = eatString("POINT.CoordinateSystemType", true, 10); + // NOTE: We deviate from the specification here once again. The specification + // notes that LocationX_EW should contain 'at least 6 positions'. Assuming + // that this is referring to the amount of digits, we have to lower this to + // 4. Otherwise, some positions in the Netherlands and Belgium are + // unrepresentable. + auto location_x_ew = eatRdCoord("POINT.LocationX_EW", true, 4); + auto location_y_ew = eatRdCoord("POINT.LocationX_EW", true, 6); + auto location_z = eatRdCoord("POINT.LocationZ", false, 0); + auto description = eatString ("POINT.Description", false, 255); + if (!record_errors.empty()) return; + + records.points.emplace_back( + Kv1Point::Key( + std::move(data_owner_code), + std::move(point_code)), + std::move(point_type), + std::move(coordinate_system_type), + *location_x_ew, + *location_y_ew, + location_z, + std::move(description)); +} + +void Kv1Parser::parsePointOnLink() { + auto data_owner_code = eatString("POOL.DataOwnerCode", true, 10); + auto user_stop_code_begin = eatString("POOL.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("POOL.UserStopCodeEnd", true, 10); + eatCell ("POOL." ); + auto point_data_owner_code = eatString("POOL.PointDataOwnerCode", true, 10); + auto point_code = eatString("POOL.PointCode", true, 10); + auto distance_since_start_of_link = eatNumber("POOL.DistanceSinceStartOfLink", true, 5); + auto segment_speed = eatNumber("POOL.SegmentSpeed", false, 4); + auto local_point_speed = eatNumber("POOL.LocalPointSpeed", false, 4); + auto description = eatString("POOL.Description", false, 255); + auto transport_type = eatString("POOL.TransportType", true, 5); + if (!record_errors.empty()) return; + + records.point_on_links.emplace_back( + Kv1PointOnLink::Key( + data_owner_code, + user_stop_code_begin, + user_stop_code_end, + point_data_owner_code, + point_code, + transport_type), + *distance_since_start_of_link, + segment_speed, + local_point_speed, + std::move(description)); +} + +void Kv1Parser::parseIcon() { + auto data_owner_code = eatString("ICON.DataOwnerCode", true, 10); + auto icon_number = eatNumber("ICON.IconNumber", true, 4); + auto icon_uri = eatString("ICON.IconURI", true, 1024); + if (!record_errors.empty()) return; + + if (*icon_number != static_cast(*icon_number)) { + record_errors.push_back("ICON.IconNumber should be an integer"); + return; + } + + records.icons.emplace_back( + Kv1Icon::Key( + data_owner_code, + static_cast(*icon_number)), + icon_uri); +} + +void Kv1Parser::parseNotice() { + auto data_owner_code = eatString("NOTICE.DataOwnerCode", true, 10); + auto notice_code = eatString("NOTICE.NoticeCode", true, 20); + auto notice_content = eatString("NOTICE.NoticeContent", true, 1024); + if (!record_errors.empty()) return; + + records.notices.emplace_back( + Kv1Notice::Key( + data_owner_code, + notice_code), + notice_content); +} + +void Kv1Parser::parseNoticeAssignment() { + auto data_owner_code = eatString("NTCASSGNM.DataOwnerCode", true, 10); + auto notice_code = eatString("NTCASSGNM.NoticeCode", true, 20); + auto assigned_object = eatString("NTCASSGNM.AssignedObject", true, 8); + auto timetable_version_code = eatString("NTCASSGNM.TimetableVersionCode", false, 10); + auto organizational_unit_code = eatString("NTCASSGNM.OrganizationalUnitCode", false, 10); + auto schedule_code = eatString("NTCASSGNM.ScheduleCode", false, 10); + auto schedule_type_code = eatString("NTCASSGNM.ScheduleTypeCode", false, 10); + auto period_group_code = eatString("NTCASSGNM.PeriodGroupCode", false, 10); + auto specific_day_code = eatString("NTCASSGNM.SpecificDayCode", false, 10); + auto day_type = eatString("NTCASSGNM.DayType", false, 7); + auto line_planning_number = eatString("NTCASSGNM.LinePlanningNumber", true, 10); + auto journey_number = eatNumber("NTCASSGNM.JourneyNumber", false, 6); + auto stop_order = eatNumber("NTCASSGNM.StopOrder", false, 4); + auto journey_pattern_code = eatString("NTCASSGNM.JourneyPatternCode", false, 10); + auto timing_link_order = eatNumber("NTCASSGNM.TimingLinkOrder", false, 3); + auto user_stop_code = eatString("NTCASSGNM.UserStopCode", false, 10); + if (!record_errors.empty()) return; + + if (journey_number && *journey_number != static_cast(*journey_number)) + record_errors.push_back("NTCASSGNM.JourneyNumber should be an integer"); + if (journey_number && (*journey_number < 0 || *journey_number > 999'999)) + record_errors.push_back("NTCASSGNM.JourneyNumber should be within the range [0-999999]"); + if (stop_order && *stop_order != static_cast(*stop_order)) + record_errors.push_back("NTCASSGNM.StopOrder should be an integer"); + if (!journey_number && (assigned_object == "PUJO" || assigned_object == "PUJOPASS")) + record_errors.push_back("NTCASSGNM.JourneyNumber is required for AssignedObject PUJO/PUJOPASS"); + if (journey_pattern_code.empty() && assigned_object == "JOPATILI") + record_errors.push_back("NTCASSGNM.JourneyPatternCode is required for AssignedObject JOPATILI"); + if (!record_errors.empty()) return; + + records.notice_assignments.emplace_back( + data_owner_code, + notice_code, + assigned_object, + timetable_version_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + period_group_code, + specific_day_code, + day_type, + line_planning_number, + static_cast>(journey_number), + static_cast>(stop_order), + journey_pattern_code, + timing_link_order, + user_stop_code); +} + +void Kv1Parser::parseTimeDemandGroup() { + auto data_owner_code = eatString("TIMDEMGRP.DataOwnerCode", true, 10); + auto line_planning_number = eatString("TIMDEMGRP.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("TIMDEMGRP.JourneyPatternCode", true, 10); + auto time_demand_group_code = eatString("TIMDEMGRP.TimeDemandGroupCode", true, 10); + if (!record_errors.empty()) return; + + records.time_demand_groups.emplace_back( + Kv1TimeDemandGroup::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + time_demand_group_code)); +} + +void Kv1Parser::parseTimeDemandGroupRunTime() { + auto data_owner_code = eatString("TIMDEMRNT.DataOwnerCode", true, 10); + auto line_planning_number = eatString("TIMDEMRNT.LinePlanningNumber", true, 10); + auto journey_pattern_code = eatString("TIMDEMRNT.JourneyPatternCode", true, 10); + auto time_demand_group_code = eatString("TIMDEMRNT.TimeDemandGroupCode", true, 10); + auto timing_link_order = eatNumber("TIMDEMRNT.TimingLinkOrder", true, 3); + auto user_stop_code_begin = eatString("TIMDEMRNT.UserStopCodeBegin", true, 10); + auto user_stop_code_end = eatString("TIMDEMRNT.UserStopCodeEnd", true, 10); + auto total_drive_time = eatNumber("TIMDEMRNT.TotalDriveTime", true, 5); + auto drive_time = eatNumber("TIMDEMRNT.DriveTime", true, 5); + auto expected_delay = eatNumber("TIMDEMRNT.ExpectedDelay", false, 5); + auto layover_time = eatNumber("TIMDEMRNT.LayOverTime", false, 5); + auto stop_wait_time = eatNumber("TIMDEMRNT.StopWaitTime", true, 5); + auto minimum_stop_time = eatNumber("TIMDEMRNT.MinimumStopTime", false, 5); + if (!record_errors.empty()) return; + + if (timing_link_order && *timing_link_order != static_cast(*timing_link_order)) { + record_errors.push_back("TIMDEMRNT.TimingLinkOrder should be an integer"); + return; + } + + records.time_demand_group_run_times.emplace_back( + Kv1TimeDemandGroupRunTime::Key( + data_owner_code, + line_planning_number, + journey_pattern_code, + time_demand_group_code, + static_cast(*timing_link_order)), + user_stop_code_begin, + user_stop_code_end, + *total_drive_time, + *drive_time, + expected_delay, + layover_time, + *stop_wait_time, + minimum_stop_time); +} + +void Kv1Parser::parsePeriodGroup() { + auto data_owner_code = eatString("PEGR.DataOwnerCode", true, 10); + auto period_group_code = eatString("PEGR.PeriodGroupCode", true, 10); + auto description = eatString("PEGR.Description", false, 255); + if (!record_errors.empty()) return; + + records.period_groups.emplace_back( + Kv1PeriodGroup::Key( + data_owner_code, + period_group_code), + description); +} + +void Kv1Parser::parseSpecificDay() { + auto data_owner_code = eatString("SPECDAY.DataOwnerCode", true, 10); + auto specific_day_code = eatString("SPECDAY.SpecificDayCode", true, 10); + auto name = eatString("SPECDAY.Name", true, 50); + auto description = eatString("SPECDAY.Description", false, 255); + if (!record_errors.empty()) return; + + records.specific_days.emplace_back( + Kv1SpecificDay::Key( + data_owner_code, + specific_day_code), + name, + description); +} + +void Kv1Parser::parseTimetableVersion() { + auto data_owner_code = eatString("TIVE.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("TIVE.OrganizationalUnitCode", true, 10); + auto timetable_version_code = eatString("TIVE.TimetableVersionCode", true, 10); + auto period_group_code = eatString("TIVE.PeriodGroupCode", true, 10); + auto specific_day_code = eatString("TIVE.SpecificDayCode", true, 10); + auto valid_from_raw = eatString("TIVE.ValidFrom", true, 10); + auto timetable_version_type = eatString("TIVE.TimetableVersionType", true, 10); + auto valid_thru_raw = eatString("TIVE.ValidThru", false, 10); + auto description = eatString("TIVE.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) + record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); + std::optional valid_thru; + if (!valid_thru_raw.empty()) { + valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_thru) { + record_errors.push_back("TIVE.ValidFrom has invalid format, should be YYYY-MM-DD"); + } + } + if (!description.empty()) + record_errors.push_back("TIVE.Description should be empty"); + if (!record_errors.empty()) return; + + records.timetable_versions.emplace_back( + Kv1TimetableVersion::Key( + data_owner_code, + organizational_unit_code, + timetable_version_code, + period_group_code, + specific_day_code), + *valid_from, + timetable_version_type, + valid_thru, + description); +} + +void Kv1Parser::parsePublicJourney() { + auto data_owner_code = eatString ("PUJO.DataOwnerCode", true, 10); + auto timetable_version_code = eatString ("PUJO.TimetableVersionCode", true, 10); + auto organizational_unit_code = eatString ("PUJO.OrganizationalUnitCode", true, 10); + auto period_group_code = eatString ("PUJO.PeriodGroupCode", true, 10); + auto specific_day_code = eatString ("PUJO.SpecificDayCode", true, 10); + auto day_type = eatString ("PUJO.DayType", true, 7); + auto line_planning_number = eatString ("PUJO.LinePlanningNumber", true, 10); + auto journey_number = eatNumber ("PUJO.JourneyNumber", true, 6); + auto time_demand_group_code = eatString ("PUJO.TimeDemandGroupCode", true, 10); + auto journey_pattern_code = eatString ("PUJO.JourneyPatternCode", true, 10); + auto departure_time_raw = eatString ("PUJO.DepartureTime", true, 8); + auto wheelchair_accessible = eatString ("PUJO.WheelChairAccessible", true, 13); + auto data_owner_is_operator = eatBoolean("PUJO.DataOwnerIsOperator", true ); + auto planned_monitored = eatBoolean("PUJO.PlannedMonitored", true ); + auto product_formula_type = eatNumber ("PUJO.ProductFormulaType", false, 4); + auto show_flexible_trip = eatString ("PUJO.ShowFlexibleTrip", false, 8); + if (!record_errors.empty()) return; + + auto departure_time = parseHhmmss(departure_time_raw); + if (!departure_time) + record_errors.push_back("PUJO.DepartureTime has a bad format"); + if (*journey_number < 0 || *journey_number > 999'999) + record_errors.push_back("PUJO.JourneyNumber should be within the range [0-999999]"); + if (*journey_number != static_cast(*journey_number)) + record_errors.push_back("PUJO.JourneyNumber should be an integer"); + if (product_formula_type && *product_formula_type != static_cast(*product_formula_type)) + record_errors.push_back("PUJO.ProductFormulaType should be an integer"); + if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") + record_errors.push_back("PUJO.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("PUJO.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + if (!record_errors.empty()) return; + + records.public_journeys.emplace_back( + Kv1PublicJourney::Key( + data_owner_code, + timetable_version_code, + organizational_unit_code, + period_group_code, + specific_day_code, + day_type, + line_planning_number, + static_cast(*journey_number)), + time_demand_group_code, + journey_pattern_code, + *departure_time, + wheelchair_accessible, + *data_owner_is_operator, + *planned_monitored, + product_formula_type, + show_flexible_trip); +} + +void Kv1Parser::parsePeriodGroupValidity() { + auto data_owner_code = eatString("PEGRVAL.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("PEGRVAL.OrganizationalUnitCode", true, 10); + auto period_group_code = eatString("PEGRVAL.PeriodGroupCode", true, 10); + auto valid_from_raw = eatString("PEGRVAL.ValidFrom", true, 10); + auto valid_thru_raw = eatString("PEGRVAL.ValidThru", true, 10); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + auto valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_from) + record_errors.push_back("PEGRVAL.ValidFrom has invalid format, should be YYYY-MM-DD"); + if (!valid_thru) + record_errors.push_back("PEGRVAL.ValidThru has invalid format, should be YYYY-MM-DD"); + if (!record_errors.empty()) return; + + records.period_group_validities.emplace_back( + Kv1PeriodGroupValidity::Key( + data_owner_code, + organizational_unit_code, + period_group_code, + *valid_from), + *valid_thru); +} + +void Kv1Parser::parseExceptionalOperatingDay() { + auto data_owner_code = eatString("EXCOPDAY.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("EXCOPDAY.OrganizationalUnitCode", true, 10); + auto valid_date_raw = eatString("EXCOPDAY.ValidDate", true, 23); + auto day_type_as_on = eatString("EXCOPDAY.DayTypeAsOn", true, 7); + auto specific_day_code = eatString("EXCOPDAY.SpecificDayCode", true, 10); + auto period_group_code = eatString("EXCOPDAY.PeriodGroupCode", false, 10); + auto description = eatString("EXCOPDAY.Description", false, 255); + if (!record_errors.empty()) return; + + std::string_view error; + auto valid_date = parseDateTime(valid_date_raw, amsterdam, &error); + if (!valid_date) { + record_errors.push_back(std::format("EXCOPDAY.ValidDate has an bad format (value: {}): {}", valid_date_raw, error)); + return; + } + + records.exceptional_operating_days.emplace_back( + Kv1ExceptionalOperatingDay::Key( + data_owner_code, + organizational_unit_code, + *valid_date), + day_type_as_on, + specific_day_code, + period_group_code, + description); +} + +void Kv1Parser::parseScheduleVersion() { + auto data_owner_code = eatString("SCHEDVERS.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("SCHEDVERS.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString("SCHEDVERS.ScheduleCode", true, 10); + auto schedule_type_code = eatString("SCHEDVERS.ScheduleTypeCode", true, 10); + auto valid_from_raw = eatString("SCHEDVERS.ValidFrom", true, 10); + auto valid_thru_raw = eatString("SCHEDVERS.ValidThru", false, 10); + auto description = eatString("SCHEDVERS.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_from = parseYyyymmdd(valid_from_raw); + if (!valid_from) + record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); + std::optional valid_thru; + if (!valid_thru_raw.empty()) { + valid_thru = parseYyyymmdd(valid_thru_raw); + if (!valid_thru) { + record_errors.push_back("SCHEDVERS.ValidFrom has invalid format, should be YYYY-MM-DD"); + } + } + if (!description.empty()) + record_errors.push_back("SCHEDVERS.Description should be empty"); + if (!record_errors.empty()) return; + + records.schedule_versions.emplace_back( + Kv1ScheduleVersion::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code), + *valid_from, + valid_thru, + description); +} + +void Kv1Parser::parsePublicJourneyPassingTimes() { + auto data_owner_code = eatString ("PUJOPASS.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString ("PUJOPASS.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString ("PUJOPASS.ScheduleCode", true, 10); + auto schedule_type_code = eatString ("PUJOPASS.ScheduleTypeCode", true, 10); + auto line_planning_number = eatString ("PUJOPASS.LinePlanningNumber", true, 10); + auto journey_number = eatNumber ("PUJOPASS.JourneyNumber", true, 6); + auto stop_order = eatNumber ("PUJOPASS.StopOrder", true, 4); + auto journey_pattern_code = eatString ("PUJOPASS.JourneyPatternCode", true, 10); + auto user_stop_code = eatString ("PUJOPASS.UserStopCode", true, 10); + auto target_arrival_time_raw = eatString ("PUJOPASS.TargetArrivalTime", false, 8); + auto target_departure_time_raw = eatString ("PUJOPASS.TargetDepartureTime", false, 8); + auto wheelchair_accessible = eatString ("PUJOPASS.WheelChairAccessible", true, 13); + auto data_owner_is_operator = eatBoolean("PUJOPASS.DataOwnerIsOperator", true ); + auto planned_monitored = eatBoolean("PUJOPASS.PlannedMonitored", true ); + auto product_formula_type = eatNumber ("PUJOPASS.ProductFormulaType", false, 4); + auto show_flexible_trip = eatString ("PUJOPASS.ShowFlexibleTrip", false, 8); + if (!record_errors.empty()) return; + + if (*journey_number < 0 || *journey_number > 999'999) + record_errors.push_back("PUJOPASS.JourneyNumber should be within the range [0-999999]"); + if (*journey_number != static_cast(*journey_number)) + record_errors.push_back("PUJOPASS.JourneyNumber should be an integer"); + if (*stop_order != static_cast(*stop_order)) + record_errors.push_back("PUJOPASS.StopOrder should be an integer"); + if (product_formula_type && *product_formula_type != static_cast(*product_formula_type)) + record_errors.push_back("PUJOPASS.ProductFormulaType should be an integer"); + if (wheelchair_accessible != "ACCESSIBLE" && wheelchair_accessible != "NOTACCESSIBLE" && wheelchair_accessible != "UNKNOWN") + record_errors.push_back("PUJOPASS.WheelChairAccessible should be in BISON E3 values [ACCESSIBLE, NOTACCESSIBLE, UNKNOWN]"); + if (!show_flexible_trip.empty() && show_flexible_trip != "TRUE" && + show_flexible_trip != "FALSE" && show_flexible_trip != "REALTIME") + record_errors.push_back("PUJOPASS.ShowFlexibleTrip should be in BISON E21 values [TRUE, FALSE, REALTIME]"); + std::optional> target_arrival_time; + if (!target_arrival_time_raw.empty()) { + target_arrival_time = parseHhmmss(target_arrival_time_raw); + if (!target_arrival_time) { + record_errors.push_back("PUJOPASS.TargetArrivalTime has invalid format, should be HH:MM:SS"); + } + } + std::optional> target_departure_time; + if (!target_departure_time_raw.empty()) { + target_departure_time = parseHhmmss(target_departure_time_raw); + if (!target_departure_time) { + record_errors.push_back("PUJOPASS.TargetDepartureTime has invalid format, should be HH:MM:SS"); + } + } + if (!record_errors.empty()) return; + + records.public_journey_passing_times.emplace_back( + Kv1PublicJourneyPassingTimes::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + line_planning_number, + static_cast(*journey_number), + static_cast(*stop_order)), + journey_pattern_code, + user_stop_code, + target_arrival_time, + target_departure_time, + wheelchair_accessible, + *data_owner_is_operator, + *planned_monitored, + product_formula_type, + show_flexible_trip); +} + +void Kv1Parser::parseOperatingDay() { + auto data_owner_code = eatString("OPERDAY.DataOwnerCode", true, 10); + auto organizational_unit_code = eatString("OPERDAY.OrganizationalUnitCode", true, 10); + auto schedule_code = eatString("OPERDAY.ScheduleCode", true, 10); + auto schedule_type_code = eatString("OPERDAY.ScheduleTypeCode", true, 10); + auto valid_date_raw = eatString("OPERDAY.ValidDate", true, 10); + auto description = eatString("OPERDAY.Description", false, 255); + if (!record_errors.empty()) return; + + auto valid_date = parseYyyymmdd(valid_date_raw); + if (!valid_date) + record_errors.push_back("OPERDAY.ValidDate has invalid format, should be YYYY-MM-DD"); + if (!record_errors.empty()) return; + + records.operating_days.emplace_back( + Kv1OperatingDay::Key( + data_owner_code, + organizational_unit_code, + schedule_code, + schedule_type_code, + *valid_date), + description); +} + +const std::unordered_map Kv1Parser::type_parsers{ + { "ORUN", &Kv1Parser::parseOrganizationalUnit }, + { "ORUNORUN", &Kv1Parser::parseHigherOrganizationalUnit }, + { "USRSTOP", &Kv1Parser::parseUserStopPoint }, + { "USRSTAR", &Kv1Parser::parseUserStopArea }, + { "TILI", &Kv1Parser::parseTimingLink }, + { "LINK", &Kv1Parser::parseLink }, + { "LINE", &Kv1Parser::parseLine }, + { "DEST", &Kv1Parser::parseDestination }, + { "JOPA", &Kv1Parser::parseJourneyPattern }, + { "CONFINREL", &Kv1Parser::parseConcessionFinancerRelation }, + { "CONAREA", &Kv1Parser::parseConcessionArea }, + { "FINANCER", &Kv1Parser::parseFinancer }, + { "JOPATILI", &Kv1Parser::parseJourneyPatternTimingLink }, + { "POINT", &Kv1Parser::parsePoint }, + { "POOL", &Kv1Parser::parsePointOnLink }, + { "ICON", &Kv1Parser::parseIcon }, + { "NOTICE", &Kv1Parser::parseNotice }, + { "NTCASSGNM", &Kv1Parser::parseNoticeAssignment }, + { "TIMDEMGRP", &Kv1Parser::parseTimeDemandGroup }, + { "TIMDEMRNT", &Kv1Parser::parseTimeDemandGroupRunTime }, + { "PEGR", &Kv1Parser::parsePeriodGroup }, + { "SPECDAY", &Kv1Parser::parseSpecificDay }, + { "TIVE", &Kv1Parser::parseTimetableVersion }, + { "PUJO", &Kv1Parser::parsePublicJourney }, + { "PEGRVAL", &Kv1Parser::parsePeriodGroupValidity }, + { "EXCOPDAY", &Kv1Parser::parseExceptionalOperatingDay }, + { "SCHEDVERS", &Kv1Parser::parseScheduleVersion }, + { "PUJOPASS", &Kv1Parser::parsePublicJourneyPassingTimes }, + { "OPERDAY", &Kv1Parser::parseOperatingDay }, +}; diff --git a/lib/libtmi8/src/kv1_types.cpp b/lib/libtmi8/src/kv1_types.cpp new file mode 100644 index 0000000..49e306e --- /dev/null +++ b/lib/libtmi8/src/kv1_types.cpp @@ -0,0 +1,773 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +#include + +size_t Kv1Records::size() const { + return organizational_units.size() + + higher_organizational_units.size() + + user_stop_points.size() + + user_stop_areas.size() + + timing_links.size() + + links.size() + + lines.size() + + destinations.size() + + journey_patterns.size() + + concession_financer_relations.size() + + concession_areas.size() + + financers.size() + + journey_pattern_timing_links.size() + + points.size() + + point_on_links.size() + + icons.size() + + notices.size() + + notice_assignments.size() + + time_demand_groups.size() + + time_demand_group_run_times.size() + + period_groups.size() + + specific_days.size() + + timetable_versions.size() + + public_journeys.size() + + period_group_validities.size() + + exceptional_operating_days.size() + + schedule_versions.size() + + public_journey_passing_times.size() + + operating_days.size(); +} + +Kv1OrganizationalUnit::Key::Key( + std::string data_owner_code, + std::string organizational_unit_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)) +{} + +Kv1HigherOrganizationalUnit::Key::Key( + std::string data_owner_code, + std::string organizational_unit_code_parent, + std::string organizational_unit_code_child, + std::chrono::year_month_day valid_from) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code_parent(std::move(organizational_unit_code_parent)), + organizational_unit_code_child(std::move(organizational_unit_code_child)), + valid_from(valid_from) +{} + +Kv1UserStopPoint::Key::Key( + std::string data_owner_code, + std::string user_stop_code) + : data_owner_code(std::move(data_owner_code)), + user_stop_code(std::move(user_stop_code)) +{} + +Kv1UserStopArea::Key::Key( + std::string data_owner_code, + std::string user_stop_area_code) + : data_owner_code(std::move(data_owner_code)), + user_stop_area_code(std::move(user_stop_area_code)) +{} + +Kv1TimingLink::Key::Key( + std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)) +{} + +Kv1Link::Key::Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string transport_type) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)), + transport_type(std::move(transport_type)) +{} + +Kv1Line::Key::Key(std::string data_owner_code, + std::string line_planning_number) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)) +{} + +Kv1Destination::Key::Key(std::string data_owner_code, + std::string dest_code) + : data_owner_code(std::move(data_owner_code)), + dest_code(std::move(dest_code)) +{} + +Kv1JourneyPattern::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)) +{} + +Kv1ConcessionFinancerRelation::Key::Key(std::string data_owner_code, + std::string con_fin_rel_code) + : data_owner_code(std::move(data_owner_code)), + con_fin_rel_code(std::move(con_fin_rel_code)) +{} + +Kv1ConcessionArea::Key::Key(std::string data_owner_code, + std::string concession_area_code) + : data_owner_code(std::move(data_owner_code)), + concession_area_code(std::move(concession_area_code)) +{} + +Kv1Financer::Key::Key(std::string data_owner_code, + std::string financer_code) + : data_owner_code(std::move(data_owner_code)), + financer_code(std::move(financer_code)) +{} + +Kv1JourneyPatternTimingLink::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + short timing_link_order) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(journey_pattern_code), + timing_link_order(timing_link_order) +{} + +Kv1Point::Key::Key(std::string data_owner_code, + std::string point_code) + : data_owner_code(std::move(data_owner_code)), + point_code(std::move(point_code)) +{} + +Kv1PointOnLink::Key::Key(std::string data_owner_code, + std::string user_stop_code_begin, + std::string user_stop_code_end, + std::string point_data_owner_code, + std::string point_code, + std::string transport_type) + : data_owner_code(std::move(data_owner_code)), + user_stop_code_begin(std::move(user_stop_code_begin)), + user_stop_code_end(std::move(user_stop_code_end)), + point_data_owner_code(std::move(point_data_owner_code)), + point_code(std::move(point_code)), + transport_type(std::move(transport_type)) +{} + +Kv1Icon::Key::Key(std::string data_owner_code, + short icon_number) + : data_owner_code(std::move(data_owner_code)), + icon_number(icon_number) +{} + +Kv1Notice::Key::Key(std::string data_owner_code, + std::string notice_code) + : data_owner_code(std::move(data_owner_code)), + notice_code(std::move(notice_code)) +{} + +Kv1TimeDemandGroup::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)), + time_demand_group_code(std::move(time_demand_group_code)) +{} + +Kv1TimeDemandGroupRunTime::Key::Key(std::string data_owner_code, + std::string line_planning_number, + std::string journey_pattern_code, + std::string time_demand_group_code, + short timing_link_order) + : data_owner_code(std::move(data_owner_code)), + line_planning_number(std::move(line_planning_number)), + journey_pattern_code(std::move(journey_pattern_code)), + time_demand_group_code(std::move(time_demand_group_code)), + timing_link_order(std::move(timing_link_order)) +{} + +Kv1PeriodGroup::Key::Key(std::string data_owner_code, + std::string period_group_code) + : data_owner_code(std::move(data_owner_code)), + period_group_code(std::move(period_group_code)) +{} + +Kv1SpecificDay::Key::Key(std::string data_owner_code, + std::string specific_day_code) + : data_owner_code(std::move(data_owner_code)), + specific_day_code(std::move(specific_day_code)) +{} + +Kv1TimetableVersion::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string timetable_version_code, + std::string period_group_code, + std::string specific_day_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + timetable_version_code(std::move(timetable_version_code)), + period_group_code(std::move(period_group_code)), + specific_day_code(std::move(specific_day_code)) +{} + +Kv1PublicJourney::Key::Key(std::string data_owner_code, + std::string timetable_version_code, + std::string organizational_unit_code, + std::string period_group_code, + std::string specific_day_code, + std::string day_type, + std::string line_planning_number, + int journey_number) + : data_owner_code(std::move(data_owner_code)), + timetable_version_code(std::move(timetable_version_code)), + organizational_unit_code(std::move(organizational_unit_code)), + period_group_code(std::move(period_group_code)), + specific_day_code(std::move(specific_day_code)), + day_type(std::move(day_type)), + line_planning_number(std::move(line_planning_number)), + journey_number(journey_number) +{} + +Kv1PeriodGroupValidity::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string period_group_code, + std::chrono::year_month_day valid_from) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + period_group_code(std::move(period_group_code)), + valid_from(valid_from) +{} + +Kv1ExceptionalOperatingDay::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::chrono::sys_seconds valid_date) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + valid_date(valid_date) +{} + +Kv1ScheduleVersion::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)) +{} + +Kv1PublicJourneyPassingTimes::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::string line_planning_number, + int journey_number, + short stop_order) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)), + line_planning_number(std::move(line_planning_number)), + journey_number(journey_number), + stop_order(stop_order) +{} + +Kv1OperatingDay::Key::Key(std::string data_owner_code, + std::string organizational_unit_code, + std::string schedule_code, + std::string schedule_type_code, + std::chrono::year_month_day valid_date) + : data_owner_code(std::move(data_owner_code)), + organizational_unit_code(std::move(organizational_unit_code)), + schedule_code(std::move(schedule_code)), + schedule_type_code(std::move(schedule_type_code)), + valid_date(valid_date) +{} + +bool operator==(const Kv1OrganizationalUnit::Key &a, const Kv1OrganizationalUnit::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code; +} + +bool operator==(const Kv1HigherOrganizationalUnit::Key &a, const Kv1HigherOrganizationalUnit::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code_parent == b.organizational_unit_code_parent + && a.organizational_unit_code_child == b.organizational_unit_code_child; +} + +bool operator==(const Kv1UserStopPoint::Key &a, const Kv1UserStopPoint::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code == b.user_stop_code; +} + +bool operator==(const Kv1UserStopArea::Key &a, const Kv1UserStopArea::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_area_code == b.user_stop_area_code; +} + +bool operator==(const Kv1TimingLink::Key &a, const Kv1TimingLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end; +} + +bool operator==(const Kv1Link::Key &a, const Kv1Link::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end + && a.transport_type == b.transport_type; +} + +bool operator==(const Kv1Line::Key &a, const Kv1Line::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number; +} + +bool operator==(const Kv1Destination::Key &a, const Kv1Destination::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.dest_code == b.dest_code; +} + +bool operator==(const Kv1JourneyPattern::Key &a, const Kv1JourneyPattern::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code; +} + +bool operator==(const Kv1ConcessionFinancerRelation::Key &a, const Kv1ConcessionFinancerRelation::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.con_fin_rel_code == b.con_fin_rel_code; +} + +bool operator==(const Kv1ConcessionArea::Key &a, const Kv1ConcessionArea::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.concession_area_code == b.concession_area_code; +} + +bool operator==(const Kv1Financer::Key &a, const Kv1Financer::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.financer_code == b.financer_code; +} + +bool operator==(const Kv1JourneyPatternTimingLink::Key &a, const Kv1JourneyPatternTimingLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.timing_link_order == b.timing_link_order; +} + +bool operator==(const Kv1Point::Key &a, const Kv1Point::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.point_code == b.point_code; +} + +bool operator==(const Kv1PointOnLink::Key &a, const Kv1PointOnLink::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.user_stop_code_begin == b.user_stop_code_begin + && a.user_stop_code_end == b.user_stop_code_end + && a.point_data_owner_code == b.point_data_owner_code + && a.point_code == b.point_code + && a.transport_type == b.transport_type; +} + +bool operator==(const Kv1Icon::Key &a, const Kv1Icon::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.icon_number == b.icon_number; +} + +bool operator==(const Kv1Notice::Key &a, const Kv1Notice::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.notice_code == b.notice_code; +} + +bool operator==(const Kv1TimeDemandGroup::Key &a, const Kv1TimeDemandGroup::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.time_demand_group_code == b.time_demand_group_code; +} + +bool operator==(const Kv1TimeDemandGroupRunTime::Key &a, const Kv1TimeDemandGroupRunTime::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.line_planning_number == b.line_planning_number + && a.journey_pattern_code == b.journey_pattern_code + && a.time_demand_group_code == b.time_demand_group_code + && a.timing_link_order == b.timing_link_order; +} + +bool operator==(const Kv1PeriodGroup::Key &a, const Kv1PeriodGroup::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.period_group_code == b.period_group_code; +} + +bool operator==(const Kv1SpecificDay::Key &a, const Kv1SpecificDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.specific_day_code == b.specific_day_code; +} + +bool operator==(const Kv1TimetableVersion::Key &a, const Kv1TimetableVersion::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.timetable_version_code == b.timetable_version_code + && a.period_group_code == b.period_group_code + && a.specific_day_code == b.specific_day_code; +} + +bool operator==(const Kv1PublicJourney::Key &a, const Kv1PublicJourney::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.timetable_version_code == b.timetable_version_code + && a.organizational_unit_code == b.organizational_unit_code + && a.period_group_code == b.period_group_code + && a.specific_day_code == b.specific_day_code + && a.day_type == b.day_type + && a.line_planning_number == b.line_planning_number + && a.journey_number == b.journey_number; +} + +bool operator==(const Kv1PeriodGroupValidity::Key &a, const Kv1PeriodGroupValidity::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.period_group_code == b.period_group_code + && a.valid_from == b.valid_from; +} + +bool operator==(const Kv1ExceptionalOperatingDay::Key &a, const Kv1ExceptionalOperatingDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.valid_date == b.valid_date; +} + +bool operator==(const Kv1ScheduleVersion::Key &a, const Kv1ScheduleVersion::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code; +} + +bool operator==(const Kv1PublicJourneyPassingTimes::Key &a, const Kv1PublicJourneyPassingTimes::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code + && a.line_planning_number == b.line_planning_number + && a.journey_number == b.journey_number + && a.stop_order == b.stop_order; +} + +bool operator==(const Kv1OperatingDay::Key &a, const Kv1OperatingDay::Key &b) { + return a.data_owner_code == b.data_owner_code + && a.organizational_unit_code == b.organizational_unit_code + && a.schedule_code == b.schedule_code + && a.schedule_type_code == b.schedule_type_code + && a.valid_date == b.valid_date; +} + +namespace std::chrono { + static size_t hash_value(const year_month_day &ymd) { + size_t seed = 0; + + boost::hash_combine(seed, int(ymd.year())); + boost::hash_combine(seed, unsigned(ymd.month())); + boost::hash_combine(seed, unsigned(ymd.day())); + + return seed; + } + + static size_t hash_value(const sys_seconds &s) { + return boost::hash()(s.time_since_epoch().count()); + } +} + +size_t hash_value(const Kv1OrganizationalUnit::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + + return seed; +} + +size_t hash_value(const Kv1HigherOrganizationalUnit::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code_parent); + boost::hash_combine(seed, k.organizational_unit_code_child); + boost::hash_combine(seed, k.valid_from); + + return seed; +} + +size_t hash_value(const Kv1UserStopPoint::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code); + + return seed; +} + +size_t hash_value(const Kv1UserStopArea::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_area_code); + + return seed; +} + +size_t hash_value(const Kv1TimingLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + + return seed; +} + +size_t hash_value(const Kv1Link::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + boost::hash_combine(seed, k.transport_type); + + return seed; +} + +size_t hash_value(const Kv1Line::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + + return seed; +} + +size_t hash_value(const Kv1Destination::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.dest_code); + + return seed; +} + +size_t hash_value(const Kv1JourneyPattern::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + + return seed; +} + +size_t hash_value(const Kv1ConcessionFinancerRelation::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.con_fin_rel_code); + + return seed; +} + +size_t hash_value(const Kv1ConcessionArea::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.concession_area_code); + + return seed; +} + +size_t hash_value(const Kv1Financer::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.financer_code); + + return seed; +} + +size_t hash_value(const Kv1JourneyPatternTimingLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.timing_link_order); + + return seed; +} + +size_t hash_value(const Kv1Point::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.point_code); + + return seed; +} + +size_t hash_value(const Kv1PointOnLink::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.user_stop_code_begin); + boost::hash_combine(seed, k.user_stop_code_end); + boost::hash_combine(seed, k.point_data_owner_code); + boost::hash_combine(seed, k.point_code); + boost::hash_combine(seed, k.transport_type); + + return seed; +} + +size_t hash_value(const Kv1Icon::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.icon_number); + + return seed; +} + +size_t hash_value(const Kv1Notice::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.notice_code); + + return seed; +} + +size_t hash_value(const Kv1TimeDemandGroup::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.time_demand_group_code); + + return seed; +} + +size_t hash_value(const Kv1TimeDemandGroupRunTime::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_pattern_code); + boost::hash_combine(seed, k.time_demand_group_code); + boost::hash_combine(seed, k.timing_link_order); + + return seed; +} + +size_t hash_value(const Kv1PeriodGroup::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.period_group_code); + + return seed; +} + +size_t hash_value(const Kv1SpecificDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.specific_day_code); + + return seed; +} + +size_t hash_value(const Kv1TimetableVersion::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.timetable_version_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.specific_day_code); + + return seed; +} + +size_t hash_value(const Kv1PublicJourney::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.timetable_version_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.specific_day_code); + boost::hash_combine(seed, k.day_type); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_number); + + return seed; +} + +size_t hash_value(const Kv1PeriodGroupValidity::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.period_group_code); + boost::hash_combine(seed, k.valid_from); + + return seed; +} + +size_t hash_value(const Kv1ExceptionalOperatingDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.valid_date); + + return seed; +} + +size_t hash_value(const Kv1ScheduleVersion::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + + return seed; +} + +size_t hash_value(const Kv1PublicJourneyPassingTimes::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + boost::hash_combine(seed, k.line_planning_number); + boost::hash_combine(seed, k.journey_number); + boost::hash_combine(seed, k.stop_order); + + return seed; +} + +size_t hash_value(const Kv1OperatingDay::Key &k) { + size_t seed = 0; + + boost::hash_combine(seed, k.data_owner_code); + boost::hash_combine(seed, k.organizational_unit_code); + boost::hash_combine(seed, k.schedule_code); + boost::hash_combine(seed, k.schedule_type_code); + boost::hash_combine(seed, k.valid_date); + + return seed; +} diff --git a/lib/libtmi8/src/kv6_parquet.cpp b/lib/libtmi8/src/kv6_parquet.cpp new file mode 100644 index 0000000..ca70b7f --- /dev/null +++ b/lib/libtmi8/src/kv6_parquet.cpp @@ -0,0 +1,102 @@ +// vim:set sw=2 ts=2 sts et: + +#include + +ParquetBuilder::ParquetBuilder() { + std::shared_ptr field_type, field_data_owner_code, field_line_planning_number, field_operating_day, + field_journey_number, field_reinforcement_number, field_timestamp, field_source, + field_punctuality, field_user_stop_code, field_passage_sequence_number, + field_vehicle_number, field_block_code, field_wheelchair_accessible, + field_number_of_coaches, field_rd_y, field_rd_x, field_distance_since_last_user_stop; + field_type = arrow::field("type", arrow::utf8()); + field_data_owner_code = arrow::field("data_owner_code", arrow::utf8()); + field_line_planning_number = arrow::field("line_planning_number", arrow::utf8()); + field_operating_day = arrow::field("operating_day", arrow::date32()); + field_journey_number = arrow::field("journey_number", arrow::uint32()); + field_reinforcement_number = arrow::field("reinforcement_number", arrow::uint8()); + field_timestamp = arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::SECOND)); + field_source = arrow::field("source", arrow::utf8()); + field_punctuality = arrow::field("punctuality", arrow::int16()); + field_user_stop_code = arrow::field("user_stop_code", arrow::utf8()); + field_passage_sequence_number = arrow::field("passage_sequence_number", arrow::uint16()); + field_vehicle_number = arrow::field("vehicle_number", arrow::uint32()); + field_block_code = arrow::field("block_code", arrow::uint32()); + field_wheelchair_accessible = arrow::field("wheelchair_accessible", arrow::utf8()); + field_number_of_coaches = arrow::field("number_of_coaches", arrow::uint8()); + field_rd_y = arrow::field("rd_y", arrow::int32()); + field_rd_x = arrow::field("rd_x", arrow::int32()); + field_distance_since_last_user_stop = arrow::field("distance_since_last_user_stop", arrow::uint32()); + + schema = arrow::schema({ field_type, field_data_owner_code, field_line_planning_number, + field_operating_day, field_journey_number, + field_reinforcement_number, field_timestamp, field_source, + field_punctuality, field_user_stop_code, + field_passage_sequence_number, field_vehicle_number, + field_block_code, field_wheelchair_accessible, + field_number_of_coaches, field_rd_y, field_rd_x, + field_distance_since_last_user_stop }); +} + +arrow::Result> ParquetBuilder::getTable() { + ARROW_ASSIGN_OR_RAISE(std::shared_ptr types, types.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr data_owner_codes, data_owner_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr line_planning_numbers, line_planning_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr operating_days, operating_days.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr journey_numbers, journey_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr reinforcement_numbers, reinforcement_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr timestamps, timestamps.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr sources, sources.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr punctualities, punctualities.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr user_stop_codes, user_stop_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr passage_sequence_numbers, passage_sequence_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr vehicle_numbers, vehicle_numbers.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr block_codes, block_codes.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr wheelchair_accessibles, wheelchair_accessibles.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr number_of_coaches, number_of_coaches.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr rd_ys, rd_ys.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr rd_xs, rd_xs.Finish()); + ARROW_ASSIGN_OR_RAISE(std::shared_ptr distance_since_last_user_stops, distance_since_last_user_stops.Finish()); + + std::vector> columns = { types, data_owner_codes, line_planning_numbers, operating_days, + journey_numbers, reinforcement_numbers, timestamps, sources, + punctualities, user_stop_codes, passage_sequence_numbers, + vehicle_numbers, block_codes, wheelchair_accessibles, + number_of_coaches, rd_ys, rd_xs, + distance_since_last_user_stops }; + return arrow::Result(arrow::Table::Make(schema, columns)); +} + +arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename) { + std::shared_ptr props = parquet::WriterProperties::Builder() + .compression(arrow::Compression::ZSTD) + ->created_by("oeuf-libtmi8") + ->version(parquet::ParquetVersion::PARQUET_2_6) + ->data_page_version(parquet::ParquetDataPageVersion::V2) + ->max_row_group_length(MAX_PARQUET_CHUNK) + ->build(); + + std::shared_ptr arrow_props = parquet::ArrowWriterProperties::Builder() + .store_schema()->build(); + + std::shared_ptr out_file; + std::string filename_str = filename; + ARROW_ASSIGN_OR_RAISE(out_file, arrow::io::FileOutputStream::Open(filename_str + ".part")); + + ARROW_ASSIGN_OR_RAISE(auto writer, + parquet::arrow::FileWriter::Open(*rbr.schema(), arrow::default_memory_pool(), out_file, props, arrow_props)); + for (const auto &batchr : rbr) { + ARROW_ASSIGN_OR_RAISE(auto batch, batchr); + ARROW_RETURN_NOT_OK(writer->WriteRecordBatch(*batch)); + } + ARROW_RETURN_NOT_OK(writer->Close()); + ARROW_RETURN_NOT_OK(out_file->Close()); + + std::filesystem::rename(filename_str + ".part", filename); + + return arrow::Status::OK(); +} + +arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename) { + auto tbr = arrow::TableBatchReader(table); + return writeArrowRecordsAsParquetFile(tbr, filename); +} -- cgit v1.2.3