From 17a3ea880402338420699e03bcb24181e4ff3924 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Thu, 2 May 2024 20:27:40 +0200 Subject: Initial commit Based on dc4ba6a --- lib/libtmi8/include/tmi8/kv6_parquet.hpp | 46 ++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 lib/libtmi8/include/tmi8/kv6_parquet.hpp (limited to 'lib/libtmi8/include/tmi8/kv6_parquet.hpp') diff --git a/lib/libtmi8/include/tmi8/kv6_parquet.hpp b/lib/libtmi8/include/tmi8/kv6_parquet.hpp new file mode 100644 index 0000000..33b57ca --- /dev/null +++ b/lib/libtmi8/include/tmi8/kv6_parquet.hpp @@ -0,0 +1,46 @@ +// vim:set sw=2 ts=2 sts et: + +#ifndef OEUF_LIBTMI8_KV6_PARQUET_HPP +#define OEUF_LIBTMI8_KV6_PARQUET_HPP + +#include + +#include +#include +#include + +static const size_t MAX_PARQUET_CHUNK = 10000; + +struct ParquetBuilder { + ParquetBuilder(); + arrow::Result> getTable(); + + std::shared_ptr schema; + + arrow::StringBuilder types; + arrow::StringBuilder data_owner_codes; + arrow::StringBuilder line_planning_numbers; + arrow::Date32Builder operating_days; + arrow::UInt32Builder journey_numbers; + arrow::UInt8Builder reinforcement_numbers; + arrow::TimestampBuilder timestamps{arrow::timestamp(arrow::TimeUnit::SECOND), arrow::default_memory_pool()}; + arrow::StringBuilder sources; + arrow::Int16Builder punctualities; + arrow::StringBuilder user_stop_codes; + arrow::UInt16Builder passage_sequence_numbers; + arrow::UInt32Builder vehicle_numbers; + arrow::UInt32Builder block_codes; + arrow::StringBuilder wheelchair_accessibles; + arrow::UInt8Builder number_of_coaches; + arrow::Int32Builder rd_ys; + arrow::Int32Builder rd_xs; + arrow::UInt32Builder distance_since_last_user_stops; +}; + +[[nodiscard]] +arrow::Status writeArrowRecordsAsParquetFile(arrow::RecordBatchReader &rbr, std::filesystem::path filename); + +[[nodiscard]] +arrow::Status writeArrowTableAsParquetFile(const arrow::Table &table, std::filesystem::path filename); + +#endif // OEUF_LIBTMI8_KV6_PARQUET_HPP -- cgit v1.2.3