From 86c8896ee69b068368b4ef9a4c3923285907c328 Mon Sep 17 00:00:00 2001 From: Rutger Broekhoff Date: Tue, 18 Mar 2025 15:29:27 +0100 Subject: Parsing ING statements (POC) --- app/Import/Ing/CurrentAccountCsv.hs | 356 ++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) create mode 100644 app/Import/Ing/CurrentAccountCsv.hs (limited to 'app/Import/Ing/CurrentAccountCsv.hs') diff --git a/app/Import/Ing/CurrentAccountCsv.hs b/app/Import/Ing/CurrentAccountCsv.hs new file mode 100644 index 0000000..bf28730 --- /dev/null +++ b/app/Import/Ing/CurrentAccountCsv.hs @@ -0,0 +1,356 @@ +{-# LANGUAGE OverloadedLists #-} +{-# LANGUAGE OverloadedStrings #-} + +module Import.Ing.CurrentAccountCsv where + +import Control.Applicative ((<|>)) +import Data.ByteString.Lazy qualified as BS +import Data.Csv ((.:)) +import Data.Csv qualified as C +import Data.Decimal (Decimal) +import Data.Functor ((<&>)) +import Data.Iban (Iban) +import Data.Text qualified as T +import Data.Time.Calendar (Day) +import Data.Time.Clock (UTCTime) +import Data.Time.Zones (TZ, loadTZFromDB) +import Data.Vector qualified as V +import Import.Ing.Shared + ( DebitCredit (Credit, Debit), + dateCP, + decimalCP, + ibanCP, + maybeCP, + scsvOptions, + timestampCP, + ) +import System.IO (Handle) +import Text.Regex.TDFA ((=~~)) + +data TransactionType + = AcceptGiro -- AC (acceptgiro) + | AtmWithdrawal -- GM (geldautomaat, Giromaat) + | BatchPayment -- VZ (verzamelbetaling); 'Batch payment' + | BranchPosting -- FL (filiaalboeking) + | Deposit -- ST (storting) + | DirectDebit -- IC (incasso); 'SEPA direct debit' + | Ideal -- ID (iDEAL); 'iDEAL' + | OnlineBanking -- GT (internetbankieren, Girotel); 'Online Banking' + | OfficeWithdrawal -- PK (opname kantoor, postkantoor) + | PaymentTerminal -- BA (betaalautomaat); 'Payment terminal' + | PeriodicTransfer -- PO (periodieke overschrijving) + | PhoneBanking -- GF (telefonisch bankieren, Girofoon) + | Transfer -- OV (overboeking); 'Transfer' + | Various -- DV (diversen) + deriving (Eq, Show) + +parseCode :: T.Text -> C.Parser TransactionType +parseCode "AC" = return AcceptGiro +parseCode "GM" = return AtmWithdrawal +parseCode "VZ" = return BatchPayment +parseCode "FL" = return BranchPosting +parseCode "ST" = return Deposit +parseCode "IC" = return DirectDebit +parseCode "ID" = return Ideal +parseCode "GT" = return OnlineBanking +parseCode "PK" = return OfficeWithdrawal +parseCode "BA" = return PaymentTerminal +parseCode "PO" = return PeriodicTransfer +parseCode "GF" = return PhoneBanking +parseCode "OV" = return Transfer +parseCode "DV" = return Various +parseCode t = fail $ "Unknown transaction code '" ++ T.unpack t ++ "'" + +parseType :: T.Text -> C.Parser TransactionType +parseType "SEPA direct debit" = return DirectDebit +parseType "Batch payment" = return BatchPayment +parseType "Online Banking" = return OnlineBanking +parseType "Payment terminal" = return PaymentTerminal +parseType "Transfer" = return Transfer +parseType "iDEAL" = return Ideal +parseType t = fail $ "Unknown transaction type '" ++ T.unpack t ++ "'" + +data PrimTx = PrimTx + { ptDate :: !Day, + ptDesc :: !(Maybe T.Text), + ptAccount :: !Iban, + ptCounterparty :: !(Maybe Iban), + ptDebitCredit :: !DebitCredit, + ptAmount :: !Decimal, + ptResBal :: !Decimal, + ptTag :: !T.Text, + ptMoreData :: !MoreData + } + deriving (Show) + +data MoreData + = PaymentTerminalData + { ptCardSequenceNo :: !T.Text, + ptTimestamp :: !UTCTime, + ptTransaction :: !T.Text, + ptTerminal :: !T.Text, + ptValueDate :: !Day, + ptGooglePay :: !Bool + } + | DepositTransferData + { dtName :: !T.Text, + dtDescription :: !T.Text, + dtIban :: !Iban, + dtReference :: !T.Text, + dtValueDate :: !Day + } + | RoundingSavingsDeposit + { rsdSavingsAccount :: !T.Text, + rsdValueDate :: !Day + } + | OnlineBankingCredit + { obcName :: !T.Text, + obcDescription :: !T.Text, + obcIban :: !Iban, + obcTimestamp :: !UTCTime, + obcValueDate :: !Day + } + | OnlineBankingDebit + { obdName :: !T.Text, + obdDescription :: !T.Text, + obdIban :: !Iban, + obdTimestamp :: !(Maybe UTCTime), + obdValueDate :: !Day + } + | RecurrentDirectDebitData + { rddName :: !T.Text, + rddDescription :: !T.Text, + rddIban :: !Iban, + rddReference :: !T.Text, + rddMandateId :: !T.Text, + rddCreditorId :: !T.Text, + rddOtherParty :: !(Maybe T.Text), + rddValueDate :: !Day + } + | IdealDebitData + { idName :: !T.Text, + idDescription :: !T.Text, + idIban :: !Iban, + idTimestamp :: !UTCTime, + idReference :: !T.Text, + idValueDate :: !Day + } + | PaymentTerminalCashbackData + { ptcCardSequenceNo :: !T.Text, + ptcTimestamp :: !UTCTime, + ptcTransaction :: !T.Text, + ptcTerminal :: !T.Text, + ptcValueDate :: !Day + } + | BatchPaymentData + { bpName :: !T.Text, + bpDescription :: !T.Text, + bpIban :: !Iban, + bpReference :: !T.Text, + bpValueDate :: !Day + } + deriving (Show) + +maybeNotProvided :: T.Text -> Maybe T.Text +maybeNotProvided t = if t == "NOTPROVIDED" then Nothing else Just t + +valueDateCP :: T.Text -> C.Parser Day +valueDateCP = dateCP "%d/%m/%Y" + +data PartTx = PartTx !Day !TransactionType !DebitCredit + +notificationsCP :: TZ -> PartTx -> T.Text -> C.Parser MoreData +notificationsCP _ (PartTx _ Transfer Credit) t = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) Reference: (.*) Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, ref, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + valDate <- valueDateCP valDateTxt + return $ + DepositTransferData + { dtName = name, + dtDescription = desc, + dtIban = iban, + dtReference = ref, + dtValueDate = valDate + } +notificationsCP _ (PartTx _ Transfer Debit) t = do + let regex = "^To Oranje spaarrekening ([A-Z0-9]+) Afronding Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [savingsAccount, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + valDate <- valueDateCP valDateTxt + return $ + RoundingSavingsDeposit + { rsdSavingsAccount = savingsAccount, + rsdValueDate = valDate + } +notificationsCP amsTz (PartTx _ PaymentTerminal Debit) t = do + let regex = "^Card sequence no.: ([0-9]+) ? ([0-9]{2}/[0-9]{2}/[0-9]{4} [0-9]{2}:[0-9]{2}) Transaction: (.*) Term: ((.+) Google Pay|(.+)) Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [cardSeqNo, timestampTxt, transaction, _, gpayTerm, noGpayTerm, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + timestamp <- timestampCP "%d/%m/%Y %H:%M" amsTz timestampTxt + valDate <- valueDateCP valDateTxt + return $ + PaymentTerminalData + { ptCardSequenceNo = cardSeqNo, + ptTimestamp = timestamp, + ptTransaction = transaction, + ptTerminal = if T.null gpayTerm then noGpayTerm else gpayTerm, + ptValueDate = valDate, + ptGooglePay = T.null noGpayTerm + } +notificationsCP amsTz (PartTx _ PaymentTerminal Credit) t = do + let regex = "^Card sequence no.: ([0-9]+) ? ([0-9]{2}/[0-9]{2}/[0-9]{4} [0-9]{2}:[0-9]{2}) Transaction: (.*) Term: (.*) Cashback transaction Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [cardSeqNo, timestampTxt, transaction, term, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + timestamp <- timestampCP "%d/%m/%Y %H:%M" amsTz timestampTxt + valDate <- valueDateCP valDateTxt + return $ + PaymentTerminalCashbackData + { ptcCardSequenceNo = cardSeqNo, + ptcTimestamp = timestamp, + ptcTransaction = transaction, + ptcTerminal = term, + ptcValueDate = valDate + } +notificationsCP amsTz (PartTx _ OnlineBanking Credit) t = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) Date/time: ([0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2}) Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, timestampTxt, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + timestamp <- timestampCP "%d-%m-%Y %H:%M:%S" amsTz timestampTxt + valDate <- valueDateCP valDateTxt + return $ + OnlineBankingCredit + { obcName = name, + obcDescription = desc, + obcIban = iban, + obcTimestamp = timestamp, + obcValueDate = valDate + } +notificationsCP amsTz (PartTx _ OnlineBanking Debit) t = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) (Date/time: ([0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2}) )?Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, _, timestampTxt, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + timestamp <- + if T.null timestampTxt + then pure Nothing + else Just <$> timestampCP "%d-%m-%Y %H:%M:%S" amsTz timestampTxt + valDate <- valueDateCP valDateTxt + return $ + OnlineBankingDebit + { obdName = name, + obdDescription = desc, + obdIban = iban, + obdTimestamp = timestamp, + obdValueDate = valDate + } +notificationsCP _ (PartTx date DirectDebit Debit) t = normalRecurrentDirectDebit <|> ingInsurancePayment + where + normalRecurrentDirectDebit = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) Reference: (.*) Mandate ID: (.*) Creditor ID: (.*) Recurrent SEPA direct debit (Other party: (.*) )?Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, ref, mandateId, creditorId, _, otherParty, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + valDate <- valueDateCP valDateTxt + return $ + RecurrentDirectDebitData + { rddName = name, + rddDescription = desc, + rddIban = iban, + rddReference = ref, + rddMandateId = mandateId, + rddCreditorId = creditorId, + rddOtherParty = if T.null otherParty then Nothing else Just otherParty, + rddValueDate = valDate + } + ingInsurancePayment = do + let regex = "^Name: (.* ING Verzekeren) Description: (.*) IBAN: ([A-Z0-9]+) Reference: (.*) Mandate ID: (.*) Creditor ID: (.*) Recurrent SEPA direct debit$" :: String + (_, _, _, [name, desc, ibanTxt, ref, mandateId, creditorId]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + return $ + RecurrentDirectDebitData + { rddName = name, + rddDescription = desc, + rddIban = iban, + rddReference = ref, + rddMandateId = mandateId, + rddCreditorId = creditorId, + rddOtherParty = Nothing, + rddValueDate = date + } +notificationsCP amsTz (PartTx _ Ideal Debit) t = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) Reference: ([0-9]{2}-[0-9]{2}-[0-9]{4} [0-9]{2}:[0-9]{2}) ([0-9]+) Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, timestampTxt, ref, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + timestamp <- timestampCP "%d-%m-%Y %H:%M" amsTz timestampTxt + valDate <- valueDateCP valDateTxt + return $ + IdealDebitData + { idName = name, + idDescription = desc, + idIban = iban, + idTimestamp = timestamp, + idReference = ref, + idValueDate = valDate + } +notificationsCP _ (PartTx _ BatchPayment Credit) t = do + let regex = "^Name: (.*) Description: (.*) IBAN: ([A-Z0-9]+) Reference: (.*) Value date: ([0-9]{2}/[0-9]{2}/[0-9]{4})$" :: String + (_, _, _, [name, desc, ibanTxt, ref, valDateTxt]) <- t =~~ regex :: C.Parser (T.Text, T.Text, T.Text, [T.Text]) + iban <- ibanCP ibanTxt + valDate <- valueDateCP valDateTxt + return $ + BatchPaymentData + { bpName = name, + bpDescription = desc, + bpIban = iban, + bpReference = ref, + bpValueDate = valDate + } +notificationsCP _ (PartTx _ ty cd) _ = fail $ "Unmatched type and debit/credit combination (" ++ show ty ++ ", " ++ show cd ++ ")" + +debitCreditCP :: T.Text -> C.Parser DebitCredit +debitCreditCP "Debit" = return Debit +debitCreditCP "Credit" = return Credit +debitCreditCP t = fail ("Unknown debit/credit value '" ++ T.unpack t ++ "'") + +parseNamedRecord :: TZ -> C.NamedRecord -> C.Parser PrimTx +parseNamedRecord amsTz m = do + date <- m .: "Date" >>= dateCP "%0Y%m%d" + debitCredit <- m .: "Debit/credit" >>= debitCreditCP + codeText <- m .: "Code" + tyText <- m .: "Transaction type" + tyFromCode <- parseCode codeText + ty <- parseType tyText + if ty /= tyFromCode + then + fail $ "Code '" ++ T.unpack codeText ++ "' and transaction type '" ++ T.unpack tyText ++ "' do not agree" + else + PrimTx date + <$> (m .: "Name / Description" <&> maybeNotProvided) + <*> (m .: "Account" >>= ibanCP) + <*> (m .: "Counterparty" >>= maybeCP ibanCP) + <*> pure debitCredit + <*> (m .: "Amount (EUR)" >>= decimalCP) + <*> (m .: "Resulting balance" >>= decimalCP) + <*> m .: "Tag" + <*> (m .: "Notifications" >>= notificationsCP amsTz (PartTx date ty debitCredit)) + +readFile :: Handle -> IO (V.Vector PrimTx) +readFile h = do + tz <- loadTZFromDB "Europe/Amsterdam" + contents <- BS.hGetContents h + case C.decodeByNameWithP (parseNamedRecord tz) scsvOptions contents of + Left err -> fail err + Right + ( [ "Date", + "Name / Description", + "Account", + "Counterparty", + "Code", + "Debit/credit", + "Amount (EUR)", + "Transaction type", + "Notifications", + "Resulting balance", + "Tag" + ], + txs + ) -> + return txs + Right _ -> + fail "Headers do not match expected pattern" -- cgit v1.2.3