#!/usr/bin/env python3 """ Bank Statement to YNAB Converter Converts bank statements from various formats to YNAB-compatible CSV files """ import os import sys import glob import re import argparse import logging import yaml import pandas as pd from pathlib import Path logger = logging.getLogger() def parse_norwegian_number(value): """Convert Norwegian number format (comma decimal) to float""" if pd.isna(value) or value == '': return 0.0 # Convert to string and replace comma with dot str_value = str(value).replace(',', '.') try: return float(str_value) except ValueError: return 0.0 def parse_norwegian_date(date_str): """Convert DD.MM.YYYY format to YYYY-MM-DD""" if pd.isna(date_str) or date_str == '': return '' try: # Parse DD.MM.YYYY and convert to date object return pd.to_datetime(date_str, format='%d.%m.%Y') except (ValueError, TypeError): logger.error(f"Invalid date format: {date_str}") exit(1) def convert_memo(original): original = original.replace(" Kurs: 1.0000", "") words = original.split(" ") while len(words) > 0: if words[0] == "": # It's empty del words[0] elif m := re.match(r'\*(\d{4})', words[0]): # It's the last four numbers of the card, ignore it del words[0] elif m := re.match(r'\d{2}\.\d{2}', words[0]): # It's the date. Move it to the end words.append(words.pop(0)) elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])): # It's the original currency if words[0] == "NOK": # It's Norwegian kroner, ignoring del words[0] del words[0] else: # It's some other currency, move it to the end words.append(words.pop(0)) words.append(words.pop(0)) else: break return " ".join(words) def parse_bank_sor(data): """ Parse Sparebank 1 bank data Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto """ result = [] for _, row in data.iterrows(): if row.get('Status') != "Bokført": continue if row.get('Valuta') != 'NOK': raise ValueError(f"Unknown currency {row['Valuta']}") payee = convert_memo(row.get('Beskrivelse', '')) memo = convert_memo(row.get('Melding/KID/Fakt.nr', '')) result.append({ 'Date': parse_norwegian_date(row.get('Bokført dato')), 'Payee': payee, 'Memo': memo, 'Outflow': -float(row['Beløp ut'] or '0'), 'Inflow': float(row['Beløp inn'] or '0'), }) return pd.DataFrame(result) def parse_bank_sparebank1(data): """ Parse Sparebank 1 bank data Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto """ result = [] for _, row in data.iterrows(): inflow = parse_norwegian_number(row.get('Inn')) outflow = parse_norwegian_number(row.get('Ut')) # Convert outflow to positive if negative if outflow < 0: outflow = -outflow result.append({ 'Date': parse_norwegian_date(row.get('Dato', '')), 'Payee': row.get('Beskrivelse', ''), 'Memo': row.get('Til konto', ''), 'Outflow': outflow, 'Inflow': inflow }) return pd.DataFrame(result) def parse_bank_norwegian(data): """ Parse Norwegian bank data Expected columns: TransactionDate, Text, Memo, Amount """ result = [] for _, row in data.iterrows(): amount = row.get('Amount', 0) inflow = amount if amount > 0 else 0 outflow = -amount if amount < 0 else 0 # Make outflow positive result.append({ 'Date': row.get('TransactionDate', ''), 'Payee': row.get('Text', ''), 'Memo': row.get('Memo', ''), 'Outflow': outflow, 'Inflow': inflow }) return pd.DataFrame(result) # Dictionary of banks, filename patterns, and parsing functions BANKS = { "SparebankenNorge": { "patterns": ["Transaksjoner*.csv"], "encoding": "latin1", "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", "parse_function": parse_bank_sor, "delimiter": ";" }, "Sparebank1": { "patterns": ["OversiktKonti*.csv"], "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", "parse_function": parse_bank_sparebank1, "delimiter": ";" }, "Norwegian": { "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", "parse_function": parse_bank_norwegian } # Add more banks and patterns as needed } def process_bank_statement(file_path, parse_function, delimiter, encoding): """ Process a single bank statement file Args: file_path (str): Path to the bank statement file parse_function (callable): Function to parse the specific bank format delimiter (Optional): Field delimiter Returns: pd.DataFrame: Processed YNAB-compatible data """ file_extension = Path(file_path).suffix.lower() try: # Handle CSV files if file_extension == ".csv": data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) # Handle Excel files elif file_extension in [".xlsx", ".xls"]: data = pd.read_excel(file_path) else: logger.warning(f"Skipping unsupported file type: {file_path}") return pd.DataFrame() # Call the appropriate bank-specific parsing function ynab_data = parse_function(data) return ynab_data except Exception as e: logger.error(f"Error processing file {file_path}: {e}") raise e return pd.DataFrame() def find_bank_config(filename): """ Find the appropriate bank configuration for a given filename Args: filename (str): Name of the file to match Returns: tuple: (bank_name, bank_config) or (None, None) if no match """ import fnmatch for bank_name, bank_config in BANKS.items(): for pattern in bank_config["patterns"]: if fnmatch.fnmatch(filename, pattern): return bank_name, bank_config return None, None def convert_bank_statements_to_ynab(input_paths, output_directory): """ Convert bank statements to YNAB format Args: input_paths (list): List of specific files or directories to process """ # Create output directory if it doesn't exist output_directory.mkdir(exist_ok=True, parents=True) # Get list of files to process files_to_process = [] for path in input_paths: if not path.exists(): logger.warning(f"Path does not exist: {file_path}") elif path.is_file(): files_to_process.append(path) elif path.is_dir(): logger.debug(f"Looking for matching files in {path}") for bank_config in BANKS.values(): for pattern in bank_config["patterns"]: matching_files = glob.glob(str(path / pattern)) files_to_process.extend([Path(f) for f in matching_files]) files_processed = False # Process each file logger.info(f"Processing {len(files_to_process)} file(s)...") for file_path in files_to_process: logger.debug(f"Processing {file_path}") if not file_path.exists(): logger.warning(f"File not found: {file_path}") continue # Find matching bank configuration bank_name, bank_config = find_bank_config(file_path.name) if not bank_config: logger.warning(f"No bank configuration found for file: {file_path.name}") continue logger.info(f"Processing file: {file_path} for {bank_name}") parse_function = bank_config["parse_function"] delimiter = bank_config.get("delimiter", ",") encoding = bank_config.get("encoding", "utf-8") # Process the file ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) if ynab_data.empty: logger.warning(f"No data processed for {file_path}") continue filename_placeholders = { 'bank': bank_name, 'first_date': ynab_data['Date'].min().date(), 'last_date': ynab_data['Date'].max().date(), } file_retry_count = 0 while True: output_filename = bank_config["output_filename"].format(**filename_placeholders) if file_retry_count > 0: output_filename += f" ({file_retry_count})" output_filename += ".csv" output_file = output_directory / output_filename if not output_file.exists(): break file_retry_count += 1 # Export to CSV for YNAB import ynab_data.to_csv(output_file, index=False) logger.info(f"Data saved to {output_file}") files_processed = True if not files_processed: logger.warning("No files were processed. Make sure your files match the expected patterns.") def setup_logger(verbosity): if verbosity <= 0: log_level = logging.WARNING elif verbosity == 1: log_level = logging.INFO elif verbosity >= 2: log_level = logging.DEBUG logging.basicConfig( level=log_level, format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', datefmt='%Y-%m-%dT%H:%M:%S', ) if __name__ == "__main__": parser = argparse.ArgumentParser( prog='YNAB', description='Prepare bank transcripts for import to You Need A Budget', ) parser.add_argument( 'filenames', type=Path, nargs='*', help='The files to process', ) parser.add_argument( '-o', '--output-dir', type=Path, default=None, help='The location to store the converted files', ) parser.add_argument( '-v', '--verbose', default=0, action='count', help='Increase logging verbosity', ) parser.add_argument( '-c', '--config', type=Path, help='Path to the config file', ) args = parser.parse_args() setup_logger(args.verbose) config_path = args.config or Path.home() / '.config/ynab/config.yaml' if config_path and config_path.exists(): with config_path.open('r') as config_file: config = yaml.safe_load(config_file) logger.debug(f"Loaded config file {config_path}") else: logger.debug(f"Could not find config file {config_path}") config = {} verbosity = config.get('verbosity') if args.verbose == 0 and verbosity: setup_logger(verbosity) current_directory = Path.cwd() inputs = args.filenames or config.get('default_inputs') if not inputs: logger.info("Processing all files in current directory") inputs = [current_directory] output_dir = args.output_dir or Path(config.get('output_dir')) if not output_dir: output_dir = current_directory / "YNAB_Outputs" logger.debug(f"No output directory set. Defaulting to {output_dir}") convert_bank_statements_to_ynab(inputs, output_dir)