|
- #!/usr/bin/env python3
- """
- Bank Statement to YNAB Converter
- Converts bank statements from various formats to YNAB-compatible CSV files
- """
-
- import os
- import sys
- import glob
- import re
- import argparse
- import logging
- import yaml
- import pandas as pd
- from pathlib import Path
-
- logger = logging.getLogger()
-
- def parse_norwegian_number(value):
- """Convert Norwegian number format (comma decimal) to float"""
- if pd.isna(value) or value == '':
- return 0.0
- # Convert to string and replace comma with dot
- str_value = str(value).replace(',', '.')
- try:
- return float(str_value)
- except ValueError:
- return 0.0
-
- def parse_norwegian_date(date_str):
- """Convert DD.MM.YYYY format to YYYY-MM-DD"""
- if pd.isna(date_str) or date_str == '':
- return ''
- try:
- # Parse DD.MM.YYYY and convert to date object
- return pd.to_datetime(date_str, format='%d.%m.%Y')
- except (ValueError, TypeError):
- logger.error(f"Invalid date format: {date_str}")
- exit(1)
-
- def convert_memo(original):
- original = original.replace(" Kurs: 1.0000", "")
- words = original.split(" ")
-
- while len(words) > 0:
- if words[0] == "":
- # It's empty
- del words[0]
- elif m := re.match(r'\*(\d{4})', words[0]):
- # It's the last four numbers of the card, ignore it
- del words[0]
- elif m := re.match(r'\d{2}\.\d{2}', words[0]):
- # It's the date. Move it to the end
- words.append(words.pop(0))
- elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
- # It's the original currency
- if words[0] == "NOK":
- # It's Norwegian kroner, ignoring
- del words[0]
- del words[0]
- else:
- # It's some other currency, move it to the end
- words.append(words.pop(0))
- words.append(words.pop(0))
- else:
- break
-
- return " ".join(words)
-
- def parse_bank_sor(data):
- """
- Parse Sparebank 1 bank data
- Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
- """
- result = []
-
- for _, row in data.iterrows():
- if row.get('Status') != "Bokført":
- continue
-
- if row.get('Valuta') != 'NOK':
- raise ValueError(f"Unknown currency {row['Valuta']}")
-
- payee = convert_memo(row.get('Beskrivelse', ''))
- memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))
-
- result.append({
- 'Date': parse_norwegian_date(row.get('Bokført dato')),
- 'Payee': payee,
- 'Memo': memo,
- 'Outflow': -float(row['Beløp ut'] or '0'),
- 'Inflow': float(row['Beløp inn'] or '0'),
- })
-
- return pd.DataFrame(result)
-
- def parse_bank_sparebank1(data):
- """
- Parse Sparebank 1 bank data
- Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
- """
- result = []
-
- for _, row in data.iterrows():
- inflow = parse_norwegian_number(row.get('Inn'))
- outflow = parse_norwegian_number(row.get('Ut'))
-
- # Convert outflow to positive if negative
- if outflow < 0:
- outflow = -outflow
-
- result.append({
- 'Date': parse_norwegian_date(row.get('Dato', '')),
- 'Payee': row.get('Beskrivelse', ''),
- 'Memo': row.get('Til konto', ''),
- 'Outflow': outflow,
- 'Inflow': inflow
- })
-
- return pd.DataFrame(result)
-
-
- def parse_bank_norwegian(data):
- """
- Parse Norwegian bank data
- Expected columns: TransactionDate, Text, Memo, Amount
- """
- result = []
-
- for _, row in data.iterrows():
- amount = row.get('Amount', 0)
- inflow = amount if amount > 0 else 0
- outflow = -amount if amount < 0 else 0 # Make outflow positive
-
- result.append({
- 'Date': row.get('TransactionDate', ''),
- 'Payee': row.get('Text', ''),
- 'Memo': row.get('Memo', ''),
- 'Outflow': outflow,
- 'Inflow': inflow
- })
-
- return pd.DataFrame(result)
-
-
- # Dictionary of banks, filename patterns, and parsing functions
- BANKS = {
- "SparebankenNorge": {
- "patterns": ["Transaksjoner*.csv"],
- "encoding": "latin1",
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
- "parse_function": parse_bank_sor,
- "delimiter": ";"
- },
- "Sparebank1": {
- "patterns": ["OversiktKonti*.csv"],
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
- "parse_function": parse_bank_sparebank1,
- "delimiter": ";"
- },
- "Norwegian": {
- "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
- "parse_function": parse_bank_norwegian
- }
- # Add more banks and patterns as needed
- }
-
-
- def process_bank_statement(file_path, parse_function, delimiter, encoding):
- """
- Process a single bank statement file
-
- Args:
- file_path (str): Path to the bank statement file
- parse_function (callable): Function to parse the specific bank format
- delimiter (Optional<str>): Field delimiter
-
- Returns:
- pd.DataFrame: Processed YNAB-compatible data
- """
- file_extension = Path(file_path).suffix.lower()
-
- try:
- # Handle CSV files
- if file_extension == ".csv":
- data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
- # Handle Excel files
- elif file_extension in [".xlsx", ".xls"]:
- data = pd.read_excel(file_path)
- else:
- logger.warning(f"Skipping unsupported file type: {file_path}")
- return pd.DataFrame()
-
- # Call the appropriate bank-specific parsing function
- ynab_data = parse_function(data)
- return ynab_data
-
- except Exception as e:
- logger.error(f"Error processing file {file_path}: {e}")
- raise e
- return pd.DataFrame()
-
-
- def find_bank_config(filename):
- """
- Find the appropriate bank configuration for a given filename
-
- Args:
- filename (str): Name of the file to match
-
- Returns:
- tuple: (bank_name, bank_config) or (None, None) if no match
- """
- import fnmatch
-
- for bank_name, bank_config in BANKS.items():
- for pattern in bank_config["patterns"]:
- if fnmatch.fnmatch(filename, pattern):
- return bank_name, bank_config
-
- return None, None
-
-
- def convert_bank_statements_to_ynab(input_paths, output_directory):
- """
- Convert bank statements to YNAB format
-
- Args:
- input_paths (list): List of specific files or directories to process
- """
- # Create output directory if it doesn't exist
- output_directory.mkdir(exist_ok=True, parents=True)
-
- # Get list of files to process
- files_to_process = []
- for path in input_paths:
- if not path.exists():
- logger.warning(f"Path does not exist: {file_path}")
- elif path.is_file():
- files_to_process.append(path)
- elif path.is_dir():
- logger.debug(f"Looking for matching files in {path}")
- for bank_config in BANKS.values():
- for pattern in bank_config["patterns"]:
- matching_files = glob.glob(str(path / pattern))
- files_to_process.extend([Path(f) for f in matching_files])
-
- files_processed = False
-
- # Process each file
- logger.info(f"Processing {len(files_to_process)} file(s)...")
- for file_path in files_to_process:
- logger.debug(f"Processing {file_path}")
- if not file_path.exists():
- logger.warning(f"File not found: {file_path}")
- continue
-
- # Find matching bank configuration
- bank_name, bank_config = find_bank_config(file_path.name)
-
- if not bank_config:
- logger.warning(f"No bank configuration found for file: {file_path.name}")
- continue
-
- logger.info(f"Processing file: {file_path} for {bank_name}")
-
- parse_function = bank_config["parse_function"]
- delimiter = bank_config.get("delimiter", ",")
- encoding = bank_config.get("encoding", "utf-8")
-
- # Process the file
- ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
-
- if ynab_data.empty:
- logger.warning(f"No data processed for {file_path}")
- continue
-
- filename_placeholders = {
- 'bank': bank_name,
- 'first_date': ynab_data['Date'].min().date(),
- 'last_date': ynab_data['Date'].max().date(),
- }
-
- file_retry_count = 0
- while True:
- output_filename = bank_config["output_filename"].format(**filename_placeholders)
-
- if file_retry_count > 0:
- output_filename += f" ({file_retry_count})"
-
- output_filename += ".csv"
- output_file = output_directory / output_filename
-
- if not output_file.exists():
- break
-
- file_retry_count += 1
-
- # Export to CSV for YNAB import
- ynab_data.to_csv(output_file, index=False)
- logger.info(f"Data saved to {output_file}")
- files_processed = True
-
- if not files_processed:
- logger.warning("No files were processed. Make sure your files match the expected patterns.")
-
- def setup_logger(verbosity):
- if verbosity <= 0:
- log_level = logging.WARNING
- elif verbosity == 1:
- log_level = logging.INFO
- elif verbosity >= 2:
- log_level = logging.DEBUG
-
- logging.basicConfig(
- level=log_level,
- format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
- datefmt='%Y-%m-%dT%H:%M:%S',
- )
-
- if __name__ == "__main__":
- parser = argparse.ArgumentParser(
- prog='YNAB',
- description='Prepare bank transcripts for import to You Need A Budget',
- )
-
- parser.add_argument(
- 'filenames',
- type=Path,
- nargs='*',
- help='The files to process',
- )
-
- parser.add_argument(
- '-o', '--output-dir',
- type=Path,
- default=None,
- help='The location to store the converted files',
- )
-
- parser.add_argument(
- '-v', '--verbose',
- default=0,
- action='count',
- help='Increase logging verbosity',
- )
-
- parser.add_argument(
- '-c', '--config',
- type=Path,
- help='Path to the config file',
- )
-
- args = parser.parse_args()
-
- setup_logger(args.verbose)
-
- config_path = args.config or Path.home() / '.config/ynab/config.yaml'
- if config_path and config_path.exists():
- with config_path.open('r') as config_file:
- config = yaml.safe_load(config_file)
- logger.debug(f"Loaded config file {config_path}")
- else:
- logger.debug(f"Could not find config file {config_path}")
- config = {}
-
- verbosity = config.get('verbosity')
- if args.verbose == 0 and verbosity:
- setup_logger(verbosity)
-
- current_directory = Path.cwd()
-
- inputs = args.filenames or config.get('default_inputs')
- if not inputs:
- logger.info("Processing all files in current directory")
- inputs = [current_directory]
-
- output_dir = args.output_dir or Path(config.get('output_dir'))
- if not output_dir:
- output_dir = current_directory / "YNAB_Outputs"
- logger.debug(f"No output directory set. Defaulting to {output_dir}")
-
- convert_bank_statements_to_ynab(inputs, output_dir)
|