From fdd40de8c2c36a83c244102a734f6fc0589d5fdd Mon Sep 17 00:00:00 2001 From: Sindre Stephansen Date: Mon, 22 Sep 2025 10:21:24 +0200 Subject: [PATCH] Merge both versions of the script --- ynab-karianne.py | 251 ----------------------------------- ynab.py | 333 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 265 insertions(+), 319 deletions(-) delete mode 100644 ynab-karianne.py diff --git a/ynab-karianne.py b/ynab-karianne.py deleted file mode 100644 index a985bc3..0000000 --- a/ynab-karianne.py +++ /dev/null @@ -1,251 +0,0 @@ -#!/usr/bin/env python3 -""" -Bank Statement to YNAB Converter -Converts bank statements from various formats to YNAB-compatible CSV files -""" - -import os -import sys -import glob -import pandas as pd -from pathlib import Path - -def parse_norwegian_number(value): - """Convert Norwegian number format (comma decimal) to float""" - if pd.isna(value) or value == '': - return 0.0 - # Convert to string and replace comma with dot - str_value = str(value).replace(',', '.') - try: - return float(str_value) - except ValueError: - return 0.0 - -def parse_norwegian_date(date_str): - """Convert DD.MM.YYYY format to YYYY-MM-DD""" - if pd.isna(date_str) or date_str == '': - return '' - try: - # Parse DD.MM.YYYY and convert to date object - return pd.to_datetime(date_str, format='%d.%m.%Y') - except (ValueError, TypeError): - print(f"Invalid date format: {date_str}") - exit(1) - -def parse_bank_sparebank1(data): - """ - Parse Sparebank 1 bank data - Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto - """ - result = [] - - for _, row in data.iterrows(): - inflow = parse_norwegian_number(row.get('Inn')) - outflow = parse_norwegian_number(row.get('Ut')) - - # Convert outflow to positive if negative - if outflow < 0: - outflow = -outflow - - result.append({ - 'Date': parse_norwegian_date(row.get('Dato', '')), - 'Payee': row.get('Beskrivelse', ''), - 'Memo': row.get('Til konto', ''), - 'Outflow': outflow, - 'Inflow': inflow - }) - - return pd.DataFrame(result) - - -def parse_bank_norwegian(data): - """ - Parse Norwegian bank data - Expected columns: TransactionDate, Text, Memo, Amount - """ - result = [] - - for _, row in data.iterrows(): - amount = row.get('Amount', 0) - inflow = amount if amount > 0 else 0 - outflow = -amount if amount < 0 else 0 # Make outflow positive - - result.append({ - 'Date': row.get('TransactionDate', ''), - 'Payee': row.get('Text', ''), - 'Memo': row.get('Memo', ''), - 'Outflow': outflow, - 'Inflow': inflow - }) - - return pd.DataFrame(result) - - -# Dictionary of banks, filename patterns, and parsing functions -BANKS = { - "Sparebank1": { - "patterns": ["OversiktKonti*.csv"], - "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", - "parse_function": parse_bank_sparebank1, - "delimiter": ";" - }, - "Norwegian": { - "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], - "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", - "parse_function": parse_bank_norwegian - } - # Add more banks and patterns as needed -} - - -def process_bank_statement(file_path, parse_function, delimiter): - """ - Process a single bank statement file - - Args: - file_path (str): Path to the bank statement file - parse_function (callable): Function to parse the specific bank format - delimiter (Optional): Field delimiter - - Returns: - pd.DataFrame: Processed YNAB-compatible data - """ - file_extension = Path(file_path).suffix.lower() - - try: - # Handle CSV files - if file_extension == ".csv": - data = pd.read_csv(file_path, delimiter=delimiter) - # Handle Excel files - elif file_extension in [".xlsx", ".xls"]: - data = pd.read_excel(file_path) - else: - print(f"Skipping unsupported file type: {file_path}") - return pd.DataFrame() - - # Call the appropriate bank-specific parsing function - ynab_data = parse_function(data) - return ynab_data - - except Exception as e: - print(f"Error processing file {file_path}: {e}") - raise e - return pd.DataFrame() - - -def find_bank_config(filename): - """ - Find the appropriate bank configuration for a given filename - - Args: - filename (str): Name of the file to match - - Returns: - tuple: (bank_name, bank_config) or (None, None) if no match - """ - import fnmatch - - for bank_name, bank_config in BANKS.items(): - for pattern in bank_config["patterns"]: - if fnmatch.fnmatch(filename, pattern): - return bank_name, bank_config - - return None, None - - -def convert_bank_statements_to_ynab(input_files=None): - """ - Convert bank statements to YNAB format - - Args: - input_files (list): Optional list of specific files to process - If None, processes all files in current directory - """ - current_directory = Path.cwd() - output_directory = current_directory / "YNAB_Outputs" - - # Create output directory if it doesn't exist - output_directory.mkdir(exist_ok=True) - - # Get list of files to process - if input_files: - print(f"Processing {len(input_files)} dragged file(s)...") - files_to_process = [Path(f) for f in input_files if Path(f).exists()] - else: - print("Processing all files in current directory...") - files_to_process = [] - # Collect all files matching any bank pattern - for bank_config in BANKS.values(): - for pattern in bank_config["patterns"]: - matching_files = glob.glob(str(current_directory / pattern)) - files_to_process.extend([Path(f) for f in matching_files]) - - files_processed = False - - # Process each file - for file_path in files_to_process: - if not file_path.exists(): - print(f"File not found: {file_path}") - continue - - # Find matching bank configuration - bank_name, bank_config = find_bank_config(file_path.name) - - if not bank_config: - print(f"No bank configuration found for file: {file_path.name}") - continue - - print(f"Processing file: {file_path} for {bank_name}") - - parse_function = bank_config["parse_function"] - delimiter = bank_config.get("delimiter", ",") - - # Process the file - ynab_data = process_bank_statement(str(file_path), parse_function, delimiter) - - if ynab_data.empty: - print(f"No data processed for {file_path}") - continue - - filename_placeholders = { - 'bank': bank_name, - 'first_date': ynab_data['Date'].min().date(), - 'last_date': ynab_data['Date'].max().date(), - } - - file_retry_count = 0 - while True: - output_filename = bank_config["output_filename"].format(**filename_placeholders) - - if file_retry_count > 0: - output_filename += f" ({file_retry_count})" - - output_filename += ".csv" - output_file = output_directory / output_filename - - if not output_file.exists(): - break - - file_retry_count += 1 - - # Export to CSV for YNAB import - ynab_data.to_csv(output_file, index=False) - print(f"Data saved to {output_file}") - files_processed = True - - if not files_processed: - print("No files were processed. Make sure your files match the expected patterns.") - - -if __name__ == "__main__": - # Check if files were dragged onto the script - if len(sys.argv) > 1: - # Files were dragged - process them - files = sys.argv[1:] - convert_bank_statements_to_ynab(files) - else: - # No files dragged - run normal directory processing - convert_bank_statements_to_ynab() - - # Keep window open on Mac so user can see results - input("\nPress Enter to close...") \ No newline at end of file diff --git a/ynab.py b/ynab.py index e98fd7d..45ca923 100755 --- a/ynab.py +++ b/ynab.py @@ -1,39 +1,49 @@ -#!/bin/python3 +#!/usr/bin/env python3 +""" +Bank Statement to YNAB Converter +Converts bank statements from various formats to YNAB-compatible CSV files +""" +import os import sys -import csv +import glob import re +import pandas as pd from pathlib import Path -whitelist_cards = ["7756"] +def parse_norwegian_number(value): + """Convert Norwegian number format (comma decimal) to float""" + if pd.isna(value) or value == '': + return 0.0 + # Convert to string and replace comma with dot + str_value = str(value).replace(',', '.') + try: + return float(str_value) + except ValueError: + return 0.0 -in_header = ['Utført dato', 'Bokført dato', 'Rentedato', 'Beskrivelse', 'Type', 'Undertype', 'Fra konto', 'Avsender', 'Til konto', 'Mottakernavn', 'Beløp inn', 'Beløp ut', 'Valuta', 'Status', 'Melding'] -out_header = ['Date', 'Payee', 'Memo', 'Outflow', 'Inflow'] - - -def usage(): - print('Usage: ynab.py ') - - -def is_reserved(row): - return row['Status'] == "Reservert" +def parse_norwegian_date(date_str): + """Convert DD.MM.YYYY format to YYYY-MM-DD""" + if pd.isna(date_str) or date_str == '': + return '' + try: + # Parse DD.MM.YYYY and convert to date object + return pd.to_datetime(date_str, format='%d.%m.%Y') + except (ValueError, TypeError): + print(f"Invalid date format: {date_str}") + exit(1) def convert_memo(original): original = original.replace(" Kurs: 1.0000", "") words = original.split(" ") - for i in range(20): + while len(words) > 0: if words[0] == "": # It's empty del words[0] elif m := re.match(r'\*(\d{4})', words[0]): - # It's the last four digits of a card - if m.groups()[0] in whitelist_cards: - # It's an expected card, ignore it - del words[0] - else: - # It's an unexpected card, move it to the end - words.append(words.pop(0)) + # It's the last four numbers of the card, ignore it + del words[0] elif m := re.match(r'\d{2}\.\d{2}', words[0]): # It's the date. Move it to the end words.append(words.pop(0)) @@ -49,71 +59,258 @@ def convert_memo(original): words.append(words.pop(0)) else: break - else: - raise Exception(f"Infinite loop while parsing \"{original}\"") - return " ".join(words) +def parse_bank_sor(data): + """ + Parse Sparebank 1 bank data + Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto + """ + result = [] + + for _, row in data.iterrows(): + if row.get('Status') != "Bokført": + continue -def convert_row(row): - if is_reserved(row): - return None + if row.get('Valuta') != 'NOK': + raise ValueError(f"Unknown currency {row['Valuta']}") - if row['Valuta'] != 'NOK': - raise ValueError(f"Unknown currency {row['Valuta']}") + payee = convert_memo(row.get('Beskrivelse', '')) + memo = convert_memo(row.get('Melding/KID/Fakt.nr', '')) - return [ - row['Bokført dato'], - row['Beskrivelse'], - convert_memo(row['Melding']), - -float(row['Beløp ut'] or '0'), - float(row['Beløp inn'] or '0'), - ] + result.append({ + 'Date': parse_norwegian_date(row.get('Bokført dato')), + 'Payee': payee, + 'Memo': memo, + 'Outflow': -float(row['Beløp ut'] or '0'), + 'Inflow': float(row['Beløp inn'] or '0'), + }) + + return pd.DataFrame(result) +def parse_bank_sparebank1(data): + """ + Parse Sparebank 1 bank data + Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto + """ + result = [] + + for _, row in data.iterrows(): + inflow = parse_norwegian_number(row.get('Inn')) + outflow = parse_norwegian_number(row.get('Ut')) + + # Convert outflow to positive if negative + if outflow < 0: + outflow = -outflow + + result.append({ + 'Date': parse_norwegian_date(row.get('Dato', '')), + 'Payee': row.get('Beskrivelse', ''), + 'Memo': row.get('Til konto', ''), + 'Outflow': outflow, + 'Inflow': inflow + }) + + return pd.DataFrame(result) -def convert(reader, writer): - writer.writerow(out_header) - # Ignore header - reader.__next__() +def parse_bank_norwegian(data): + """ + Parse Norwegian bank data + Expected columns: TransactionDate, Text, Memo, Amount + """ + result = [] + + for _, row in data.iterrows(): + amount = row.get('Amount', 0) + inflow = amount if amount > 0 else 0 + outflow = -amount if amount < 0 else 0 # Make outflow positive + + result.append({ + 'Date': row.get('TransactionDate', ''), + 'Payee': row.get('Text', ''), + 'Memo': row.get('Memo', ''), + 'Outflow': outflow, + 'Inflow': inflow + }) + + return pd.DataFrame(result) - for raw_row in reader: - # Stop when we hit an empty row - for field in raw_row: - if field: - break + +# Dictionary of banks, filename patterns, and parsing functions +BANKS = { + "SparebankenNorge": { + "patterns": ["Transaksjoner*.csv"], + "encoding": "latin1", + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_bank_sor, + "delimiter": ";" + }, + "Sparebank1": { + "patterns": ["OversiktKonti*.csv"], + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_bank_sparebank1, + "delimiter": ";" + }, + "Norwegian": { + "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_bank_norwegian + } + # Add more banks and patterns as needed +} + + +def process_bank_statement(file_path, parse_function, delimiter, encoding): + """ + Process a single bank statement file + + Args: + file_path (str): Path to the bank statement file + parse_function (callable): Function to parse the specific bank format + delimiter (Optional): Field delimiter + + Returns: + pd.DataFrame: Processed YNAB-compatible data + """ + file_extension = Path(file_path).suffix.lower() + + try: + # Handle CSV files + if file_extension == ".csv": + data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) + # Handle Excel files + elif file_extension in [".xlsx", ".xls"]: + data = pd.read_excel(file_path) else: - break + print(f"Skipping unsupported file type: {file_path}") + return pd.DataFrame() + + # Call the appropriate bank-specific parsing function + ynab_data = parse_function(data) + return ynab_data + + except Exception as e: + print(f"Error processing file {file_path}: {e}") + raise e + return pd.DataFrame() - # Create dictionary with proper field names - row = {x[0]: x[1] for x in zip(in_header, raw_row)} - try: - if (result := convert_row(row)) is not None: - writer.writerow(result) - except Exception as e: - print(f'Error in row {row}') - raise e +def find_bank_config(filename): + """ + Find the appropriate bank configuration for a given filename + + Args: + filename (str): Name of the file to match + + Returns: + tuple: (bank_name, bank_config) or (None, None) if no match + """ + import fnmatch + + for bank_name, bank_config in BANKS.items(): + for pattern in bank_config["patterns"]: + if fnmatch.fnmatch(filename, pattern): + return bank_name, bank_config + + return None, None -def main(): - if len(sys.argv) != 2: - usage() - exit(1) +def convert_bank_statements_to_ynab(input_files=None): + """ + Convert bank statements to YNAB format + + Args: + input_files (list): Optional list of specific files to process + If None, processes all files in current directory + """ + current_directory = Path.cwd() + output_directory = current_directory / "YNAB_Outputs" + + # Create output directory if it doesn't exist + output_directory.mkdir(exist_ok=True) + + # Get list of files to process + if input_files: + print(f"Processing {len(input_files)} dragged file(s)...") + files_to_process = [Path(f) for f in input_files if Path(f).exists()] + else: + print("Processing all files in current directory...") + files_to_process = [] + # Collect all files matching any bank pattern + for bank_config in BANKS.values(): + for pattern in bank_config["patterns"]: + matching_files = glob.glob(str(current_directory / pattern)) + files_to_process.extend([Path(f) for f in matching_files]) + + files_processed = False + + # Process each file + for file_path in files_to_process: + if not file_path.exists(): + print(f"File not found: {file_path}") + continue + + # Find matching bank configuration + bank_name, bank_config = find_bank_config(file_path.name) + + if not bank_config: + print(f"No bank configuration found for file: {file_path.name}") + continue + + print(f"Processing file: {file_path} for {bank_name}") + + parse_function = bank_config["parse_function"] + delimiter = bank_config.get("delimiter", ",") + encoding = bank_config.get("encoding", "utf-8") + + # Process the file + ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) + + if ynab_data.empty: + print(f"No data processed for {file_path}") + continue + + filename_placeholders = { + 'bank': bank_name, + 'first_date': ynab_data['Date'].min().date(), + 'last_date': ynab_data['Date'].max().date(), + } - filepath = Path(sys.argv[1]) + file_retry_count = 0 + while True: + output_filename = bank_config["output_filename"].format(**filename_placeholders) - new_basename = f'ynab-{filepath.name}' - new_filepath = filepath.parent / new_basename + if file_retry_count > 0: + output_filename += f" ({file_retry_count})" + + output_filename += ".csv" + output_file = output_directory / output_filename - with filepath.open(mode='r', encoding='latin1') as old_file: - reader = csv.reader(old_file, delimiter=';') + if not output_file.exists(): + break - with new_filepath.open(mode='w') as new_file: - writer = csv.writer(new_file) - convert(reader, writer) + file_retry_count += 1 + + # Export to CSV for YNAB import + ynab_data.to_csv(output_file, index=False) + print(f"Data saved to {output_file}") + files_processed = True + + if not files_processed: + print("No files were processed. Make sure your files match the expected patterns.") -if __name__ == '__main__': - main() +if __name__ == "__main__": + # Check if files were dragged onto the script + if len(sys.argv) > 1: + # Files were dragged - process them + files = sys.argv[1:] + convert_bank_statements_to_ynab(files) + else: + # No files dragged - run normal directory processing + convert_bank_statements_to_ynab() + + # Keep window open on Mac so user can see results + input("\nPress Enter to close...") \ No newline at end of file