From ca1f9c3a67eaac8c289782ea1407d1480d5000c5 Mon Sep 17 00:00:00 2001 From: Sindre Stephansen Date: Mon, 22 Sep 2025 12:44:58 +0200 Subject: [PATCH] Restructure code --- src/bank_parsers/bank_norwegian.py | 23 ++ src/bank_parsers/helpers.py | 53 ++++ src/bank_parsers/sparebank1.py | 29 ++ src/bank_parsers/sparebanken_norge.py | 30 ++ src/convert.py | 171 ++++++++++++ src/main.py | 93 +++++++ ynab.py | 384 -------------------------- 7 files changed, 399 insertions(+), 384 deletions(-) create mode 100644 src/bank_parsers/bank_norwegian.py create mode 100644 src/bank_parsers/helpers.py create mode 100644 src/bank_parsers/sparebank1.py create mode 100644 src/bank_parsers/sparebanken_norge.py create mode 100644 src/convert.py create mode 100644 src/main.py delete mode 100755 ynab.py diff --git a/src/bank_parsers/bank_norwegian.py b/src/bank_parsers/bank_norwegian.py new file mode 100644 index 0000000..1ffb87a --- /dev/null +++ b/src/bank_parsers/bank_norwegian.py @@ -0,0 +1,23 @@ +import pandas as pd + +def parse_bank_norwegian(data): + """ + Parse Norwegian bank data + Expected columns: TransactionDate, Text, Memo, Amount + """ + result = [] + + for _, row in data.iterrows(): + amount = row.get('Amount', 0) + inflow = amount if amount > 0 else 0 + outflow = -amount if amount < 0 else 0 # Make outflow positive + + result.append({ + 'Date': row.get('TransactionDate', ''), + 'Payee': row.get('Text', ''), + 'Memo': row.get('Memo', ''), + 'Outflow': outflow, + 'Inflow': inflow + }) + + return pd.DataFrame(result) \ No newline at end of file diff --git a/src/bank_parsers/helpers.py b/src/bank_parsers/helpers.py new file mode 100644 index 0000000..3d49df2 --- /dev/null +++ b/src/bank_parsers/helpers.py @@ -0,0 +1,53 @@ +import re +import pandas as pd + +def parse_norwegian_number(value): + """Convert Norwegian number format (comma decimal) to float""" + if pd.isna(value) or value == '': + return 0.0 + # Convert to string and replace comma with dot + str_value = str(value).replace(',', '.') + try: + return float(str_value) + except ValueError: + return 0.0 + +def parse_norwegian_date(date_str): + """Convert DD.MM.YYYY format to YYYY-MM-DD""" + if pd.isna(date_str) or date_str == '': + return '' + try: + # Parse DD.MM.YYYY and convert to date object + return pd.to_datetime(date_str, format='%d.%m.%Y') + except (ValueError, TypeError): + logger.error(f"Invalid date format: {date_str}") + exit(1) + +def convert_memo(original): + original = original.replace(" Kurs: 1.0000", "") + words = original.split(" ") + + while len(words) > 0: + if words[0] == "": + # It's empty + del words[0] + elif m := re.match(r'\*(\d{4})', words[0]): + # It's the last four numbers of the card, ignore it + del words[0] + elif m := re.match(r'\d{2}\.\d{2}', words[0]): + # It's the date. Move it to the end + words.append(words.pop(0)) + elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])): + # It's the original currency + if words[0] == "NOK": + # It's Norwegian kroner, ignoring + del words[0] + del words[0] + else: + # It's some other currency, move it to the end + words.append(words.pop(0)) + words.append(words.pop(0)) + else: + break + + return " ".join(words) diff --git a/src/bank_parsers/sparebank1.py b/src/bank_parsers/sparebank1.py new file mode 100644 index 0000000..6e58f12 --- /dev/null +++ b/src/bank_parsers/sparebank1.py @@ -0,0 +1,29 @@ +import pandas as pd + +from bank_parsers.helpers import parse_norwegian_number, parse_norwegian_date + + +def parse_sparebank1(data): + """ + Parse Sparebank 1 bank data + Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto + """ + result = [] + + for _, row in data.iterrows(): + inflow = parse_norwegian_number(row.get('Inn')) + outflow = parse_norwegian_number(row.get('Ut')) + + # Convert outflow to positive if negative + if outflow < 0: + outflow = -outflow + + result.append({ + 'Date': parse_norwegian_date(row.get('Dato', '')), + 'Payee': row.get('Beskrivelse', ''), + 'Memo': row.get('Til konto', ''), + 'Outflow': outflow, + 'Inflow': inflow + }) + + return pd.DataFrame(result) diff --git a/src/bank_parsers/sparebanken_norge.py b/src/bank_parsers/sparebanken_norge.py new file mode 100644 index 0000000..b68860b --- /dev/null +++ b/src/bank_parsers/sparebanken_norge.py @@ -0,0 +1,30 @@ +import pandas as pd + +from bank_parsers.helpers import parse_norwegian_date, convert_memo + + +def parse_sparebanken_norge(data): + """ + Parse Sparebanken Norge bank data + """ + result = [] + + for _, row in data.iterrows(): + if row.get('Status') != "Bokført": + continue + + if row.get('Valuta') != 'NOK': + raise ValueError(f"Unknown currency {row['Valuta']}") + + payee = convert_memo(row.get('Beskrivelse', '')) + memo = convert_memo(row.get('Melding/KID/Fakt.nr', '')) + + result.append({ + 'Date': parse_norwegian_date(row.get('Bokført dato')), + 'Payee': payee, + 'Memo': memo, + 'Outflow': -float(row['Beløp ut'] or '0'), + 'Inflow': float(row['Beløp inn'] or '0'), + }) + + return pd.DataFrame(result) \ No newline at end of file diff --git a/src/convert.py b/src/convert.py new file mode 100644 index 0000000..460d90f --- /dev/null +++ b/src/convert.py @@ -0,0 +1,171 @@ +import glob +import logging +import pandas as pd +from pathlib import Path + +from bank_parsers.sparebank1 import parse_sparebank1 +from bank_parsers.bank_norwegian import parse_bank_norwegian +from bank_parsers.sparebanken_norge import parse_sparebanken_norge + + +# Dictionary of banks, filename patterns, and parsing functions +BANKS = { + "SparebankenNorge": { + "patterns": ["Transaksjoner*.csv"], + "encoding": "latin1", + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_sparebanken_norge, + "delimiter": ";" + }, + "Sparebank1": { + "patterns": ["OversiktKonti*.csv"], + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_sparebank1, + "delimiter": ";" + }, + "Norwegian": { + "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], + "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", + "parse_function": parse_bank_norwegian + } + # Add more banks and patterns as needed +} + + +def find_bank_config(filename): + """ + Find the appropriate bank configuration for a given filename + + Args: + filename (str): Name of the file to match + + Returns: + tuple: (bank_name, bank_config) or (None, None) if no match + """ + import fnmatch + + for bank_name, bank_config in BANKS.items(): + for pattern in bank_config["patterns"]: + if fnmatch.fnmatch(filename, pattern): + return bank_name, bank_config + + return None, None + + +def process_bank_statement(file_path, parse_function, delimiter, encoding): + """ + Process a single bank statement file + + Args: + file_path (str): Path to the bank statement file + parse_function (callable): Function to parse the specific bank format + delimiter (Optional): Field delimiter + + Returns: + pd.DataFrame: Processed YNAB-compatible data + """ + file_extension = Path(file_path).suffix.lower() + + try: + # Handle CSV files + if file_extension == ".csv": + data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) + # Handle Excel files + elif file_extension in [".xlsx", ".xls"]: + data = pd.read_excel(file_path) + else: + logging.warning(f"Skipping unsupported file type: {file_path}") + return pd.DataFrame() + + # Call the appropriate bank-specific parsing function + ynab_data = parse_function(data) + return ynab_data + + except Exception as e: + logging.error(f"Error processing file {file_path}: {e}") + raise e + return pd.DataFrame() + + +def convert_bank_statements_to_ynab(input_paths, output_directory): + """ + Convert bank statements to YNAB format + + Args: + input_paths (list): List of specific files or directories to process + """ + # Create output directory if it doesn't exist + output_directory.mkdir(exist_ok=True, parents=True) + + # Get list of files to process + files_to_process = [] + for path in input_paths: + if not path.exists(): + logging.warning(f"Path does not exist: {file_path}") + elif path.is_file(): + files_to_process.append(path) + elif path.is_dir(): + logging.debug(f"Looking for matching files in {path}") + for bank_config in BANKS.values(): + for pattern in bank_config["patterns"]: + matching_files = glob.glob(str(path / pattern)) + files_to_process.extend([Path(f) for f in matching_files]) + + files_processed = False + + # Process each file + logging.info(f"Processing {len(files_to_process)} file(s)...") + for file_path in files_to_process: + logging.debug(f"Processing {file_path}") + if not file_path.exists(): + logging.warning(f"File not found: {file_path}") + continue + + # Find matching bank configuration + bank_name, bank_config = find_bank_config(file_path.name) + + if not bank_config: + logging.warning(f"No bank configuration found for file: {file_path.name}") + continue + + logging.info(f"Processing file: {file_path} for {bank_name}") + + parse_function = bank_config["parse_function"] + delimiter = bank_config.get("delimiter", ",") + encoding = bank_config.get("encoding", "utf-8") + + # Process the file + ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) + + if ynab_data.empty: + logging.warning(f"No data processed for {file_path}") + continue + + filename_placeholders = { + 'bank': bank_name, + 'first_date': ynab_data['Date'].min().date(), + 'last_date': ynab_data['Date'].max().date(), + } + + file_retry_count = 0 + while True: + output_filename = bank_config["output_filename"].format(**filename_placeholders) + + if file_retry_count > 0: + output_filename += f" ({file_retry_count})" + + output_filename += ".csv" + output_file = output_directory / output_filename + + if not output_file.exists(): + break + + file_retry_count += 1 + + # Export to CSV for YNAB import + ynab_data.to_csv(output_file, index=False) + logging.info(f"Data saved to {output_file}") + files_processed = True + + if not files_processed: + logging.warning("No files were processed. Make sure your files match the expected patterns.") \ No newline at end of file diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..7518133 --- /dev/null +++ b/src/main.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +""" +Bank Statement to YNAB Converter +Converts bank statements from various formats to YNAB-compatible CSV files +""" + +import os +import sys +import argparse +import logging +import yaml +from pathlib import Path + +from convert import convert_bank_statements_to_ynab + + +def setup_logging(verbosity): + if verbosity <= 0: + log_level = logging.WARNING + elif verbosity == 1: + log_level = logging.INFO + elif verbosity >= 2: + log_level = logging.DEBUG + + logging.basicConfig( + level=log_level, + format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%dT%H:%M:%S', + ) + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog='YNAB', + description='Prepare bank transcripts for import to You Need A Budget', + ) + + parser.add_argument( + 'filenames', + type=Path, + nargs='*', + help='The files to process', + ) + + parser.add_argument( + '-o', '--output-dir', + type=Path, + default=None, + help='The location to store the converted files', + ) + + parser.add_argument( + '-v', '--verbose', + default=0, + action='count', + help='Increase logging verbosity', + ) + + parser.add_argument( + '-c', '--config', + type=Path, + help='Path to the config file', + ) + + args = parser.parse_args() + + setup_logging(args.verbose) + + config_path = args.config or Path.home() / '.config/ynab/config.yaml' + if config_path and config_path.exists(): + with config_path.open('r') as config_file: + config = yaml.safe_load(config_file) + logging.debug(f"Loaded config file {config_path}") + else: + logging.debug(f"Could not find config file {config_path}") + config = {} + + verbosity = config.get('verbosity') + if args.verbose == 0 and verbosity: + setup_logging(verbosity) + + current_directory = Path.cwd() + + inputs = args.filenames or config.get('default_inputs') + if not inputs: + logging.info("Processing all files in current directory") + inputs = [current_directory] + + output_dir = args.output_dir or Path(config.get('output_dir')) + if not output_dir: + output_dir = current_directory / "YNAB_Outputs" + logging.debug(f"No output directory set. Defaulting to {output_dir}") + + convert_bank_statements_to_ynab(inputs, output_dir) \ No newline at end of file diff --git a/ynab.py b/ynab.py deleted file mode 100755 index 0616012..0000000 --- a/ynab.py +++ /dev/null @@ -1,384 +0,0 @@ -#!/usr/bin/env python3 -""" -Bank Statement to YNAB Converter -Converts bank statements from various formats to YNAB-compatible CSV files -""" - -import os -import sys -import glob -import re -import argparse -import logging -import yaml -import pandas as pd -from pathlib import Path - -logger = logging.getLogger() - -def parse_norwegian_number(value): - """Convert Norwegian number format (comma decimal) to float""" - if pd.isna(value) or value == '': - return 0.0 - # Convert to string and replace comma with dot - str_value = str(value).replace(',', '.') - try: - return float(str_value) - except ValueError: - return 0.0 - -def parse_norwegian_date(date_str): - """Convert DD.MM.YYYY format to YYYY-MM-DD""" - if pd.isna(date_str) or date_str == '': - return '' - try: - # Parse DD.MM.YYYY and convert to date object - return pd.to_datetime(date_str, format='%d.%m.%Y') - except (ValueError, TypeError): - logger.error(f"Invalid date format: {date_str}") - exit(1) - -def convert_memo(original): - original = original.replace(" Kurs: 1.0000", "") - words = original.split(" ") - - while len(words) > 0: - if words[0] == "": - # It's empty - del words[0] - elif m := re.match(r'\*(\d{4})', words[0]): - # It's the last four numbers of the card, ignore it - del words[0] - elif m := re.match(r'\d{2}\.\d{2}', words[0]): - # It's the date. Move it to the end - words.append(words.pop(0)) - elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])): - # It's the original currency - if words[0] == "NOK": - # It's Norwegian kroner, ignoring - del words[0] - del words[0] - else: - # It's some other currency, move it to the end - words.append(words.pop(0)) - words.append(words.pop(0)) - else: - break - - return " ".join(words) - -def parse_bank_sor(data): - """ - Parse Sparebank 1 bank data - Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto - """ - result = [] - - for _, row in data.iterrows(): - if row.get('Status') != "Bokført": - continue - - if row.get('Valuta') != 'NOK': - raise ValueError(f"Unknown currency {row['Valuta']}") - - payee = convert_memo(row.get('Beskrivelse', '')) - memo = convert_memo(row.get('Melding/KID/Fakt.nr', '')) - - result.append({ - 'Date': parse_norwegian_date(row.get('Bokført dato')), - 'Payee': payee, - 'Memo': memo, - 'Outflow': -float(row['Beløp ut'] or '0'), - 'Inflow': float(row['Beløp inn'] or '0'), - }) - - return pd.DataFrame(result) - -def parse_bank_sparebank1(data): - """ - Parse Sparebank 1 bank data - Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto - """ - result = [] - - for _, row in data.iterrows(): - inflow = parse_norwegian_number(row.get('Inn')) - outflow = parse_norwegian_number(row.get('Ut')) - - # Convert outflow to positive if negative - if outflow < 0: - outflow = -outflow - - result.append({ - 'Date': parse_norwegian_date(row.get('Dato', '')), - 'Payee': row.get('Beskrivelse', ''), - 'Memo': row.get('Til konto', ''), - 'Outflow': outflow, - 'Inflow': inflow - }) - - return pd.DataFrame(result) - - -def parse_bank_norwegian(data): - """ - Parse Norwegian bank data - Expected columns: TransactionDate, Text, Memo, Amount - """ - result = [] - - for _, row in data.iterrows(): - amount = row.get('Amount', 0) - inflow = amount if amount > 0 else 0 - outflow = -amount if amount < 0 else 0 # Make outflow positive - - result.append({ - 'Date': row.get('TransactionDate', ''), - 'Payee': row.get('Text', ''), - 'Memo': row.get('Memo', ''), - 'Outflow': outflow, - 'Inflow': inflow - }) - - return pd.DataFrame(result) - - -# Dictionary of banks, filename patterns, and parsing functions -BANKS = { - "SparebankenNorge": { - "patterns": ["Transaksjoner*.csv"], - "encoding": "latin1", - "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", - "parse_function": parse_bank_sor, - "delimiter": ";" - }, - "Sparebank1": { - "patterns": ["OversiktKonti*.csv"], - "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", - "parse_function": parse_bank_sparebank1, - "delimiter": ";" - }, - "Norwegian": { - "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], - "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", - "parse_function": parse_bank_norwegian - } - # Add more banks and patterns as needed -} - - -def process_bank_statement(file_path, parse_function, delimiter, encoding): - """ - Process a single bank statement file - - Args: - file_path (str): Path to the bank statement file - parse_function (callable): Function to parse the specific bank format - delimiter (Optional): Field delimiter - - Returns: - pd.DataFrame: Processed YNAB-compatible data - """ - file_extension = Path(file_path).suffix.lower() - - try: - # Handle CSV files - if file_extension == ".csv": - data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) - # Handle Excel files - elif file_extension in [".xlsx", ".xls"]: - data = pd.read_excel(file_path) - else: - logger.warning(f"Skipping unsupported file type: {file_path}") - return pd.DataFrame() - - # Call the appropriate bank-specific parsing function - ynab_data = parse_function(data) - return ynab_data - - except Exception as e: - logger.error(f"Error processing file {file_path}: {e}") - raise e - return pd.DataFrame() - - -def find_bank_config(filename): - """ - Find the appropriate bank configuration for a given filename - - Args: - filename (str): Name of the file to match - - Returns: - tuple: (bank_name, bank_config) or (None, None) if no match - """ - import fnmatch - - for bank_name, bank_config in BANKS.items(): - for pattern in bank_config["patterns"]: - if fnmatch.fnmatch(filename, pattern): - return bank_name, bank_config - - return None, None - - -def convert_bank_statements_to_ynab(input_paths, output_directory): - """ - Convert bank statements to YNAB format - - Args: - input_paths (list): List of specific files or directories to process - """ - # Create output directory if it doesn't exist - output_directory.mkdir(exist_ok=True, parents=True) - - # Get list of files to process - files_to_process = [] - for path in input_paths: - if not path.exists(): - logger.warning(f"Path does not exist: {file_path}") - elif path.is_file(): - files_to_process.append(path) - elif path.is_dir(): - logger.debug(f"Looking for matching files in {path}") - for bank_config in BANKS.values(): - for pattern in bank_config["patterns"]: - matching_files = glob.glob(str(path / pattern)) - files_to_process.extend([Path(f) for f in matching_files]) - - files_processed = False - - # Process each file - logger.info(f"Processing {len(files_to_process)} file(s)...") - for file_path in files_to_process: - logger.debug(f"Processing {file_path}") - if not file_path.exists(): - logger.warning(f"File not found: {file_path}") - continue - - # Find matching bank configuration - bank_name, bank_config = find_bank_config(file_path.name) - - if not bank_config: - logger.warning(f"No bank configuration found for file: {file_path.name}") - continue - - logger.info(f"Processing file: {file_path} for {bank_name}") - - parse_function = bank_config["parse_function"] - delimiter = bank_config.get("delimiter", ",") - encoding = bank_config.get("encoding", "utf-8") - - # Process the file - ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) - - if ynab_data.empty: - logger.warning(f"No data processed for {file_path}") - continue - - filename_placeholders = { - 'bank': bank_name, - 'first_date': ynab_data['Date'].min().date(), - 'last_date': ynab_data['Date'].max().date(), - } - - file_retry_count = 0 - while True: - output_filename = bank_config["output_filename"].format(**filename_placeholders) - - if file_retry_count > 0: - output_filename += f" ({file_retry_count})" - - output_filename += ".csv" - output_file = output_directory / output_filename - - if not output_file.exists(): - break - - file_retry_count += 1 - - # Export to CSV for YNAB import - ynab_data.to_csv(output_file, index=False) - logger.info(f"Data saved to {output_file}") - files_processed = True - - if not files_processed: - logger.warning("No files were processed. Make sure your files match the expected patterns.") - -def setup_logger(verbosity): - if verbosity <= 0: - log_level = logging.WARNING - elif verbosity == 1: - log_level = logging.INFO - elif verbosity >= 2: - log_level = logging.DEBUG - - logging.basicConfig( - level=log_level, - format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - datefmt='%Y-%m-%dT%H:%M:%S', - ) - -if __name__ == "__main__": - parser = argparse.ArgumentParser( - prog='YNAB', - description='Prepare bank transcripts for import to You Need A Budget', - ) - - parser.add_argument( - 'filenames', - type=Path, - nargs='*', - help='The files to process', - ) - - parser.add_argument( - '-o', '--output-dir', - type=Path, - default=None, - help='The location to store the converted files', - ) - - parser.add_argument( - '-v', '--verbose', - default=0, - action='count', - help='Increase logging verbosity', - ) - - parser.add_argument( - '-c', '--config', - type=Path, - help='Path to the config file', - ) - - args = parser.parse_args() - - setup_logger(args.verbose) - - config_path = args.config or Path.home() / '.config/ynab/config.yaml' - if config_path and config_path.exists(): - with config_path.open('r') as config_file: - config = yaml.safe_load(config_file) - logger.debug(f"Loaded config file {config_path}") - else: - logger.debug(f"Could not find config file {config_path}") - config = {} - - verbosity = config.get('verbosity') - if args.verbose == 0 and verbosity: - setup_logger(verbosity) - - current_directory = Path.cwd() - - inputs = args.filenames or config.get('default_inputs') - if not inputs: - logger.info("Processing all files in current directory") - inputs = [current_directory] - - output_dir = args.output_dir or Path(config.get('output_dir')) - if not output_dir: - output_dir = current_directory / "YNAB_Outputs" - logger.debug(f"No output directory set. Defaulting to {output_dir}") - - convert_bank_statements_to_ynab(inputs, output_dir) \ No newline at end of file