import glob import logging import pandas as pd from pathlib import Path from bank_parsers.sparebank1 import parse_sparebank1 from bank_parsers.bank_norwegian import parse_bank_norwegian from bank_parsers.sparebanken_norge import parse_sparebanken_norge # Dictionary of banks, filename patterns, and parsing functions BANKS = { "SparebankenNorge": { "patterns": ["Transaksjoner*.csv"], "encoding": "latin1", "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv", "parse_function": parse_sparebanken_norge, "delimiter": ";" }, "Sparebank1": { "patterns": ["OversiktKonti*.csv"], "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv", "parse_function": parse_sparebank1, "delimiter": ";" }, "Norwegian": { "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv", "parse_function": parse_bank_norwegian } # Add more banks and patterns as needed } def find_bank_config(filename): """ Find the appropriate bank configuration for a given filename Args: filename (str): Name of the file to match Returns: tuple: (bank_name, bank_config) or (None, None) if no match """ import fnmatch for bank_name, bank_config in BANKS.items(): for pattern in bank_config["patterns"]: if fnmatch.fnmatch(filename, pattern): return bank_name, bank_config return None, None def process_bank_statement(file_path, parse_function, delimiter, encoding): """ Process a single bank statement file Args: file_path (str): Path to the bank statement file parse_function (callable): Function to parse the specific bank format delimiter (Optional): Field delimiter Returns: pd.DataFrame: Processed YNAB-compatible data """ file_extension = Path(file_path).suffix.lower() try: # Handle CSV files if file_extension == ".csv": data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) # Handle Excel files elif file_extension in [".xlsx", ".xls"]: data = pd.read_excel(file_path) else: logging.warning(f"Skipping unsupported file type: {file_path}") return pd.DataFrame() # Call the appropriate bank-specific parsing function ynab_data = parse_function(data) return ynab_data except Exception as e: logging.error(f"Error processing file {file_path}: {e}") raise e return pd.DataFrame() def get_unique_filename(original_path): file_retry_count = 0 while True: result = Path(original_path) if file_retry_count > 0: result = result.with_stem(result.stem + f"({file_retry_count})") if result.exists(): logging.debug(f"File {result} exists. Looking for available alternative") file_retry_count += 1 continue else: return result def convert_bank_statements_to_ynab(input_paths, output_directory, archive_directory, on_success, overwrite): """ Convert bank statements to YNAB format Args: input_paths (list): List of specific files or directories to process """ # Create output directory if it doesn't exist output_directory.mkdir(exist_ok=True, parents=True) # Get list of files to process files_to_process = [] for path in input_paths: if not path.exists(): logging.warning(f"Path does not exist: {file_path}") elif path.is_file(): files_to_process.append(path) elif path.is_dir(): logging.debug(f"Looking for matching files in {path}") for bank_config in BANKS.values(): for pattern in bank_config["patterns"]: matching_files = glob.glob(str(path / pattern)) files_to_process.extend([Path(f) for f in matching_files]) files_processed = False # Process each file logging.info(f"Processing {len(files_to_process)} file(s)...") for file_path in files_to_process: logging.debug(f"Processing {file_path}") if not file_path.exists(): logging.warning(f"File not found: {file_path}") continue # Find matching bank configuration bank_name, bank_config = find_bank_config(file_path.name) if not bank_config: logging.warning(f"No bank configuration found for file: {file_path.name}") continue logging.info(f"Processing file: {file_path} for {bank_name}") parse_function = bank_config["parse_function"] delimiter = bank_config.get("delimiter", ",") encoding = bank_config.get("encoding", "utf-8") # Process the file ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) if ynab_data.empty: logging.warning(f"No data processed for {file_path}") continue filename_placeholders = { 'bank': bank_name, 'first_date': ynab_data['Date'].min().date(), 'last_date': ynab_data['Date'].max().date(), } output_file = output_directory / Path(bank_config["output_filename"].format(**filename_placeholders)) if not overwrite: output_file = get_unique_filename(output_file) # Export to CSV for YNAB import ynab_data.to_csv(output_file, index=False) logging.info(f"Data saved to {output_file}") files_processed = True if on_success == 'delete': logging.info(f"Deleting {file_path}") file_path.unlink() elif on_success == 'archive': archive_directory.mkdir(exist_ok=True, parents=True) file_archive_path = archive_directory / file_path.name if not overwrite: file_archive_path = get_unique_filename(file_archive_path) logging.debug(f"Archiving {file_path} to {file_archive_path}") file_path.rename(file_archive_path) elif on_success and on_success != 'nothing': logger.warning(f"Invalid operation after conversion: {on_success}") if not files_processed: logging.warning("No files were processed. Make sure your files match the expected patterns.")