|
- import glob
- import logging
- import pandas as pd
- from pathlib import Path
-
- from bank_parsers.sparebank1 import parse_sparebank1
- from bank_parsers.bank_norwegian import parse_bank_norwegian
- from bank_parsers.sparebanken_norge import parse_sparebanken_norge
-
-
- # Dictionary of banks, filename patterns, and parsing functions
- BANKS = {
- "SparebankenNorge": {
- "patterns": ["Transaksjoner*.csv"],
- "encoding": "latin1",
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv",
- "parse_function": parse_sparebanken_norge,
- "delimiter": ";"
- },
- "Sparebank1": {
- "patterns": ["OversiktKonti*.csv"],
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv",
- "parse_function": parse_sparebank1,
- "delimiter": ";"
- },
- "Norwegian": {
- "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
- "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}.csv",
- "parse_function": parse_bank_norwegian
- }
- # Add more banks and patterns as needed
- }
-
-
- def find_bank_config(filename):
- """
- Find the appropriate bank configuration for a given filename
-
- Args:
- filename (str): Name of the file to match
-
- Returns:
- tuple: (bank_name, bank_config) or (None, None) if no match
- """
- import fnmatch
-
- for bank_name, bank_config in BANKS.items():
- for pattern in bank_config["patterns"]:
- if fnmatch.fnmatch(filename, pattern):
- return bank_name, bank_config
-
- return None, None
-
-
- def process_bank_statement(file_path, parse_function, delimiter, encoding):
- """
- Process a single bank statement file
-
- Args:
- file_path (str): Path to the bank statement file
- parse_function (callable): Function to parse the specific bank format
- delimiter (Optional<str>): Field delimiter
-
- Returns:
- pd.DataFrame: Processed YNAB-compatible data
- """
- file_extension = Path(file_path).suffix.lower()
-
- try:
- # Handle CSV files
- if file_extension == ".csv":
- data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
- # Handle Excel files
- elif file_extension in [".xlsx", ".xls"]:
- data = pd.read_excel(file_path)
- else:
- logging.warning(f"Skipping unsupported file type: {file_path}")
- return pd.DataFrame()
-
- # Call the appropriate bank-specific parsing function
- ynab_data = parse_function(data)
- return ynab_data
-
- except Exception as e:
- logging.error(f"Error processing file {file_path}: {e}")
- raise e
- return pd.DataFrame()
-
-
- def get_unique_filename(original_path):
- file_retry_count = 0
-
- while True:
- result = Path(original_path)
-
- if file_retry_count > 0:
- result = result.with_stem(result.stem + f"({file_retry_count})")
-
- if result.exists():
- logging.debug(f"File {result} exists. Looking for available alternative")
- file_retry_count += 1
- continue
- else:
- return result
-
-
- def convert_bank_statements_to_ynab(input_paths, output_directory, archive_directory, on_success, overwrite):
- """
- Convert bank statements to YNAB format
-
- Args:
- input_paths (list): List of specific files or directories to process
- """
- # Create output directory if it doesn't exist
- output_directory.mkdir(exist_ok=True, parents=True)
-
- # Get list of files to process
- files_to_process = []
- for path in input_paths:
- if not path.exists():
- logging.warning(f"Path does not exist: {file_path}")
- elif path.is_file():
- files_to_process.append(path)
- elif path.is_dir():
- logging.debug(f"Looking for matching files in {path}")
- for bank_config in BANKS.values():
- for pattern in bank_config["patterns"]:
- matching_files = glob.glob(str(path / pattern))
- files_to_process.extend([Path(f) for f in matching_files])
-
- files_processed = False
-
- # Process each file
- logging.info(f"Processing {len(files_to_process)} file(s)...")
- for file_path in files_to_process:
- logging.debug(f"Processing {file_path}")
- if not file_path.exists():
- logging.warning(f"File not found: {file_path}")
- continue
-
- # Find matching bank configuration
- bank_name, bank_config = find_bank_config(file_path.name)
-
- if not bank_config:
- logging.warning(f"No bank configuration found for file: {file_path.name}")
- continue
-
- logging.info(f"Processing file: {file_path} for {bank_name}")
-
- parse_function = bank_config["parse_function"]
- delimiter = bank_config.get("delimiter", ",")
- encoding = bank_config.get("encoding", "utf-8")
-
- # Process the file
- ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
-
- if ynab_data.empty:
- logging.warning(f"No data processed for {file_path}")
- continue
-
- filename_placeholders = {
- 'bank': bank_name,
- 'first_date': ynab_data['Date'].min().date(),
- 'last_date': ynab_data['Date'].max().date(),
- }
-
- output_file = output_directory / Path(bank_config["output_filename"].format(**filename_placeholders))
- if not overwrite:
- output_file = get_unique_filename(output_file)
-
- # Export to CSV for YNAB import
- ynab_data.to_csv(output_file, index=False)
- logging.info(f"Data saved to {output_file}")
- files_processed = True
-
- if on_success == 'delete':
- logging.info(f"Deleting {file_path}")
- file_path.unlink()
- elif on_success == 'archive':
- archive_directory.mkdir(exist_ok=True, parents=True)
- file_archive_path = archive_directory / file_path.name
- if not overwrite:
- file_archive_path = get_unique_filename(file_archive_path)
- logging.debug(f"Archiving {file_path} to {file_archive_path}")
- file_path.rename(file_archive_path)
- elif on_success and on_success != 'nothing':
- logger.warning(f"Invalid operation after conversion: {on_success}")
-
- if not files_processed:
- logging.warning("No files were processed. Make sure your files match the expected patterns.")
|