|
|
|
@@ -1,39 +1,49 @@ |
|
|
|
#!/bin/python3 |
|
|
|
#!/usr/bin/env python3 |
|
|
|
""" |
|
|
|
Bank Statement to YNAB Converter |
|
|
|
Converts bank statements from various formats to YNAB-compatible CSV files |
|
|
|
""" |
|
|
|
|
|
|
|
import os |
|
|
|
import sys |
|
|
|
import csv |
|
|
|
import glob |
|
|
|
import re |
|
|
|
import pandas as pd |
|
|
|
from pathlib import Path |
|
|
|
|
|
|
|
whitelist_cards = ["7756"] |
|
|
|
def parse_norwegian_number(value): |
|
|
|
"""Convert Norwegian number format (comma decimal) to float""" |
|
|
|
if pd.isna(value) or value == '': |
|
|
|
return 0.0 |
|
|
|
# Convert to string and replace comma with dot |
|
|
|
str_value = str(value).replace(',', '.') |
|
|
|
try: |
|
|
|
return float(str_value) |
|
|
|
except ValueError: |
|
|
|
return 0.0 |
|
|
|
|
|
|
|
in_header = ['Utført dato', 'Bokført dato', 'Rentedato', 'Beskrivelse', 'Type', 'Undertype', 'Fra konto', 'Avsender', 'Til konto', 'Mottakernavn', 'Beløp inn', 'Beløp ut', 'Valuta', 'Status', 'Melding'] |
|
|
|
out_header = ['Date', 'Payee', 'Memo', 'Outflow', 'Inflow'] |
|
|
|
|
|
|
|
|
|
|
|
def usage(): |
|
|
|
print('Usage: ynab.py <filename>') |
|
|
|
|
|
|
|
|
|
|
|
def is_reserved(row): |
|
|
|
return row['Status'] == "Reservert" |
|
|
|
def parse_norwegian_date(date_str): |
|
|
|
"""Convert DD.MM.YYYY format to YYYY-MM-DD""" |
|
|
|
if pd.isna(date_str) or date_str == '': |
|
|
|
return '' |
|
|
|
try: |
|
|
|
# Parse DD.MM.YYYY and convert to date object |
|
|
|
return pd.to_datetime(date_str, format='%d.%m.%Y') |
|
|
|
except (ValueError, TypeError): |
|
|
|
print(f"Invalid date format: {date_str}") |
|
|
|
exit(1) |
|
|
|
|
|
|
|
def convert_memo(original): |
|
|
|
original = original.replace(" Kurs: 1.0000", "") |
|
|
|
words = original.split(" ") |
|
|
|
|
|
|
|
for i in range(20): |
|
|
|
while len(words) > 0: |
|
|
|
if words[0] == "": |
|
|
|
# It's empty |
|
|
|
del words[0] |
|
|
|
elif m := re.match(r'\*(\d{4})', words[0]): |
|
|
|
# It's the last four digits of a card |
|
|
|
if m.groups()[0] in whitelist_cards: |
|
|
|
# It's an expected card, ignore it |
|
|
|
del words[0] |
|
|
|
else: |
|
|
|
# It's an unexpected card, move it to the end |
|
|
|
words.append(words.pop(0)) |
|
|
|
# It's the last four numbers of the card, ignore it |
|
|
|
del words[0] |
|
|
|
elif m := re.match(r'\d{2}\.\d{2}', words[0]): |
|
|
|
# It's the date. Move it to the end |
|
|
|
words.append(words.pop(0)) |
|
|
|
@@ -49,71 +59,258 @@ def convert_memo(original): |
|
|
|
words.append(words.pop(0)) |
|
|
|
else: |
|
|
|
break |
|
|
|
else: |
|
|
|
raise Exception(f"Infinite loop while parsing \"{original}\"") |
|
|
|
|
|
|
|
|
|
|
|
return " ".join(words) |
|
|
|
|
|
|
|
def parse_bank_sor(data): |
|
|
|
""" |
|
|
|
Parse Sparebank 1 bank data |
|
|
|
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto |
|
|
|
""" |
|
|
|
result = [] |
|
|
|
|
|
|
|
for _, row in data.iterrows(): |
|
|
|
if row.get('Status') != "Bokført": |
|
|
|
continue |
|
|
|
|
|
|
|
def convert_row(row): |
|
|
|
if is_reserved(row): |
|
|
|
return None |
|
|
|
if row.get('Valuta') != 'NOK': |
|
|
|
raise ValueError(f"Unknown currency {row['Valuta']}") |
|
|
|
|
|
|
|
if row['Valuta'] != 'NOK': |
|
|
|
raise ValueError(f"Unknown currency {row['Valuta']}") |
|
|
|
payee = convert_memo(row.get('Beskrivelse', '')) |
|
|
|
memo = convert_memo(row.get('Melding/KID/Fakt.nr', '')) |
|
|
|
|
|
|
|
return [ |
|
|
|
row['Bokført dato'], |
|
|
|
row['Beskrivelse'], |
|
|
|
convert_memo(row['Melding']), |
|
|
|
-float(row['Beløp ut'] or '0'), |
|
|
|
float(row['Beløp inn'] or '0'), |
|
|
|
] |
|
|
|
result.append({ |
|
|
|
'Date': parse_norwegian_date(row.get('Bokført dato')), |
|
|
|
'Payee': payee, |
|
|
|
'Memo': memo, |
|
|
|
'Outflow': -float(row['Beløp ut'] or '0'), |
|
|
|
'Inflow': float(row['Beløp inn'] or '0'), |
|
|
|
}) |
|
|
|
|
|
|
|
return pd.DataFrame(result) |
|
|
|
|
|
|
|
def parse_bank_sparebank1(data): |
|
|
|
""" |
|
|
|
Parse Sparebank 1 bank data |
|
|
|
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto |
|
|
|
""" |
|
|
|
result = [] |
|
|
|
|
|
|
|
for _, row in data.iterrows(): |
|
|
|
inflow = parse_norwegian_number(row.get('Inn')) |
|
|
|
outflow = parse_norwegian_number(row.get('Ut')) |
|
|
|
|
|
|
|
# Convert outflow to positive if negative |
|
|
|
if outflow < 0: |
|
|
|
outflow = -outflow |
|
|
|
|
|
|
|
result.append({ |
|
|
|
'Date': parse_norwegian_date(row.get('Dato', '')), |
|
|
|
'Payee': row.get('Beskrivelse', ''), |
|
|
|
'Memo': row.get('Til konto', ''), |
|
|
|
'Outflow': outflow, |
|
|
|
'Inflow': inflow |
|
|
|
}) |
|
|
|
|
|
|
|
return pd.DataFrame(result) |
|
|
|
|
|
|
|
def convert(reader, writer): |
|
|
|
writer.writerow(out_header) |
|
|
|
|
|
|
|
# Ignore header |
|
|
|
reader.__next__() |
|
|
|
def parse_bank_norwegian(data): |
|
|
|
""" |
|
|
|
Parse Norwegian bank data |
|
|
|
Expected columns: TransactionDate, Text, Memo, Amount |
|
|
|
""" |
|
|
|
result = [] |
|
|
|
|
|
|
|
for _, row in data.iterrows(): |
|
|
|
amount = row.get('Amount', 0) |
|
|
|
inflow = amount if amount > 0 else 0 |
|
|
|
outflow = -amount if amount < 0 else 0 # Make outflow positive |
|
|
|
|
|
|
|
result.append({ |
|
|
|
'Date': row.get('TransactionDate', ''), |
|
|
|
'Payee': row.get('Text', ''), |
|
|
|
'Memo': row.get('Memo', ''), |
|
|
|
'Outflow': outflow, |
|
|
|
'Inflow': inflow |
|
|
|
}) |
|
|
|
|
|
|
|
return pd.DataFrame(result) |
|
|
|
|
|
|
|
for raw_row in reader: |
|
|
|
# Stop when we hit an empty row |
|
|
|
for field in raw_row: |
|
|
|
if field: |
|
|
|
break |
|
|
|
|
|
|
|
# Dictionary of banks, filename patterns, and parsing functions |
|
|
|
BANKS = { |
|
|
|
"SparebankenNorge": { |
|
|
|
"patterns": ["Transaksjoner*.csv"], |
|
|
|
"encoding": "latin1", |
|
|
|
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", |
|
|
|
"parse_function": parse_bank_sor, |
|
|
|
"delimiter": ";" |
|
|
|
}, |
|
|
|
"Sparebank1": { |
|
|
|
"patterns": ["OversiktKonti*.csv"], |
|
|
|
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", |
|
|
|
"parse_function": parse_bank_sparebank1, |
|
|
|
"delimiter": ";" |
|
|
|
}, |
|
|
|
"Norwegian": { |
|
|
|
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"], |
|
|
|
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}", |
|
|
|
"parse_function": parse_bank_norwegian |
|
|
|
} |
|
|
|
# Add more banks and patterns as needed |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def process_bank_statement(file_path, parse_function, delimiter, encoding): |
|
|
|
""" |
|
|
|
Process a single bank statement file |
|
|
|
|
|
|
|
Args: |
|
|
|
file_path (str): Path to the bank statement file |
|
|
|
parse_function (callable): Function to parse the specific bank format |
|
|
|
delimiter (Optional<str>): Field delimiter |
|
|
|
|
|
|
|
Returns: |
|
|
|
pd.DataFrame: Processed YNAB-compatible data |
|
|
|
""" |
|
|
|
file_extension = Path(file_path).suffix.lower() |
|
|
|
|
|
|
|
try: |
|
|
|
# Handle CSV files |
|
|
|
if file_extension == ".csv": |
|
|
|
data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding) |
|
|
|
# Handle Excel files |
|
|
|
elif file_extension in [".xlsx", ".xls"]: |
|
|
|
data = pd.read_excel(file_path) |
|
|
|
else: |
|
|
|
break |
|
|
|
print(f"Skipping unsupported file type: {file_path}") |
|
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
# Call the appropriate bank-specific parsing function |
|
|
|
ynab_data = parse_function(data) |
|
|
|
return ynab_data |
|
|
|
|
|
|
|
except Exception as e: |
|
|
|
print(f"Error processing file {file_path}: {e}") |
|
|
|
raise e |
|
|
|
return pd.DataFrame() |
|
|
|
|
|
|
|
# Create dictionary with proper field names |
|
|
|
row = {x[0]: x[1] for x in zip(in_header, raw_row)} |
|
|
|
|
|
|
|
try: |
|
|
|
if (result := convert_row(row)) is not None: |
|
|
|
writer.writerow(result) |
|
|
|
except Exception as e: |
|
|
|
print(f'Error in row {row}') |
|
|
|
raise e |
|
|
|
def find_bank_config(filename): |
|
|
|
""" |
|
|
|
Find the appropriate bank configuration for a given filename |
|
|
|
|
|
|
|
Args: |
|
|
|
filename (str): Name of the file to match |
|
|
|
|
|
|
|
Returns: |
|
|
|
tuple: (bank_name, bank_config) or (None, None) if no match |
|
|
|
""" |
|
|
|
import fnmatch |
|
|
|
|
|
|
|
for bank_name, bank_config in BANKS.items(): |
|
|
|
for pattern in bank_config["patterns"]: |
|
|
|
if fnmatch.fnmatch(filename, pattern): |
|
|
|
return bank_name, bank_config |
|
|
|
|
|
|
|
return None, None |
|
|
|
|
|
|
|
|
|
|
|
def main(): |
|
|
|
if len(sys.argv) != 2: |
|
|
|
usage() |
|
|
|
exit(1) |
|
|
|
def convert_bank_statements_to_ynab(input_files=None): |
|
|
|
""" |
|
|
|
Convert bank statements to YNAB format |
|
|
|
|
|
|
|
Args: |
|
|
|
input_files (list): Optional list of specific files to process |
|
|
|
If None, processes all files in current directory |
|
|
|
""" |
|
|
|
current_directory = Path.cwd() |
|
|
|
output_directory = current_directory / "YNAB_Outputs" |
|
|
|
|
|
|
|
# Create output directory if it doesn't exist |
|
|
|
output_directory.mkdir(exist_ok=True) |
|
|
|
|
|
|
|
# Get list of files to process |
|
|
|
if input_files: |
|
|
|
print(f"Processing {len(input_files)} dragged file(s)...") |
|
|
|
files_to_process = [Path(f) for f in input_files if Path(f).exists()] |
|
|
|
else: |
|
|
|
print("Processing all files in current directory...") |
|
|
|
files_to_process = [] |
|
|
|
# Collect all files matching any bank pattern |
|
|
|
for bank_config in BANKS.values(): |
|
|
|
for pattern in bank_config["patterns"]: |
|
|
|
matching_files = glob.glob(str(current_directory / pattern)) |
|
|
|
files_to_process.extend([Path(f) for f in matching_files]) |
|
|
|
|
|
|
|
files_processed = False |
|
|
|
|
|
|
|
# Process each file |
|
|
|
for file_path in files_to_process: |
|
|
|
if not file_path.exists(): |
|
|
|
print(f"File not found: {file_path}") |
|
|
|
continue |
|
|
|
|
|
|
|
# Find matching bank configuration |
|
|
|
bank_name, bank_config = find_bank_config(file_path.name) |
|
|
|
|
|
|
|
if not bank_config: |
|
|
|
print(f"No bank configuration found for file: {file_path.name}") |
|
|
|
continue |
|
|
|
|
|
|
|
print(f"Processing file: {file_path} for {bank_name}") |
|
|
|
|
|
|
|
parse_function = bank_config["parse_function"] |
|
|
|
delimiter = bank_config.get("delimiter", ",") |
|
|
|
encoding = bank_config.get("encoding", "utf-8") |
|
|
|
|
|
|
|
# Process the file |
|
|
|
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding) |
|
|
|
|
|
|
|
if ynab_data.empty: |
|
|
|
print(f"No data processed for {file_path}") |
|
|
|
continue |
|
|
|
|
|
|
|
filename_placeholders = { |
|
|
|
'bank': bank_name, |
|
|
|
'first_date': ynab_data['Date'].min().date(), |
|
|
|
'last_date': ynab_data['Date'].max().date(), |
|
|
|
} |
|
|
|
|
|
|
|
filepath = Path(sys.argv[1]) |
|
|
|
file_retry_count = 0 |
|
|
|
while True: |
|
|
|
output_filename = bank_config["output_filename"].format(**filename_placeholders) |
|
|
|
|
|
|
|
new_basename = f'ynab-{filepath.name}' |
|
|
|
new_filepath = filepath.parent / new_basename |
|
|
|
if file_retry_count > 0: |
|
|
|
output_filename += f" ({file_retry_count})" |
|
|
|
|
|
|
|
output_filename += ".csv" |
|
|
|
output_file = output_directory / output_filename |
|
|
|
|
|
|
|
with filepath.open(mode='r', encoding='latin1') as old_file: |
|
|
|
reader = csv.reader(old_file, delimiter=';') |
|
|
|
if not output_file.exists(): |
|
|
|
break |
|
|
|
|
|
|
|
with new_filepath.open(mode='w') as new_file: |
|
|
|
writer = csv.writer(new_file) |
|
|
|
convert(reader, writer) |
|
|
|
file_retry_count += 1 |
|
|
|
|
|
|
|
# Export to CSV for YNAB import |
|
|
|
ynab_data.to_csv(output_file, index=False) |
|
|
|
print(f"Data saved to {output_file}") |
|
|
|
files_processed = True |
|
|
|
|
|
|
|
if not files_processed: |
|
|
|
print("No files were processed. Make sure your files match the expected patterns.") |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
main() |
|
|
|
if __name__ == "__main__": |
|
|
|
# Check if files were dragged onto the script |
|
|
|
if len(sys.argv) > 1: |
|
|
|
# Files were dragged - process them |
|
|
|
files = sys.argv[1:] |
|
|
|
convert_bank_statements_to_ynab(files) |
|
|
|
else: |
|
|
|
# No files dragged - run normal directory processing |
|
|
|
convert_bank_statements_to_ynab() |
|
|
|
|
|
|
|
# Keep window open on Mac so user can see results |
|
|
|
input("\nPress Enter to close...") |