Selaa lähdekoodia

Merge both versions of the script

master
Sindre Stephansen 2 kuukautta sitten
vanhempi
commit
fdd40de8c2
Allekirjoittanut: sindre <sindre@sindrestephansen.com> GPG Key ID: B06FC67D17A46ADE
2 muutettua tiedostoa jossa 265 lisäystä ja 319 poistoa
  1. +0
    -251
      ynab-karianne.py
  2. +265
    -68
      ynab.py

+ 0
- 251
ynab-karianne.py Näytä tiedosto

@@ -1,251 +0,0 @@
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""

import os
import sys
import glob
import pandas as pd
from pathlib import Path

def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0

def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
print(f"Invalid date format: {date_str}")
exit(1)

def parse_bank_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def process_bank_statement(file_path, parse_function, delimiter):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else:
print(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
print(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()


def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None


def convert_bank_statements_to_ynab(input_files=None):
"""
Convert bank statements to YNAB format
Args:
input_files (list): Optional list of specific files to process
If None, processes all files in current directory
"""
current_directory = Path.cwd()
output_directory = current_directory / "YNAB_Outputs"
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True)
# Get list of files to process
if input_files:
print(f"Processing {len(input_files)} dragged file(s)...")
files_to_process = [Path(f) for f in input_files if Path(f).exists()]
else:
print("Processing all files in current directory...")
files_to_process = []
# Collect all files matching any bank pattern
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(current_directory / pattern))
files_to_process.extend([Path(f) for f in matching_files])
files_processed = False
# Process each file
for file_path in files_to_process:
if not file_path.exists():
print(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
print(f"No bank configuration found for file: {file_path.name}")
continue
print(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter)
if ynab_data.empty:
print(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}

file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)

if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename

if not output_file.exists():
break

file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
print("No files were processed. Make sure your files match the expected patterns.")


if __name__ == "__main__":
# Check if files were dragged onto the script
if len(sys.argv) > 1:
# Files were dragged - process them
files = sys.argv[1:]
convert_bank_statements_to_ynab(files)
else:
# No files dragged - run normal directory processing
convert_bank_statements_to_ynab()
# Keep window open on Mac so user can see results
input("\nPress Enter to close...")

+ 265
- 68
ynab.py Näytä tiedosto

@@ -1,39 +1,49 @@
#!/bin/python3
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""

import os
import sys
import csv
import glob
import re
import pandas as pd
from pathlib import Path

whitelist_cards = ["7756"]
def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0

in_header = ['Utført dato', 'Bokført dato', 'Rentedato', 'Beskrivelse', 'Type', 'Undertype', 'Fra konto', 'Avsender', 'Til konto', 'Mottakernavn', 'Beløp inn', 'Beløp ut', 'Valuta', 'Status', 'Melding']
out_header = ['Date', 'Payee', 'Memo', 'Outflow', 'Inflow']


def usage():
print('Usage: ynab.py <filename>')


def is_reserved(row):
return row['Status'] == "Reservert"
def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
print(f"Invalid date format: {date_str}")
exit(1)

def convert_memo(original):
original = original.replace(" Kurs: 1.0000", "")
words = original.split(" ")

for i in range(20):
while len(words) > 0:
if words[0] == "":
# It's empty
del words[0]
elif m := re.match(r'\*(\d{4})', words[0]):
# It's the last four digits of a card
if m.groups()[0] in whitelist_cards:
# It's an expected card, ignore it
del words[0]
else:
# It's an unexpected card, move it to the end
words.append(words.pop(0))
# It's the last four numbers of the card, ignore it
del words[0]
elif m := re.match(r'\d{2}\.\d{2}', words[0]):
# It's the date. Move it to the end
words.append(words.pop(0))
@@ -49,71 +59,258 @@ def convert_memo(original):
words.append(words.pop(0))
else:
break
else:
raise Exception(f"Infinite loop while parsing \"{original}\"")


return " ".join(words)

def parse_bank_sor(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
if row.get('Status') != "Bokført":
continue

def convert_row(row):
if is_reserved(row):
return None
if row.get('Valuta') != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")

if row['Valuta'] != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")
payee = convert_memo(row.get('Beskrivelse', ''))
memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))

return [
row['Bokført dato'],
row['Beskrivelse'],
convert_memo(row['Melding']),
-float(row['Beløp ut'] or '0'),
float(row['Beløp inn'] or '0'),
]
result.append({
'Date': parse_norwegian_date(row.get('Bokført dato')),
'Payee': payee,
'Memo': memo,
'Outflow': -float(row['Beløp ut'] or '0'),
'Inflow': float(row['Beløp inn'] or '0'),
})
return pd.DataFrame(result)

def parse_bank_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)

def convert(reader, writer):
writer.writerow(out_header)

# Ignore header
reader.__next__()
def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)

for raw_row in reader:
# Stop when we hit an empty row
for field in raw_row:
if field:
break

# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"SparebankenNorge": {
"patterns": ["Transaksjoner*.csv"],
"encoding": "latin1",
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sor,
"delimiter": ";"
},
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def process_bank_statement(file_path, parse_function, delimiter, encoding):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else:
break
print(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
print(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()

# Create dictionary with proper field names
row = {x[0]: x[1] for x in zip(in_header, raw_row)}

try:
if (result := convert_row(row)) is not None:
writer.writerow(result)
except Exception as e:
print(f'Error in row {row}')
raise e
def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None


def main():
if len(sys.argv) != 2:
usage()
exit(1)
def convert_bank_statements_to_ynab(input_files=None):
"""
Convert bank statements to YNAB format
Args:
input_files (list): Optional list of specific files to process
If None, processes all files in current directory
"""
current_directory = Path.cwd()
output_directory = current_directory / "YNAB_Outputs"
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True)
# Get list of files to process
if input_files:
print(f"Processing {len(input_files)} dragged file(s)...")
files_to_process = [Path(f) for f in input_files if Path(f).exists()]
else:
print("Processing all files in current directory...")
files_to_process = []
# Collect all files matching any bank pattern
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(current_directory / pattern))
files_to_process.extend([Path(f) for f in matching_files])
files_processed = False
# Process each file
for file_path in files_to_process:
if not file_path.exists():
print(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
print(f"No bank configuration found for file: {file_path.name}")
continue
print(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
encoding = bank_config.get("encoding", "utf-8")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
if ynab_data.empty:
print(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}

filepath = Path(sys.argv[1])
file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)

new_basename = f'ynab-{filepath.name}'
new_filepath = filepath.parent / new_basename
if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename

with filepath.open(mode='r', encoding='latin1') as old_file:
reader = csv.reader(old_file, delimiter=';')
if not output_file.exists():
break

with new_filepath.open(mode='w') as new_file:
writer = csv.writer(new_file)
convert(reader, writer)
file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
print("No files were processed. Make sure your files match the expected patterns.")


if __name__ == '__main__':
main()
if __name__ == "__main__":
# Check if files were dragged onto the script
if len(sys.argv) > 1:
# Files were dragged - process them
files = sys.argv[1:]
convert_bank_statements_to_ynab(files)
else:
# No files dragged - run normal directory processing
convert_bank_statements_to_ynab()
# Keep window open on Mac so user can see results
input("\nPress Enter to close...")

Loading…
Peruuta
Tallenna