Selaa lähdekoodia

Merge both versions of the script

master
Sindre Stephansen 2 kuukautta sitten
vanhempi
commit
fdd40de8c2
Allekirjoittanut: sindre <sindre@sindrestephansen.com> GPG Key ID: B06FC67D17A46ADE
2 muutettua tiedostoa jossa 265 lisäystä ja 319 poistoa
  1. +0
    -251
      ynab-karianne.py
  2. +265
    -68
      ynab.py

+ 0
- 251
ynab-karianne.py Näytä tiedosto

@@ -1,251 +0,0 @@
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""

import os
import sys
import glob
import pandas as pd
from pathlib import Path

def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0

def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
print(f"Invalid date format: {date_str}")
exit(1)

def parse_bank_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def process_bank_statement(file_path, parse_function, delimiter):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else:
print(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
print(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()


def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None


def convert_bank_statements_to_ynab(input_files=None):
"""
Convert bank statements to YNAB format
Args:
input_files (list): Optional list of specific files to process
If None, processes all files in current directory
"""
current_directory = Path.cwd()
output_directory = current_directory / "YNAB_Outputs"
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True)
# Get list of files to process
if input_files:
print(f"Processing {len(input_files)} dragged file(s)...")
files_to_process = [Path(f) for f in input_files if Path(f).exists()]
else:
print("Processing all files in current directory...")
files_to_process = []
# Collect all files matching any bank pattern
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(current_directory / pattern))
files_to_process.extend([Path(f) for f in matching_files])
files_processed = False
# Process each file
for file_path in files_to_process:
if not file_path.exists():
print(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
print(f"No bank configuration found for file: {file_path.name}")
continue
print(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter)
if ynab_data.empty:
print(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}

file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)

if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename

if not output_file.exists():
break

file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
print("No files were processed. Make sure your files match the expected patterns.")


if __name__ == "__main__":
# Check if files were dragged onto the script
if len(sys.argv) > 1:
# Files were dragged - process them
files = sys.argv[1:]
convert_bank_statements_to_ynab(files)
else:
# No files dragged - run normal directory processing
convert_bank_statements_to_ynab()
# Keep window open on Mac so user can see results
input("\nPress Enter to close...")

+ 265
- 68
ynab.py Näytä tiedosto

@@ -1,39 +1,49 @@
#!/bin/python3
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""


import os
import sys import sys
import csv
import glob
import re import re
import pandas as pd
from pathlib import Path from pathlib import Path


whitelist_cards = ["7756"]
def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0


in_header = ['Utført dato', 'Bokført dato', 'Rentedato', 'Beskrivelse', 'Type', 'Undertype', 'Fra konto', 'Avsender', 'Til konto', 'Mottakernavn', 'Beløp inn', 'Beløp ut', 'Valuta', 'Status', 'Melding']
out_header = ['Date', 'Payee', 'Memo', 'Outflow', 'Inflow']


def usage():
print('Usage: ynab.py <filename>')


def is_reserved(row):
return row['Status'] == "Reservert"
def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
print(f"Invalid date format: {date_str}")
exit(1)


def convert_memo(original): def convert_memo(original):
original = original.replace(" Kurs: 1.0000", "") original = original.replace(" Kurs: 1.0000", "")
words = original.split(" ") words = original.split(" ")


for i in range(20):
while len(words) > 0:
if words[0] == "": if words[0] == "":
# It's empty # It's empty
del words[0] del words[0]
elif m := re.match(r'\*(\d{4})', words[0]): elif m := re.match(r'\*(\d{4})', words[0]):
# It's the last four digits of a card
if m.groups()[0] in whitelist_cards:
# It's an expected card, ignore it
del words[0]
else:
# It's an unexpected card, move it to the end
words.append(words.pop(0))
# It's the last four numbers of the card, ignore it
del words[0]
elif m := re.match(r'\d{2}\.\d{2}', words[0]): elif m := re.match(r'\d{2}\.\d{2}', words[0]):
# It's the date. Move it to the end # It's the date. Move it to the end
words.append(words.pop(0)) words.append(words.pop(0))
@@ -49,71 +59,258 @@ def convert_memo(original):
words.append(words.pop(0)) words.append(words.pop(0))
else: else:
break break
else:
raise Exception(f"Infinite loop while parsing \"{original}\"")



return " ".join(words) return " ".join(words)


def parse_bank_sor(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
if row.get('Status') != "Bokført":
continue


def convert_row(row):
if is_reserved(row):
return None
if row.get('Valuta') != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")


if row['Valuta'] != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")
payee = convert_memo(row.get('Beskrivelse', ''))
memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))


return [
row['Bokført dato'],
row['Beskrivelse'],
convert_memo(row['Melding']),
-float(row['Beløp ut'] or '0'),
float(row['Beløp inn'] or '0'),
]
result.append({
'Date': parse_norwegian_date(row.get('Bokført dato')),
'Payee': payee,
'Memo': memo,
'Outflow': -float(row['Beløp ut'] or '0'),
'Inflow': float(row['Beløp inn'] or '0'),
})
return pd.DataFrame(result)


def parse_bank_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


def convert(reader, writer):
writer.writerow(out_header)


# Ignore header
reader.__next__()
def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


for raw_row in reader:
# Stop when we hit an empty row
for field in raw_row:
if field:
break

# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"SparebankenNorge": {
"patterns": ["Transaksjoner*.csv"],
"encoding": "latin1",
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sor,
"delimiter": ";"
},
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def process_bank_statement(file_path, parse_function, delimiter, encoding):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else: else:
break
print(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
print(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()


# Create dictionary with proper field names
row = {x[0]: x[1] for x in zip(in_header, raw_row)}


try:
if (result := convert_row(row)) is not None:
writer.writerow(result)
except Exception as e:
print(f'Error in row {row}')
raise e
def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None




def main():
if len(sys.argv) != 2:
usage()
exit(1)
def convert_bank_statements_to_ynab(input_files=None):
"""
Convert bank statements to YNAB format
Args:
input_files (list): Optional list of specific files to process
If None, processes all files in current directory
"""
current_directory = Path.cwd()
output_directory = current_directory / "YNAB_Outputs"
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True)
# Get list of files to process
if input_files:
print(f"Processing {len(input_files)} dragged file(s)...")
files_to_process = [Path(f) for f in input_files if Path(f).exists()]
else:
print("Processing all files in current directory...")
files_to_process = []
# Collect all files matching any bank pattern
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(current_directory / pattern))
files_to_process.extend([Path(f) for f in matching_files])
files_processed = False
# Process each file
for file_path in files_to_process:
if not file_path.exists():
print(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
print(f"No bank configuration found for file: {file_path.name}")
continue
print(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
encoding = bank_config.get("encoding", "utf-8")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
if ynab_data.empty:
print(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}


filepath = Path(sys.argv[1])
file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)


new_basename = f'ynab-{filepath.name}'
new_filepath = filepath.parent / new_basename
if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename


with filepath.open(mode='r', encoding='latin1') as old_file:
reader = csv.reader(old_file, delimiter=';')
if not output_file.exists():
break


with new_filepath.open(mode='w') as new_file:
writer = csv.writer(new_file)
convert(reader, writer)
file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
print("No files were processed. Make sure your files match the expected patterns.")




if __name__ == '__main__':
main()
if __name__ == "__main__":
# Check if files were dragged onto the script
if len(sys.argv) > 1:
# Files were dragged - process them
files = sys.argv[1:]
convert_bank_statements_to_ynab(files)
else:
# No files dragged - run normal directory processing
convert_bank_statements_to_ynab()
# Keep window open on Mac so user can see results
input("\nPress Enter to close...")

Loading…
Peruuta
Tallenna