Pārlūkot izejas kodu

Restructure code

master
Sindre Stephansen pirms 2 mēnešiem
vecāks
revīzija
ca1f9c3a67
Parakstījis: sindre <sindre@sindrestephansen.com> GPG atslēgas ID: B06FC67D17A46ADE
7 mainītis faili ar 399 papildinājumiem un 384 dzēšanām
  1. +23
    -0
      src/bank_parsers/bank_norwegian.py
  2. +53
    -0
      src/bank_parsers/helpers.py
  3. +29
    -0
      src/bank_parsers/sparebank1.py
  4. +30
    -0
      src/bank_parsers/sparebanken_norge.py
  5. +171
    -0
      src/convert.py
  6. +93
    -0
      src/main.py
  7. +0
    -384
      ynab.py

+ 23
- 0
src/bank_parsers/bank_norwegian.py Parādīt failu

@@ -0,0 +1,23 @@
import pandas as pd

def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)

+ 53
- 0
src/bank_parsers/helpers.py Parādīt failu

@@ -0,0 +1,53 @@
import re
import pandas as pd

def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0

def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
logger.error(f"Invalid date format: {date_str}")
exit(1)

def convert_memo(original):
original = original.replace(" Kurs: 1.0000", "")
words = original.split(" ")

while len(words) > 0:
if words[0] == "":
# It's empty
del words[0]
elif m := re.match(r'\*(\d{4})', words[0]):
# It's the last four numbers of the card, ignore it
del words[0]
elif m := re.match(r'\d{2}\.\d{2}', words[0]):
# It's the date. Move it to the end
words.append(words.pop(0))
elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
# It's the original currency
if words[0] == "NOK":
# It's Norwegian kroner, ignoring
del words[0]
del words[0]
else:
# It's some other currency, move it to the end
words.append(words.pop(0))
words.append(words.pop(0))
else:
break

return " ".join(words)

+ 29
- 0
src/bank_parsers/sparebank1.py Parādīt failu

@@ -0,0 +1,29 @@
import pandas as pd

from bank_parsers.helpers import parse_norwegian_number, parse_norwegian_date


def parse_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)

+ 30
- 0
src/bank_parsers/sparebanken_norge.py Parādīt failu

@@ -0,0 +1,30 @@
import pandas as pd

from bank_parsers.helpers import parse_norwegian_date, convert_memo


def parse_sparebanken_norge(data):
"""
Parse Sparebanken Norge bank data
"""
result = []
for _, row in data.iterrows():
if row.get('Status') != "Bokført":
continue

if row.get('Valuta') != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")

payee = convert_memo(row.get('Beskrivelse', ''))
memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))

result.append({
'Date': parse_norwegian_date(row.get('Bokført dato')),
'Payee': payee,
'Memo': memo,
'Outflow': -float(row['Beløp ut'] or '0'),
'Inflow': float(row['Beløp inn'] or '0'),
})
return pd.DataFrame(result)

+ 171
- 0
src/convert.py Parādīt failu

@@ -0,0 +1,171 @@
import glob
import logging
import pandas as pd
from pathlib import Path

from bank_parsers.sparebank1 import parse_sparebank1
from bank_parsers.bank_norwegian import parse_bank_norwegian
from bank_parsers.sparebanken_norge import parse_sparebanken_norge


# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"SparebankenNorge": {
"patterns": ["Transaksjoner*.csv"],
"encoding": "latin1",
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_sparebanken_norge,
"delimiter": ";"
},
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None


def process_bank_statement(file_path, parse_function, delimiter, encoding):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else:
logging.warning(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
logging.error(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()


def convert_bank_statements_to_ynab(input_paths, output_directory):
"""
Convert bank statements to YNAB format
Args:
input_paths (list): List of specific files or directories to process
"""
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True, parents=True)
# Get list of files to process
files_to_process = []
for path in input_paths:
if not path.exists():
logging.warning(f"Path does not exist: {file_path}")
elif path.is_file():
files_to_process.append(path)
elif path.is_dir():
logging.debug(f"Looking for matching files in {path}")
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(path / pattern))
files_to_process.extend([Path(f) for f in matching_files])

files_processed = False
# Process each file
logging.info(f"Processing {len(files_to_process)} file(s)...")
for file_path in files_to_process:
logging.debug(f"Processing {file_path}")
if not file_path.exists():
logging.warning(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
logging.warning(f"No bank configuration found for file: {file_path.name}")
continue
logging.info(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
encoding = bank_config.get("encoding", "utf-8")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
if ynab_data.empty:
logging.warning(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}

file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)

if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename

if not output_file.exists():
break

file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
logging.info(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
logging.warning("No files were processed. Make sure your files match the expected patterns.")

+ 93
- 0
src/main.py Parādīt failu

@@ -0,0 +1,93 @@
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""

import os
import sys
import argparse
import logging
import yaml
from pathlib import Path

from convert import convert_bank_statements_to_ynab


def setup_logging(verbosity):
if verbosity <= 0:
log_level = logging.WARNING
elif verbosity == 1:
log_level = logging.INFO
elif verbosity >= 2:
log_level = logging.DEBUG

logging.basicConfig(
level=log_level,
format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%dT%H:%M:%S',
)

if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='YNAB',
description='Prepare bank transcripts for import to You Need A Budget',
)

parser.add_argument(
'filenames',
type=Path,
nargs='*',
help='The files to process',
)

parser.add_argument(
'-o', '--output-dir',
type=Path,
default=None,
help='The location to store the converted files',
)

parser.add_argument(
'-v', '--verbose',
default=0,
action='count',
help='Increase logging verbosity',
)

parser.add_argument(
'-c', '--config',
type=Path,
help='Path to the config file',
)

args = parser.parse_args()

setup_logging(args.verbose)

config_path = args.config or Path.home() / '.config/ynab/config.yaml'
if config_path and config_path.exists():
with config_path.open('r') as config_file:
config = yaml.safe_load(config_file)
logging.debug(f"Loaded config file {config_path}")
else:
logging.debug(f"Could not find config file {config_path}")
config = {}

verbosity = config.get('verbosity')
if args.verbose == 0 and verbosity:
setup_logging(verbosity)

current_directory = Path.cwd()

inputs = args.filenames or config.get('default_inputs')
if not inputs:
logging.info("Processing all files in current directory")
inputs = [current_directory]

output_dir = args.output_dir or Path(config.get('output_dir'))
if not output_dir:
output_dir = current_directory / "YNAB_Outputs"
logging.debug(f"No output directory set. Defaulting to {output_dir}")

convert_bank_statements_to_ynab(inputs, output_dir)

+ 0
- 384
ynab.py Parādīt failu

@@ -1,384 +0,0 @@
#!/usr/bin/env python3
"""
Bank Statement to YNAB Converter
Converts bank statements from various formats to YNAB-compatible CSV files
"""

import os
import sys
import glob
import re
import argparse
import logging
import yaml
import pandas as pd
from pathlib import Path

logger = logging.getLogger()

def parse_norwegian_number(value):
"""Convert Norwegian number format (comma decimal) to float"""
if pd.isna(value) or value == '':
return 0.0
# Convert to string and replace comma with dot
str_value = str(value).replace(',', '.')
try:
return float(str_value)
except ValueError:
return 0.0

def parse_norwegian_date(date_str):
"""Convert DD.MM.YYYY format to YYYY-MM-DD"""
if pd.isna(date_str) or date_str == '':
return ''
try:
# Parse DD.MM.YYYY and convert to date object
return pd.to_datetime(date_str, format='%d.%m.%Y')
except (ValueError, TypeError):
logger.error(f"Invalid date format: {date_str}")
exit(1)

def convert_memo(original):
original = original.replace(" Kurs: 1.0000", "")
words = original.split(" ")

while len(words) > 0:
if words[0] == "":
# It's empty
del words[0]
elif m := re.match(r'\*(\d{4})', words[0]):
# It's the last four numbers of the card, ignore it
del words[0]
elif m := re.match(r'\d{2}\.\d{2}', words[0]):
# It's the date. Move it to the end
words.append(words.pop(0))
elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
# It's the original currency
if words[0] == "NOK":
# It's Norwegian kroner, ignoring
del words[0]
del words[0]
else:
# It's some other currency, move it to the end
words.append(words.pop(0))
words.append(words.pop(0))
else:
break

return " ".join(words)

def parse_bank_sor(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
if row.get('Status') != "Bokført":
continue

if row.get('Valuta') != 'NOK':
raise ValueError(f"Unknown currency {row['Valuta']}")

payee = convert_memo(row.get('Beskrivelse', ''))
memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))

result.append({
'Date': parse_norwegian_date(row.get('Bokført dato')),
'Payee': payee,
'Memo': memo,
'Outflow': -float(row['Beløp ut'] or '0'),
'Inflow': float(row['Beløp inn'] or '0'),
})
return pd.DataFrame(result)

def parse_bank_sparebank1(data):
"""
Parse Sparebank 1 bank data
Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
"""
result = []
for _, row in data.iterrows():
inflow = parse_norwegian_number(row.get('Inn'))
outflow = parse_norwegian_number(row.get('Ut'))
# Convert outflow to positive if negative
if outflow < 0:
outflow = -outflow
result.append({
'Date': parse_norwegian_date(row.get('Dato', '')),
'Payee': row.get('Beskrivelse', ''),
'Memo': row.get('Til konto', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


def parse_bank_norwegian(data):
"""
Parse Norwegian bank data
Expected columns: TransactionDate, Text, Memo, Amount
"""
result = []
for _, row in data.iterrows():
amount = row.get('Amount', 0)
inflow = amount if amount > 0 else 0
outflow = -amount if amount < 0 else 0 # Make outflow positive
result.append({
'Date': row.get('TransactionDate', ''),
'Payee': row.get('Text', ''),
'Memo': row.get('Memo', ''),
'Outflow': outflow,
'Inflow': inflow
})
return pd.DataFrame(result)


# Dictionary of banks, filename patterns, and parsing functions
BANKS = {
"SparebankenNorge": {
"patterns": ["Transaksjoner*.csv"],
"encoding": "latin1",
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sor,
"delimiter": ";"
},
"Sparebank1": {
"patterns": ["OversiktKonti*.csv"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_sparebank1,
"delimiter": ";"
},
"Norwegian": {
"patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
"output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
"parse_function": parse_bank_norwegian
}
# Add more banks and patterns as needed
}


def process_bank_statement(file_path, parse_function, delimiter, encoding):
"""
Process a single bank statement file
Args:
file_path (str): Path to the bank statement file
parse_function (callable): Function to parse the specific bank format
delimiter (Optional<str>): Field delimiter
Returns:
pd.DataFrame: Processed YNAB-compatible data
"""
file_extension = Path(file_path).suffix.lower()
try:
# Handle CSV files
if file_extension == ".csv":
data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
# Handle Excel files
elif file_extension in [".xlsx", ".xls"]:
data = pd.read_excel(file_path)
else:
logger.warning(f"Skipping unsupported file type: {file_path}")
return pd.DataFrame()
# Call the appropriate bank-specific parsing function
ynab_data = parse_function(data)
return ynab_data
except Exception as e:
logger.error(f"Error processing file {file_path}: {e}")
raise e
return pd.DataFrame()


def find_bank_config(filename):
"""
Find the appropriate bank configuration for a given filename
Args:
filename (str): Name of the file to match
Returns:
tuple: (bank_name, bank_config) or (None, None) if no match
"""
import fnmatch
for bank_name, bank_config in BANKS.items():
for pattern in bank_config["patterns"]:
if fnmatch.fnmatch(filename, pattern):
return bank_name, bank_config
return None, None


def convert_bank_statements_to_ynab(input_paths, output_directory):
"""
Convert bank statements to YNAB format
Args:
input_paths (list): List of specific files or directories to process
"""
# Create output directory if it doesn't exist
output_directory.mkdir(exist_ok=True, parents=True)
# Get list of files to process
files_to_process = []
for path in input_paths:
if not path.exists():
logger.warning(f"Path does not exist: {file_path}")
elif path.is_file():
files_to_process.append(path)
elif path.is_dir():
logger.debug(f"Looking for matching files in {path}")
for bank_config in BANKS.values():
for pattern in bank_config["patterns"]:
matching_files = glob.glob(str(path / pattern))
files_to_process.extend([Path(f) for f in matching_files])

files_processed = False
# Process each file
logger.info(f"Processing {len(files_to_process)} file(s)...")
for file_path in files_to_process:
logger.debug(f"Processing {file_path}")
if not file_path.exists():
logger.warning(f"File not found: {file_path}")
continue
# Find matching bank configuration
bank_name, bank_config = find_bank_config(file_path.name)
if not bank_config:
logger.warning(f"No bank configuration found for file: {file_path.name}")
continue
logger.info(f"Processing file: {file_path} for {bank_name}")
parse_function = bank_config["parse_function"]
delimiter = bank_config.get("delimiter", ",")
encoding = bank_config.get("encoding", "utf-8")
# Process the file
ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
if ynab_data.empty:
logger.warning(f"No data processed for {file_path}")
continue
filename_placeholders = {
'bank': bank_name,
'first_date': ynab_data['Date'].min().date(),
'last_date': ynab_data['Date'].max().date(),
}

file_retry_count = 0
while True:
output_filename = bank_config["output_filename"].format(**filename_placeholders)

if file_retry_count > 0:
output_filename += f" ({file_retry_count})"
output_filename += ".csv"
output_file = output_directory / output_filename

if not output_file.exists():
break

file_retry_count += 1
# Export to CSV for YNAB import
ynab_data.to_csv(output_file, index=False)
logger.info(f"Data saved to {output_file}")
files_processed = True
if not files_processed:
logger.warning("No files were processed. Make sure your files match the expected patterns.")

def setup_logger(verbosity):
if verbosity <= 0:
log_level = logging.WARNING
elif verbosity == 1:
log_level = logging.INFO
elif verbosity >= 2:
log_level = logging.DEBUG

logging.basicConfig(
level=log_level,
format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%dT%H:%M:%S',
)

if __name__ == "__main__":
parser = argparse.ArgumentParser(
prog='YNAB',
description='Prepare bank transcripts for import to You Need A Budget',
)

parser.add_argument(
'filenames',
type=Path,
nargs='*',
help='The files to process',
)

parser.add_argument(
'-o', '--output-dir',
type=Path,
default=None,
help='The location to store the converted files',
)

parser.add_argument(
'-v', '--verbose',
default=0,
action='count',
help='Increase logging verbosity',
)

parser.add_argument(
'-c', '--config',
type=Path,
help='Path to the config file',
)

args = parser.parse_args()

setup_logger(args.verbose)

config_path = args.config or Path.home() / '.config/ynab/config.yaml'
if config_path and config_path.exists():
with config_path.open('r') as config_file:
config = yaml.safe_load(config_file)
logger.debug(f"Loaded config file {config_path}")
else:
logger.debug(f"Could not find config file {config_path}")
config = {}

verbosity = config.get('verbosity')
if args.verbose == 0 and verbosity:
setup_logger(verbosity)

current_directory = Path.cwd()

inputs = args.filenames or config.get('default_inputs')
if not inputs:
logger.info("Processing all files in current directory")
inputs = [current_directory]

output_dir = args.output_dir or Path(config.get('output_dir'))
if not output_dir:
output_dir = current_directory / "YNAB_Outputs"
logger.debug(f"No output directory set. Defaulting to {output_dir}")

convert_bank_statements_to_ynab(inputs, output_dir)

Notiek ielāde…
Atcelt
Saglabāt