A script to convert CSV exported from Sparebanken Sør to a format YNAB can import
Du kannst nicht mehr als 25 Themen auswählen Themen müssen entweder mit einem Buchstaben oder einer Ziffer beginnen. Sie können Bindestriche („-“) enthalten und bis zu 35 Zeichen lang sein.

384 Zeilen
12KB

  1. #!/usr/bin/env python3
  2. """
  3. Bank Statement to YNAB Converter
  4. Converts bank statements from various formats to YNAB-compatible CSV files
  5. """
  6. import os
  7. import sys
  8. import glob
  9. import re
  10. import argparse
  11. import logging
  12. import yaml
  13. import pandas as pd
  14. from pathlib import Path
  15. logger = logging.getLogger()
  16. def parse_norwegian_number(value):
  17. """Convert Norwegian number format (comma decimal) to float"""
  18. if pd.isna(value) or value == '':
  19. return 0.0
  20. # Convert to string and replace comma with dot
  21. str_value = str(value).replace(',', '.')
  22. try:
  23. return float(str_value)
  24. except ValueError:
  25. return 0.0
  26. def parse_norwegian_date(date_str):
  27. """Convert DD.MM.YYYY format to YYYY-MM-DD"""
  28. if pd.isna(date_str) or date_str == '':
  29. return ''
  30. try:
  31. # Parse DD.MM.YYYY and convert to date object
  32. return pd.to_datetime(date_str, format='%d.%m.%Y')
  33. except (ValueError, TypeError):
  34. logger.error(f"Invalid date format: {date_str}")
  35. exit(1)
  36. def convert_memo(original):
  37. original = original.replace(" Kurs: 1.0000", "")
  38. words = original.split(" ")
  39. while len(words) > 0:
  40. if words[0] == "":
  41. # It's empty
  42. del words[0]
  43. elif m := re.match(r'\*(\d{4})', words[0]):
  44. # It's the last four numbers of the card, ignore it
  45. del words[0]
  46. elif m := re.match(r'\d{2}\.\d{2}', words[0]):
  47. # It's the date. Move it to the end
  48. words.append(words.pop(0))
  49. elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
  50. # It's the original currency
  51. if words[0] == "NOK":
  52. # It's Norwegian kroner, ignoring
  53. del words[0]
  54. del words[0]
  55. else:
  56. # It's some other currency, move it to the end
  57. words.append(words.pop(0))
  58. words.append(words.pop(0))
  59. else:
  60. break
  61. return " ".join(words)
  62. def parse_bank_sor(data):
  63. """
  64. Parse Sparebank 1 bank data
  65. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  66. """
  67. result = []
  68. for _, row in data.iterrows():
  69. if row.get('Status') != "Bokført":
  70. continue
  71. if row.get('Valuta') != 'NOK':
  72. raise ValueError(f"Unknown currency {row['Valuta']}")
  73. payee = convert_memo(row.get('Beskrivelse', ''))
  74. memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))
  75. result.append({
  76. 'Date': parse_norwegian_date(row.get('Bokført dato')),
  77. 'Payee': payee,
  78. 'Memo': memo,
  79. 'Outflow': -float(row['Beløp ut'] or '0'),
  80. 'Inflow': float(row['Beløp inn'] or '0'),
  81. })
  82. return pd.DataFrame(result)
  83. def parse_bank_sparebank1(data):
  84. """
  85. Parse Sparebank 1 bank data
  86. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  87. """
  88. result = []
  89. for _, row in data.iterrows():
  90. inflow = parse_norwegian_number(row.get('Inn'))
  91. outflow = parse_norwegian_number(row.get('Ut'))
  92. # Convert outflow to positive if negative
  93. if outflow < 0:
  94. outflow = -outflow
  95. result.append({
  96. 'Date': parse_norwegian_date(row.get('Dato', '')),
  97. 'Payee': row.get('Beskrivelse', ''),
  98. 'Memo': row.get('Til konto', ''),
  99. 'Outflow': outflow,
  100. 'Inflow': inflow
  101. })
  102. return pd.DataFrame(result)
  103. def parse_bank_norwegian(data):
  104. """
  105. Parse Norwegian bank data
  106. Expected columns: TransactionDate, Text, Memo, Amount
  107. """
  108. result = []
  109. for _, row in data.iterrows():
  110. amount = row.get('Amount', 0)
  111. inflow = amount if amount > 0 else 0
  112. outflow = -amount if amount < 0 else 0 # Make outflow positive
  113. result.append({
  114. 'Date': row.get('TransactionDate', ''),
  115. 'Payee': row.get('Text', ''),
  116. 'Memo': row.get('Memo', ''),
  117. 'Outflow': outflow,
  118. 'Inflow': inflow
  119. })
  120. return pd.DataFrame(result)
  121. # Dictionary of banks, filename patterns, and parsing functions
  122. BANKS = {
  123. "SparebankenNorge": {
  124. "patterns": ["Transaksjoner*.csv"],
  125. "encoding": "latin1",
  126. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  127. "parse_function": parse_bank_sor,
  128. "delimiter": ";"
  129. },
  130. "Sparebank1": {
  131. "patterns": ["OversiktKonti*.csv"],
  132. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  133. "parse_function": parse_bank_sparebank1,
  134. "delimiter": ";"
  135. },
  136. "Norwegian": {
  137. "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
  138. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  139. "parse_function": parse_bank_norwegian
  140. }
  141. # Add more banks and patterns as needed
  142. }
  143. def process_bank_statement(file_path, parse_function, delimiter, encoding):
  144. """
  145. Process a single bank statement file
  146. Args:
  147. file_path (str): Path to the bank statement file
  148. parse_function (callable): Function to parse the specific bank format
  149. delimiter (Optional<str>): Field delimiter
  150. Returns:
  151. pd.DataFrame: Processed YNAB-compatible data
  152. """
  153. file_extension = Path(file_path).suffix.lower()
  154. try:
  155. # Handle CSV files
  156. if file_extension == ".csv":
  157. data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
  158. # Handle Excel files
  159. elif file_extension in [".xlsx", ".xls"]:
  160. data = pd.read_excel(file_path)
  161. else:
  162. logger.warning(f"Skipping unsupported file type: {file_path}")
  163. return pd.DataFrame()
  164. # Call the appropriate bank-specific parsing function
  165. ynab_data = parse_function(data)
  166. return ynab_data
  167. except Exception as e:
  168. logger.error(f"Error processing file {file_path}: {e}")
  169. raise e
  170. return pd.DataFrame()
  171. def find_bank_config(filename):
  172. """
  173. Find the appropriate bank configuration for a given filename
  174. Args:
  175. filename (str): Name of the file to match
  176. Returns:
  177. tuple: (bank_name, bank_config) or (None, None) if no match
  178. """
  179. import fnmatch
  180. for bank_name, bank_config in BANKS.items():
  181. for pattern in bank_config["patterns"]:
  182. if fnmatch.fnmatch(filename, pattern):
  183. return bank_name, bank_config
  184. return None, None
  185. def convert_bank_statements_to_ynab(input_paths, output_directory):
  186. """
  187. Convert bank statements to YNAB format
  188. Args:
  189. input_paths (list): List of specific files or directories to process
  190. """
  191. # Create output directory if it doesn't exist
  192. output_directory.mkdir(exist_ok=True, parents=True)
  193. # Get list of files to process
  194. files_to_process = []
  195. for path in input_paths:
  196. if not path.exists():
  197. logger.warning(f"Path does not exist: {file_path}")
  198. elif path.is_file():
  199. files_to_process.append(path)
  200. elif path.is_dir():
  201. logger.debug(f"Looking for matching files in {path}")
  202. for bank_config in BANKS.values():
  203. for pattern in bank_config["patterns"]:
  204. matching_files = glob.glob(str(path / pattern))
  205. files_to_process.extend([Path(f) for f in matching_files])
  206. files_processed = False
  207. # Process each file
  208. logger.info(f"Processing {len(files_to_process)} file(s)...")
  209. for file_path in files_to_process:
  210. logger.debug(f"Processing {file_path}")
  211. if not file_path.exists():
  212. logger.warning(f"File not found: {file_path}")
  213. continue
  214. # Find matching bank configuration
  215. bank_name, bank_config = find_bank_config(file_path.name)
  216. if not bank_config:
  217. logger.warning(f"No bank configuration found for file: {file_path.name}")
  218. continue
  219. logger.info(f"Processing file: {file_path} for {bank_name}")
  220. parse_function = bank_config["parse_function"]
  221. delimiter = bank_config.get("delimiter", ",")
  222. encoding = bank_config.get("encoding", "utf-8")
  223. # Process the file
  224. ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
  225. if ynab_data.empty:
  226. logger.warning(f"No data processed for {file_path}")
  227. continue
  228. filename_placeholders = {
  229. 'bank': bank_name,
  230. 'first_date': ynab_data['Date'].min().date(),
  231. 'last_date': ynab_data['Date'].max().date(),
  232. }
  233. file_retry_count = 0
  234. while True:
  235. output_filename = bank_config["output_filename"].format(**filename_placeholders)
  236. if file_retry_count > 0:
  237. output_filename += f" ({file_retry_count})"
  238. output_filename += ".csv"
  239. output_file = output_directory / output_filename
  240. if not output_file.exists():
  241. break
  242. file_retry_count += 1
  243. # Export to CSV for YNAB import
  244. ynab_data.to_csv(output_file, index=False)
  245. logger.info(f"Data saved to {output_file}")
  246. files_processed = True
  247. if not files_processed:
  248. logger.warning("No files were processed. Make sure your files match the expected patterns.")
  249. def setup_logger(verbosity):
  250. if verbosity <= 0:
  251. log_level = logging.WARNING
  252. elif verbosity == 1:
  253. log_level = logging.INFO
  254. elif verbosity >= 2:
  255. log_level = logging.DEBUG
  256. logging.basicConfig(
  257. level=log_level,
  258. format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
  259. datefmt='%Y-%m-%dT%H:%M:%S',
  260. )
  261. if __name__ == "__main__":
  262. parser = argparse.ArgumentParser(
  263. prog='YNAB',
  264. description='Prepare bank transcripts for import to You Need A Budget',
  265. )
  266. parser.add_argument(
  267. 'filenames',
  268. type=Path,
  269. nargs='*',
  270. help='The files to process',
  271. )
  272. parser.add_argument(
  273. '-o', '--output-dir',
  274. type=Path,
  275. default=None,
  276. help='The location to store the converted files',
  277. )
  278. parser.add_argument(
  279. '-v', '--verbose',
  280. default=0,
  281. action='count',
  282. help='Increase logging verbosity',
  283. )
  284. parser.add_argument(
  285. '-c', '--config',
  286. type=Path,
  287. help='Path to the config file',
  288. )
  289. args = parser.parse_args()
  290. setup_logger(args.verbose)
  291. config_path = args.config or Path.home() / '.config/ynab/config.yaml'
  292. if config_path and config_path.exists():
  293. with config_path.open('r') as config_file:
  294. config = yaml.safe_load(config_file)
  295. logger.debug(f"Loaded config file {config_path}")
  296. else:
  297. logger.debug(f"Could not find config file {config_path}")
  298. config = {}
  299. verbosity = config.get('verbosity')
  300. if args.verbose == 0 and verbosity:
  301. setup_logger(verbosity)
  302. current_directory = Path.cwd()
  303. inputs = args.filenames or config.get('default_inputs')
  304. if not inputs:
  305. logger.info("Processing all files in current directory")
  306. inputs = [current_directory]
  307. output_dir = args.output_dir or Path(config.get('output_dir'))
  308. if not output_dir:
  309. output_dir = current_directory / "YNAB_Outputs"
  310. logger.debug(f"No output directory set. Defaulting to {output_dir}")
  311. convert_bank_statements_to_ynab(inputs, output_dir)