A script to convert CSV exported from Sparebanken Sør to a format YNAB can import
No puede seleccionar más de 25 temas Los temas deben comenzar con una letra o número, pueden incluir guiones ('-') y pueden tener hasta 35 caracteres de largo.

351 líneas
11KB

  1. #!/usr/bin/env python3
  2. """
  3. Bank Statement to YNAB Converter
  4. Converts bank statements from various formats to YNAB-compatible CSV files
  5. """
  6. import os
  7. import sys
  8. import glob
  9. import re
  10. import argparse
  11. import logging
  12. import pandas as pd
  13. from pathlib import Path
  14. logger = logging.getLogger(__name__)
  15. def parse_norwegian_number(value):
  16. """Convert Norwegian number format (comma decimal) to float"""
  17. if pd.isna(value) or value == '':
  18. return 0.0
  19. # Convert to string and replace comma with dot
  20. str_value = str(value).replace(',', '.')
  21. try:
  22. return float(str_value)
  23. except ValueError:
  24. return 0.0
  25. def parse_norwegian_date(date_str):
  26. """Convert DD.MM.YYYY format to YYYY-MM-DD"""
  27. if pd.isna(date_str) or date_str == '':
  28. return ''
  29. try:
  30. # Parse DD.MM.YYYY and convert to date object
  31. return pd.to_datetime(date_str, format='%d.%m.%Y')
  32. except (ValueError, TypeError):
  33. logger.error(f"Invalid date format: {date_str}")
  34. exit(1)
  35. def convert_memo(original):
  36. original = original.replace(" Kurs: 1.0000", "")
  37. words = original.split(" ")
  38. while len(words) > 0:
  39. if words[0] == "":
  40. # It's empty
  41. del words[0]
  42. elif m := re.match(r'\*(\d{4})', words[0]):
  43. # It's the last four numbers of the card, ignore it
  44. del words[0]
  45. elif m := re.match(r'\d{2}\.\d{2}', words[0]):
  46. # It's the date. Move it to the end
  47. words.append(words.pop(0))
  48. elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
  49. # It's the original currency
  50. if words[0] == "NOK":
  51. # It's Norwegian kroner, ignoring
  52. del words[0]
  53. del words[0]
  54. else:
  55. # It's some other currency, move it to the end
  56. words.append(words.pop(0))
  57. words.append(words.pop(0))
  58. else:
  59. break
  60. return " ".join(words)
  61. def parse_bank_sor(data):
  62. """
  63. Parse Sparebank 1 bank data
  64. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  65. """
  66. result = []
  67. for _, row in data.iterrows():
  68. if row.get('Status') != "Bokført":
  69. continue
  70. if row.get('Valuta') != 'NOK':
  71. raise ValueError(f"Unknown currency {row['Valuta']}")
  72. payee = convert_memo(row.get('Beskrivelse', ''))
  73. memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))
  74. result.append({
  75. 'Date': parse_norwegian_date(row.get('Bokført dato')),
  76. 'Payee': payee,
  77. 'Memo': memo,
  78. 'Outflow': -float(row['Beløp ut'] or '0'),
  79. 'Inflow': float(row['Beløp inn'] or '0'),
  80. })
  81. return pd.DataFrame(result)
  82. def parse_bank_sparebank1(data):
  83. """
  84. Parse Sparebank 1 bank data
  85. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  86. """
  87. result = []
  88. for _, row in data.iterrows():
  89. inflow = parse_norwegian_number(row.get('Inn'))
  90. outflow = parse_norwegian_number(row.get('Ut'))
  91. # Convert outflow to positive if negative
  92. if outflow < 0:
  93. outflow = -outflow
  94. result.append({
  95. 'Date': parse_norwegian_date(row.get('Dato', '')),
  96. 'Payee': row.get('Beskrivelse', ''),
  97. 'Memo': row.get('Til konto', ''),
  98. 'Outflow': outflow,
  99. 'Inflow': inflow
  100. })
  101. return pd.DataFrame(result)
  102. def parse_bank_norwegian(data):
  103. """
  104. Parse Norwegian bank data
  105. Expected columns: TransactionDate, Text, Memo, Amount
  106. """
  107. result = []
  108. for _, row in data.iterrows():
  109. amount = row.get('Amount', 0)
  110. inflow = amount if amount > 0 else 0
  111. outflow = -amount if amount < 0 else 0 # Make outflow positive
  112. result.append({
  113. 'Date': row.get('TransactionDate', ''),
  114. 'Payee': row.get('Text', ''),
  115. 'Memo': row.get('Memo', ''),
  116. 'Outflow': outflow,
  117. 'Inflow': inflow
  118. })
  119. return pd.DataFrame(result)
  120. # Dictionary of banks, filename patterns, and parsing functions
  121. BANKS = {
  122. "SparebankenNorge": {
  123. "patterns": ["Transaksjoner*.csv"],
  124. "encoding": "latin1",
  125. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  126. "parse_function": parse_bank_sor,
  127. "delimiter": ";"
  128. },
  129. "Sparebank1": {
  130. "patterns": ["OversiktKonti*.csv"],
  131. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  132. "parse_function": parse_bank_sparebank1,
  133. "delimiter": ";"
  134. },
  135. "Norwegian": {
  136. "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
  137. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  138. "parse_function": parse_bank_norwegian
  139. }
  140. # Add more banks and patterns as needed
  141. }
  142. def process_bank_statement(file_path, parse_function, delimiter, encoding):
  143. """
  144. Process a single bank statement file
  145. Args:
  146. file_path (str): Path to the bank statement file
  147. parse_function (callable): Function to parse the specific bank format
  148. delimiter (Optional<str>): Field delimiter
  149. Returns:
  150. pd.DataFrame: Processed YNAB-compatible data
  151. """
  152. file_extension = Path(file_path).suffix.lower()
  153. try:
  154. # Handle CSV files
  155. if file_extension == ".csv":
  156. data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
  157. # Handle Excel files
  158. elif file_extension in [".xlsx", ".xls"]:
  159. data = pd.read_excel(file_path)
  160. else:
  161. logger.warning(f"Skipping unsupported file type: {file_path}")
  162. return pd.DataFrame()
  163. # Call the appropriate bank-specific parsing function
  164. ynab_data = parse_function(data)
  165. return ynab_data
  166. except Exception as e:
  167. logger.error(f"Error processing file {file_path}: {e}")
  168. raise e
  169. return pd.DataFrame()
  170. def find_bank_config(filename):
  171. """
  172. Find the appropriate bank configuration for a given filename
  173. Args:
  174. filename (str): Name of the file to match
  175. Returns:
  176. tuple: (bank_name, bank_config) or (None, None) if no match
  177. """
  178. import fnmatch
  179. for bank_name, bank_config in BANKS.items():
  180. for pattern in bank_config["patterns"]:
  181. if fnmatch.fnmatch(filename, pattern):
  182. return bank_name, bank_config
  183. return None, None
  184. def convert_bank_statements_to_ynab(input_files, output_directory):
  185. """
  186. Convert bank statements to YNAB format
  187. Args:
  188. input_files (list): List of specific files to process
  189. If empty, processes all files in current directory
  190. """
  191. current_directory = Path.cwd()
  192. if not output_directory:
  193. output_directory = current_directory / "YNAB_Outputs"
  194. logger.debug(f"No output directory set. Defaulting to {output_directory}")
  195. # Create output directory if it doesn't exist
  196. output_directory.mkdir(exist_ok=True, parents=True)
  197. # Get list of files to process
  198. if input_files:
  199. logger.info(f"Processing {len(input_files)} dragged file(s)...")
  200. files_to_process = [Path(f) for f in input_files if Path(f).exists()]
  201. else:
  202. logger.info("Processing all files in current directory...")
  203. logger.debug(f"Current directory is {current_directory}")
  204. files_to_process = []
  205. # Collect all files matching any bank pattern
  206. for bank_config in BANKS.values():
  207. for pattern in bank_config["patterns"]:
  208. matching_files = glob.glob(str(current_directory / pattern))
  209. files_to_process.extend([Path(f) for f in matching_files])
  210. files_processed = False
  211. # Process each file
  212. for file_path in files_to_process:
  213. if not file_path.exists():
  214. logger.warning(f"File not found: {file_path}")
  215. continue
  216. # Find matching bank configuration
  217. bank_name, bank_config = find_bank_config(file_path.name)
  218. if not bank_config:
  219. logger.warning(f"No bank configuration found for file: {file_path.name}")
  220. continue
  221. logger.info(f"Processing file: {file_path} for {bank_name}")
  222. parse_function = bank_config["parse_function"]
  223. delimiter = bank_config.get("delimiter", ",")
  224. encoding = bank_config.get("encoding", "utf-8")
  225. # Process the file
  226. ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
  227. if ynab_data.empty:
  228. logger.warning(f"No data processed for {file_path}")
  229. continue
  230. filename_placeholders = {
  231. 'bank': bank_name,
  232. 'first_date': ynab_data['Date'].min().date(),
  233. 'last_date': ynab_data['Date'].max().date(),
  234. }
  235. file_retry_count = 0
  236. while True:
  237. output_filename = bank_config["output_filename"].format(**filename_placeholders)
  238. if file_retry_count > 0:
  239. output_filename += f" ({file_retry_count})"
  240. output_filename += ".csv"
  241. output_file = output_directory / output_filename
  242. if not output_file.exists():
  243. break
  244. file_retry_count += 1
  245. # Export to CSV for YNAB import
  246. ynab_data.to_csv(output_file, index=False)
  247. logger.info(f"Data saved to {output_file}")
  248. files_processed = True
  249. if not files_processed:
  250. logger.warning("No files were processed. Make sure your files match the expected patterns.")
  251. if __name__ == "__main__":
  252. parser = argparse.ArgumentParser(
  253. prog='YNAB',
  254. description='Prepare bank transcripts for import to You Need A Budget',
  255. )
  256. parser.add_argument(
  257. 'filenames',
  258. type=Path,
  259. nargs='*',
  260. help='The files to process',
  261. )
  262. parser.add_argument(
  263. '-o', '--output-dir',
  264. type=Path,
  265. default=None,
  266. help='The location to store the converted files',
  267. )
  268. parser.add_argument(
  269. '-v', '--verbose',
  270. default=0,
  271. action='count',
  272. help='Increase logging verbosity',
  273. )
  274. args = parser.parse_args()
  275. if args.verbose <= 0:
  276. log_level = logging.WARNING
  277. elif args.verbose == 1:
  278. log_level = logging.INFO
  279. elif args.verbose >= 2:
  280. log_level = logging.DEBUG
  281. logging.basicConfig(level=log_level)
  282. convert_bank_statements_to_ynab(args.filenames, args.output_dir)