A script to convert CSV exported from Sparebanken Sør to a format YNAB can import
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

316 lines
9.8KB

  1. #!/usr/bin/env python3
  2. """
  3. Bank Statement to YNAB Converter
  4. Converts bank statements from various formats to YNAB-compatible CSV files
  5. """
  6. import os
  7. import sys
  8. import glob
  9. import re
  10. import pandas as pd
  11. from pathlib import Path
  12. def parse_norwegian_number(value):
  13. """Convert Norwegian number format (comma decimal) to float"""
  14. if pd.isna(value) or value == '':
  15. return 0.0
  16. # Convert to string and replace comma with dot
  17. str_value = str(value).replace(',', '.')
  18. try:
  19. return float(str_value)
  20. except ValueError:
  21. return 0.0
  22. def parse_norwegian_date(date_str):
  23. """Convert DD.MM.YYYY format to YYYY-MM-DD"""
  24. if pd.isna(date_str) or date_str == '':
  25. return ''
  26. try:
  27. # Parse DD.MM.YYYY and convert to date object
  28. return pd.to_datetime(date_str, format='%d.%m.%Y')
  29. except (ValueError, TypeError):
  30. print(f"Invalid date format: {date_str}")
  31. exit(1)
  32. def convert_memo(original):
  33. original = original.replace(" Kurs: 1.0000", "")
  34. words = original.split(" ")
  35. while len(words) > 0:
  36. if words[0] == "":
  37. # It's empty
  38. del words[0]
  39. elif m := re.match(r'\*(\d{4})', words[0]):
  40. # It's the last four numbers of the card, ignore it
  41. del words[0]
  42. elif m := re.match(r'\d{2}\.\d{2}', words[0]):
  43. # It's the date. Move it to the end
  44. words.append(words.pop(0))
  45. elif (m1 := re.match(r'^[A-Z]{3}$', words[0])) and (m2 := re.match(r'[\d]+\.[\d]+', words[1])):
  46. # It's the original currency
  47. if words[0] == "NOK":
  48. # It's Norwegian kroner, ignoring
  49. del words[0]
  50. del words[0]
  51. else:
  52. # It's some other currency, move it to the end
  53. words.append(words.pop(0))
  54. words.append(words.pop(0))
  55. else:
  56. break
  57. return " ".join(words)
  58. def parse_bank_sor(data):
  59. """
  60. Parse Sparebank 1 bank data
  61. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  62. """
  63. result = []
  64. for _, row in data.iterrows():
  65. if row.get('Status') != "Bokført":
  66. continue
  67. if row.get('Valuta') != 'NOK':
  68. raise ValueError(f"Unknown currency {row['Valuta']}")
  69. payee = convert_memo(row.get('Beskrivelse', ''))
  70. memo = convert_memo(row.get('Melding/KID/Fakt.nr', ''))
  71. result.append({
  72. 'Date': parse_norwegian_date(row.get('Bokført dato')),
  73. 'Payee': payee,
  74. 'Memo': memo,
  75. 'Outflow': -float(row['Beløp ut'] or '0'),
  76. 'Inflow': float(row['Beløp inn'] or '0'),
  77. })
  78. return pd.DataFrame(result)
  79. def parse_bank_sparebank1(data):
  80. """
  81. Parse Sparebank 1 bank data
  82. Expected columns: Dato, Beskrivelse, Rentedato, Inn, Ut, Til konto, Fra konto
  83. """
  84. result = []
  85. for _, row in data.iterrows():
  86. inflow = parse_norwegian_number(row.get('Inn'))
  87. outflow = parse_norwegian_number(row.get('Ut'))
  88. # Convert outflow to positive if negative
  89. if outflow < 0:
  90. outflow = -outflow
  91. result.append({
  92. 'Date': parse_norwegian_date(row.get('Dato', '')),
  93. 'Payee': row.get('Beskrivelse', ''),
  94. 'Memo': row.get('Til konto', ''),
  95. 'Outflow': outflow,
  96. 'Inflow': inflow
  97. })
  98. return pd.DataFrame(result)
  99. def parse_bank_norwegian(data):
  100. """
  101. Parse Norwegian bank data
  102. Expected columns: TransactionDate, Text, Memo, Amount
  103. """
  104. result = []
  105. for _, row in data.iterrows():
  106. amount = row.get('Amount', 0)
  107. inflow = amount if amount > 0 else 0
  108. outflow = -amount if amount < 0 else 0 # Make outflow positive
  109. result.append({
  110. 'Date': row.get('TransactionDate', ''),
  111. 'Payee': row.get('Text', ''),
  112. 'Memo': row.get('Memo', ''),
  113. 'Outflow': outflow,
  114. 'Inflow': inflow
  115. })
  116. return pd.DataFrame(result)
  117. # Dictionary of banks, filename patterns, and parsing functions
  118. BANKS = {
  119. "SparebankenNorge": {
  120. "patterns": ["Transaksjoner*.csv"],
  121. "encoding": "latin1",
  122. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  123. "parse_function": parse_bank_sor,
  124. "delimiter": ";"
  125. },
  126. "Sparebank1": {
  127. "patterns": ["OversiktKonti*.csv"],
  128. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  129. "parse_function": parse_bank_sparebank1,
  130. "delimiter": ";"
  131. },
  132. "Norwegian": {
  133. "patterns": ["BankNorwegian*.xlsx", "Statement*.xlsx"],
  134. "output_filename": "YNAB-{bank}-FROM-{first_date}-TO-{last_date}",
  135. "parse_function": parse_bank_norwegian
  136. }
  137. # Add more banks and patterns as needed
  138. }
  139. def process_bank_statement(file_path, parse_function, delimiter, encoding):
  140. """
  141. Process a single bank statement file
  142. Args:
  143. file_path (str): Path to the bank statement file
  144. parse_function (callable): Function to parse the specific bank format
  145. delimiter (Optional<str>): Field delimiter
  146. Returns:
  147. pd.DataFrame: Processed YNAB-compatible data
  148. """
  149. file_extension = Path(file_path).suffix.lower()
  150. try:
  151. # Handle CSV files
  152. if file_extension == ".csv":
  153. data = pd.read_csv(file_path, delimiter=delimiter, encoding=encoding)
  154. # Handle Excel files
  155. elif file_extension in [".xlsx", ".xls"]:
  156. data = pd.read_excel(file_path)
  157. else:
  158. print(f"Skipping unsupported file type: {file_path}")
  159. return pd.DataFrame()
  160. # Call the appropriate bank-specific parsing function
  161. ynab_data = parse_function(data)
  162. return ynab_data
  163. except Exception as e:
  164. print(f"Error processing file {file_path}: {e}")
  165. raise e
  166. return pd.DataFrame()
  167. def find_bank_config(filename):
  168. """
  169. Find the appropriate bank configuration for a given filename
  170. Args:
  171. filename (str): Name of the file to match
  172. Returns:
  173. tuple: (bank_name, bank_config) or (None, None) if no match
  174. """
  175. import fnmatch
  176. for bank_name, bank_config in BANKS.items():
  177. for pattern in bank_config["patterns"]:
  178. if fnmatch.fnmatch(filename, pattern):
  179. return bank_name, bank_config
  180. return None, None
  181. def convert_bank_statements_to_ynab(input_files=None):
  182. """
  183. Convert bank statements to YNAB format
  184. Args:
  185. input_files (list): Optional list of specific files to process
  186. If None, processes all files in current directory
  187. """
  188. current_directory = Path.cwd()
  189. output_directory = current_directory / "YNAB_Outputs"
  190. # Create output directory if it doesn't exist
  191. output_directory.mkdir(exist_ok=True)
  192. # Get list of files to process
  193. if input_files:
  194. print(f"Processing {len(input_files)} dragged file(s)...")
  195. files_to_process = [Path(f) for f in input_files if Path(f).exists()]
  196. else:
  197. print("Processing all files in current directory...")
  198. files_to_process = []
  199. # Collect all files matching any bank pattern
  200. for bank_config in BANKS.values():
  201. for pattern in bank_config["patterns"]:
  202. matching_files = glob.glob(str(current_directory / pattern))
  203. files_to_process.extend([Path(f) for f in matching_files])
  204. files_processed = False
  205. # Process each file
  206. for file_path in files_to_process:
  207. if not file_path.exists():
  208. print(f"File not found: {file_path}")
  209. continue
  210. # Find matching bank configuration
  211. bank_name, bank_config = find_bank_config(file_path.name)
  212. if not bank_config:
  213. print(f"No bank configuration found for file: {file_path.name}")
  214. continue
  215. print(f"Processing file: {file_path} for {bank_name}")
  216. parse_function = bank_config["parse_function"]
  217. delimiter = bank_config.get("delimiter", ",")
  218. encoding = bank_config.get("encoding", "utf-8")
  219. # Process the file
  220. ynab_data = process_bank_statement(str(file_path), parse_function, delimiter, encoding)
  221. if ynab_data.empty:
  222. print(f"No data processed for {file_path}")
  223. continue
  224. filename_placeholders = {
  225. 'bank': bank_name,
  226. 'first_date': ynab_data['Date'].min().date(),
  227. 'last_date': ynab_data['Date'].max().date(),
  228. }
  229. file_retry_count = 0
  230. while True:
  231. output_filename = bank_config["output_filename"].format(**filename_placeholders)
  232. if file_retry_count > 0:
  233. output_filename += f" ({file_retry_count})"
  234. output_filename += ".csv"
  235. output_file = output_directory / output_filename
  236. if not output_file.exists():
  237. break
  238. file_retry_count += 1
  239. # Export to CSV for YNAB import
  240. ynab_data.to_csv(output_file, index=False)
  241. print(f"Data saved to {output_file}")
  242. files_processed = True
  243. if not files_processed:
  244. print("No files were processed. Make sure your files match the expected patterns.")
  245. if __name__ == "__main__":
  246. # Check if files were dragged onto the script
  247. if len(sys.argv) > 1:
  248. # Files were dragged - process them
  249. files = sys.argv[1:]
  250. convert_bank_statements_to_ynab(files)
  251. else:
  252. # No files dragged - run normal directory processing
  253. convert_bank_statements_to_ynab()
  254. # Keep window open on Mac so user can see results
  255. input("\nPress Enter to close...")