-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
94b7953
commit 6ed6de6
Showing
7 changed files
with
479 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
House payments | ||
Utilities | ||
Food | ||
Transport | ||
Health | ||
Subscriptions | ||
Insurance | ||
Entertainment | ||
Other | ||
Unknown |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,10 @@ | ||
""" | ||
Generic config variables | ||
""" | ||
|
||
from pathlib import Path | ||
|
||
PROJECT_FOLDER = Path(__file__).parent.resolve() | ||
|
||
OUTPUT_FOLDER = PROJECT_FOLDER / "output" | ||
OUTPUT_FOLDER.mkdir(exist_ok=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,62 +1,35 @@ | ||
import pandas as pd | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
|
||
# Load datai | ||
df = pd.read_csv("data/NL38INGB0001546874_01-01-2024_18-08-2024.csv", | ||
sep=";") | ||
df = df[["Date", "Name / Description", "Amount (EUR)", "Transaction type", "Notifications"]] | ||
|
||
# Rename | ||
df = df.rename(columns={"Date": "date", | ||
"Name / Description": "description", | ||
"Amount (EUR)": "amount", | ||
"Transaction type": "category", | ||
"Notifications": "extra"}) | ||
|
||
# Set date format | ||
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") | ||
# To numeric | ||
df["amount"] = df["amount"].str.replace(",", ".") | ||
df["amount"] = pd.to_numeric(df["amount"]) | ||
|
||
# Get unique transaction categories | ||
categories = df["category"].unique().tolist() | ||
print(categories) | ||
|
||
# Split | ||
plt.pie(df.groupby("category")["amount"].sum(), labels=categories, autopct='%.0f%%') | ||
plt.show() | ||
|
||
# Categorize | ||
""" | ||
Online Banking = Moving between my accounts | Manual payments to other accounts | ||
Batch payment = Retour. Only 1 instance | ||
Cash machine = Cash withdrawal (ATM) | ||
Deposit = Cash deposit (ATM) | ||
Transfer = Rounding feature from savings | Retour | ||
SEPA direct debit = Automatic debit payments | ||
Various = ING account payments | Credit card repayment | ||
Payment terminal = Payments at card machine | ||
iDEAL = Payments via iDEAL | ||
____________________________________________________________________ | ||
Drop: | ||
- Batch payment | ||
- Deposit | ||
- Transfer | ||
Split: | ||
- Online banking --> Remove if extra contains 'From Oranje spaarrekening' | ||
Keep: | ||
- Online banking | ||
- SEPA direct debit | ||
- Various | ||
- Payment terminal | ||
- iDEAL | ||
- Cash machine | ||
""" | ||
import json | ||
|
||
from config import PROJECT_FOLDER, OUTPUT_FOLDER | ||
from main.load import ing_loader | ||
from transformers import pipeline | ||
|
||
# Load and preprocess data | ||
df = ing_loader( | ||
PROJECT_FOLDER / "data" / "NL38INGB0001546874_01-01-2024_18-08-2024.csv" | ||
) | ||
|
||
# Set up zero-shot classifier | ||
zeroshot_classifier = pipeline( | ||
"zero-shot-classification", | ||
model="MoritzLaurer/deberta-v3-large-zeroshot-v2.0", | ||
device=0, | ||
) | ||
|
||
# Read budget categories from categories.txt | ||
with open(PROJECT_FOLDER / "categories.txt", "r") as f: | ||
budget_categories = [line.strip() for line in f.readlines()] | ||
|
||
# Set up json file to store transaction classifications | ||
json_file_path = OUTPUT_FOLDER / "transaction_classifications.json" | ||
|
||
# Initialize an empty dictionary to store classifications | ||
transaction_classifications = {} | ||
|
||
# Load existing classifications if the file exists | ||
if json_file_path.exists(): | ||
with json_file_path.open("r") as json_file: | ||
transaction_classifications = json.load(json_file) | ||
|
||
# Iterate over each row | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
""" | ||
Contains loaders for different data sources (file only) | ||
Output will result in unified dataframe: | ||
- date | ||
- account_name | ||
- account_number | ||
- amount | ||
- description | ||
- category (if available) | ||
""" | ||
|
||
import pandas as pd | ||
|
||
|
||
def ing_loader(file_path): | ||
"""Loader for ING bank transaction export | ||
Online Banking = Moving between accounts | Manual payments to other accounts | ||
Batch payment = Retour. Only 1 instance | ||
Cash machine = Cash withdrawal (ATM) | ||
Deposit = Cash deposit (ATM) | ||
Transfer = Rounding feature from savings | Retour | ||
SEPA direct debit = Automatic debit payments | ||
Various = ING account payments | Credit card repayment | ||
Payment terminal = Payments at card machine | ||
iDEAL = Payments via iDEAL | ||
____________________________________________________________________ | ||
Drop: | ||
- Batch payment | ||
- Deposit | ||
- Transfer | ||
Split: | ||
- Online banking --> Remove if extra contains 'From Oranje spaarrekening' | ||
""" | ||
# Load and preprocess data | ||
df = pd.read_csv(file_path, sep=";") | ||
df = df[ | ||
[ | ||
"Date", | ||
"Name / Description", | ||
"Account", | ||
"Amount (EUR)", | ||
"Transaction type", | ||
"Notifications", | ||
] | ||
] | ||
|
||
# Rename columns for consistency and clarity | ||
df = df.rename( | ||
columns={ | ||
"Date": "date", | ||
"Name / Description": "account_name", | ||
"Account": "account_number", | ||
"Amount (EUR)": "amount", | ||
"Transaction type": "category", | ||
"Notifications": "description", | ||
} | ||
) | ||
|
||
# Convert date and amount columns | ||
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d") | ||
df["amount"] = pd.to_numeric(df["amount"].str.replace(",", ".")) | ||
|
||
# Filter transactions | ||
df = df[~df["category"].isin(["Batch payment", "Deposit", "Transfer"])] | ||
mask = (df["category"] == "Online Banking") & ( | ||
df["extra"].str.contains("Oranje spaarrekening", case=False, na=False) | ||
) | ||
df = df[~mask] | ||
|
||
return df |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.