Skip to content
Snippets Groups Projects
Commit 0c09f08f authored by benzinab's avatar benzinab
Browse files

Edit number of actions and actions graphics

- Add normal distribution to actions to be executed
- Add plt to display likelihood of source / added actions
parent c5b1b66d
No related branches found
No related tags found
No related merge requests found
......@@ -57,7 +57,11 @@ class CitizenDataGenerator:
citizen['credit_score'] = np.clip(citizen['credit_score'], 0, 1030)
# Add the action to the citizen's history
citizen['applied_actions'].append(action['name'])
applied_action = {
"criteria_name": action['name'],
"subcategory_name": action['subcategory_name']
}
citizen['applied_actions'].append(applied_action)
# Update the citizen's classification
for classification, boundaries in classifications.items():
......@@ -91,12 +95,13 @@ class CitizenDataGenerator:
return chosen_action
# Define the main function that generates synthetic data for a sample of citizens
def generate_synthetic_data(self, n_citizens, max_actions):
def generate_synthetic_data(self, n_citizens):
# Initialize a list of citizens with 100 credit score and classification "A"
citizens = [{"credit_score": 1000, "applied_actions": [], "classification": "A"} for _ in range(n_citizens)]
# Apply random actions to each citizen
# normalverteilung 10 ->
for citizen in citizens:
n_actions = random.randint(1, max_actions)
n_actions = int(np.random.normal(loc=30, scale=10))
for _ in range(n_actions):
self.apply_action(citizen)
# Return the list of citizens with updated credit scores, applied actions, and classifications
......
import json
import matplotlib.pyplot as plt
from collections import Counter
def create_table(file_path):
with open(file_path) as json_file:
data = json.load(json_file)
class CategoriesVisualizer:
table = []
for category in data:
for subcategory in category["subcategories"]:
subcategory_name = subcategory["name"]
print(subcategory_name)
return table
def __init__(self):
file_path = "../data/raw/actions/converted_actions.json"
with open(file_path) as json_file:
self.data = json.load(json_file)
# Load the subcategory likelihoods from json file
with open("../data/raw/likelihood/subcategories_likelihood_example.json") as f:
self.subcategory_likelihoods = json.load(f)
table = create_table('../../data/raw/actions/converted_actions.json')
def create_table(self):
for row in table:
print(row)
table = []
for category in self.data:
for subcategory in category["subcategories"]:
subcategory_name = subcategory["name"]
print(subcategory_name)
return table
print("Number of categories: ", len(set([x[0] for x in table])))
print("Number of subcategories: ", len(table))
def print_categories_and_subcategories(self):
table = self.create_table()
for row in table:
print(row)
print("Number of categories: ", len(set([x[0] for x in table])))
print("Number of subcategories: ", len(table))
def plot_original_subcategory_likelihood(self):
self.plot_subcategory_likelihood("Subcategories Likelihood", self.subcategory_likelihoods)
def plot_subcategory_likelihood(self, title, subcategory_likelihoods):
subcategory_counts = Counter(subcategory_likelihoods)
sorted_subcategory_counts = dict(sorted(subcategory_counts.items(), key=lambda x: x[1], reverse=True))
filtered_subcategory_counts = dict(list(sorted_subcategory_counts.items())[:20])
subcategories = list(filtered_subcategory_counts.keys())
counts = list(filtered_subcategory_counts.values())
plt.barh(subcategories, counts)
plt.xlabel('Likelihood')
plt.ylabel('Subcategory')
plt.title(title)
plt.tight_layout()
plt.show()
def plot_subcategory_likelihoods(self, title, initial, result):
subcategory_counts1 = Counter(initial)
subcategory_counts2 = {k: v / len(result)
for k, v in Counter(result).items()}
subcategory_counts = {k: (subcategory_counts1[k], subcategory_counts2[k])
for k in subcategory_counts1.keys() & subcategory_counts2.keys()}
sorted_subcategory_counts = dict(sorted(subcategory_counts.items(), key=lambda x: x[1], reverse=True))
filtered_subcategory_counts = dict(list(sorted_subcategory_counts.items())[:40])
subcategories = [k if len(k) <= 20 else k[:30] + '...' for k in filtered_subcategory_counts.keys()]
counts1 = [x[0] for x in filtered_subcategory_counts.values()]
counts2 = [x[1] for x in filtered_subcategory_counts.values()]
fig, ax = plt.subplots()
ax.barh(subcategories, counts1, label='Initial Likelihood', zorder=2)
ax.barh(subcategories, counts2, label='Likelihood After Generation', left=counts1, zorder=1)
ax.set_xlabel('Likelihood')
ax.set_ylabel('Subcategory')
ax.set_title(title)
ax.legend()
plt.tight_layout()
plt.show()
def plot_subcategory_likelihood_pie_chart(self):
subcategory_counts = Counter(self.subcategory_likelihoods)
sorted_subcategory_counts = dict(sorted(subcategory_counts.items(), key=lambda x: x[1], reverse=True))
filtered_subcategory_counts = dict(list(sorted_subcategory_counts.items())[:20])
subcategories = list(filtered_subcategory_counts.keys())
counts = list(filtered_subcategory_counts.values())
plt.pie(counts, labels=subcategories, textprops={'fontsize': 7})
plt.title('Subcategory Likelihood')
plt.show()
import collections
import json
import matplotlib.pyplot as plt
import pandas as pd
from faker import Faker
from src.data_processing.citizen_data_generator import CitizenDataGenerator
from src.data_statistics.categories_visualizer import CategoriesVisualizer
class CitizenDataVisualizer:
def __init__(self, citizens):
self.citizen_data_generator = CitizenDataGenerator()
# Generate synthetic data for 10 citizens
self.citizens = citizens
# Load the subcategory likelihoods from json file
with open("../data/raw/likelihood/subcategories_likelihood_example.json") as f:
self.subcategory_likelihoods = json.load(f)
self.categoriesVisualizer = CategoriesVisualizer()
def print_data_frame(self):
# Create a DataFrame from the list of citizens
......@@ -22,7 +28,7 @@ class CitizenDataVisualizer:
citizens_df["name"] = [fake.name() for _ in range(len(citizens_df))]
# set number of rows to max
# pd.options.display.max_rows = len(citizens_df)
pd.options.display.max_rows = len(citizens_df)
# Print the DataFrame
print(citizens_df[["name", "classification", "credit_score"]])
......@@ -48,3 +54,10 @@ class CitizenDataVisualizer:
plt.ylabel('Count')
plt.title('Bar chart of classifications')
plt.show()
def plot_subcategory_likelihoods(self):
applied_actions = [action for citizen in self.citizens for action in citizen['applied_actions']]
self.categoriesVisualizer.plot_subcategory_likelihoods("Original/Applied Likelihood",
self.categoriesVisualizer.subcategory_likelihoods,
[action['subcategory_name'] for action in
applied_actions])
\ No newline at end of file
from src.data_processing.citizen_data_generator import CitizenDataGenerator
from src.data_statistics.categories_visualizer import CategoriesVisualizer
from src.data_statistics.citizen_data_visualizer import CitizenDataVisualizer
data_generator = CitizenDataGenerator()
citizens = data_generator.generate_synthetic_data(100, 50)
citizens = data_generator.generate_synthetic_data(100)
data_visualizer = CitizenDataVisualizer(citizens)
categories_visualizer = CategoriesVisualizer()
# categories_visualizer.plot_subcategory_likelihood_pie_chart()
# categories_visualizer.plot_original_subcategory_likelihood()
data_visualizer.print_data_frame()
data_visualizer.display_classification_bar()
data_visualizer.display_credit_score_histogram()
data_visualizer.plot_subcategory_likelihoods()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment