Skip to content
Snippets Groups Projects
Commit 1e6ed859 authored by niklasfranz's avatar niklasfranz
Browse files

Merge branch 'master' into 19-logging

parents 860f0443 66788a30
No related branches found
No related tags found
No related merge requests found
<mxfile host="Electron" modified="2023-07-12T16:27:44.192Z" agent="5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/15.8.7 Chrome/91.0.4472.164 Electron/13.6.2 Safari/537.36" etag="cKdbLBWRQm2rR1pnQ_-2" version="15.8.7" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">7ZtRc5s4EIB/jWfubsYZAzZxHkPSpNf6btKm196bRwEZ1AhEJRGb/PpbYYFNAIckdrhJmEmn1iIktN8uWu3aA+ssXF1yFAd/MQ/TgTnyVgPrfGCahjk24D8lSdeSqWWvBT4nnu60EVyTe6yFIy1NiIdFqaNkjEoSl4UuiyLsypIMcc6W5W4LRsuzxsjHFcG1i2hV+oN4MtCrMI838o+Y+EE+s2GfrK+EKO+sVyIC5LHllsj6MLDOOGNy/SlcnWGqlJfr5cef6Q86u7UvP30Rv9A/zudvf38frge7eMotxRI4juSzh76/XVx8/D7++W98dfFlObo8vfk01LeM7hBNtL6uKJMSc71kmeZ6FEsSUhRBy1mwSF7rK6AFB1HiR/DZhceDOy3nDnNJAMGpviBZDFI3INSboZQlahFCIvc2bzkB4+QehkUULhkggMtcamsy7VKPa3UniEcg5VhAn6tcM0YhmiEhdR+XUYpiQW6yB1ZdQsR9EjmwUBbmA7Ek8rCnWwXqrCE5uy2MR93fkofmprSBV1vWqPlcYhZiyVPooq9aGkeau5BuLzeGa+V+FWwZrTk2tcNoZ/GLoYvZvoJzocgHHRTTVeYbV+cz7Lr57PJ0iAL3CEnsKC2KbTOED1sr3Ygy46w3VP/rcvht4juzz8tfwQStHOv79dCsGCrMY52C6BxJJLAcqIdS2snxbNkuqF9u2SnFC9lopSJGLon8WdbnfLyRfNXrVyIG9y5oZiEB8TwcZRYk4UnWRqaeImYkkpmCJg78gRrPRkeTwQQe6AzaxqYNf6o7l2csAmNDJLMqDBa8xMqKnQY7Kptgs39X7S0tc3zUvuxm8yqBfipVq0p1saYao8hD4kjBXXAU4h7v0/FOzNfDW/t44wreCjtKsi0lkOHmzf/wdfsI2BAQqeFykt8U6POhUaFtVWlbNWQpusH0igkiCVPj83XfB8TbQW1HsNUeoaEeT9tBnR6I6aSGqaXit0TAbgo77zyGCOK339dufMdgsLfpr/tHO+36dWzUsHXUUoHo3COgDHKTKK8oMY6pPLogfsKxUuW6fbrC4o1y3/972jBe8UVdC/54F3gXAjsfom9gNL9BfO4GEJr38PcF32r5Qj8Y/Glr+Gyl/vW+v0/89rhj/Cdt8QewAfTs98n+pO35/mAbfjX/swVfAH042PfM93nkNroO8sYVVtjzcZ7PA5UEzGcRoh820gfZsU2fGcvOWuqo9RNLmeqEHUokKx/q1nOqiRqVuSuFJljCXfx4rC0R9/EuOMZxPR2OKZLkrvx0e1e9Wc13BDCJOWJwCK6jMlNH0bImH022FsfhLBHa6B3nu4xcJ+P1zYMiBb7Nq9m4GvObw9GRYZn6BJmWZmwNQA9+pRaz1YUtFlkS8ACZx2oW4493Amtq29N9wtKjFNvOK8CrpiuM9wAP2I2NcYndcF/wzAPA2/GyLsErkv0PEDZUp0Z9deqxrbV9dWp8XC4XWZN2kYx1bD5qZDXVqcnkf1mdqrXUPLG9XcgoKhcN9Yw3FjDvMFH9Th3V2uZuz2+dO33NUla9CVRjuwrj917seDrWtiWsPVQ76g/Idg1VdUCGY8bcZeENEPXmKFoRLFM4MDOO4aj81ny7MXppSbwRr915AqQp8634EjGPEHeJEEQo2jTPgjiM0R5yW8jdl7Wa6loaMvhxjCVRQXFP+bmUuy9iGU2FDIW5CMeKTKb3hoOxw1HuvFplNNUrFGUBOoH9GJw4CaMt0teYkz5J/QTMnVelzKbKhMKcRORXgjXmedZF5LSzZEXPuSXnzitQ5q69WVcfUyCdgC57h3426FctO9Wfj5u+OBZzHHPmYiGyTVpg+WCLvnjDW/QhMiH5l8G726KbUEuOkZyzRFKCuXhHmA/h0XbnkVjd13sfePQySGOK0h71i1CfdB2N5We7Cuq5x1kM8RhWtFG2U6uYrPftFwG3zK7DMqMum62AcxyClucRi5Dr4lgCgbn6kV4P/EXAx4cLz6C5+Y3iuuC1+aWn9eE/</diagram></mxfile>
\ No newline at end of file
docs/class_diagram.png

65.3 KiB

...@@ -138,7 +138,7 @@ class Dataset: ...@@ -138,7 +138,7 @@ class Dataset:
is_competitive_col = self.get_is_competitive_col(dataframe) is_competitive_col = self.get_is_competitive_col(dataframe)
return is_competitive_col return is_competitive_col
def treat_outliers(self, df, colname): def treat_outliers(self, df, colname) -> pd.DataFrame:
q = df[colname].quantile(0.99) q = df[colname].quantile(0.99)
return df[df[colname] < q] return df[df[colname] < q]
......
...@@ -10,11 +10,32 @@ class Plotter: ...@@ -10,11 +10,32 @@ class Plotter:
self.ds = dataset self.ds = dataset
self.df = dataset.get_dataframe() self.df = dataset.get_dataframe()
def customize_plot(self, fig, ax, styling_params): def customize_plot(self, fig, ax, styling_params) -> None:
""" customize_plot
Args:
fig (plt.figure.Figure),
ax (plt.axes.Axes),
styling_params (dict)
Returns:
None
"""
if styling_params.get("title"): if styling_params.get("title"):
ax.set_title(styling_params["title"]) ax.set_title(styling_params["title"])
def distribution_plot(self, target): def distribution_plot(self, target) -> None:
""" plot a distribution plot.
Args:
target (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
grouped_data = self.df.groupby(target).size() grouped_data = self.df.groupby(target).size()
plt.barh(grouped_data.index, grouped_data.values) plt.barh(grouped_data.index, grouped_data.values)
print( print(
...@@ -28,7 +49,18 @@ class Plotter: ...@@ -28,7 +49,18 @@ class Plotter:
def plot_categorical_bar_chart( def plot_categorical_bar_chart(
self, category1, category2, styling_params={} self, category1, category2, styling_params={}
): ) -> None:
""" plot a categorical bar chart.
Args:
category1 (str, must be present as a column in the dataset),
category2 (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
ct = pd.crosstab(self.df[category1], self.df[category2]) ct = pd.crosstab(self.df[category1], self.df[category2])
# Calculate percentages by row # Calculate percentages by row
ct_percent = ct.apply(lambda r: r / r.sum() * 100, axis=0) ct_percent = ct.apply(lambda r: r / r.sum() * 100, axis=0)
...@@ -36,14 +68,39 @@ class Plotter: ...@@ -36,14 +68,39 @@ class Plotter:
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
ct_percent.plot(kind="bar", ax=ax) ct_percent.plot(kind="bar", ax=ax)
def plot_categorical_boxplot(self, target, category, styling_params={}): def plot_categorical_boxplot(
self, target, category, styling_params={}
) -> None:
""" plot a categorical boxplot.
Args:
target (str, must be present as a column in the dataset),
category (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
sns.boxplot(x=category, y=target, data=self.df, palette="rainbow") sns.boxplot(x=category, y=target, data=self.df, palette="rainbow")
def plot_categorical_histplot( def plot_categorical_histplot(
self, target, category, styling_params={}, bins=30 self, target, category, styling_params={}, bins=30
): ) -> None:
""" plot a categorical hisplot.
Args:
target (str, must be present as a column in the dataset),
category (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
uniques = self.ds.get_unique_column_values(category) uniques = self.ds.get_unique_column_values(category)
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
...@@ -57,7 +114,18 @@ class Plotter: ...@@ -57,7 +114,18 @@ class Plotter:
alpha=0.5, alpha=0.5,
) )
def plot_scatterplot(self, target1, target2, styling_params={}): def plot_scatterplot(self, target1, target2, styling_params={}) -> None:
""" plot a scatterplot.
Args:
target1 (str, must be present as a column in the dataset),
target2 (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
ax.scatter(self.df[target1], self.df[target2]) ax.scatter(self.df[target1], self.df[target2])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment