Skip to content
Snippets Groups Projects
Commit 7d9087c3 authored by Alexander Shervud's avatar Alexander Shervud
Browse files

Merge branch 'master' of...

parents a6c55cd8 66788a30
No related branches found
No related tags found
No related merge requests found
<mxfile host="Electron" modified="2023-07-12T16:27:44.192Z" agent="5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/15.8.7 Chrome/91.0.4472.164 Electron/13.6.2 Safari/537.36" etag="cKdbLBWRQm2rR1pnQ_-2" version="15.8.7" type="device"><diagram id="C5RBs43oDa-KdzZeNtuy" name="Page-1">7ZtRc5s4EIB/jWfubsYZAzZxHkPSpNf6btKm196bRwEZ1AhEJRGb/PpbYYFNAIckdrhJmEmn1iIktN8uWu3aA+ssXF1yFAd/MQ/TgTnyVgPrfGCahjk24D8lSdeSqWWvBT4nnu60EVyTe6yFIy1NiIdFqaNkjEoSl4UuiyLsypIMcc6W5W4LRsuzxsjHFcG1i2hV+oN4MtCrMI838o+Y+EE+s2GfrK+EKO+sVyIC5LHllsj6MLDOOGNy/SlcnWGqlJfr5cef6Q86u7UvP30Rv9A/zudvf38frge7eMotxRI4juSzh76/XVx8/D7++W98dfFlObo8vfk01LeM7hBNtL6uKJMSc71kmeZ6FEsSUhRBy1mwSF7rK6AFB1HiR/DZhceDOy3nDnNJAMGpviBZDFI3INSboZQlahFCIvc2bzkB4+QehkUULhkggMtcamsy7VKPa3UniEcg5VhAn6tcM0YhmiEhdR+XUYpiQW6yB1ZdQsR9EjmwUBbmA7Ek8rCnWwXqrCE5uy2MR93fkofmprSBV1vWqPlcYhZiyVPooq9aGkeau5BuLzeGa+V+FWwZrTk2tcNoZ/GLoYvZvoJzocgHHRTTVeYbV+cz7Lr57PJ0iAL3CEnsKC2KbTOED1sr3Ygy46w3VP/rcvht4juzz8tfwQStHOv79dCsGCrMY52C6BxJJLAcqIdS2snxbNkuqF9u2SnFC9lopSJGLon8WdbnfLyRfNXrVyIG9y5oZiEB8TwcZRYk4UnWRqaeImYkkpmCJg78gRrPRkeTwQQe6AzaxqYNf6o7l2csAmNDJLMqDBa8xMqKnQY7Kptgs39X7S0tc3zUvuxm8yqBfipVq0p1saYao8hD4kjBXXAU4h7v0/FOzNfDW/t44wreCjtKsi0lkOHmzf/wdfsI2BAQqeFykt8U6POhUaFtVWlbNWQpusH0igkiCVPj83XfB8TbQW1HsNUeoaEeT9tBnR6I6aSGqaXit0TAbgo77zyGCOK339dufMdgsLfpr/tHO+36dWzUsHXUUoHo3COgDHKTKK8oMY6pPLogfsKxUuW6fbrC4o1y3/972jBe8UVdC/54F3gXAjsfom9gNL9BfO4GEJr38PcF32r5Qj8Y/Glr+Gyl/vW+v0/89rhj/Cdt8QewAfTs98n+pO35/mAbfjX/swVfAH042PfM93nkNroO8sYVVtjzcZ7PA5UEzGcRoh820gfZsU2fGcvOWuqo9RNLmeqEHUokKx/q1nOqiRqVuSuFJljCXfx4rC0R9/EuOMZxPR2OKZLkrvx0e1e9Wc13BDCJOWJwCK6jMlNH0bImH022FsfhLBHa6B3nu4xcJ+P1zYMiBb7Nq9m4GvObw9GRYZn6BJmWZmwNQA9+pRaz1YUtFlkS8ACZx2oW4493Amtq29N9wtKjFNvOK8CrpiuM9wAP2I2NcYndcF/wzAPA2/GyLsErkv0PEDZUp0Z9deqxrbV9dWp8XC4XWZN2kYx1bD5qZDXVqcnkf1mdqrXUPLG9XcgoKhcN9Yw3FjDvMFH9Th3V2uZuz2+dO33NUla9CVRjuwrj917seDrWtiWsPVQ76g/Idg1VdUCGY8bcZeENEPXmKFoRLFM4MDOO4aj81ny7MXppSbwRr915AqQp8634EjGPEHeJEEQo2jTPgjiM0R5yW8jdl7Wa6loaMvhxjCVRQXFP+bmUuy9iGU2FDIW5CMeKTKb3hoOxw1HuvFplNNUrFGUBOoH9GJw4CaMt0teYkz5J/QTMnVelzKbKhMKcRORXgjXmedZF5LSzZEXPuSXnzitQ5q69WVcfUyCdgC57h3426FctO9Wfj5u+OBZzHHPmYiGyTVpg+WCLvnjDW/QhMiH5l8G726KbUEuOkZyzRFKCuXhHmA/h0XbnkVjd13sfePQySGOK0h71i1CfdB2N5We7Cuq5x1kM8RhWtFG2U6uYrPftFwG3zK7DMqMum62AcxyClucRi5Dr4lgCgbn6kV4P/EXAx4cLz6C5+Y3iuuC1+aWn9eE/</diagram></mxfile>
\ No newline at end of file
docs/class_diagram.png

65.3 KiB

...@@ -122,7 +122,7 @@ class Dataset: ...@@ -122,7 +122,7 @@ class Dataset:
is_competitive_col = self.get_is_competitive_col(dataframe) is_competitive_col = self.get_is_competitive_col(dataframe)
return is_competitive_col return is_competitive_col
def treat_outliers(self, df, colname): def treat_outliers(self, df, colname) -> pd.DataFrame:
q = df[colname].quantile(0.99) q = df[colname].quantile(0.99)
return df[df[colname] < q] return df[df[colname] < q]
......
...@@ -10,13 +10,35 @@ class Plotter: ...@@ -10,13 +10,35 @@ class Plotter:
self.ds = dataset self.ds = dataset
self.df = dataset.get_dataframe() self.df = dataset.get_dataframe()
def customize_plot(self, fig, ax, styling_params): def customize_plot(self, fig, ax, styling_params) -> None:
""" customize_plot
Args:
fig (plt.figure.Figure),
ax (plt.axes.Axes),
styling_params (dict)
Returns:
None
"""
if styling_params.get("title"): if styling_params.get("title"):
ax.set_title(styling_params["title"]) ax.set_title(styling_params["title"])
def plot_categorical_bar_chart( def plot_categorical_bar_chart(
self, category1, category2, styling_params={} self, category1, category2, styling_params={}
): ) -> None:
""" plot a categorical bar chart.
Args:
category1 (str, must be present as a column in the dataset),
category2 (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
ct = pd.crosstab(self.df[category1], self.df[category2]) ct = pd.crosstab(self.df[category1], self.df[category2])
# Calculate percentages by row # Calculate percentages by row
ct_percent = ct.apply(lambda r: r / r.sum() * 100, axis=0) ct_percent = ct.apply(lambda r: r / r.sum() * 100, axis=0)
...@@ -24,14 +46,39 @@ class Plotter: ...@@ -24,14 +46,39 @@ class Plotter:
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
ct_percent.plot(kind="bar", ax=ax) ct_percent.plot(kind="bar", ax=ax)
def plot_categorical_boxplot(self, target, category, styling_params={}): def plot_categorical_boxplot(
self, target, category, styling_params={}
) -> None:
""" plot a categorical boxplot.
Args:
target (str, must be present as a column in the dataset),
category (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
sns.boxplot(x=category, y=target, data=self.df, palette="rainbow") sns.boxplot(x=category, y=target, data=self.df, palette="rainbow")
def plot_categorical_histplot( def plot_categorical_histplot(
self, target, category, styling_params={}, bins=30 self, target, category, styling_params={}, bins=30
): ) -> None:
""" plot a categorical hisplot.
Args:
target (str, must be present as a column in the dataset),
category (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
uniques = self.ds.get_unique_column_values(category) uniques = self.ds.get_unique_column_values(category)
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
...@@ -45,20 +92,29 @@ class Plotter: ...@@ -45,20 +92,29 @@ class Plotter:
alpha=0.5, alpha=0.5,
) )
def plot_scatterplot(self, target1, target2, styling_params={}): def plot_scatterplot(self, target1, target2, styling_params={}) -> None:
""" plot a scatterplot.
Args:
target1 (str, must be present as a column in the dataset),
target2 (str, must be present as a column in the dataset),
styling_params (dict)
Returns:
None
"""
fig, ax = plt.subplots() fig, ax = plt.subplots()
self.customize_plot(fig, ax, styling_params) self.customize_plot(fig, ax, styling_params)
ax.scatter(self.df[target1], self.df[target2]) ax.scatter(self.df[target1], self.df[target2])
def distribution_plot(self, target):
def distribution_plot(self, target):
fig, ax = plt.subplots()
grouped_data = self.df.groupby(target).size() grouped_data = self.df.groupby(target).size()
ax.barh(grouped_data.index, grouped_data.values) plt.barh(grouped_data.index, grouped_data.values)
print( print(
str(grouped_data), str(grouped_data),
str(grouped_data.index), str(grouped_data.index),
str(grouped_data.values), str(grouped_data.values),
) )
ax.set_xlabel("Size") plt.xlabel("Size")
ax.set_ylabel(target) plt.ylabel(target)
ax.set_title(f"Distribution of {target}") plt.title(f"Distribution of {target}")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment