From 1e804ebf86c383383262f10f9ee2baa0c1671814 Mon Sep 17 00:00:00 2001
From: niklasfranz <nf.app@icloud.com>
Date: Wed, 12 Jul 2023 19:17:33 +0200
Subject: [PATCH] refactor: added logging and error handling

---
 src/Plotter.py | 131 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/Plotter.py b/src/Plotter.py
index 4137491..c8d9051 100644
--- a/src/Plotter.py
+++ b/src/Plotter.py
@@ -3,10 +3,14 @@ import matplotlib.pyplot as plt
 import pandas as pd
 import seaborn as sns
 from .Dataset import Dataset
-
+import logging
 
 class Plotter:
     def __init__(self, dataset: Dataset):
+        if type(dataset) != Dataset:
+            logging.error("dataset parameter is not of type Dataset")
+            raise ValueError(f"{dataset} is not of type Dataset")
+
         self.ds = dataset
         self.df = dataset.get_dataframe()
 
@@ -25,7 +29,7 @@ class Plotter:
         if styling_params.get("title"):
             ax.set_title(styling_params["title"])
 
-    def distribution_plot(self, target) -> None:
+    def distribution_plot(self, target, styling_params = {}) -> None:
         """ plot a distribution plot.
 
         Args:
@@ -36,6 +40,24 @@ class Plotter:
         Returns:
             None
         """
+
+        # implementing sensible logging and error catching
+        if (type(target) != str):
+            logging.error("parameter target should be a string.")
+            raise ValueError("parameter target should be a string.")
+
+        if not (target in self.df.columns):
+            logging.error("parameter target cannot be found in the dataset.")
+            raise ValueError(
+                "parameter target cannot be found in the dataset."
+            )
+        
+        if (type(styling_params) != dict):
+            logging.error("parameter styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
+        
+        # plotting the plot
+        
         grouped_data = self.df.groupby(target).size()
         plt.barh(grouped_data.index, grouped_data.values)
         print(
@@ -61,6 +83,32 @@ class Plotter:
         Returns:
             None
         """
+        # implementing sensible logging and error catching
+        if (type(category1) != str):
+            logging.error("parameter category1 should be a string.")
+            raise ValueError("parameter category1 should be a string.")
+
+        if not (category1 in self.df.columns):
+            logging.error("parameter category1 cannot be found in the dataset.")
+            raise ValueError(
+                "parameter category1 cannot be found in the dataset."
+            )
+    
+        if (type(category2) != str):
+            logging.error("parameter category2 should be a string.")
+            raise ValueError("parameter category2 should be a string.")
+
+        if not (category2 in self.df.columns):
+            logging.error("parameter category2 cannot be found in the dataset.")
+            raise ValueError(
+                "parameter category2 cannot be found in the dataset."
+            )
+        
+        if (type(styling_params) != dict):
+            logging.error("parameter styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
+
+        # plotting the plot
         ct = pd.crosstab(self.df[category1], self.df[category2])
         # Calculate percentages by row
         ct_percent = ct.apply(lambda r: r / r.sum() * 100, axis=0)
@@ -82,7 +130,32 @@ class Plotter:
         Returns:
             None
         """
+        # implementing sensible logging and error catching
+        if (type(target) != str):
+            logging.error("parameter target should be a string.")
+            raise ValueError("parameter target should be a string.")
+
+        if not (target in self.df.columns):
+            logging.error("parameter target cannot be found in the dataset.")
+            raise ValueError(
+                "parameter target cannot be found in the dataset."
+            )
+    
+        if (type(category) != str):
+            logging.error("parameter category should be a string.")
+            raise ValueError("parameter category should be a string.")
+
+        if not (category in self.df.columns):
+            logging.error("parameter category cannot be found in the dataset.")
+            raise ValueError(
+                "parameter category cannot be found in the dataset."
+            )
+        
+        if (type(styling_params) != dict):
+            logging.error("parameter styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
         
+        # plotting the plot
         fig, ax = plt.subplots()
         self.customize_plot(fig, ax, styling_params)
         sns.boxplot(x=category, y=target, data=self.df, palette="rainbow")
@@ -101,6 +174,33 @@ class Plotter:
         Returns:
             None
         """
+        # implementing sensible logging and error catching
+        if (type(target) != str):
+            logging.error("parameter target should be a string.")
+            raise ValueError("parameter target should be a string.")
+
+        if not (target in self.df.columns):
+            logging.error("parameter target cannot be found in the dataset.")
+            raise ValueError(
+                "parameter target cannot be found in the dataset."
+            )
+    
+        if (type(category) != str):
+            logging.error("parameter category should be a string.")
+            raise ValueError("parameter category should be a string.")
+
+        if not (category in self.df.columns):
+            logging.error("parameter category cannot be found in the dataset.")
+            raise ValueError(
+                "parameter category cannot be found in the dataset."
+            )
+        
+        if (type(styling_params) != dict):
+            logging.error("parameter styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
+        
+        # plotting the plot
+        
         uniques = self.ds.get_unique_column_values(category)
         fig, ax = plt.subplots()
         self.customize_plot(fig, ax, styling_params)
@@ -126,6 +226,33 @@ class Plotter:
         Returns:
             None
         """
+
+        # implementing sensible logging and error catching
+        if (type(target1) != str):
+            logging.error("parameter target1 should be a string.")
+            raise ValueError("parameter target1 should be a string.")
+
+        if not (target1 in self.df.columns):
+            logging.error("parameter target1 cannot be found in the dataset.")
+            raise ValueError(
+                "parameter target1 cannot be found in the dataset."
+            )
+    
+        if (type(target2) != str):
+            logging.error("parameter target2 should be a string.")
+            raise ValueError("parameter target2 should be a string.")
+
+        if not (target2 in self.df.columns):
+            logging.error("parameter target2 cannot be found in the dataset.")
+            raise ValueError(
+                "parameter target2 cannot be found in the dataset."
+            )
+        
+        if (type(styling_params) != dict):
+            logging.error("parameter styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
+        
+        # plotting the plot
         fig, ax = plt.subplots()
         self.customize_plot(fig, ax, styling_params)
         ax.scatter(self.df[target1], self.df[target2])
\ No newline at end of file
-- 
GitLab