refactor: extensive logging, add param type hints

bc6fcc14 · niklasfranz · e853bbf7 · bc6fcc14
Commit bc6fcc14 authored 1 year ago by niklasfranz
--- a/src/Plotter.py
+++ b/src/Plotter.py
@@ -20,6 +20,9 @@ class Plotter:
            logging.error("dataset parameter is not of type Dataset")
            raise ValueError(f"{dataset} is not of type Dataset")

+        logging.debug("Plotter.__init__(): all params valid, dataset \
+                      initialized")
+
        self.ds = dataset
        self.df = dataset.get_dataframe()

@@ -45,7 +48,8 @@ class Plotter:
            fig.set_figheight(parameters["plotter_fig_height"])
            fig.set_figwidth(parameters["plotter_fig_width"])

-    def distribution_plot(self, target, styling_params={}) -> None:
+    def distribution_plot(self, target: str,
+                          styling_params: dict = {}) -> None:
        """plot a distribution plot.

        Args:
@@ -70,6 +74,11 @@ class Plotter:
            logging.error("parameter styling params should be a dict.")
            raise ValueError("parameter styling params should be a dict.")

+        logging.debug("Plotter.distribution_plot(): all params valid,\
+                plotting plot_categorical_bar_chart(): \n \
+                target: {target} \n \
+                styling_params: {styling_params}")
+
        # plotting the plot
        fig, ax = plt.subplots()
        grouped_data = self.df.groupby(target).size()
@@ -84,7 +93,7 @@ class Plotter:
        self.customize_plot(fig, ax, styling_params)

    def plot_categorical_bar_chart(
-        self, category1, category2, styling_params={}
+        self, category1: str, category2: str, styling_params: str = {}
    ) -> None:
        """plot a categorical bar chart.

@@ -99,33 +108,44 @@ class Plotter:
        """
        # implementing sensible logging and error catching
        if type(category1) != str:
-            logging.error("parameter category1 should be a string.")
+            logging.error("Plotter.plot_categorical_bar_chart(): parameter \
+                          category1 should be a string.")
            raise ValueError("parameter category1 should be a string.")

        if not (category1 in self.df.columns):
            logging.error(
-                "parameter category1 cannot be found in the dataset."
+                "Plotter.plot_categorical_bar_chart(): parameter category1 \
+                cannot be found in the dataset."
            )
            raise KeyError(
                "parameter category1 cannot be found in the dataset."
            )

        if type(category2) != str:
-            logging.error("parameter category2 should be a string.")
+            logging.error("Plotter.plot_categorical_bar_chart(): parameter \
+                          category2 should be a string.")
            raise ValueError("parameter category2 should be a string.")

        if not (category2 in self.df.columns):
            logging.error(
-                "parameter category2 cannot be found in the dataset."
+                "Plotter.plot_categorical_bar_chart(): \
+                parameter category2 cannot be found in the dataset."
            )
            raise KeyError(
                "parameter category2 cannot be found in the dataset."
            )

        if type(styling_params) != dict:
-            logging.error("parameter styling params should be a dict.")
+            logging.error("Plotter.plot_categorical_bar_chart(): \
+                          parameter styling params should be a dict.")
            raise ValueError("parameter styling params should be a dict.")

+        logging.debug("Plotter.plot_categorical_bar_chart(): all params valid,\
+                      plotting plot_categorical_bar_chart(): \n \
+                      category1: {category1} \n \
+                      category2: {category2} \n \
+                      styling_params: {styling_params}")
+
        # plotting the plot
        ct = pd.crosstab(self.df[category1], self.df[category2])
        # Calculate percentages by row
@@ -136,7 +156,7 @@ class Plotter:
        ct_percent.plot(kind="bar", ax=ax)

    def plot_categorical_boxplot(
-        self, target, category, styling_params={}
+        self, target: str, category: str, styling_params: dict = {}
    ) -> None:
        """plot a categorical boxplot.

@@ -149,6 +169,39 @@ class Plotter:
        Returns:
            None
        """
+        # implementing sensible logging and error catching
+        if type(target) != str:
+            logging.error("Plotter.plot_categorical_boxplot(): parameter \
+                           target should be a string.")
+            raise ValueError("parameter target should be a string.")
+
+        if not (target in self.df.columns):
+            logging.error("Plotter.plot_categorical_boxplot(): parameter \
+                           target cannot be found in the dataset.")
+            raise KeyError("parameter target cannot be found in the dataset.")
+
+        if type(category) != str:
+            logging.error("Plotter.plot_categorical_boxplot(): parameter \
+                          category should be a string.")
+            raise ValueError("parameter category should be a string.")
+
+        if not (category in self.df.columns):
+            logging.error("Plotter.plot_categorical_boxplot(): parameter \
+                          category cannot be found in the dataset.")
+            raise KeyError(
+                "parameter category cannot be found in the dataset."
+            )
+
+        if type(styling_params) != dict:
+            logging.error("Plotter.plot_categorical_boxplot(): parameter \
+                          styling params should be a dict.")
+            raise ValueError("parameter styling params should be a dict.")
+
+        logging.debug("Plotter.plot_categorical_boxplot(): all params valid, \
+                      plotting plot_categorical_boxplot(): \n \
+                      target: {target} \n \
+                      category: {category} \n \
+                      styling_params: {styling_params}")

        fig, ax = plt.subplots()
        ax.set_ylabel("Percent")
@@ -156,8 +209,8 @@ class Plotter:
        sns.boxplot(x=category, y=target, data=self.df, palette="rainbow")

    def plot_categorical_histplot(
-        self, target, category, styling_params={}, bins=30
-    ) -> None:
+        self, target: str, category: str,
+            styling_params: dict = {}, bins: int = 30) -> None:
        """plot a categorical hisplot.

        Args:
@@ -171,27 +224,38 @@ class Plotter:
        """
        # implementing sensible logging and error catching
        if type(target) != str:
-            logging.error("parameter target should be a string.")
+            logging.error("Plotter.plot_categorical_histplot(): parameter \
+                           target should be a string.")
            raise ValueError("parameter target should be a string.")

        if not (target in self.df.columns):
-            logging.error("parameter target cannot be found in the dataset.")
+            logging.error("Plotter.plot_categorical_histplot(): parameter \
+                           target cannot be found in the dataset.")
            raise KeyError("parameter target cannot be found in the dataset.")

        if type(category) != str:
-            logging.error("parameter category should be a string.")
+            logging.error("Plotter.plot_categorical_histplot(): parameter \
+                          category should be a string.")
            raise ValueError("parameter category should be a string.")

        if not (category in self.df.columns):
-            logging.error("parameter category cannot be found in the dataset.")
+            logging.error("Plotter.plot_categorical_histplot(): parameter \
+                          category cannot be found in the dataset.")
            raise KeyError(
                "parameter category cannot be found in the dataset."
            )

        if type(styling_params) != dict:
-            logging.error("parameter styling params should be a dict.")
+            logging.error("Plotter.plot_categorical_histplot(): parameter \
+                          styling params should be a dict.")
            raise ValueError("parameter styling params should be a dict.")

+        logging.debug("Plotter.plot_categorical_histplot(): all params valid, \
+                      plotting plot_categorical_histplot(): \n \
+                      target: {target} \n \
+                      category: {category} \n \
+                      styling_params: {styling_params}")
+
        # plotting the plot

        uniques = self.ds.get_unique_column_values(category)
@@ -213,7 +277,8 @@ class Plotter:
            label_list.append(val)
        ax.legend(labels=label_list)

-    def plot_scatterplot(self, target1, target2, styling_params={}) -> None:
+    def plot_scatterplot(self, target1: str, target2: str,
+                         styling_params: dict = {}) -> None:
        """plot a scatterplot.

        Args:
@@ -228,25 +293,36 @@ class Plotter:

        # implementing sensible logging and error catching
        if type(target1) != str:
-            logging.error("parameter target1 should be a string.")
+            logging.error("Plotter.plot_scatterplot(): parameter target1 \
+                          should be a string.")
            raise ValueError("parameter target1 should be a string.")

        if not (target1 in self.df.columns):
-            logging.error("parameter target1 cannot be found in the dataset.")
+            logging.error("Plotter.plot_scatterplot(): parameter target1 \
+                          cannot be found in the dataset.")
            raise KeyError("parameter target1 cannot be found in the dataset.")

        if type(target2) != str:
-            logging.error("parameter target2 should be a string.")
+            logging.error("Plotter.plot_scatterplot(): parameter target2 \
+                          should be a string.")
            raise ValueError("parameter target2 should be a string.")

        if not (target2 in self.df.columns):
-            logging.error("parameter target2 cannot be found in the dataset.")
+            logging.error("Plotter.plot_scatterplot(): parameter target2 \
+                          cannot be found in the dataset.")
            raise KeyError("parameter target2 cannot be found in the dataset.")

        if type(styling_params) != dict:
-            logging.error("parameter styling params should be a dict.")
+            logging.error("Plotter.plot_scatterplot(): parameter styling \
+                          params should be a dict.")
            raise ValueError("parameter styling params should be a dict.")

+        logging.debug("Plotter.plot_scatterplot(): all params valid, plotting\
+                      scatterplot using given parameters: \n \
+                      target1: {target1} \n \
+                      target2: {target2} \n \
+                      styling_params: {styling_params}")
+
        # plotting the plot
        fig, ax = plt.subplots()
        ax.set_xlabel(target1)