diff --git a/.gitignore b/.gitignore
index 8abee9d6d4cb733781f0d0cd2e1de4af266f56e7..9ad8d9892c69bbd9d6c184efe8f0778e81dacd0a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ pyproject.toml
 
 # for testing
 dump.ipynb
+Bullshit.py
 
 DUMP_ds.py
 
diff --git a/Notebook.ipynb b/Notebook.ipynb
index 6e4fbbdfc18dedca2e0bca2c9a185f47024a4210..05308c16ccd1a69645c41d5a5e7ce67118e1e13c 100644
--- a/Notebook.ipynb
+++ b/Notebook.ipynb
@@ -8,9 +8,11 @@
     "\n",
     "# Overview \n",
     "\n",
-    "In this project we decided to analyze anxiety in Gamers. We picked the dataset from kaggle because it intersected our personal interests. The data can be found [here](https://www.kaggle.com/datasets/divyansh22/online-gaming-anxiety-data)\n",
+    "In this project we decided to analyze anxiety in Gamers. We picked the dataset from kaggle because it intersected our personal interests. The data and survey can be found [here](https://www.kaggle.com/datasets/divyansh22/online-gaming-anxiety-data)\n",
     "\n",
-    "The data was acquired by a survey published and shared online. This way everyone could participate. For us that also means analyzing and di"
+    "The data was acquired by a survey published and shared online. This way everyone could participate. For us that also means taking into account that the distribution and answers can be scewed. \n",
+    "\n",
+    "## Motivation - "
    ]
   },
   {
@@ -19,10 +21,14 @@
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<src.Dataset.Dataset object at 0x000001CA32F35BD0>\n"
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'src.Dataset'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39msrc\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mDataset\u001b[39;00m \u001b[39mimport\u001b[39;00m Dataset \n\u001b[0;32m      3\u001b[0m dataset \u001b[39m=\u001b[39m Dataset(\u001b[39m\"\u001b[39m\u001b[39mdata\u001b[39m\u001b[39m\\\u001b[39m\u001b[39mGamingStudy_data.csv\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m      4\u001b[0m \u001b[39mprint\u001b[39m(dataset)\n",
+      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'src.Dataset'"
      ]
     }
    ],
@@ -30,7 +36,7 @@
     "from src.Dataset import Dataset \n",
     "\n",
     "dataset = Dataset(\"data\\GamingStudy_data.csv\")\n",
-    "print(dataset)"
+    "print(dataset)\n"
    ]
   },
   {
@@ -39,14 +45,14 @@
    "source": [
     "# Data Exploration\n",
     "\n",
-    "\n",
-    "\n",
     "Because the data was accumulated in a semi-professional way for a pre-study we had to clean it up and make some changes. \n",
     "\n",
     "Some columns could be answered with an open text field. Naturally the answeres in those columns are very diversified and hard to analyze. \n",
-    "+ Example\n",
-    "+ Example\n",
-    "+ Example\n",
+    "\n",
+    "#### Affected Columns\n",
+    "+ Whyplay\n",
+    "+ Earnings \n",
+    "+ League\n",
     "\n",
     "In the following we will explain if and how we used these columns. \n",
     "\n",
@@ -56,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -132,8 +138,7 @@
     "A more detailed interpretation can be found [here](http://labs.psychology.illinois.edu/~ediener/Documents/Understanding%20SWLS%20Scores.pdf).\n",
     "\n",
     "Residents of developed nations (e.g. DE) usually score 20-24.\n",
-    "### Take it yourself\n",
-    "\n",
+    "#### Questions \n",
     "____ In most ways my life is close to my ideal.<br>\n",
     "____ The conditions of my life are excellent.<br>\n",
     "____ I am satisfied with my life.<br>\n",
@@ -148,13 +153,24 @@
    "metadata": {},
    "source": [
     "# Analysis\n",
-    "Explained new columns and why we did that (\"Is_narcissist, \"Anxiety_score\")\n",
-    "## Normalizing the Data \n"
+    "\n",
+    "## Preprocessing \n",
+    "* Explained new columns and why we did that *\n",
+    "\n",
+    "Some columns gave the options to write individual responses. Naturally those are not useful in data analysis. In some cases we cleaned the columns and changes the unusual cases to \"Other\"/\"NA\"\n",
+    "### Cleaned Columns\n",
+    "+ \"Whyplay\" \n",
+    "+ Accept \n",
+    "## Normalizing the Data \n",
+    "\n",
+    "### \"Is_narcissist,\n",
+    "### \"Anxiety_score\"\n",
+    "### \"Is_competetive"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -171,10 +187,10 @@
        "13461    0.125210\n",
        "13462    0.591783\n",
        "13463    0.243231\n",
-       "Length: 13464, dtype: float64"
+       "Length: 13050, dtype: float64"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -196,18 +212,25 @@
     "\n",
     "### Women vs Men \n",
     "\n",
-    "Explanation "
+    "Explanation \n",
+    "![Example Plot](https://cdn.discordapp.com/attachments/806128836332879924/1127988009627832320/Unbenannt.png)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
+    "\n",
     "# SIDE BY SIDE PLOTS \n",
     "# LEFT = LINE Graph distribution of Anxiety Score Related to Group\n",
-    "# RIGHT = Stacked Bars comparing the GROUP with = 1. 2. 3. "
+    "# RIGHT = Stacked Bars comparing the GROUP with = \n",
+    "# 1.[Work] - 4 Bars\n",
+    "# 2.[Degree] - 5 Bars\n",
+    "# 3.[Whyplay ] - 4 Bars (Everything until \"All of them\")\n",
+    "\n",
+    "#"
    ]
   },
   {
@@ -226,26 +249,10 @@
    "source": [
     "# SIDE BY SIDE PLOTS \n",
     "# LEFT = LINE Graph distribution of Anxiety Score Related to Group\n",
-    "# RIGHT = Stacked Bars comparing the GROUP with = 1. 2. 3. "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### High Education vs Lower Education\n",
-    "Explanation"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# SIDE BY SIDE PLOTS \n",
-    "# LEFT = LINE Graph distribution of Anxiety Score Related to Group\n",
-    "# RIGHT = Stacked Bars comparing the GROUP with = 1. 2. 3. "
+    "# RIGHT = Stacked Bars comparing the GROUP with = \n",
+    "# 1.[Work] - 4 Bars\n",
+    "# 2.[Degree] - 5 Bars\n",
+    "# 3.[Whyplay ] - 4 Bars (Everything until \"All of them\")"
    ]
   },
   {
@@ -263,14 +270,17 @@
    "source": [
     "# SIDE BY SIDE PLOTS \n",
     "# LEFT = LINE Graph distribution of Anxiety Score Related to Group\n",
-    "# RIGHT = Stacked Bars comparing the GROUP with = 1. 2. 3. "
+    "# RIGHT = Stacked Bars comparing the GROUP with = \n",
+    "# 1.[Work] - 4 Bars\n",
+    "# 2.[Degree] - 5 Bars\n",
+    "# 3.[Whyplay ] - 4 Bars (Everything until \"All of them\")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Q2 - Correlations betweeet played hours and ones well being. "
+    "## Q2 - Correlations between played hours and one's well being. "
    ]
   },
   {
@@ -291,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -308,7 +318,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -335,6 +345,13 @@
     "2. Is the amount of educated players similar "
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![Scatter](https://cdn.discordapp.com/attachments/1127973734884581386/1127973829344493679/image.png)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -344,11 +361,11 @@
     "#### Analyze the countries amounting to Top 7 or 90% of the survey. \n",
     "\n",
     "\n",
-    "#Q4. MAP PLOT = Most played game per country (Dont do it if its League everywhere. )\n",
+    "#Q4.MAP PLOT = Most played game per country (Dont do it if its League everywhere. )\n",
     "#Q4 MAP PLOT = Heat Map with redder areas for more Anxiety in the country. \n",
     "#Q1.2 Grouped Bar Chart with the top game next to the \"Anxiety Score\"\n",
     "\n",
-    "#2 Grouped Bars with Education per Country IF POSSIBLE grouped with a bar for the anxiety "
+    "#2 Scatter PLot like in the example "
    ]
   }
  ],
diff --git a/src/Dataset.py b/src/Dataset.py
index 271450048e73f9ade1810db20e389d95cfd6831a..1ef5004c17ad22e716577b69f376bb3f7168a799 100644
--- a/src/Dataset.py
+++ b/src/Dataset.py
@@ -5,7 +5,7 @@ import pandas as pd
 class Dataset:
     def __init__(self, dataset_filename: str) -> None:
         """A wrapper class for the pandas dataframe.
-        Loads the dataset
+        Loads and preprocesses the dataset, then stores it in self.dataframe.
 
         Args:
             dataset_filename (str): the path of the dataset,
@@ -20,16 +20,99 @@ class Dataset:
         Args:
             raw_dataframe (pd.DataFrame):
                 raw dataframe as read from pd.read_csv().
+                This dataframe is discarded afterwards.
 
         Returns:
             pd.DataFrame: resulting preprocessed dataframe.
         """
-        dataframe = raw_dataframe.drop(["League"], axis="columns")
+        dataframe = self._drop_unnecessary_columns(
+            raw_dataframe
+        )  # for conveneince
+        dataframe = self.remove_nonaccepting_rows(dataframe)
+
         dataframe["Anxiety_score"] = self.get_combined_anxiety_score(dataframe)
         dataframe["Is_narcissist"] = self.get_is_narcissist_col(dataframe)
+
+        dataframe["Is_competitive"] = self.preprocess_whyplay(dataframe)
         # more preprocessing goes here
         return dataframe
 
+    def get_is_competitive_col(self, dataframe: pd.DataFrame):
+        is_competitive_col = np.zeros(shape=len(dataframe))
+        is_competitive_col[
+            (dataframe["whyplay"] == "improving")
+            | (dataframe["whyplay"] == "winning")
+            | (dataframe["whyplay"] == "all of the above")
+        ] = True
+        is_competitive_col[
+            (dataframe["whyplay"] == "having fun")
+            | (dataframe["whyplay"] == "relaxing")
+        ] = False
+        is_competitive_col[(dataframe["whyplay"] == "other")] = None
+
+        return is_competitive_col
+
+    def _drop_unnecessary_columns(
+        self, dataframe: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Drop unnecessary rows from the dataset.
+
+        Args:
+            dataframe (pd.DataFrame): the dataframe.
+
+        Returns:
+            pd.DataFrame: the dataframe.
+        """
+        rows_to_drop = [
+            "League",
+            "S. No.",
+            "Timestamp",
+            "highestleague",
+            "earnings",
+        ]
+        return dataframe.drop(rows_to_drop, axis="columns")
+
+    def remove_nonaccepting_rows(
+        self, dataframe: pd.DataFrame
+    ) -> pd.DataFrame:
+        """Removes rows where participants did not consent to data processing.
+
+        Args:
+            dataframe (pd.DataFrame): the dataframe.
+
+        Returns:
+            pd.DataFrame: the dataframe.
+        """
+        # drop rows where users did not accept to having their data used
+        dataframe = dataframe.drop(
+            dataframe[dataframe["accept"] != "Accept"].index,
+        )
+        dataframe = dataframe.drop(["accept"], axis=1)
+        return dataframe
+
+    def preprocess_whyplay(self, dataframe: pd.DataFrame) -> pd.Series:
+        """Preprocesses the whyplay column, and returns a Is_competitive col.
+
+        Args:
+            dataframe (pd.DataFrame): the dataframe.
+
+        Returns:
+            pd.Series: the Is_competitive column.
+        """
+        dataframe["whyplay"] = dataframe["whyplay"].str.lower()
+        most_common_whyplay_reasons = list(
+            dataframe.groupby("whyplay")
+            .size()
+            .sort_values(ascending=False)
+            .head(5)
+            .index
+        )
+        dataframe.loc[
+            ~dataframe["whyplay"].isin(most_common_whyplay_reasons), "whyplay"
+        ] = "other"
+        is_competitive_col = self.get_is_competitive_col(dataframe)
+        return is_competitive_col
+
     def get_combined_anxiety_score(self, dataframe: pd.DataFrame) -> pd.Series:
         """Get the combined anxiety score, as a column.
         This score is based on the GAN, SPIN and SWL metrics.
@@ -88,3 +171,35 @@ class Dataset:
             pd.Series: The sorted column.
         """
         return self.dataframe[colname].sort_values(ascending=ascending)
+
+    def get_unique_column_values(self, colname: str):
+        """Returns a count of categorical values in the dataset.
+
+        Args:
+            colname (str): the column name.
+
+        Returns:
+            string array: an array of strings containing the unique values
+            present in the column
+        """
+        return self.dataframe[colname].explode().unique()
+
+    def get_category_counts(
+        self, colname: str, ascending: bool = None
+    ) -> pd.Series:
+        """Returns a count of categorical values in the dataset.
+
+        Args:
+            colname (str): the column name.
+            ascending (bool, optional): Direction to sort results.
+              If set to None, the results are not sorted. Defaults to None.
+
+        Returns:
+            pd.Series: the counted categories.
+        """
+        grouped_size = self.dataframe.groupby(colname).size()
+        return (
+            grouped_size
+            if ascending is None
+            else grouped_size.sort_values(ascending=ascending)
+        )
diff --git a/src/Plotter.py b/src/Plotter.py
new file mode 100644
index 0000000000000000000000000000000000000000..c16adfe01c294d076f9904c8aa01695792d013b4
--- /dev/null
+++ b/src/Plotter.py
@@ -0,0 +1,46 @@
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib
+import pandas as pd
+import seaborn as sns
+from .Dataset import Dataset
+
+class Plotter:
+    def __init__(self, dataset: Dataset):
+        self.ds = dataset
+        self.df = dataset.get_dataframe()
+
+    
+    def customize_plot(self, fig, ax, styling_params):
+        if styling_params.get('title'):
+            ax.set_title(styling_params["title"])
+
+    
+    def plot_categorical_bar_chart(self, category1, category2, styling_params = {}):
+        ct = pd.crosstab(self.df[category1], self.df[category2])
+        # Calculate percentages by row
+        ct_percent = ct.apply(lambda r: r/r.sum() * 100, axis=0)                
+        fig, ax = plt.subplots()
+        self.customize_plot(fig, ax, styling_params)
+        ct_percent.plot(kind='bar', ax=ax)
+
+
+    def plot_categorical_boxplot(self, target, category, styling_params = {}):
+        fig, ax = plt.subplots()
+        self.customize_plot(fig, ax, styling_params)
+        sns.boxplot(x=category,y=target,data=self.df, palette='rainbow')
+
+
+    def plot_categorical_histplot(self, target, category, styling_params = {}, bins= 30):
+        uniques = self.ds.get_unique_column_values(category)
+        fig, ax = plt.subplots()
+        self.customize_plot(fig, ax, styling_params)
+        for val in uniques:
+            anx_score = self.df[self.df[category] == val][target]
+            anx_score_weights = np.ones(len(anx_score)) / len(anx_score)
+            ax.hist(
+                anx_score,
+                weights=anx_score_weights,
+                bins = bins,
+                alpha=0.5,
+            )
\ No newline at end of file