diff --git a/Editor_Notebook.ipynb b/Editor_Notebook.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8bd7904e5a9d903661e5de53d883304d7fb3f3d2 --- /dev/null +++ b/Editor_Notebook.ipynb @@ -0,0 +1,31 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Notebook for Coders" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Notebook.ipynb b/Notebook.ipynb index 975e65d6bc413385dd66bcd87057ce0c370f79b5..1960c5f05081b544405840850d4cf6693e0f636f 100644 --- a/Notebook.ipynb +++ b/Notebook.ipynb @@ -17,24 +17,18 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "<src.Dataset.Dataset object at 0x000001CD53BDA250>\n" - ] - } - ], + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", - "from src.Dataset import Dataset \n", + "from src.Dataset import Dataset\n", + "from src.Plotter import Plotter\n", "\n", "dataset = Dataset(\"data\\GamingStudy_data.csv\")\n", "dataframe = dataset.get_dataframe()\n", - "print(dataset)\n" + "print(dataset)\n", + "plotter = Plotter(dataset)" ] }, { @@ -58,20 +52,22 @@ "\n" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Distribution of Participants \n", + "### Gender" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "\"\"\"\n", - "4 Plots/Way to shows the distribution of \n", - "Gender, \n", - "Platform (where they found the survey)\n", - "Games Top 5 \n", - "and Console\n", - "\n", - "\"\"\"" + "plotter.distribution_plot(\"Gender\")\n", + "pass" ] }, { @@ -266,7 +262,17 @@ "metadata": {}, "source": [ "## Q2 - Correlations between played hours and one's well being.\n", - "**Maybe we can even add if hours watching Streams effect it**" + "**Maybe we can even add if hours watching Streams effect it**\n", + "\n", + "For research question two we wanted to know if there is a correlation \n", + "between played hours and the player's well being. We went into the question\n", + "with the expectation that players which play longer hours are more anxiety prone\n", + "and less satisfied with life than those who play less. If that would be the \n", + "case, a positive correlation of hours played and our combined anxiety score \n", + "variable would be expected. We want to take a look at the data using a scatter-\n", + "plot, showing the correlation of both variables of interest, using the\n", + "plot_scatterplot() function of our Plotter class:\n", + "code below: plotter.plot_scatterplot(\"Hours\", \"Anxiety_score\")" ] }, { @@ -275,10 +281,9 @@ "metadata": {}, "outputs": [], "source": [ - " \"\"\"\n", - " \n", - " \n", - " \"\"\"" + "plotter.plot_scatterplot(\"Hours\", \"Anxiety_score\") \n", + "\n", + "#Still needs to be prettier" ] }, { @@ -304,37 +309,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Category distribution:\n", - "whyplay\n", - "having fun 5105\n", - "improving 4661\n", - "winning 1977\n", - "relaxing 623\n", - "other 424\n", - "all of the above 48\n", - "dtype: int64\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "<Figure size 576x432 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "\"\"\" Horizontal bar chart, one row for every reason for with top width\n", "# Anxiety colored in for the amount of anxiety in that group \n", @@ -444,4 +421,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/src/Plotter.py b/src/Plotter.py index c8d9051a3b7e2319a463cf91aba90b003908e7a9..5b0c4271991ac2a2a66867b6fc69f6e2f8cb59dd 100644 --- a/src/Plotter.py +++ b/src/Plotter.py @@ -15,7 +15,7 @@ class Plotter: self.df = dataset.get_dataframe() def customize_plot(self, fig, ax, styling_params) -> None: - """ customize_plot + """customize_plot Args: fig (plt.figure.Figure), @@ -72,7 +72,7 @@ class Plotter: def plot_categorical_bar_chart( self, category1, category2, styling_params={} ) -> None: - """ plot a categorical bar chart. + """plot a categorical bar chart. Args: category1 (str, must be present as a column in the dataset), @@ -119,7 +119,7 @@ class Plotter: def plot_categorical_boxplot( self, target, category, styling_params={} ) -> None: - """ plot a categorical boxplot. + """plot a categorical boxplot. Args: target (str, must be present as a column in the dataset), @@ -130,32 +130,7 @@ class Plotter: Returns: None """ - # implementing sensible logging and error catching - if (type(target) != str): - logging.error("parameter target should be a string.") - raise ValueError("parameter target should be a string.") - - if not (target in self.df.columns): - logging.error("parameter target cannot be found in the dataset.") - raise ValueError( - "parameter target cannot be found in the dataset." - ) - - if (type(category) != str): - logging.error("parameter category should be a string.") - raise ValueError("parameter category should be a string.") - - if not (category in self.df.columns): - logging.error("parameter category cannot be found in the dataset.") - raise ValueError( - "parameter category cannot be found in the dataset." - ) - - if (type(styling_params) != dict): - logging.error("parameter styling params should be a dict.") - raise ValueError("parameter styling params should be a dict.") - # plotting the plot fig, ax = plt.subplots() self.customize_plot(fig, ax, styling_params) sns.boxplot(x=category, y=target, data=self.df, palette="rainbow") @@ -163,7 +138,7 @@ class Plotter: def plot_categorical_histplot( self, target, category, styling_params={}, bins=30 ) -> None: - """ plot a categorical hisplot. + """plot a categorical hisplot. Args: target (str, must be present as a column in the dataset), @@ -215,7 +190,7 @@ class Plotter: ) def plot_scatterplot(self, target1, target2, styling_params={}) -> None: - """ plot a scatterplot. + """plot a scatterplot. Args: target1 (str, must be present as a column in the dataset), @@ -255,4 +230,23 @@ class Plotter: # plotting the plot fig, ax = plt.subplots() self.customize_plot(fig, ax, styling_params) - ax.scatter(self.df[target1], self.df[target2]) \ No newline at end of file + ax.scatter(self.df[target1], self.df[target2]) + + def distribution_plot(self, target: str): + """ + distribution_plot _summary_ + + Args: + target (str): _description_ + + Returns: + None + """ + grouped_data = self.df.groupby(target).size() + plt.barh(grouped_data.index, grouped_data.values) + print(grouped_data.sort_values(ascending=False)) + # print(grouped_data.index) + # print(grouped_data.values) + plt.xlabel("Size") + plt.ylabel(target) + plt.title(f"Distribution of {target}")