Skip to content
Snippets Groups Projects
Commit 64b16115 authored by Sortofamudkip's avatar Sortofamudkip
Browse files

refactored Dataset class columns

parent dc023d15
No related branches found
No related tags found
No related merge requests found
......@@ -20,27 +20,21 @@ class Dataset:
Args:
raw_dataframe (pd.DataFrame):
raw dataframe as read from pd.read_csv().
This dataframe is discarded afterwards.
Returns:
pd.DataFrame: resulting preprocessed dataframe.
"""
dataframe = raw_dataframe # for conveneince
rows_to_drop = [
"League",
"S. No.",
"Timestamp",
"highestleague",
"earnings",
]
dataframe = self._drop_unnecessary_columns(
raw_dataframe
) # for conveneince
# rows_to_drop = []
dataframe = raw_dataframe.drop(rows_to_drop, axis="columns")
dataframe = self.remove_nonaccepting_rows(dataframe)
dataframe["Anxiety_score"] = self.get_combined_anxiety_score(dataframe)
dataframe["Is_narcissist"] = self.get_is_narcissist_col(dataframe)
self.preprocess_whyplay(dataframe)
dataframe["Is_competitive"] = self.get_is_competitive_col(dataframe)
dataframe["Is_competitive"] = self.preprocess_whyplay(dataframe)
# more preprocessing goes here
return dataframe
......@@ -60,6 +54,16 @@ class Dataset:
return is_competitive_col
def _drop_unnecessary_columns(self, dataframe: pd.DataFrame):
rows_to_drop = [
"League",
"S. No.",
"Timestamp",
"highestleague",
"earnings",
]
return dataframe.drop(rows_to_drop, axis="columns")
def remove_nonaccepting_rows(self, dataframe: pd.DataFrame):
# drop rows where users did not accept to having their data used
dataframe = dataframe.drop(
......@@ -80,7 +84,8 @@ class Dataset:
dataframe.loc[
~dataframe["whyplay"].isin(most_common_whyplay_reasons), "whyplay"
] = "other"
return most_common_whyplay_reasons
is_competitive_col = self.get_is_competitive_col(dataframe)
return is_competitive_col
def get_combined_anxiety_score(self, dataframe: pd.DataFrame) -> pd.Series:
"""Get the combined anxiety score, as a column.
......@@ -140,10 +145,8 @@ class Dataset:
pd.Series: The sorted column.
"""
return self.dataframe[colname].sort_values(ascending=ascending)
def get_unique_column_values(
self, colname: str):
def get_unique_column_values(self, colname: str):
"""Returns a count of categorical values in the dataset.
Args:
......@@ -155,7 +158,6 @@ class Dataset:
"""
return self.dataframe[colname].explode().unique()
def get_category_counts(
self, colname: str, ascending: bool = None
) -> pd.Series:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment