Skip to content
Snippets Groups Projects
Commit 6a682ba8 authored by Sortofamudkip's avatar Sortofamudkip
Browse files

preprocess_whyplay

parent 8805070c
No related branches found
No related tags found
No related merge requests found
...@@ -27,9 +27,25 @@ class Dataset: ...@@ -27,9 +27,25 @@ class Dataset:
dataframe = raw_dataframe.drop(["League"], axis="columns") dataframe = raw_dataframe.drop(["League"], axis="columns")
dataframe["Anxiety_score"] = self.get_combined_anxiety_score(dataframe) dataframe["Anxiety_score"] = self.get_combined_anxiety_score(dataframe)
dataframe["Is_narcissist"] = self.get_is_narcissist_col(dataframe) dataframe["Is_narcissist"] = self.get_is_narcissist_col(dataframe)
self.preprocess_whyplay(dataframe)
# more preprocessing goes here # more preprocessing goes here
return dataframe return dataframe
def preprocess_whyplay(self, dataframe: pd.DataFrame):
dataframe["whyplay"] = dataframe["whyplay"].str.lower()
most_common_whyplay_reasons = list(
dataframe.groupby("whyplay")
.size()
.sort_values(ascending=False)
.head(5)
.index
)
dataframe[
~dataframe["whyplay"].isin(most_common_whyplay_reasons)
] = "other"
return most_common_whyplay_reasons
def get_combined_anxiety_score(self, dataframe: pd.DataFrame) -> pd.Series: def get_combined_anxiety_score(self, dataframe: pd.DataFrame) -> pd.Series:
"""Get the combined anxiety score, as a column. """Get the combined anxiety score, as a column.
This score is based on the GAN, SPIN and SWL metrics. This score is based on the GAN, SPIN and SWL metrics.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment