Skip to content
Snippets Groups Projects
Commit 7bdb14cf authored by Sortofamudkip's avatar Sortofamudkip
Browse files

basic preprocess_dataset() and get_sorted_column()

parent 5129400f
No related branches found
No related tags found
1 merge request!5Resolve "Basic Dataset class"
......@@ -3,12 +3,30 @@ import pandas as pd
class Dataset:
def __init__(self, dataset_filename: str) -> None:
raw_dataset = pd.read_csv(dataset_filename, encoding="windows-1254")
self.dataset = self.preprocess_dataset(raw_dataset)
raw_dataframe = pd.read_csv(dataset_filename, encoding="windows-1254")
self.dataframe = self.preprocess_dataset(raw_dataframe)
def preprocess_dataset(self, raw_dataframe):
# preprocessing goes here
return raw_dataframe
def preprocess_dataset(self, raw_dataframe: pd.DataFrame) -> pd.DataFrame:
"""preprocess dataframe immediately after loading it.
Args:
raw_dataframe (pd.DataFrame):
raw dataframe as read from pd.read_csv().
Returns:
pd.DataFrame: resulting preprocessed dataframe.
"""
dataframe = raw_dataframe.drop(["League"], axis="columns")
# more preprocessing goes here
return dataframe
def get_dataframe(self) -> pd.DataFrame:
"""A getter function for the dataframe.
Returns:
pd.DataFrame: the dataset.
"""
return self.dataframe
def draw_histogram(self):
raise NotImplementedError
......@@ -18,3 +36,15 @@ class Dataset:
def get_plottable_columns(self) -> list:
raise NotImplementedError
def get_sorted_column(self, colname: str, ascending=True) -> pd.Series:
"""Returns a single column, sorted either ascending or descending.
Args:
colname (str): the column name (see get_dataset_columns()).
ascending (bool, optional): Sorting order. Defaults to True.
Returns:
pd.Series: The sorted column.
"""
return self.dataframe[colname].sort_values(ascending=ascending)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment