import pandas as pd class Dataset: def __init__(self, dataset_filename: str) -> None: raw_dataframe = pd.read_csv(dataset_filename, encoding="windows-1254") self.dataframe = self.preprocess_dataset(raw_dataframe) def preprocess_dataset(self, raw_dataframe: pd.DataFrame) -> pd.DataFrame: """preprocess dataframe immediately after loading it. Args: raw_dataframe (pd.DataFrame): raw dataframe as read from pd.read_csv(). Returns: pd.DataFrame: resulting preprocessed dataframe. """ dataframe = raw_dataframe.drop(["League"], axis="columns") # more preprocessing goes here return dataframe def get_dataframe(self) -> pd.DataFrame: """A getter function for the dataframe. Returns: pd.DataFrame: the dataset. """ return self.dataframe def draw_histogram(self): raise NotImplementedError def get_dataset_columns(self) -> list: raise NotImplementedError def get_plottable_columns(self) -> list: raise NotImplementedError def get_sorted_column(self, colname: str, ascending=True) -> pd.Series: """Returns a single column, sorted either ascending or descending. Args: colname (str): the column name (see get_dataset_columns()). ascending (bool, optional): Sorting order. Defaults to True. Returns: pd.Series: The sorted column. """ return self.dataframe[colname].sort_values(ascending=ascending)