Skip to content
Snippets Groups Projects
Commit b07769a6 authored by Alexander Shervud's avatar Alexander Shervud
Browse files

Added functionality and requirements

parent 337498d2
No related branches found
No related tags found
No related merge requests found
......@@ -9,10 +9,9 @@
- Users should be able to get an overview of the dataset at hand:
- how many rows are there?
- what columns are there?
- what relations do these columns have?
- Users should be able to get metrics like mean of a single column of a dataset
- Users should be able to extract a few rows based on a selective criteria (for example only rows with specific value in a column)
- Users should be able to be able to drop specific rows and columns in my data
- Users should be able to extract a few rows that matches a selected criteria.
- Users should be able to be able to treat outliers in a set. That way you can circumvent fake/joke answers and prevent a plot from blowing up.
#### Plotting
......
......@@ -340,3 +340,70 @@ class Dataset:
if ascending is None
else grouped_size.sort_values(ascending=ascending)
)
def get_column_count(self) -> int:
"""
get_column_count returns the amount of columns in the dataframe.
Returns:
int: column_count
"""
return len(self.dataframe.columns)
def get_row_count(self) -> int:
"""
get_rowcount returns the amount of rows in the dataframe.
Returns:
int: row_count
"""
return len(self.dataframe)
def get_column_mean(self, colname:str) -> int:
"""
get_column_mean returns the mean value of all entries in one column.
Args:
colname (str): Index of the columns in the dataframe.
(Indexes can be get by calling get_columns()).
Returns:
int: column_mean
"""
if type(colname) != str:
logging.error("parameter `colname` is not a string")
raise ValueError(f"{colname} is not a string")
if colname not in self.dataframe.columns:
logging.error("column requested not in dataframe")
raise KeyError(f"{colname} is not a column in dataframe")
return self.dataframe[colname].mean()
def filtered_rows(self, colname: str, criteria: str) -> pd.DataFrame:
"""
filtered_rows returns a filtered dataframe
Args:
colname (str): Column including the value you want to filter by.
criteria (str): criteria that matches all the rows you want to keep
Returns:
pd.DataFrame: _description_
"""
if type(colname) != str:
logging.error("parameter `colname` is not a string")
raise ValueError(f"{colname} is not a string")
if colname not in self.dataframe.columns:
logging.error("column requested not in dataframe")
raise KeyError(f"{colname} is not a column in dataframe")
return self.dataframe[self.dataframe[colname] == criteria]
def get_columns(self) -> pd.Series:
"""
get_columns returns all column headers/indexes
Returns:
pd.Series: List of all column headers
"""
return self.dataframe.columns
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment