Added functionality and requirements

b07769a6 · Alexander Shervud · 337498d2 · b07769a6 · b07769a6
Commit b07769a6 authored 1 year ago by Alexander Shervud
--- a/docs/requirements.md
+++ b/docs/requirements.md
@@ -9,10 +9,9 @@
 - Users should be able to get an overview of the dataset at hand:
  - how many rows are there?
  - what columns are there?
-  - what relations do these columns have?
+
 - Users should be able to get metrics like mean of a single column of a dataset
- Users should be able to extract a few rows based on a selective criteria (for example only rows with specific value in a column)
- Users should be able to be able to drop specific rows and columns in my data
+- Users should be able to extract a few rows that matches a selected criteria.
 - Users should be able to be able to treat outliers in a set. That way you can circumvent fake/joke answers and prevent a plot from blowing up.

 #### Plotting

--- a/src/Dataset.py
+++ b/src/Dataset.py
@@ -340,3 +340,70 @@ class Dataset:
            if ascending is None
            else grouped_size.sort_values(ascending=ascending)
        )
+
+    def get_column_count(self) -> int: 
+        """ 
+        get_column_count returns the amount of columns in the dataframe.
+
+        Returns:
+            int: column_count
+        """
+        return len(self.dataframe.columns)
+    
+    def get_row_count(self) -> int:
+        """
+        get_rowcount returns the amount of rows in the dataframe.
+
+        Returns:
+            int: row_count
+        """
+        return len(self.dataframe)
+    
+    def get_column_mean(self, colname:str) -> int: 
+        """
+        get_column_mean returns the mean value of all entries in one column.
+
+        Args:
+            colname (str): Index of the columns in the dataframe.
+            (Indexes can be get by calling get_columns()).
+
+        Returns:
+            int: column_mean
+        """
+        if type(colname) != str:
+            logging.error("parameter `colname` is not a string")
+            raise ValueError(f"{colname} is not a string")
+        if colname not in self.dataframe.columns:
+            logging.error("column requested not in dataframe")
+            raise KeyError(f"{colname} is not a column in dataframe")
+        
+        return self.dataframe[colname].mean()
+    
+    def filtered_rows(self, colname: str, criteria: str) -> pd.DataFrame:
+        """
+        filtered_rows returns a filtered dataframe
+
+        Args:
+            colname (str): Column including the value you want to filter by.
+            criteria (str): criteria that matches all the rows you want to keep
+
+        Returns:
+            pd.DataFrame: _description_
+        """
+        if type(colname) != str:
+            logging.error("parameter `colname` is not a string")
+            raise ValueError(f"{colname} is not a string")
+        if colname not in self.dataframe.columns:
+            logging.error("column requested not in dataframe")
+            raise KeyError(f"{colname} is not a column in dataframe")
+    
+        return self.dataframe[self.dataframe[colname] == criteria]
+    
+    def get_columns(self) -> pd.Series: 
+        """
+        get_columns returns all column headers/indexes
+        
+        Returns:
+            pd.Series: List of all column headers
+        """
+        return self.dataframe.columns
\ No newline at end of file