diff --git a/opencsp/common/lib/file/CsvColumns.py b/opencsp/common/lib/file/CsvColumns.py index 4cb7a6f9..bd71c1d1 100644 --- a/opencsp/common/lib/file/CsvColumns.py +++ b/opencsp/common/lib/file/CsvColumns.py @@ -7,11 +7,26 @@ class CsvColumns: + """ + A class to help parse CSV files with a tentative structure by finding column name matches. + + This class allows for the definition of expected column names and their aliases, + and provides methods to parse the header and data rows of a CSV file. + """ + + # "ChatGPT 4o" assisted with generating this docstring. def __init__(self, columns: dict[str, list[str | re.Pattern]]): - """Helps to parse csv files that have a tentative structure to them by finding column name matches. + """ + Initializes the CsvColumns instance with the provided column definitions. + Helps to parse csv files that have a tentative structure to them by finding column name matches. - Example:: + Parameters + ---------- + columns : dict[str, list[str | re.Pattern]] + The anticipated column names and their corresponding aliases or regex patterns. + Example + ------- cols = cc.CsvColumns({ 'latitude': ['lat'], 'datetime': ['UTC', 'localtime', re.compile(r"^dt")] @@ -20,21 +35,50 @@ def __init__(self, columns: dict[str, list[str | re.Pattern]]): cols.parse_header(rows[0]) lat = float(rows[1][cols['latitude']]) dt = datetime.fromisoformat(rows[1][cols['datetime']]) - - Args: - columns (dict[str,list[str | re.Pattern]]): The anticipated column names to patterns to match those column names. """ + # "ChatGPT 4o" assisted with generating this docstring. self.columns = {k: _ColumnHeader(k, columns[k], -1) for k in columns} @classmethod def SimpleColumns(cls, header_row: list[str]): - """Simple constructor that creates columns and column names from the header.""" + """ + Creates a CsvColumns instance from a simple header row. + + This method initializes the columns using the header row as both the names and aliases. + + Parameters + ---------- + header_row : list[str] + A list of column names from the CSV header. + + Returns + ------- + CsvColumns + An instance of CsvColumns initialized with the provided header row. + """ + # "ChatGPT 4o" assisted with generating this docstring. columns = {v: [v] for v in header_row} ret = cls(columns) ret.parse_header(header_row) return ret def parse_data_row(self, data_row: list[str], row_idx=-1): + """ + Parses a data row and extracts values based on the matched column indices. + + Parameters + ---------- + data_row : list[str] + A list of values from a single row of the CSV file. + row_idx : int, optional + The index of the row being parsed, used for logging. Defaults to -1. + + Returns + ------- + dict[str, str] + A dictionary mapping column names to their corresponding values from the data row. + """ + # "ChatGPT 4o" assisted with generating this docstring. ret: dict[str, str] = {} last_matched_idx = -1 @@ -61,6 +105,24 @@ def parse_header( ok_if_not_found: list[str] = None, alternatives: dict[str, list[str]] = None, ): + """ + Parses the header row to find matches for the defined columns. + + This method updates the column indices based on the header row and checks for + any missing columns, logging warnings or raising errors as specified. + + Parameters + ---------- + header_row : list[str] + A list of column names from the CSV header. + error_on_not_found : bool | list[str], optional + If True, raises an error for any missing columns. If a list, raises an error for columns in that list. Defaults to True. + ok_if_not_found : list[str], optional + A list of column names that are acceptable to be missing. Defaults to None. + alternatives : dict[str, list[str]], optional + A dictionary mapping column names to lists of alternative names. Defaults to None. + """ + # "ChatGPT 4o" assisted with generating this docstring. # add reverse values for the alternatives, if any if alternatives != None: ks = list(alternatives.keys()) diff --git a/opencsp/common/lib/file/SimpleCsv.py b/opencsp/common/lib/file/SimpleCsv.py index 8762899c..70159f78 100644 --- a/opencsp/common/lib/file/SimpleCsv.py +++ b/opencsp/common/lib/file/SimpleCsv.py @@ -4,21 +4,42 @@ class SimpleCsv: + """ + A class for simple parsing of CSV files. + + This class allows for reading a CSV file and provides methods to access + the header, columns, and rows of the file in a structured manner. + """ + + # "ChatGPT 4o" assisted with generating this docstring. def __init__(self, description: str, file_path: str, file_name_ext: str): - """Allows for simple CSV file parsing. + """ + Initializes the SimpleCsv instance and parses the CSV file. + + Parameters + ---------- + description : str + A description of the file to be processed, or None to suppress output to stdout. + file_path : str + The path to the CSV file to be processed. + file_name_ext : str + The name and extension of the CSV file to be processed. - Example:: + Raises + ------ + FileNotFoundError + If the specified CSV file does not exist. + ValueError + If the CSV file is empty or improperly formatted. + + Example + ------- parser = scsv.SimpleCsv("example file", file_path, file_name_ext) for row_dict in parser: print(row_dict) - - Parameters: - ----------- - - description (str): A description of the file to be processed, or None to not print to stdout. - - file_path (str): Path to the file to be processed. - - file_name_ext (str): Name and extension of the file to be processed. """ + # "ChatGPT 4o" assisted with generating this docstring. self.description = description self.file_path = file_path self.file_name_ext = file_name_ext @@ -32,12 +53,41 @@ def __init__(self, description: str, file_path: str, file_name_ext: str): self.rows.append(self.cols.parse_data_row(row)) def get_header(self): + """ + Returns the header of the CSV file as a comma-separated string. + + Returns + ------- + str + A string representation of the header row of the CSV file. + """ + # "ChatGPT 4o" assisted with generating this docstring. return ",".join(self.get_columns()) def get_columns(self): + """ + Returns a list of column names from the CSV file. + + Returns + ------- + list[str] + A list of column names extracted from the CSV header. + """ + # "ChatGPT 4o" assisted with generating this docstring. return [col.name for col in self.cols.columns.values] def get_rows(self): + """ + Returns the rows of the CSV file as a list of dictionaries. + + Each dictionary corresponds to a row in the CSV file, with column names as keys. + + Returns + ------- + list[dict[str, str]] + A list of dictionaries representing the rows of the CSV file. + """ + # "ChatGPT 4o" assisted with generating this docstring. return self.rows def __iter__(self):