Skip to content

Commit

Permalink
common/lib/file: Test docs
Browse files Browse the repository at this point in the history
  • Loading branch information
e10harvey committed Jan 17, 2025
1 parent a1b8056 commit 7cad4ae
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 14 deletions.
74 changes: 68 additions & 6 deletions opencsp/common/lib/file/CsvColumns.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,26 @@


class CsvColumns:
"""
A class to help parse CSV files with a tentative structure by finding column name matches.
This class allows for the definition of expected column names and their aliases,
and provides methods to parse the header and data rows of a CSV file.
"""

# "ChatGPT 4o" assisted with generating this docstring.
def __init__(self, columns: dict[str, list[str | re.Pattern]]):
"""Helps to parse csv files that have a tentative structure to them by finding column name matches.
"""
Initializes the CsvColumns instance with the provided column definitions.
Helps to parse csv files that have a tentative structure to them by finding column name matches.
Example::
Parameters
----------
columns : dict[str, list[str | re.Pattern]]
The anticipated column names and their corresponding aliases or regex patterns.
Example
-------
cols = cc.CsvColumns({
'latitude': ['lat'],
'datetime': ['UTC', 'localtime', re.compile(r"^dt")]
Expand All @@ -20,21 +35,50 @@ def __init__(self, columns: dict[str, list[str | re.Pattern]]):
cols.parse_header(rows[0])
lat = float(rows[1][cols['latitude']])
dt = datetime.fromisoformat(rows[1][cols['datetime']])
Args:
columns (dict[str,list[str | re.Pattern]]): The anticipated column names to patterns to match those column names.
"""
# "ChatGPT 4o" assisted with generating this docstring.
self.columns = {k: _ColumnHeader(k, columns[k], -1) for k in columns}

@classmethod
def SimpleColumns(cls, header_row: list[str]):
"""Simple constructor that creates columns and column names from the header."""
"""
Creates a CsvColumns instance from a simple header row.
This method initializes the columns using the header row as both the names and aliases.
Parameters
----------
header_row : list[str]
A list of column names from the CSV header.
Returns
-------
CsvColumns
An instance of CsvColumns initialized with the provided header row.
"""
# "ChatGPT 4o" assisted with generating this docstring.
columns = {v: [v] for v in header_row}
ret = cls(columns)
ret.parse_header(header_row)
return ret

def parse_data_row(self, data_row: list[str], row_idx=-1):
"""
Parses a data row and extracts values based on the matched column indices.
Parameters
----------
data_row : list[str]
A list of values from a single row of the CSV file.
row_idx : int, optional
The index of the row being parsed, used for logging. Defaults to -1.
Returns
-------
dict[str, str]
A dictionary mapping column names to their corresponding values from the data row.
"""
# "ChatGPT 4o" assisted with generating this docstring.
ret: dict[str, str] = {}
last_matched_idx = -1

Expand All @@ -61,6 +105,24 @@ def parse_header(
ok_if_not_found: list[str] = None,
alternatives: dict[str, list[str]] = None,
):
"""
Parses the header row to find matches for the defined columns.
This method updates the column indices based on the header row and checks for
any missing columns, logging warnings or raising errors as specified.
Parameters
----------
header_row : list[str]
A list of column names from the CSV header.
error_on_not_found : bool | list[str], optional
If True, raises an error for any missing columns. If a list, raises an error for columns in that list. Defaults to True.
ok_if_not_found : list[str], optional
A list of column names that are acceptable to be missing. Defaults to None.
alternatives : dict[str, list[str]], optional
A dictionary mapping column names to lists of alternative names. Defaults to None.
"""
# "ChatGPT 4o" assisted with generating this docstring.
# add reverse values for the alternatives, if any
if alternatives != None:
ks = list(alternatives.keys())
Expand Down
66 changes: 58 additions & 8 deletions opencsp/common/lib/file/SimpleCsv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,42 @@


class SimpleCsv:
"""
A class for simple parsing of CSV files.
This class allows for reading a CSV file and provides methods to access
the header, columns, and rows of the file in a structured manner.
"""

# "ChatGPT 4o" assisted with generating this docstring.
def __init__(self, description: str, file_path: str, file_name_ext: str):
"""Allows for simple CSV file parsing.
"""
Initializes the SimpleCsv instance and parses the CSV file.
Parameters
----------
description : str
A description of the file to be processed, or None to suppress output to stdout.
file_path : str
The path to the CSV file to be processed.
file_name_ext : str
The name and extension of the CSV file to be processed.
Example::
Raises
------
FileNotFoundError
If the specified CSV file does not exist.
ValueError
If the CSV file is empty or improperly formatted.
Example
-------
parser = scsv.SimpleCsv("example file", file_path, file_name_ext)
for row_dict in parser:
print(row_dict)
Parameters:
-----------
- description (str): A description of the file to be processed, or None to not print to stdout.
- file_path (str): Path to the file to be processed.
- file_name_ext (str): Name and extension of the file to be processed.
"""
# "ChatGPT 4o" assisted with generating this docstring.
self.description = description
self.file_path = file_path
self.file_name_ext = file_name_ext
Expand All @@ -32,12 +53,41 @@ def __init__(self, description: str, file_path: str, file_name_ext: str):
self.rows.append(self.cols.parse_data_row(row))

def get_header(self):
"""
Returns the header of the CSV file as a comma-separated string.
Returns
-------
str
A string representation of the header row of the CSV file.
"""
# "ChatGPT 4o" assisted with generating this docstring.
return ",".join(self.get_columns())

def get_columns(self):
"""
Returns a list of column names from the CSV file.
Returns
-------
list[str]
A list of column names extracted from the CSV header.
"""
# "ChatGPT 4o" assisted with generating this docstring.
return [col.name for col in self.cols.columns.values]

def get_rows(self):
"""
Returns the rows of the CSV file as a list of dictionaries.
Each dictionary corresponds to a row in the CSV file, with column names as keys.
Returns
-------
list[dict[str, str]]
A list of dictionaries representing the rows of the CSV file.
"""
# "ChatGPT 4o" assisted with generating this docstring.
return self.rows

def __iter__(self):
Expand Down

0 comments on commit 7cad4ae

Please sign in to comment.