Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cache CSV stream schema to avoid regenerating it since the 'schema' p…
Browse files Browse the repository at this point in the history
…roperty is accessed multiple times per record.
atl-ggregson committed Jan 16, 2025
1 parent dfd07ea commit ff37910
Showing 2 changed files with 10 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "tap-csv"
version = "1.1.0"
version = "1.2.0"
description = "Singer tap for CSV, built with the Meltano SDK for Singer Taps."
authors = ["Pat Nadolny"]
keywords = [
11 changes: 9 additions & 2 deletions tap_csv/client.py
Original file line number Diff line number Diff line change
@@ -28,6 +28,7 @@ def __init__(self, *args, **kwargs):
"""Init CSVStram."""
# cache file_config so we dont need to go iterating the config list again later
self.file_config = kwargs.pop("file_config")
self.stream_schema = None
super().__init__(*args, **kwargs)

def get_records(self, context: Context | None) -> t.Iterable[dict]:
@@ -126,8 +127,13 @@ def schema(self) -> dict:
"""Return dictionary of record schema.
Dynamically detect the json schema for the stream.
This is evaluated prior to any records being retrieved.
This property is accessed multiple times for each record
so it's important to cache the schema.
"""
if self.stream_schema:
return self.stream_schema

properties: list[th.Property] = []
self.primary_keys = self.file_config.get("keys", [])

@@ -156,4 +162,5 @@ def schema(self) -> dict:
# Cache header for future use
self.header = header

return th.PropertiesList(*properties).to_dict()
self.stream_schema = th.PropertiesList(*properties).to_dict()
return self.stream_schema

0 comments on commit ff37910

Please sign in to comment.