Skip to content

Commit

Permalink
Remove outdated config properties
Browse files Browse the repository at this point in the history
  • Loading branch information
janheinrichmerker committed Aug 17, 2023
1 parent 50d377f commit c8f5883
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 180 deletions.
4 changes: 2 additions & 2 deletions archive_query_log/cli/archives.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def archive_it(

echo("Load Archive-It collections.")
collections_api_url = urljoin(api_url, "/api/collection")
response = config.http_session.get(
response = config.http.session.get(
collections_api_url,
params={"limit": 0, "format": "json"})
num_collections = int(response.headers["Total-Row-Count"])
Expand All @@ -159,7 +159,7 @@ def archive_it(
unit="archives", disable=not auto_merge and not no_merge)
offset_range = range(0, num_collections, page_size)
for offset in offset_range:
response = config.http_session.get(
response = config.http.session.get(
collections_api_url,
params={"limit": page_size, "offset": offset, "format": "json"})
response_list = response.json()
Expand Down
2 changes: 1 addition & 1 deletion archive_query_log/cli/captures.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _iter_captures(
) -> Iterator[Capture]:
cdx_api = CdxApi(
api_url=source.archive.cdx_api_url,
session=config.http_session,
session=config.http.session,
)
url = f"https://{source.provider.domain}"
url = urljoin(url, source.provider.url_path_prefix)
Expand Down
28 changes: 9 additions & 19 deletions archive_query_log/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,6 @@
from archive_query_log import __version__ as version


@dataclass(frozen=True)
class EsIndex(DataClassJsonMixin):
name: str
mapping: dict
settings: dict


@dataclass(frozen=True)
class EsConfig(DataClassJsonMixin):
host: str
Expand Down Expand Up @@ -74,20 +67,11 @@ def parallel_bulk(self, actions, *args, **kwargs):


@dataclass(frozen=True)
class Config(DataClassJsonMixin):
es: EsConfig
es_index_serps: EsIndex
es_index_results: EsIndex
es_index_url_query_parsers: EsIndex
es_index_url_page_parsers: EsIndex
es_index_url_offset_parsers: EsIndex
es_index_url_language_parsers: EsIndex
es_index_serp_query_parsers: EsIndex
es_index_serp_snippets_parsers: EsIndex
es_index_serp_direct_answer_parsers: EsIndex
class HttpConfig(DataClassJsonMixin):
max_retries: int = 5

@cached_property
def http_session(self) -> Session:
def session(self) -> Session:
session = Session()
session.headers.update({
"User-Agent": f"AQL/{version} (Webis group)",
Expand All @@ -108,3 +92,9 @@ def http_session(self) -> Session:
session.mount("http://", _adapter)
session.mount("https://", _adapter)
return session


@dataclass(frozen=True)
class Config(DataClassJsonMixin):
es: EsConfig
http: HttpConfig = HttpConfig()
158 changes: 0 additions & 158 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,161 +3,3 @@ es:
port: 9200
username: null
password: null
es_index_serps:
name: aql_serps
settings:
number_of_shards: 10
number_of_replicas: 2
mapping: {} # TODO
es_index_results:
name: aql_results
settings:
number_of_shards: 20
number_of_replicas: 2
mapping: {} # TODO
es_index_url_query_parsers:
name: aql_url_query_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
parameter:
type: keyword
segment:
type: integer
replacements:
type: object
properties:
search:
type: keyword
replace:
type: keyword
es_index_url_page_parsers:
name: aql_url_page_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
parameter:
type: keyword
segment:
type: integer
replacements:
type: object
properties:
search:
type: keyword
replace:
type: keyword
es_index_url_offset_parsers:
name: aql_url_offset_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
parameter:
type: keyword
segment:
type: integer
replacements:
type: object
properties:
search:
type: keyword
replace:
type: keyword
es_index_url_language_parsers:
name: aql_url_language_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
parameter:
type: keyword
segment:
type: integer
replacements:
type: object
properties:
search:
type: keyword
replace:
type: keyword
es_index_serp_query_parsers:
name: aql_serp_query_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
html_selector:
type: keyword
html_attribute:
type: keyword
html_text:
type: boolean
replacements:
type: object
properties:
search:
type: keyword
replace:
type: keyword
es_index_serp_snippets_parsers:
name: aql_serp_snippets_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
html_selector:
type: keyword
html_selector_url:
type: keyword
html_attribute_url:
type: keyword
html_selector_title:
type: keyword
html_selector_text:
type: keyword
es_index_serp_direct_answer_parsers:
name: aql_serp_direct_answer_parsers
settings:
number_of_shards: 1
number_of_replicas: 2
mapping:
properties:
url_filter_regex:
type: keyword
parser_type:
type: keyword
html_selector:
type: keyword

0 comments on commit c8f5883

Please sign in to comment.