diff --git a/epo_ops/api.py b/epo_ops/api.py index 785c8c2..042b12c 100644 --- a/epo_ops/api.py +++ b/epo_ops/api.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- import logging +import warnings from base64 import b64encode +from typing import List, Optional, Union from xml.etree import ElementTree as ET import requests @@ -8,7 +10,14 @@ from . import exceptions from .middlewares import Throttler -from .models import NETWORK_TIMEOUT, AccessToken, Request +from .models import ( + NETWORK_TIMEOUT, + AccessToken, + Docdb, + Epodoc, + Original, + Request, +) log = logging.getLogger(__name__) @@ -35,23 +44,125 @@ def __init__(self, key, secret, accept_type="xml", middlewares=None): self.secret = secret self._access_token = None - def family(self, reference_type, input, endpoint=None, constituents=None): + def family( + self, + reference_type: str, + input: Union[Docdb, Epodoc], + endpoint=None, + constituents: Optional[List[str]] = None, + ) -> requests.Response: + """ + Retrieves the patent numbers of the extended patent family related to the input (INPADOC family). + + Args: + reference_type (str): Any of "publication", "application", or "priority". + input (Epodoc or Docdb): The document number. Cannot be Original. + endpoint (optional): None. Not applicable for family service. + constituents (List[str], optional): List of "biblio", "legal" or both. + Defaults to None. + + Returns: + requests.Response: a requests.Response object. + + Examples: + >>> response = client.family("publication", epo_ops.models.Epodoc("EP1000000")) + >>> response + + >>> len(response.text) + 8790 + + >>> response_with_constituents = client.family("publication", epo_ops.models.Epodoc("EP1000000"), None, ["biblio", "legal"]) + >>> response_with_constituents + + >>> len(response_with_constituents.text) + 160206 + """ + if endpoint is not None: + warnings.warn( + "The `endpoint` argument is not used in this context and will be removed.", + DeprecationWarning, + stacklevel=2, + ) + url = self._make_request_url( dict( service=self.__family_path__, reference_type=reference_type, input=input, - endpoint=endpoint, + endpoint=None, constituents=constituents, use_get=True, ) ) return self._make_request(url, None, params=input.as_api_input(), use_get=True) - def image(self, path, range=1, document_format="application/tiff"): + def image( + self, path: str, range: int = 1, document_format: str = "application/tiff" + ) -> requests.Response: + """ + Retrieve the image page for a given path, one page at a time. + The path needs to be retrieved from the xml resulting from a prior inquiry using + the published_data() service with the 'endpoint="images"' argument. + + Args: + path (str): contained in the 'link' attribute of the document instance element (inquiry xml). + range (int, optional): the number of the image page to be fetched. Defaults to 1. + document_format (str, optional): depends on the inquiry response. Defaults to "application/tiff". + + Returns: + requests.Response: a requests.Response object. + """ return self._image_request(path, range, document_format) - def number(self, reference_type, input, output_format): + def number( + self, + reference_type: str, + input: Union[Original, Docdb, Epodoc], + output_format: str, + ) -> requests.Response: + """ + This service converts a patent number from one input format into another format. + + Args: + reference_type (str): Any of "publication", "application", or "priority". + input (Original, Epodoc or Docdb): The document number as a data object. + output_format (str): Any of "original", "epodoc" or "docdb". + + Returns: + requests.Response: a requests.Response object. + + + Examples: + # from JP original to docdb + >>> response = client.number( + "application", + Original(number="2006-147056", country_code="JP", kind_code="A", date="20060526"), + "docdb, + ) + + # from US original to epodoc + >>> response = client.number( + "application", + Original("08/921,321", "US", "A", "19970829"), + "epodoc", + ) + + # from PCT original to docdb + >>> response = client.number( + "application", + Original("PCT/GB02/04635", date="19970829"), + "docdb", + ) + + Use-cases: + Given that other OPS services use only the Epodoc or Docdb format, + the general use-case of this method is to convert the Original format + into either the Docdb or the Epodoc format. + + Note: + It is especially important to include the date of publication in the input + whenever possible because number formatting may vary depending on the date. + """ possible_conversions = { "docdb": ["original", "epodoc"], "epodoc": ["original"], @@ -73,8 +184,32 @@ def number(self, reference_type, input, output_format): ) def published_data( - self, reference_type, input, endpoint="biblio", constituents=None - ): + self, + reference_type: str, + input: Union[Docdb, Epodoc], + endpoint="biblio", + constituents: Optional[List[str]] = None, + ) -> requests.Response: + """ + Retrieval service for published data. + + Args: + reference_type (str): Any of "publication", "application", or "priority". + input (Epodoc or Docdb): The document number as a Epodoc or Docdb data object. + endpoint (str, optional): "biblio", "equivalents", "abstract", "claims", "description", + "fulltext", "images". Defaults to "biblio". + constituents (list[str], optional): List of "biblio", "abstract", "images", "full cycle". + + Returns: + requests.Response: a requests.Response object. + + Note: + 1) input cannot be a models.Original + 2) only the endpoint "biblio" or "equivalents" use the constituents parameter. + 3) the images and fulltext retrieval require a two-step process: inquiry, then retrieval, e.g. + - client.published_data(..., endpoint='images',...) to retrieve the image path, then + - client.image(path=...) + """ return self._service_request( dict( service=self.__published_data_path__, @@ -86,8 +221,16 @@ def published_data( ) def published_data_search( - self, cql, range_begin=1, range_end=25, constituents=None - ): + self, + cql: str, + range_begin: int = 1, + range_end: int = 25, + constituents: Optional[List[str]] = None, + ) -> requests.Response: + """ + Performs a bibliographic search ussing common query language (CQL) to retrieve the data. + Possible constituents: "abstract", "biblio" and/or "full-cycle". + """ range = dict(key="X-OPS-Range", begin=range_begin, end=range_end) return self._search_request( dict( @@ -97,7 +240,24 @@ def published_data_search( range, ) - def register(self, reference_type, input, constituents=None): + def register( + self, + reference_type: str, + input: Epodoc, + constituents: Optional[List[str]] = None, + ) -> requests.Response: + """ + Provides the interface for the European Patent Register online service for retrieving all + the publicly available information on published European patent applications and + international PCT applications designating the EPO as they pass through the grant procedure. + + Possible constituents: "biblio", "events", "procedural-steps" or "upp". + + Notes: + 1) Only the Epodoc input format is supported + 2) the default behaviour of the register retrieval is biblio, so you don't have to add the + biblio constituent if you want to retrieve only bibliographic data. + """ # TODO: input can only be Epodoc, not Docdb constituents = constituents or ["biblio"] return self._service_request( @@ -109,7 +269,18 @@ def register(self, reference_type, input, constituents=None): ) ) - def register_search(self, cql, range_begin=1, range_end=25): + def register_search( + self, cql: str, range_begin: int = 1, range_end: int = 25 + ) -> requests.Response: + """ + Use this service to find specific register data + that is part of the public aspect of the patent lifecycle. + + Example: + >>> response = client.register_search(cql="pa=IBM", range_begin=1, range_end=25) + >>> print(response.text) + + """ range = dict(key="Range", begin=range_begin, end=range_end) return self._search_request( {"service": self.__register_search_path__}, cql, range