From 333e5c243ae4e09269b8d389b55b64fe24a1a7b0 Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Tue, 25 Oct 2022 17:02:46 +0200 Subject: [PATCH 01/10] Implemented data serialization/deserialization with marshmallow --- requirements.txt | 1 + src/missing_prices.py | 25 +++++++++++++++--------- src/utils.py | 45 ++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2a23744..38caed7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,3 +15,4 @@ psycopg2-binary>=2.9.3 SQLAlchemy==1.4.41 pandas==1.5.0 click==8.1.3 +marshmallow==3.18.0 diff --git a/src/missing_prices.py b/src/missing_prices.py index ee0fc4c..adca72d 100644 --- a/src/missing_prices.py +++ b/src/missing_prices.py @@ -9,17 +9,20 @@ from dune_client.client import DuneClient from dune_client.query import Query from dune_client.types import DuneRecord +from marshmallow import fields from duneapi.api import DuneAPI from duneapi.types import Address, DuneQuery, Network from duneapi.util import open_query +from src.utils import TokenSchema, CoinSchema, CoinsSchema + DuneTokenPriceRow = tuple[str, str, str, str, int] # TODO - remove the Anys here: https://github.com/cowprotocol/data-misc/issues/20 def load_coins() -> dict[str, dict[str, Any]]: - """ " + """ Loads and returns coin dictionaries from Coin Paprika via their API. Excludes, inactive, new and non "token" types """ @@ -48,7 +51,10 @@ def load_coins() -> dict[str, dict[str, Any]]: # print(f"Error with {err}, excluding entry {entry}") print(f"Excluded address for {missed} entries out of {len(entries)}") - return coin_dict + # return coin_dict + return CoinsSchema(keys=fields.Str(), values=fields.Nested(CoinSchema)).load( + coin_dict + ) def write_results(results: list[DuneTokenPriceRow], path: str, filename: str) -> None: @@ -103,7 +109,8 @@ def as_dune_repr(self, coin_id: str) -> dict[str, Any]: def load_tokens(dune: DuneClient) -> list[DuneRecord]: """Loads Tokens with missing prices from Dune""" - return dune.refresh(Query(query_id=1317238, name="Tokens with Missing Prices")) + results = dune.refresh(Query(query_id=1317238, name="Tokens with Missing Prices")) + return [TokenSchema().load(r) for r in results] def fetch_tokens_without_prices(dune: DuneAPI) -> list[CoinPaprikaToken]: @@ -128,14 +135,14 @@ def run_missing_prices() -> None: print(f"Fetched {len(tokens)} traded tokens from Dune without prices") found, res = 0, [] for token in tokens: - if token["address"].lower() in coins: - paprika_data = coins[token["address"].lower()] + if token["address"] in coins: + paprika_data = coins[token["address"]] dune_row = ( - str(paprika_data["id"]), + paprika_data["id"], "ethereum", - str(paprika_data["symbol"]), - str(paprika_data["address"].lower()), - int(token["decimals"]), + paprika_data["symbol"], + paprika_data["address"], + token["decimals"], ) res.append(dune_row) found += 1 diff --git a/src/utils.py b/src/utils.py index 5ab491e..dc2c566 100644 --- a/src/utils.py +++ b/src/utils.py @@ -3,9 +3,10 @@ import os from datetime import datetime from enum import Enum -from typing import Any +from typing import Any, Mapping from duneapi.types import Network as LegacyDuneNetwork +from marshmallow import fields, Schema def partition_array(arr: list[Any], size: int) -> list[list[Any]]: @@ -79,3 +80,45 @@ def chain_id(self) -> int: Aligned with https://chainlist.org/ """ return {Network.MAINNET: 1, Network.GNOSIS: 100}[self] + + +class LoweredString(fields.String): + def _deserialize(self, value, *args, **kwargs): + if hasattr(value, "lower"): + value = value.lower() + return super()._deserialize(value, *args, **kwargs) + + def _serialize(self, value, attr, obj, **kwargs): + if value is None: + return "" + return str(value).lower() + + +class TokenSchema(Schema): + address = LoweredString(required=True) + decimals = fields.Int(required=True) + popularity = fields.Int() + symbol = fields.String() + + +class CoinSchema(Schema): + id = fields.String(required=True) + name = fields.String() + symbol = fields.String(required=True) + rank = fields.Int() + is_new = fields.Bool() + is_active = fields.Bool() + type = fields.String() + address = LoweredString(required=True) + + +class CoinsSchema(fields.Dict): + @staticmethod + def _get_obj(obj, _attr, _default): + return obj + + def dump(self, obj: Any): + return self.serialize("", obj, accessor=self._get_obj) + + def load(self, data: Mapping[str, Any]): + return self.deserialize(data) From 5307a525984b7c04420119aa33ee80728609cc04 Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Tue, 25 Oct 2022 17:09:53 +0200 Subject: [PATCH 02/10] Added docstrings. --- src/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils.py b/src/utils.py index dc2c566..135ca0e 100644 --- a/src/utils.py +++ b/src/utils.py @@ -83,6 +83,7 @@ def chain_id(self) -> int: class LoweredString(fields.String): + """Custom marshmallow String field for lowered string""" def _deserialize(self, value, *args, **kwargs): if hasattr(value, "lower"): value = value.lower() @@ -95,6 +96,7 @@ def _serialize(self, value, attr, obj, **kwargs): class TokenSchema(Schema): + """TokenSchema CoinSchema for serializing/deserializing token data""" address = LoweredString(required=True) decimals = fields.Int(required=True) popularity = fields.Int() @@ -102,6 +104,7 @@ class TokenSchema(Schema): class CoinSchema(Schema): + """CoinSchema for serializing/deserializing coin data""" id = fields.String(required=True) name = fields.String() symbol = fields.String(required=True) @@ -113,6 +116,7 @@ class CoinSchema(Schema): class CoinsSchema(fields.Dict): + """CoinsSchema for containing multiple coinschema-s""" @staticmethod def _get_obj(obj, _attr, _default): return obj From d21109f09928f77de5cd11bec19f1ba19127030b Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Tue, 25 Oct 2022 18:13:47 +0200 Subject: [PATCH 03/10] Linting checks --- src/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utils.py b/src/utils.py index 135ca0e..d85916b 100644 --- a/src/utils.py +++ b/src/utils.py @@ -119,10 +119,13 @@ class CoinsSchema(fields.Dict): """CoinsSchema for containing multiple coinschema-s""" @staticmethod def _get_obj(obj, _attr, _default): + """Accessor for the dump method""" return obj def dump(self, obj: Any): + """Serializes data""" return self.serialize("", obj, accessor=self._get_obj) def load(self, data: Mapping[str, Any]): + """Loads data into mapping""" return self.deserialize(data) From dad16b23f4b80bef879342cf2cc4581fed4f00fc Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Tue, 25 Oct 2022 18:15:58 +0200 Subject: [PATCH 04/10] Added black formatting --- src/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils.py b/src/utils.py index d85916b..65d4d57 100644 --- a/src/utils.py +++ b/src/utils.py @@ -84,6 +84,7 @@ def chain_id(self) -> int: class LoweredString(fields.String): """Custom marshmallow String field for lowered string""" + def _deserialize(self, value, *args, **kwargs): if hasattr(value, "lower"): value = value.lower() @@ -97,6 +98,7 @@ def _serialize(self, value, attr, obj, **kwargs): class TokenSchema(Schema): """TokenSchema CoinSchema for serializing/deserializing token data""" + address = LoweredString(required=True) decimals = fields.Int(required=True) popularity = fields.Int() @@ -105,6 +107,7 @@ class TokenSchema(Schema): class CoinSchema(Schema): """CoinSchema for serializing/deserializing coin data""" + id = fields.String(required=True) name = fields.String() symbol = fields.String(required=True) @@ -117,6 +120,7 @@ class CoinSchema(Schema): class CoinsSchema(fields.Dict): """CoinsSchema for containing multiple coinschema-s""" + @staticmethod def _get_obj(obj, _attr, _default): """Accessor for the dump method""" From cba86cc2c26d4bf95d5fbd2c9087fff36e32846d Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 13:26:25 +0200 Subject: [PATCH 05/10] Added review feedback --- src/missing_prices.py | 31 ++++++++++--------- src/utils.py | 70 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 73 insertions(+), 28 deletions(-) diff --git a/src/missing_prices.py b/src/missing_prices.py index adca72d..bf60b13 100644 --- a/src/missing_prices.py +++ b/src/missing_prices.py @@ -8,20 +8,19 @@ from dotenv import load_dotenv from dune_client.client import DuneClient from dune_client.query import Query -from dune_client.types import DuneRecord from marshmallow import fields from duneapi.api import DuneAPI from duneapi.types import Address, DuneQuery, Network from duneapi.util import open_query -from src.utils import TokenSchema, CoinSchema, CoinsSchema +from src.utils import TokenSchema, CoinSchema, CoinsSchema, Token, EthereumAddress, Coin -DuneTokenPriceRow = tuple[str, str, str, str, int] +DuneTokenPriceRow = tuple[str, str, str, Address, int] # TODO - remove the Anys here: https://github.com/cowprotocol/data-misc/issues/20 -def load_coins() -> dict[str, dict[str, Any]]: +def load_coins() -> dict[Address, Coin]: """ Loads and returns coin dictionaries from Coin Paprika via their API. Excludes, inactive, new and non "token" types @@ -45,14 +44,18 @@ def load_coins() -> dict[str, dict[str, Any]]: # only include ethereum tokens try: entry["address"] = contract_dict[entry["id"]].lower() - coin_dict[entry["address"]] = entry + try: + # coin_dict[Address(entry["address"])] = entry + coin_dict[entry["address"]] = entry + except ValueError as e: + print(f"{entry['address']} is not a valid ethereum address") + continue except KeyError: missed += 1 # print(f"Error with {err}, excluding entry {entry}") print(f"Excluded address for {missed} entries out of {len(entries)}") - # return coin_dict - return CoinsSchema(keys=fields.Str(), values=fields.Nested(CoinSchema)).load( + return CoinsSchema(keys=EthereumAddress, values=fields.Nested(CoinSchema)).load( coin_dict ) @@ -107,7 +110,7 @@ def as_dune_repr(self, coin_id: str) -> dict[str, Any]: } -def load_tokens(dune: DuneClient) -> list[DuneRecord]: +def load_tokens(dune: DuneClient) -> list[Token]: """Loads Tokens with missing prices from Dune""" results = dune.refresh(Query(query_id=1317238, name="Tokens with Missing Prices")) return [TokenSchema().load(r) for r in results] @@ -135,14 +138,14 @@ def run_missing_prices() -> None: print(f"Fetched {len(tokens)} traded tokens from Dune without prices") found, res = 0, [] for token in tokens: - if token["address"] in coins: - paprika_data = coins[token["address"]] + if token.address in coins: + paprika_data = coins[token.address] dune_row = ( - paprika_data["id"], + paprika_data.id, "ethereum", - paprika_data["symbol"], - paprika_data["address"], - token["decimals"], + paprika_data.symbol, + paprika_data.address, + token.decimals, ) res.append(dune_row) found += 1 diff --git a/src/utils.py b/src/utils.py index 65d4d57..a719c75 100644 --- a/src/utils.py +++ b/src/utils.py @@ -4,9 +4,11 @@ from datetime import datetime from enum import Enum from typing import Any, Mapping +from dataclasses import dataclass from duneapi.types import Network as LegacyDuneNetwork -from marshmallow import fields, Schema +from marshmallow import fields, Schema, post_load, ValidationError +from duneapi.types import Address def partition_array(arr: list[Any], size: int) -> list[list[Any]]: @@ -82,28 +84,55 @@ def chain_id(self) -> int: return {Network.MAINNET: 1, Network.GNOSIS: 100}[self] -class LoweredString(fields.String): - """Custom marshmallow String field for lowered string""" - - def _deserialize(self, value, *args, **kwargs): - if hasattr(value, "lower"): - value = value.lower() - return super()._deserialize(value, *args, **kwargs) +class EthereumAddress(fields.Field): + """Field that serializes to a string of numbers and deserializes + to a list of numbers. + """ def _serialize(self, value, attr, obj, **kwargs): if value is None: return "" - return str(value).lower() + return f"{value}".lower() + + def _deserialize(self, value, attr, data, **kwargs): + try: + return Address(value) + except ValueError as error: + raise ValidationError("Not a valid address") from error + + +@dataclass +class Token: + address: Address + decimals: int + symbol: str + popularity: int + + +@dataclass +class Coin: + id: str + name: str + symbol: str + rank: int + is_new: bool + is_active: bool + type: str + address: Address class TokenSchema(Schema): """TokenSchema CoinSchema for serializing/deserializing token data""" - address = LoweredString(required=True) + address = EthereumAddress(required=True) decimals = fields.Int(required=True) popularity = fields.Int() symbol = fields.String() + @post_load + def make_user(self, data, **kwargs): + return Token(**data) + class CoinSchema(Schema): """CoinSchema for serializing/deserializing coin data""" @@ -115,11 +144,21 @@ class CoinSchema(Schema): is_new = fields.Bool() is_active = fields.Bool() type = fields.String() - address = LoweredString(required=True) + address = EthereumAddress(required=True) + + def load(self, *args, **kwargs): + try: + return super().load(*args, **kwargs) + except ValidationError as e: + return e.valid_data + + @post_load + def make_user(self, data, **kwargs): + return Coin(**data) class CoinsSchema(fields.Dict): - """CoinsSchema for containing multiple coinschema-s""" + """CoinsSchema for containing multiple Coinschema-s""" @staticmethod def _get_obj(obj, _attr, _default): @@ -130,6 +169,9 @@ def dump(self, obj: Any): """Serializes data""" return self.serialize("", obj, accessor=self._get_obj) - def load(self, data: Mapping[str, Any]): + def load(self, data: dict[Address, Any]): """Loads data into mapping""" - return self.deserialize(data) + try: + return self.deserialize(data) + except ValidationError as e: + return e.valid_data From ca9a0dd612ef47f50f93e2980d0483d1948561d0 Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 13:38:37 +0200 Subject: [PATCH 06/10] Linting feedback --- src/missing_prices.py | 2 +- src/utils.py | 12 ++++++++---- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/missing_prices.py b/src/missing_prices.py index bf60b13..05da301 100644 --- a/src/missing_prices.py +++ b/src/missing_prices.py @@ -47,7 +47,7 @@ def load_coins() -> dict[Address, Coin]: try: # coin_dict[Address(entry["address"])] = entry coin_dict[entry["address"]] = entry - except ValueError as e: + except ValueError: print(f"{entry['address']} is not a valid ethereum address") continue except KeyError: diff --git a/src/utils.py b/src/utils.py index a719c75..c3bbc1c 100644 --- a/src/utils.py +++ b/src/utils.py @@ -3,12 +3,12 @@ import os from datetime import datetime from enum import Enum -from typing import Any, Mapping +from typing import Any from dataclasses import dataclass from duneapi.types import Network as LegacyDuneNetwork -from marshmallow import fields, Schema, post_load, ValidationError from duneapi.types import Address +from marshmallow import fields, Schema, post_load, ValidationError def partition_array(arr: list[Any], size: int) -> list[list[Any]]: @@ -103,6 +103,7 @@ def _deserialize(self, value, attr, data, **kwargs): @dataclass class Token: + """Dataclass for holding Token data""" address: Address decimals: int symbol: str @@ -111,6 +112,7 @@ class Token: @dataclass class Coin: + """Dataclass for holding Coin data""" id: str name: str symbol: str @@ -130,7 +132,8 @@ class TokenSchema(Schema): symbol = fields.String() @post_load - def make_user(self, data, **kwargs): + def make_token(self, data, **_kwargs): + """Turns Token data into Token instance""" return Token(**data) @@ -153,7 +156,8 @@ def load(self, *args, **kwargs): return e.valid_data @post_load - def make_user(self, data, **kwargs): + def make_coin(self, data, **_kwargs): + """Turns Coin data into Coin instance""" return Coin(**data) From 1411585ca99eabed5a059a70ba5ffeb35647245e Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 13:42:45 +0200 Subject: [PATCH 07/10] Pylint feedback --- src/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/utils.py b/src/utils.py index c3bbc1c..63ebb26 100644 --- a/src/utils.py +++ b/src/utils.py @@ -140,6 +140,9 @@ def make_token(self, data, **_kwargs): class CoinSchema(Schema): """CoinSchema for serializing/deserializing coin data""" + # pylint: disable=too-many-instance-attributes + # Eight are passed from the API + id = fields.String(required=True) name = fields.String() symbol = fields.String(required=True) From 33936086af3be46bd43dd2628574a1513ee9130d Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 14:58:37 +0200 Subject: [PATCH 08/10] Added review feedback --- .pylintrc | 2 +- src/missing_prices.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.pylintrc b/.pylintrc index 13e7d73..93754f1 100644 --- a/.pylintrc +++ b/.pylintrc @@ -1,2 +1,2 @@ [MASTER] -disable=fixme,logging-fstring-interpolation,missing-module-docstring,invalid-name \ No newline at end of file +disable=fixme,logging-fstring-interpolation,missing-module-docstring,invalid-name,too-many-instance-attributes \ No newline at end of file diff --git a/src/missing_prices.py b/src/missing_prices.py index 05da301..8cf801e 100644 --- a/src/missing_prices.py +++ b/src/missing_prices.py @@ -19,7 +19,6 @@ DuneTokenPriceRow = tuple[str, str, str, Address, int] -# TODO - remove the Anys here: https://github.com/cowprotocol/data-misc/issues/20 def load_coins() -> dict[Address, Coin]: """ Loads and returns coin dictionaries from Coin Paprika via their API. @@ -52,7 +51,6 @@ def load_coins() -> dict[Address, Coin]: continue except KeyError: missed += 1 - # print(f"Error with {err}, excluding entry {entry}") print(f"Excluded address for {missed} entries out of {len(entries)}") return CoinsSchema(keys=EthereumAddress, values=fields.Nested(CoinSchema)).load( @@ -66,7 +64,7 @@ def write_results(results: list[DuneTokenPriceRow], path: str, filename: str) -> os.makedirs(path) with open(os.path.join(path, filename), "w", encoding="utf-8") as file: for row in results: - file.write(str(row) + ",\n") + file.write(f"('{row[0]}', '{row[1]}', '{row[2]}', '{row[3]}', {row[4]}),\n") print(f"Results written to {filename}") From af67cdae56750befa5b5e4df205849353c2b9334 Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 15:01:14 +0200 Subject: [PATCH 09/10] Added black formatting --- src/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/utils.py b/src/utils.py index 63ebb26..bcc1f9f 100644 --- a/src/utils.py +++ b/src/utils.py @@ -104,6 +104,7 @@ def _deserialize(self, value, attr, data, **kwargs): @dataclass class Token: """Dataclass for holding Token data""" + address: Address decimals: int symbol: str @@ -113,6 +114,7 @@ class Token: @dataclass class Coin: """Dataclass for holding Coin data""" + id: str name: str symbol: str From ecb7b1650a28c6dc753042b3fcac938eb5d703ce Mon Sep 17 00:00:00 2001 From: Gent Rexha Date: Wed, 26 Oct 2022 15:54:37 +0200 Subject: [PATCH 10/10] Added todo from review feedback --- src/missing_prices.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/missing_prices.py b/src/missing_prices.py index 8cf801e..65bc74b 100644 --- a/src/missing_prices.py +++ b/src/missing_prices.py @@ -64,6 +64,7 @@ def write_results(results: list[DuneTokenPriceRow], path: str, filename: str) -> os.makedirs(path) with open(os.path.join(path, filename), "w", encoding="utf-8") as file: for row in results: + # TODO: [duneapi#68] Fix __repr__ of duneapi.types.Address file.write(f"('{row[0]}', '{row[1]}', '{row[2]}', '{row[3]}', {row[4]}),\n") print(f"Results written to {filename}")