This repository has been archived by the owner on Dec 20, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Introduce new logic for syncing raw batch data #140
Open
harisang
wants to merge
26
commits into
dev
Choose a base branch
from
update_batch_rewards_job
base: dev
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 22 commits
Commits
Show all changes
26 commits
Select commit
Hold shift + click to select a range
a555571
Introduce new logic for syncing raw batch data
harisang fc2c654
fix typo
harisang 2b6b5e6
remove redundant code
harisang 8111a6e
fix another typo
harisang d7f84dd
small fixes
harisang 7900e35
bug fix
harisang 02f5089
small adaptations and fixes
harisang 3685c38
small fixes
harisang 63e0435
hardcode gnosis chain caps for testing purposes
harisang 45550a4
add short log
harisang 22a8c27
various small changes
harisang 0b94a49
fix some pylint issues
harisang 9b2cea7
use old way of setting db url if network variable is missing
harisang 986f286
fix more pylint issues
harisang fe26674
more pylint fixes
harisang 31e1824
final pylint fix
harisang 14586bb
mypy fixes
harisang 0d8ef86
properly disable too many locals pylint error
harisang 17c69f2
remove redundant whitespaces
harisang 616918b
mypy fix
harisang 05e9191
mypy fix
harisang 51982c4
small edits in env.sample
harisang 65ab782
bug fixing
harisang 78194c2
add new line
harisang 00d80ee
small fix
harisang bced3e5
remove redundant type cast
harisang File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,3 +7,4 @@ ndjson>=0.3.1 | |
py-multiformats-cid>=0.4.4 | ||
boto3>=1.26.12 | ||
SQLAlchemy>=2.0,<3.0 | ||
web3==6.20.3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
log = set_log(__name__) | ||
|
||
MAX_PROCESSING_DELAY = 10 | ||
BUCKET_SIZE = {"mainnet": 10000, "xdai": 30000, "arbitrum-one": 1000000} | ||
|
||
|
||
class OrderbookEnv(Enum): | ||
|
@@ -45,7 +46,14 @@ class OrderbookFetcher: | |
def _pg_engine(db_env: OrderbookEnv) -> Engine: | ||
"""Returns a connection to postgres database""" | ||
load_dotenv() | ||
db_url = os.environ[f"{db_env}_DB_URL"] | ||
if "NETWORK" in os.environ: | ||
db_url = ( | ||
os.environ[f"{db_env}_DB_URL"] | ||
+ "/" | ||
+ os.environ.get("NETWORK", "mainnet") | ||
) | ||
else: | ||
db_url = os.environ[f"{db_env}_DB_URL"] | ||
db_string = f"postgresql+psycopg2://{db_url}" | ||
return create_engine(db_string) | ||
|
||
|
@@ -166,6 +174,83 @@ def get_batch_rewards(cls, block_range: BlockRange) -> DataFrame: | |
return barn.copy() | ||
return pd.DataFrame() | ||
|
||
@classmethod | ||
def run_batch_data_sql(cls, block_range: BlockRange) -> DataFrame: | ||
""" | ||
Fetches and validates Batch data DataFrame as concatenation from Prod and Staging DB | ||
""" | ||
batch_data_query_prod = ( | ||
open_query("orderbook/batch_data.sql") | ||
.replace("{{start_block}}", str(block_range.block_from)) | ||
.replace("{{end_block}}", str(block_range.block_to)) | ||
.replace( | ||
"{{EPSILON_LOWER}}", "10000000000000000" | ||
) # lower ETH cap for payment (in WEI) | ||
.replace( | ||
"{{EPSILON_UPPER}}", "12000000000000000" | ||
) # upper ETH cap for payment (in WEI) | ||
.replace("{{env}}", "prod") | ||
) | ||
batch_data_query_barn = ( | ||
open_query("orderbook/batch_data.sql") | ||
.replace("{{start_block}}", str(block_range.block_from)) | ||
.replace("{{end_block}}", str(block_range.block_to)) | ||
.replace( | ||
"{{EPSILON_LOWER}}", "10000000000000000" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The value of the cap needs to be adapted for Gnosis and Arbitrum One There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh very good point! |
||
) # lower ETH cap for payment (in WEI) | ||
.replace( | ||
"{{EPSILON_UPPER}}", "12000000000000000" | ||
) # upper ETH cap for payment (in WEI) | ||
.replace("{{env}}", "barn") | ||
) | ||
data_types = { | ||
# According to this: https://stackoverflow.com/a/11548224 | ||
# capitalized int64 means `Optional<Integer>` and it appears to work. | ||
"block_number": "Int64", | ||
"block_deadline": "int64", | ||
} | ||
barn, prod = cls._query_both_dbs( | ||
batch_data_query_prod, batch_data_query_barn, data_types | ||
) | ||
|
||
# Warn if solver appear in both environments. | ||
if not set(prod.solver).isdisjoint(set(barn.solver)): | ||
log.warning( | ||
f"solver overlap in {block_range}: solvers " | ||
f"{set(prod.solver).intersection(set(barn.solver))} part of both prod and barn" | ||
) | ||
|
||
if not prod.empty and not barn.empty: | ||
return pd.concat([prod, barn]) | ||
if not prod.empty: | ||
return prod.copy() | ||
if not barn.empty: | ||
return barn.copy() | ||
return pd.DataFrame() | ||
|
||
@classmethod | ||
def get_batch_data(cls, block_range: BlockRange) -> DataFrame: | ||
""" | ||
Decomposes the block range into buckets of 10k blocks each, | ||
so as to ensure the batch data query runs fast enough. | ||
At the end, it concatenates everything into one data frame | ||
""" | ||
load_dotenv() | ||
start = block_range.block_from | ||
end = block_range.block_to | ||
bucket_size = BUCKET_SIZE[os.environ.get("NETWORK", "mainnet")] | ||
res = [] | ||
while start < end: | ||
size = min(end - start, bucket_size) | ||
log.info(f"About to process block range ({start}, {start + size})") | ||
res.append( | ||
cls.run_batch_data_sql( | ||
BlockRange(block_from=start, block_to=start + size) | ||
) | ||
) | ||
start = start + size | ||
return pd.concat(res) | ||
|
||
@classmethod | ||
def get_app_hashes(cls) -> DataFrame: | ||
""" | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This PR changes the meaning of the
PROD_DB_URL
andBARN_DB_URL
env variables, and thiselse
statement just ensures that it defaults to the "old" way, so that the tests won't fail. This should be removed once the secrets in the repo are edited.