Restructure payouts file (#435)

This PR is an attempt at implementing #427. The PR does not change the values of final results (up to rounding of floats). It changes the structure of the code and intermediate representations. As a first step, it separates the computation of different parts of the accounting into different functions. ```python # compute individual components of payments solver_info = compute_solver_info( reward_target_df, service_fee_df, config, ) rewards = compute_rewards( batch_data, quote_rewards_df, exchange_rate_native_to_cow, config.reward_config, ) protocol_fees = compute_protocol_fees(batch_data) partner_fees = compute_partner_fees(batch_data, config.protocol_fee_config) buffer_accounting = compute_buffer_accounting(batch_data, slippage_df) ``` Those functions are implemented in separate files The results of these steps are converted into data frames for solver payments and for protocol and partner fee payments. ```python # combine into solver payouts and partner payouts solver_payouts = compute_solver_payouts( solver_info, rewards, protocol_fees, buffer_accounting ) partner_payouts = ( partner_fees # no additional computation required here at the moment ) ``` Payout data on transfers and overdrafts is then computed from solver and parner payouts data. ```python payouts = prepare_payouts(solver_payouts, partner_payouts, dune.period, config) ``` The code can be tested to produce the same transfer files as the old code. Tests have been adapted and cover essentially what was tested before. There is a bit more strictness in the testing of the separate computations of rewards, protocol fees, partner fees, etc. There is no end-to-end test for payments yet. This should be added at some point. Future changes could remove data frames from intermediate results. This would make it easier to have correct types and detect and handle missing data. Data for the different parts of the accounting can be changed to use intermediate tables generated by `src/data_sync/sync_data.py`. --------- Co-authored-by: Haris Angelidakis <[email protected]>
cowprotocol · Jan 27, 2025 · 58fdf14 · 58fdf14
1 parent 97cd3d4
commit 58fdf14
Show file tree

Hide file tree

Showing 12 changed files with 2,179 additions and 1,000 deletions.
diff --git a/src/fetch/buffer_accounting.py b/src/fetch/buffer_accounting.py
@@ -0,0 +1,94 @@
+"""Functionality for buffer accounting."""
+
+import pandas as pd
+from pandas import DataFrame
+
+BATCH_DATA_COLUMNS = ["solver", "network_fee_eth"]
+SLIPPAGE_COLUMNS = [
+    "solver",
+    "eth_slippage_wei",
+]
+
+BUFFER_ACCOUNTING_COLUMNS = [
+    "solver",
+    "network_fee_eth",
+    "slippage_eth",
+]
+
+
+def compute_buffer_accounting(
+    batch_data: DataFrame, slippage_data: DataFrame
+) -> DataFrame:
+    """Compute buffer accounting per solver.
+
+    Parameters
+    ----------
+    batch_data : DataFrame
+        Batch rewards data.
+        The columns have to contain BATCH_REWARDS_COLUMNS:
+        - solver : str
+            "0x"-prefixed hex representation of the submission address of a solver.
+        - network_fee_eth : int
+            Network fees in wei of a solver for settling batches.
+
+    slippage_data : DataFrame
+        Slippage data.
+        The columns have to contain SLIPPAGE_COLUMNS:
+        - solver : str
+            "0x"-prefixed hex representation of the submission address of a solver.
+        - eth_slippage_wei : int
+            Slippage in wei accrued by a solver in settling batches.
+
+    Returns
+    -------
+    buffer_accounting : DataFrame
+        Data frame containing rewards per solver.
+        The columns are REWARDS_COLUMNS:
+        - solver : str
+            "0x"-prefixed hex representation of the submission address of a solver.
+        - network_fee_eth : int
+            Network fees in wei of a solver for settling batches.
+        - slippage_eth : int
+            Slippage in wei accrued by a solver in settling batches.
+
+    Raises
+    ------
+    AssertionError
+        If input dataframes do not contain required columns or if the result does not have correct
+        columns.
+
+    Notes
+    -----
+    All data frames are set to have data type `object`. Otherwise, implicit conversion to int64 can
+    lead to overflows.
+    """
+
+    # validate batch data and slippage data columns
+    assert set(BATCH_DATA_COLUMNS).issubset(set(batch_data.columns))
+    assert set(SLIPPAGE_COLUMNS).issubset(set(slippage_data.columns))
+
+    with pd.option_context(
+        "future.no_silent_downcasting", True
+    ):  # remove this after Future warning disappears. We do not depend on down-casting,
+        # as we will work with object and int explicitly.
+        buffer_accounting = (
+            (
+                batch_data[BATCH_DATA_COLUMNS]
+                .astype(object)
+                .merge(
+                    slippage_data[SLIPPAGE_COLUMNS].astype(object),
+                    how="outer",
+                    on="solver",
+                    validate="one_to_one",
+                )
+            )
+            .fillna(0)
+            .astype(object)
+        )
+    buffer_accounting = buffer_accounting.rename(
+        columns={"eth_slippage_wei": "slippage_eth"}
+    )
+
+    assert set(buffer_accounting.columns) == set(BUFFER_ACCOUNTING_COLUMNS)
+
+    return buffer_accounting
diff --git a/src/fetch/partner_fees.py b/src/fetch/partner_fees.py
@@ -0,0 +1,136 @@
+"""Functionality for partner fees."""
+
+from collections import defaultdict
+
+import numpy as np
+import pandas as pd
+from pandas import DataFrame
+
+from src.config import ProtocolFeeConfig
+
+BATCH_DATA_COLUMNS = ["partner_list", "partner_fee_eth"]
+
+PARTNER_FEES_COLUMNS = ["partner", "partner_fee_eth", "partner_fee_tax"]
+
+
+def compute_partner_fees(batch_data: DataFrame, config: ProtocolFeeConfig) -> DataFrame:
+    """Compute partner fees per partner.
+
+    Parameters
+    ----------
+    batch_data : DataFrame
+        Batch rewards data.
+        The columns have to contain BATCH_DATA_COLUMNS:
+        - partner_list : list[str]
+            List of "0x"-prefixed hex representations of the partner addresses. Partner fees are
+            paid to these addresses.
+        - partner_fee_eth : list[int]
+            List of partner fees in wei a solver owes to a partner for settling batches. The list is
+            aligned with the respective partners in partner_list.
+
+    config : ProtocolFeeConfig
+        Protocol fee configuration.
+
+    Returns
+    -------
+    partner_fees : DataFrame
+        Data frame containing partner fees per partner.
+        The columns are PARTNER_FEES_COLUMNS:
+        - partner : str
+            "0x"-prefixed hex representation of the address of a partner. Partner fees are paid to
+            these addresses.
+        - partner_fee_eth : int
+            Total partner fee in wei of a partner. This amount is reduced by partner_fee_tax before
+            payment.
+        - partner_fee_tax : Fraction
+            The fraction of partner fees which need to be paid to the CoW DAO.
+
+    Raises
+    ------
+    AssertionError
+        If input dataframe does not contain required columns or if the result does not have correct
+        columns.
+    """
+
+    # validate batch data columns
+    assert set(BATCH_DATA_COLUMNS).issubset(set(batch_data.columns))
+
+    partner_fee_lists = batch_data[BATCH_DATA_COLUMNS]
+
+    partner_fees = compute_partner_fees_per_partner(partner_fee_lists, config)
+
+    assert set(partner_fees.columns) == set(PARTNER_FEES_COLUMNS)
+
+    return partner_fees
+
+
+def compute_partner_fees_per_partner(
+    partner_fee_lists: DataFrame, config: ProtocolFeeConfig
+) -> DataFrame:
+    """Compute partner fees per partner.
+
+    This is the main computation step for partner fees. It has the same input and output format as
+    `compute_partner_fees`.
+
+    Parameters
+    ----------
+    partner_fee_lists : DataFrame
+        Batch rewards data.
+        The columns are BATCH_DATA_COLUMNS:
+        - partner_list : list[str]
+            List of "0x"-prefixed hex representations of the partner addresses. Partner fees are
+            paid to these addresses.
+        - partner_fee_eth : list[int]
+            List of partner fees in wei a solver owes to a partner for settling batches. The list is
+            aligned with the respective partners in partner_list.
+
+    config : ProtocolFeeConfig
+        Protocol fee configuration.
+
+    Returns
+    -------
+    partner_fees_df : DataFrame
+        Data frame containing partner fees per partner.
+        The columns are PARTNER_FEES_COLUMNS:
+        - partner : str
+            "0x"-prefixed hex representation of the address of a partner. Partner fees are paid to
+            these addresses.
+        - partner_fee_eth : int
+            Total partner fee in wei of a partner. This amount is reduced by partner_fee_tax before
+            payment.
+        - partner_fee_tax : Fraction
+            The fraction of partner fees which need to be paid to the CoW DAO.
+
+    Notes
+    -----
+    All data frames are set to have data type `object`. Otherwise, implicit conversion to int64 can
+    lead to overflows.
+    """
+
+    partner_fees_dict: defaultdict[str, int] = defaultdict(int)
+    for _, row in partner_fee_lists.iterrows():
+        if row["partner_list"] is None:
+            continue
+
+        # We assume the two lists used below, i.e.,
+        # partner_list and partner_fee_eth,
+        # are "aligned".
+
+        for partner, partner_fee in zip(row["partner_list"], row["partner_fee_eth"]):
+            partner_fees_dict[partner] += int(partner_fee)
+
+    partner_fees_df = pd.DataFrame(
+        list(partner_fees_dict.items()),
+        columns=["partner", "partner_fee_eth"],
+    )
+
+    partner_fees_df["partner_fee_tax"] = np.where(
+        partner_fees_df["partner"] == config.reduced_cut_address,
+        config.partner_fee_reduced_cut,
+        config.partner_fee_cut,
+    )
+
+    # change all types to object to use native python types
+    partner_fees_df = partner_fees_df.astype(object)
+
+    return partner_fees_df