diff --git a/.github/workflows/actions/test-monitor-process-results/action.yml b/.github/workflows/actions/test-monitor-process-results/action.yml index ea62e2f9c20..7565e6ab20e 100644 --- a/.github/workflows/actions/test-monitor-process-results/action.yml +++ b/.github/workflows/actions/test-monitor-process-results/action.yml @@ -42,13 +42,13 @@ runs: uses: 'google-github-actions/setup-gcloud@v2' - name: Upload results to BigQuery (skipped tests) - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 1 max_attempts: 3 command: bq load --source_format=NEWLINE_DELIMITED_JSON $BIGQUERY_DATASET.$BIGQUERY_TABLE $SKIPPED_TESTS_FILE tools/test_monitor/schemas/skipped_tests_schema.json - name: Upload results to BigQuery (test run) - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 2 max_attempts: 3 diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml index e6932b42a15..29420e2a3a8 100644 --- a/.github/workflows/builds.yml +++ b/.github/workflows/builds.yml @@ -98,12 +98,12 @@ jobs: steps: - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: ${{ inputs.tag }} diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml index 8d11337f2c9..d9b29547058 100644 --- a/.github/workflows/cd.yml +++ b/.github/workflows/cd.yml @@ -24,12 +24,12 @@ jobs: environment: Production Docker Registry steps: - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} - name: Checkout repo - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: ref: ${{ inputs.tag }} # Provide Google Service Account credentials to Github Action, allowing interaction with the Google Container Registry diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a5dd0450e55..204731e8f3c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,9 +35,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -46,21 +46,19 @@ jobs: run: go generate ./... working-directory: ${{ matrix.dir }} - name: Run golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v6 with: # Required: the version of golangci-lint is required and must be specified without patch version: we always use the latest patch version. version: v1.63 args: -v working-directory: ${{ matrix.dir }} - # https://github.com/golangci/golangci-lint-action/issues/244 - skip-cache: true tidy: name: Tidy runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup private build environment if: ${{ vars.PRIVATE_BUILDS_SUPPORTED == 'true' }} @@ -69,7 +67,7 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -86,9 +84,9 @@ jobs: dynamic-matrix: ${{ steps.set-test-matrix.outputs.dynamicMatrix }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -104,9 +102,9 @@ jobs: dynamic-matrix: ${{ steps.set-test-matrix.outputs.dynamicMatrix }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -122,9 +120,9 @@ jobs: dynamic-matrix: ${{ steps.set-test-matrix.outputs.dynamicMatrix }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -144,7 +142,7 @@ jobs: runs-on: ${{ matrix.targets.runner }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup private build environment if: ${{ vars.PRIVATE_BUILDS_SUPPORTED == 'true' }} @@ -153,7 +151,7 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -161,7 +159,7 @@ jobs: - name: Setup tests (${{ matrix.targets.name }}) run: VERBOSE=1 make -e GO_TEST_PACKAGES="${{ matrix.targets.packages }}" install-tools - name: Run tests (${{ matrix.targets.name }}) - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 35 max_attempts: 5 @@ -170,7 +168,7 @@ jobs: #env: # RACE_DETECTOR: 1 - name: Upload coverage report - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 timeout-minutes: 1 continue-on-error: true with: @@ -190,7 +188,7 @@ jobs: runs-on: ${{ matrix.targets.runner }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup private build environment if: ${{ vars.PRIVATE_BUILDS_SUPPORTED == 'true' }} @@ -199,7 +197,7 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -207,7 +205,7 @@ jobs: - name: Setup tests (${{ matrix.targets.name }}) run: VERBOSE=1 make -e GO_TEST_PACKAGES="${{ matrix.targets.packages }}" install-tools - name: Run tests (${{ matrix.targets.name }}) - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 35 max_attempts: 5 @@ -216,7 +214,7 @@ jobs: #env: # RACE_DETECTOR: 1 - name: Upload coverage report - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 timeout-minutes: 1 continue-on-error: true with: @@ -232,7 +230,7 @@ jobs: CADENCE_DEPLOY_KEY: ${{ secrets.CADENCE_DEPLOY_KEY }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # all tags are needed for integration tests fetch-depth: 0 @@ -244,7 +242,7 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -267,7 +265,7 @@ jobs: gcr.io/flow-container-registry/execution-corrupted:latest \ gcr.io/flow-container-registry/verification-corrupted:latest > flow-docker-images.tar - name: Cache Docker images - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: flow-docker-images.tar # use the workflow run id as part of the cache key to ensure these docker images will only be used for a single workflow run @@ -285,7 +283,7 @@ jobs: steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup private build environment if: ${{ vars.PRIVATE_BUILDS_SUPPORTED == 'true' }} @@ -294,7 +292,7 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} @@ -302,7 +300,7 @@ jobs: - name: Setup tests (${{ matrix.targets.name }}) run: VERBOSE=1 make -e GO_TEST_PACKAGES="${{ matrix.targets.packages }}" install-tools - name: Run tests (${{ matrix.targets.name }}) - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 35 max_attempts: 5 @@ -311,7 +309,7 @@ jobs: #env: # RACE_DETECTOR: 1 - name: Upload coverage report - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 timeout-minutes: 1 continue-on-error: true with: @@ -382,7 +380,7 @@ jobs: runs-on: ${{ matrix.runner }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # all tags are needed for integration tests fetch-depth: 0 @@ -394,13 +392,13 @@ jobs: cadence_deploy_key: ${{ secrets.CADENCE_DEPLOY_KEY }} - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 timeout-minutes: 10 # fail fast. sometimes this step takes an extremely long time with: go-version: ${{ env.GO_VERSION }} cache: true - name: Load cached Docker images - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: flow-docker-images.tar # use the same cache key as the docker-build job @@ -411,7 +409,7 @@ jobs: # TODO(rbtz): re-enable when we fix exisiting races. #env: # RACE_DETECTOR: 1 - uses: nick-fields/retry@v2 + uses: nick-fields/retry@v3 with: timeout_minutes: 35 max_attempts: 5 diff --git a/.github/workflows/flaky-test-monitor.yml b/.github/workflows/flaky-test-monitor.yml index e6ee0a4585f..96d1d6482fd 100644 --- a/.github/workflows/flaky-test-monitor.yml +++ b/.github/workflows/flaky-test-monitor.yml @@ -37,9 +37,9 @@ jobs: dynamic-matrix: ${{ steps.set-test-matrix.outputs.dynamicMatrix }} steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} cache: true @@ -58,9 +58,9 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} cache: true @@ -100,9 +100,9 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} cache: true @@ -160,12 +160,12 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # all tags are needed for integration tests fetch-depth: 0 - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} cache: true diff --git a/.github/workflows/tools.yml b/.github/workflows/tools.yml index 1a7327c3ea7..c8042af3d68 100644 --- a/.github/workflows/tools.yml +++ b/.github/workflows/tools.yml @@ -32,11 +32,11 @@ jobs: with: project_id: flow - name: Setup Go - uses: actions/setup-go@v4 + uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: # to accurately get the version tag fetch-depth: 0 diff --git a/cmd/util/cmd/checkpoint-collect-stats/account_stats.go b/cmd/util/cmd/checkpoint-collect-stats/account_stats.go new file mode 100644 index 00000000000..f3349d8b5e5 --- /dev/null +++ b/cmd/util/cmd/checkpoint-collect-stats/account_stats.go @@ -0,0 +1,108 @@ +package checkpoint_collect_stats + +import ( + "cmp" + "slices" + + "github.com/rs/zerolog/log" + + "github.com/onflow/flow-go/fvm/systemcontracts" + "github.com/onflow/flow-go/model/flow" +) + +type accountFormat uint8 + +const ( + accountFormatUnknown accountFormat = iota + accountFormatV1 + accountFormatV2 +) + +func (format accountFormat) MarshalJSON() ([]byte, error) { + switch format { + case accountFormatV1: + return []byte("\"v1\""), nil + + case accountFormatV2: + return []byte("\"v2\""), nil + + default: + return []byte("\"unknown\""), nil + } +} + +type AccountStats struct { + stats + FormatV1Count int `json:"account_format_v1_count"` + FormatV2Count int `json:"account_format_v2_count"` + AccountsInFormatV2 []string `json:"accounts_in_format_v2,omitempty"` + ServiceAccount *AccountInfo `json:"service_account,omitempty"` + EVMAccount *AccountInfo `json:"evm_account,omitempty"` + TopN []*AccountInfo `json:"largest_accounts"` +} + +type AccountInfo struct { + Address string `json:"address"` + Format accountFormat `json:"account_format"` + PayloadCount uint64 `json:"payload_count"` + PayloadSize uint64 `json:"payload_size"` +} + +func getAccountStatus( + chainID flow.ChainID, + accounts map[string]*AccountInfo, +) AccountStats { + accountsSlice := make([]*AccountInfo, 0, len(accounts)) + accountSizesSlice := make([]float64, 0, len(accounts)) + + var accountsInFormatV2 []string + var accountFormatV1Count, accountFormatV2Count int + + for _, acct := range accounts { + accountsSlice = append(accountsSlice, acct) + accountSizesSlice = append(accountSizesSlice, float64(acct.PayloadSize)) + + switch acct.Format { + case accountFormatV1: + accountFormatV1Count++ + + case accountFormatV2: + accountFormatV2Count++ + accountsInFormatV2 = append(accountsInFormatV2, acct.Address) + + default: + if acct.Address != "" { + log.Info().Msgf("found account without account register nor domain register: %x", acct.Address) + } + } + } + + // Sort accounts by payload size in descending order + slices.SortFunc(accountsSlice, func(a, b *AccountInfo) int { + return cmp.Compare(b.PayloadSize, a.PayloadSize) + }) + + // Sort accounts in format v2 + slices.SortFunc(accountsInFormatV2, cmp.Compare) + + stats := getValueStats(accountSizesSlice, percentiles) + + evmAccountAddress := systemcontracts.SystemContractsForChain(chainID).EVMStorage.Address + + serviceAccountAddress := serviceAccountAddressForChain(chainID) + + return AccountStats{ + stats: stats, + FormatV1Count: accountFormatV1Count, + FormatV2Count: accountFormatV2Count, + AccountsInFormatV2: accountsInFormatV2, + ServiceAccount: accounts[string(serviceAccountAddress[:])], + EVMAccount: accounts[string(evmAccountAddress[:])], + TopN: accountsSlice[:flagTopN], + } +} + +func serviceAccountAddressForChain(chainID flow.ChainID) flow.Address { + sc := systemcontracts.SystemContractsForChain(chainID) + return sc.FlowServiceAccount.Address +} diff --git a/cmd/util/cmd/checkpoint-collect-stats/cmd.go b/cmd/util/cmd/checkpoint-collect-stats/cmd.go index 30dd72dcf27..19038d63bc1 100644 --- a/cmd/util/cmd/checkpoint-collect-stats/cmd.go +++ b/cmd/util/cmd/checkpoint-collect-stats/cmd.go @@ -8,7 +8,6 @@ import ( "strings" "sync" - "github.com/montanaflynn/stats" "github.com/onflow/cadence/common" "github.com/pkg/profile" "github.com/rs/zerolog" @@ -22,7 +21,6 @@ import ( "github.com/onflow/flow-go/cmd/util/ledger/util" "github.com/onflow/flow-go/fvm/evm/emulator/state" "github.com/onflow/flow-go/fvm/evm/handler" - "github.com/onflow/flow-go/fvm/systemcontracts" "github.com/onflow/flow-go/ledger" "github.com/onflow/flow-go/ledger/common/pathfinder" "github.com/onflow/flow-go/ledger/complete" @@ -48,6 +46,12 @@ const ( accountStatsReportName = "account-stats" ) +const ( + // NOTE: this constant is defined in github.com/onflow/cadence/runtime/storage.go + // Use this contant directly from cadence runtime package after dependency is updated. + AccountStorageKey = "stored" +) + const ( domainTypePrefix = "domain " payloadChannelBufferSize = 100_000 @@ -60,6 +64,10 @@ const ( blockHashListBucketKeyPrefix = "BlockHashListBucket" ) +// percentiles are Tukey's seven-number summary (without +// the 0 and 100 because min and max are always included). +var percentiles = []float64{12.5, 25.0, 50.0, 75.0, 87.5} + var Cmd = &cobra.Command{ Use: "checkpoint-collect-stats", Short: "collects stats on tries stored in a checkpoint, or payloads from a payloads file", @@ -96,7 +104,7 @@ func init() { "Enable memory profiling") } -type Stats struct { +type LedgerStats struct { LedgerStats *complete.LedgerStats `json:",omitempty"` PayloadStats *PayloadStats } @@ -110,17 +118,9 @@ type PayloadStats struct { } type RegisterStatsByTypes struct { - Type string `json:"type"` - Counts uint64 `json:"counts"` - ValueSizeTotal float64 `json:"value_size_total"` - ValueSizeMin float64 `json:"value_size_min"` - ValueSize25thPercentile float64 `json:"value_size_25th_percentile"` - ValueSizeMedian float64 `json:"value_size_median"` - ValueSize75thPercentile float64 `json:"value_size_75th_percentile"` - ValueSize95thPercentile float64 `json:"value_size_95th_percentile"` - ValueSize99thPercentile float64 `json:"value_size_99th_percentile"` - ValueSizeMax float64 `json:"value_size_max"` - SubTypes []RegisterStatsByTypes `json:"subtypes,omitempty"` + Type string `json:"type"` + stats + SubTypes []RegisterStatsByTypes `json:"subtypes,omitempty"` } type PayloadInfo struct { @@ -130,26 +130,6 @@ type PayloadInfo struct { Size uint64 `json:"size"` } -type AccountStats struct { - AccountCount uint64 `json:"total_account_count"` - AccountSizeMin float64 `json:"account_size_min"` - AccountSize25thPercentile float64 `json:"account_size_25th_percentile"` - AccountSizeMedian float64 `json:"account_size_median"` - AccountSize75thPercentile float64 `json:"account_size_75th_percentile"` - AccountSize95thPercentile float64 `json:"account_size_95th_percentile"` - AccountSize99thPercentile float64 `json:"account_size_99th_percentile"` - AccountSizeMax float64 `json:"account_size_max"` - ServiceAccount *AccountInfo `json:"service_account,omitempty"` - EVMAccount *AccountInfo `json:"evm_account,omitempty"` - TopN []*AccountInfo `json:"largest_accounts"` -} - -type AccountInfo struct { - Address string `json:"address"` - PayloadCount uint64 `json:"payload_count"` - PayloadSize uint64 `json:"payload_size"` -} - type sizesByType map[string][]float64 func run(*cobra.Command, []string) { @@ -211,7 +191,7 @@ func run(*cobra.Command, []string) { totalPayloadCount++ // Update payload sizes by type - typ := getType(key) + typ := getRegisterType(key) valueSizesByType[typ] = append(valueSizesByType[typ], float64(valueSize)) // Update top N largest payloads @@ -233,6 +213,21 @@ func run(*cobra.Command, []string) { } account.PayloadCount++ account.PayloadSize += uint64(size) + + // Update account format + if isAccountRegister(key) { + if account.Format == accountFormatV1 { + log.Error().Msgf("found account register while domain register exists for %x", address) + } else { + account.Format = accountFormatV2 + } + } else if isDomainRegister(key) { + if account.Format == accountFormatV2 { + log.Error().Msgf("found domain register while account register exists for %x", address) + } else { + account.Format = accountFormatV1 + } + } } // At this point, all payload are processed. @@ -246,14 +241,14 @@ func run(*cobra.Command, []string) { go func() { defer wg.Done() - statsByTypes := getStats(valueSizesByType) + statsByTypes := getRegisterStats(valueSizesByType) // Sort top N largest payloads by payload size in descending order slices.SortFunc(largestPayloads.Tree, func(a, b PayloadInfo) int { return cmp.Compare(b.Size, a.Size) }) - stats := &Stats{ + stats := &LedgerStats{ LedgerStats: ledgerStats, PayloadStats: &PayloadStats{ TotalPayloadCount: totalPayloadCount, @@ -271,38 +266,7 @@ func run(*cobra.Command, []string) { go func() { defer wg.Done() - accountsSlice := make([]*AccountInfo, 0, len(accounts)) - accountSizesSlice := make([]float64, 0, len(accounts)) - - for _, acct := range accounts { - accountsSlice = append(accountsSlice, acct) - accountSizesSlice = append(accountSizesSlice, float64(acct.PayloadSize)) - } - - // Sort accounts by payload size in descending order - slices.SortFunc(accountsSlice, func(a, b *AccountInfo) int { - return cmp.Compare(b.PayloadSize, a.PayloadSize) - }) - - stats := getTypeStats("", accountSizesSlice) - - evmAccountAddress := systemcontracts.SystemContractsForChain(chainID).EVMStorage.Address - - serviceAccountAddress := serviceAccountAddressForChain(chainID) - - acctStats := &AccountStats{ - AccountCount: uint64(len(accountsSlice)), - ServiceAccount: accounts[string(serviceAccountAddress[:])], - EVMAccount: accounts[string(evmAccountAddress[:])], - TopN: accountsSlice[:flagTopN], - AccountSizeMin: stats.ValueSizeMin, - AccountSize25thPercentile: stats.ValueSize25thPercentile, - AccountSizeMedian: stats.ValueSizeMedian, - AccountSize75thPercentile: stats.ValueSize75thPercentile, - AccountSize95thPercentile: stats.ValueSize95thPercentile, - AccountSize99thPercentile: stats.ValueSize99thPercentile, - AccountSizeMax: stats.ValueSizeMax, - } + acctStats := getAccountStatus(chainID, accounts) writeStats(accountStatsReportName, acctStats) }() @@ -405,69 +369,17 @@ func getPayloadStatsFromCheckpoint(payloadCallBack func(payload *ledger.Payload) return ledgerStats } -func getTypeStats(t string, values []float64) RegisterStatsByTypes { - sum, err := stats.Sum(values) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the sum of values") - } - - min, err := stats.Min(values) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the min of values") - } - - percentile25, err := stats.Percentile(values, 25) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the 25th percentile of values") - } - - median, err := stats.Median(values) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the median of values") - } - - percentile75, err := stats.Percentile(values, 75) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the 75th percentile of values") - } - - percentile95, err := stats.Percentile(values, 95) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the 95th percentile of values") - } - - percentile99, err := stats.Percentile(values, 99) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the 99th percentile of values") - } - - max, err := stats.Max(values) - if err != nil { - log.Fatal().Err(err).Msg("cannot compute the max of values") - } - - return RegisterStatsByTypes{ - Type: t, - Counts: uint64(len(values)), - ValueSizeTotal: sum, - ValueSizeMin: min, - ValueSize25thPercentile: percentile25, - ValueSizeMedian: median, - ValueSize75thPercentile: percentile75, - ValueSize95thPercentile: percentile95, - ValueSize99thPercentile: percentile99, - ValueSizeMax: max, - } -} - -func getStats(valueSizesByType sizesByType) []RegisterStatsByTypes { +func getRegisterStats(valueSizesByType sizesByType) []RegisterStatsByTypes { domainStats := make([]RegisterStatsByTypes, 0, len(common.AllStorageDomains)) var allDomainSizes []float64 statsByTypes := make([]RegisterStatsByTypes, 0, len(valueSizesByType)) for t, values := range valueSizesByType { - stats := getTypeStats(t, values) + stats := RegisterStatsByTypes{ + Type: t, + stats: getValueStats(values, percentiles), + } if isDomainType(t) { domainStats = append(domainStats, stats) @@ -477,19 +389,22 @@ func getStats(valueSizesByType sizesByType) []RegisterStatsByTypes { } } - allDomainStats := getTypeStats("domain", allDomainSizes) - allDomainStats.SubTypes = domainStats + allDomainStats := RegisterStatsByTypes{ + Type: "domain", + stats: getValueStats(allDomainSizes, percentiles), + SubTypes: domainStats, + } statsByTypes = append(statsByTypes, allDomainStats) // Sort domain stats by payload count in descending order slices.SortFunc(allDomainStats.SubTypes, func(a, b RegisterStatsByTypes) int { - return cmp.Compare(b.Counts, a.Counts) + return cmp.Compare(b.Count, a.Count) }) // Sort stats by payload count in descending order slices.SortFunc(statsByTypes, func(a, b RegisterStatsByTypes) int { - return cmp.Compare(b.Counts, a.Counts) + return cmp.Compare(b.Count, a.Count) }) return statsByTypes @@ -507,7 +422,24 @@ func isDomainType(typ string) bool { return strings.HasPrefix(typ, domainTypePrefix) } -func getType(key ledger.Key) string { +func isDomainRegister(key ledger.Key) bool { + k := key.KeyParts[1].Value + kstr := string(k) + for _, storageDomain := range common.AllStorageDomains { + if storageDomain.Identifier() == kstr { + return true + } + } + return false +} + +func isAccountRegister(key ledger.Key) bool { + k := key.KeyParts[1].Value + kstr := string(k) + return kstr == AccountStorageKey +} + +func getRegisterType(key ledger.Key) string { k := key.KeyParts[1].Value kstr := string(k) @@ -521,6 +453,8 @@ func getType(key ledger.Key) string { } switch kstr { + case AccountStorageKey: + return "account" case flow.ContractNamesKey: return "contract names" case flow.AccountStatusKey: @@ -560,8 +494,3 @@ func getType(key ledger.Key) string { return "others" } - -func serviceAccountAddressForChain(chainID flow.ChainID) flow.Address { - sc := systemcontracts.SystemContractsForChain(chainID) - return sc.FlowServiceAccount.Address -} diff --git a/cmd/util/cmd/checkpoint-collect-stats/stats_utils.go b/cmd/util/cmd/checkpoint-collect-stats/stats_utils.go new file mode 100644 index 00000000000..a45bea98219 --- /dev/null +++ b/cmd/util/cmd/checkpoint-collect-stats/stats_utils.go @@ -0,0 +1,58 @@ +package checkpoint_collect_stats + +import ( + "cmp" + "slices" + + statslib "github.com/montanaflynn/stats" + "github.com/rs/zerolog/log" +) + +type percentileValue struct { + Percentile float64 `json:"percentile"` + Value float64 `json:"value"` +} + +type stats struct { + Count uint64 `json:"count"` + Sum float64 `json:"sum"` + Min float64 `json:"min"` + Percentiles []percentileValue + Max float64 `json:"max"` +} + +func getValueStats(values []float64, percentiles []float64) stats { + if len(values) == 0 { + return stats{} + } + + describe, err := statslib.Describe(values, true, &percentiles) + if err != nil { + log.Fatal().Err(err).Msg("cannot describe values") + } + + sum, err := statslib.Sum(values) + if err != nil { + log.Fatal().Err(err).Msg("cannot compute sum of values") + } + + percentileValues := make([]percentileValue, len(describe.DescriptionPercentiles)) + for i, pv := range describe.DescriptionPercentiles { + percentileValues[i] = percentileValue{ + Percentile: pv.Percentile, + Value: pv.Value, + } + } + + slices.SortFunc(percentileValues, func(a, b percentileValue) int { + return cmp.Compare(a.Percentile, b.Percentile) + }) + + return stats{ + Count: uint64(len(values)), + Sum: sum, + Min: describe.Min, + Max: describe.Max, + Percentiles: percentileValues, + } +} diff --git a/engine/testutil/nodes.go b/engine/testutil/nodes.go index a9e536ef079..137d1c94207 100644 --- a/engine/testutil/nodes.go +++ b/engine/testutil/nodes.go @@ -1016,7 +1016,7 @@ func VerificationNode(t testing.TB, chunkVerifier := chunks.NewChunkVerifier(vm, vmCtx, node.Log) - approvalStorage := storage.NewResultApprovals(node.Metrics, node.PublicDB) + approvalStorage := store.NewResultApprovals(node.Metrics, badgerimpl.ToDB(node.PublicDB)) node.VerifierEngine, err = verifier.New(node.Log, collector, diff --git a/fvm/evm/emulator/emulator.go b/fvm/evm/emulator/emulator.go index 9aa39df69ff..7bf11e22f91 100644 --- a/fvm/evm/emulator/emulator.go +++ b/fvm/evm/emulator/emulator.go @@ -127,6 +127,14 @@ func (bl *BlockView) DirectCall(call *types.DirectCall) (res *types.Result, err err == nil && res != nil { proc.evm.Config.Tracer.OnTxEnd(res.Receipt(), res.ValidationError) } + + // call OnLog tracer hook, upon successful call result + if proc.evm.Config.Tracer.OnLog != nil && + err == nil && res != nil { + for _, log := range res.Logs { + proc.evm.Config.Tracer.OnLog(log) + } + } }() } @@ -191,6 +199,15 @@ func (bl *BlockView) RunTransaction( proc.evm.Config.Tracer.OnTxEnd(res.Receipt(), res.ValidationError) } + // call OnLog tracer hook, upon successful tx result + if proc.evm.Config.Tracer != nil && + proc.evm.Config.Tracer.OnLog != nil && + res != nil { + for _, log := range res.Logs { + proc.evm.Config.Tracer.OnLog(log) + } + } + return res, nil } @@ -243,6 +260,15 @@ func (bl *BlockView) BatchRunTransactions(txs []*gethTypes.Transaction) ([]*type res != nil { proc.evm.Config.Tracer.OnTxEnd(res.Receipt(), res.ValidationError) } + + // call OnLog tracer hook, upon successful tx result + if proc.evm.Config.Tracer != nil && + proc.evm.Config.Tracer.OnLog != nil && + res != nil { + for _, log := range res.Logs { + proc.evm.Config.Tracer.OnLog(log) + } + } } // finalize after all the batch transactions are executed to save resources diff --git a/go.mod b/go.mod index 1da27c5159d..c8971c27bc8 100644 --- a/go.mod +++ b/go.mod @@ -43,7 +43,7 @@ require ( github.com/libp2p/go-libp2p-kad-dht v0.25.2 github.com/libp2p/go-libp2p-kbucket v0.6.3 github.com/libp2p/go-libp2p-pubsub v0.10.0 - github.com/montanaflynn/stats v0.7.0 + github.com/montanaflynn/stats v0.7.1 github.com/multiformats/go-multiaddr v0.12.2 github.com/multiformats/go-multiaddr-dns v0.3.1 github.com/multiformats/go-multihash v0.2.3 diff --git a/go.sum b/go.sum index 775f632189f..d80e0e5bbaf 100644 --- a/go.sum +++ b/go.sum @@ -840,8 +840,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/montanaflynn/stats v0.7.0 h1:r3y12KyNxj/Sb/iOE46ws+3mS1+MZca1wlHQFPsY/JU= -github.com/montanaflynn/stats v0.7.0/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= +github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE= +github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow= github.com/mr-tron/base58 v1.1.0/go.mod h1:xcD2VGqlgYjBdcBLw+TuYLr8afG+Hj8g2eTVqeSzSU8= github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= diff --git a/storage/operation/approvals.go b/storage/operation/approvals.go new file mode 100644 index 00000000000..df38135677b --- /dev/null +++ b/storage/operation/approvals.go @@ -0,0 +1,48 @@ +package operation + +import ( + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/storage" +) + +// InsertResultApproval inserts a ResultApproval by ID. +// The same key (`approval.ID()`) necessitates that the value (full `approval`) is +// also identical (otherwise, we would have a successful pre-image attack on our +// cryptographic hash function). Therefore, concurrent calls to this function are safe. +func InsertResultApproval(w storage.Writer, approval *flow.ResultApproval) error { + return UpsertByKey(w, MakePrefix(codeResultApproval, approval.ID()), approval) +} + +// RetrieveResultApproval retrieves an approval by ID. +// Returns `storage.ErrNotFound` if no Approval with the given ID has been stored. +func RetrieveResultApproval(r storage.Reader, approvalID flow.Identifier, approval *flow.ResultApproval) error { + return RetrieveByKey(r, MakePrefix(codeResultApproval, approvalID), approval) +} + +// UnsafeIndexResultApproval inserts a ResultApproval ID keyed by ExecutionResult ID +// and chunk index. +// Unsafe means that it does not check if a different approval is indexed for the same +// chunk, and will overwrite the existing index. +// CAUTION: +// - In general, the Flow protocol requires multiple approvals for the same chunk from different +// verification nodes. In other words, there are multiple different approvals for the same chunk. +// Therefore, this index Executed Chunk ➜ ResultApproval ID is *only safe* to be used by +// Verification Nodes for tracking their own approvals (for the same ExecutionResult, a Verifier +// will always produce the same approval) +// - In order to make sure only one approval is indexed for the chunk, _all calls_ to +// `UnsafeIndexResultApproval` must be synchronized by the higher-logic. Currently, we have the +// convention that `store.ResultApprovals` is the only place that is allowed to call this method. +func UnsafeIndexResultApproval(w storage.Writer, resultID flow.Identifier, chunkIndex uint64, approvalID flow.Identifier) error { + return UpsertByKey(w, MakePrefix(codeIndexResultApprovalByChunk, resultID, chunkIndex), approvalID) +} + +// LookupResultApproval finds a ResultApproval by result ID and chunk index. +// Returns `storage.ErrNotFound` if no Approval for the given key (resultID, chunkIndex) has been stored. +// +// NOTE that the Flow protocol requires multiple approvals for the same chunk from different verification +// nodes. In other words, there are multiple different approvals for the same chunk. Therefore, the index +// Executed Chunk ➜ ResultApproval ID (queried here) is *only safe* to be used by Verification Nodes +// for tracking their own approvals (for the same ExecutionResult, a Verifier will always produce the same approval) +func LookupResultApproval(r storage.Reader, resultID flow.Identifier, chunkIndex uint64, approvalID *flow.Identifier) error { + return RetrieveByKey(r, MakePrefix(codeIndexResultApprovalByChunk, resultID, chunkIndex), approvalID) +} diff --git a/storage/store/approvals.go b/storage/store/approvals.go new file mode 100644 index 00000000000..c68a83219c4 --- /dev/null +++ b/storage/store/approvals.go @@ -0,0 +1,129 @@ +package store + +import ( + "errors" + "fmt" + "sync" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/operation" +) + +// ResultApprovals implements persistent storage for result approvals. +// +// CAUTION suitable only for _Verification Nodes_ for persisting their _own_ approvals! +// - In general, the Flow protocol requires multiple approvals for the same chunk from different +// verification nodes. In other words, there are multiple different approvals for the same chunk. +// - Internally, ResultApprovals populates an index from Executed Chunk ➜ ResultApproval. This is +// *only safe* for Verification Nodes when tracking their own approvals (for the same ExecutionResult, +// a Verifier will always produce the same approval) +type ResultApprovals struct { + db storage.DB + cache *Cache[flow.Identifier, *flow.ResultApproval] + indexing *sync.Mutex // preventing concurrent indexing of approvals +} + +var _ storage.ResultApprovals = (*ResultApprovals)(nil) + +func NewResultApprovals(collector module.CacheMetrics, db storage.DB) *ResultApprovals { + store := func(rw storage.ReaderBatchWriter, key flow.Identifier, val *flow.ResultApproval) error { + return operation.InsertResultApproval(rw.Writer(), val) + } + + retrieve := func(r storage.Reader, approvalID flow.Identifier) (*flow.ResultApproval, error) { + var approval flow.ResultApproval + err := operation.RetrieveResultApproval(r, approvalID, &approval) + return &approval, err + } + + return &ResultApprovals{ + db: db, + cache: newCache(collector, metrics.ResourceResultApprovals, + withLimit[flow.Identifier, *flow.ResultApproval](flow.DefaultTransactionExpiry+100), + withStore(store), + withRetrieve(retrieve)), + indexing: new(sync.Mutex), + } +} + +// Store stores a ResultApproval +func (r *ResultApprovals) Store(approval *flow.ResultApproval) error { + return r.db.WithReaderBatchWriter(func(rw storage.ReaderBatchWriter) error { + return r.cache.PutTx(rw, approval.ID(), approval) + }) +} + +// Index indexes a ResultApproval by chunk (ResultID + chunk index). +// This operation is idempotent (repeated calls with the same value are equivalent to +// just calling the method once; still the method succeeds on each call). +// +// CAUTION: the Flow protocol requires multiple approvals for the same chunk from different verification +// nodes. In other words, there are multiple different approvals for the same chunk. Therefore, the index +// Executed Chunk ➜ ResultApproval ID (populated here) is *only safe* to be used by Verification Nodes +// for tracking their own approvals. +func (r *ResultApprovals) Index(resultID flow.Identifier, chunkIndex uint64, approvalID flow.Identifier) error { + // For the same ExecutionResult, a correct Verifier will always produce the same approval. In other words, + // if we have already indexed an approval for the pair (resultID, chunkIndex) we should never overwrite it + // with a _different_ approval. We explicitly enforce that here to prevent state corruption. + // The lock guarantees that no other thread can concurrently update the index. Thereby confirming that no value + // is already stored for the given key (resultID, chunkIndex) and then updating the index (or aborting) is + // synchronized into one atomic operation. + r.indexing.Lock() + defer r.indexing.Unlock() + + err := r.db.WithReaderBatchWriter(func(rw storage.ReaderBatchWriter) error { + var storedApprovalID flow.Identifier + err := operation.LookupResultApproval(rw.GlobalReader(), resultID, chunkIndex, &storedApprovalID) + if err != nil { + if !errors.Is(err, storage.ErrNotFound) { + return fmt.Errorf("could not lookup result approval ID: %w", err) + } + + // no approval found, index the approval + + return operation.UnsafeIndexResultApproval(rw.Writer(), resultID, chunkIndex, approvalID) + } + + // an approval is already indexed, double check if it is the same + // We don't allow indexing multiple approvals per chunk because the + // store is only used within Verification nodes, and it is impossible + // for a Verification node to compute different approvals for the same + // chunk. + + if storedApprovalID != approvalID { + return fmt.Errorf("attempting to store conflicting approval (result: %v, chunk index: %d): storing: %v, stored: %v. %w", + resultID, chunkIndex, approvalID, storedApprovalID, storage.ErrDataMismatch) + } + + return nil + }) + + if err != nil { + return fmt.Errorf("could not index result approval: %w", err) + } + return nil +} + +// ByID retrieves a ResultApproval by its ID +func (r *ResultApprovals) ByID(approvalID flow.Identifier) (*flow.ResultApproval, error) { + val, err := r.cache.Get(r.db.Reader(), approvalID) + if err != nil { + return nil, err + } + return val, nil +} + +// ByChunk retrieves a ResultApproval by result ID and chunk index. The +// ResultApprovals store is only used within a verification node, where it is +// assumed that there is never more than one approval per chunk. +func (r *ResultApprovals) ByChunk(resultID flow.Identifier, chunkIndex uint64) (*flow.ResultApproval, error) { + var approvalID flow.Identifier + err := operation.LookupResultApproval(r.db.Reader(), resultID, chunkIndex, &approvalID) + if err != nil { + return nil, fmt.Errorf("could not lookup result approval ID: %w", err) + } + return r.ByID(approvalID) +} diff --git a/storage/store/approvals_test.go b/storage/store/approvals_test.go new file mode 100644 index 00000000000..05050eda3ff --- /dev/null +++ b/storage/store/approvals_test.go @@ -0,0 +1,131 @@ +package store_test + +import ( + "errors" + "sync" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/operation/dbtest" + "github.com/onflow/flow-go/storage/store" + "github.com/onflow/flow-go/utils/unittest" +) + +func TestApprovalStoreAndRetrieve(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + metrics := metrics.NewNoopCollector() + store := store.NewResultApprovals(metrics, db) + + approval := unittest.ResultApprovalFixture() + err := store.Store(approval) + require.NoError(t, err) + + err = store.Index(approval.Body.ExecutionResultID, approval.Body.ChunkIndex, approval.ID()) + require.NoError(t, err) + + byID, err := store.ByID(approval.ID()) + require.NoError(t, err) + require.Equal(t, approval, byID) + + byChunk, err := store.ByChunk(approval.Body.ExecutionResultID, approval.Body.ChunkIndex) + require.NoError(t, err) + require.Equal(t, approval, byChunk) + }) +} + +func TestApprovalStoreTwice(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + metrics := metrics.NewNoopCollector() + store := store.NewResultApprovals(metrics, db) + + approval := unittest.ResultApprovalFixture() + err := store.Store(approval) + require.NoError(t, err) + + err = store.Index(approval.Body.ExecutionResultID, approval.Body.ChunkIndex, approval.ID()) + require.NoError(t, err) + + err = store.Store(approval) + require.NoError(t, err) + + err = store.Index(approval.Body.ExecutionResultID, approval.Body.ChunkIndex, approval.ID()) + require.NoError(t, err) + }) +} + +func TestApprovalStoreTwoDifferentApprovalsShouldFail(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + metrics := metrics.NewNoopCollector() + store := store.NewResultApprovals(metrics, db) + + approval1 := unittest.ResultApprovalFixture() + approval2 := unittest.ResultApprovalFixture() + + err := store.Store(approval1) + require.NoError(t, err) + + err = store.Index(approval1.Body.ExecutionResultID, approval1.Body.ChunkIndex, approval1.ID()) + require.NoError(t, err) + + // we can store a different approval, but we can't index a different + // approval for the same chunk. + err = store.Store(approval2) + require.NoError(t, err) + + err = store.Index(approval1.Body.ExecutionResultID, approval1.Body.ChunkIndex, approval2.ID()) + require.Error(t, err) + require.True(t, errors.Is(err, storage.ErrDataMismatch)) + }) +} + +// verify that storing and indexing two conflicting approvals concurrently should be impossible; +// we expect that one operations succeeds, the other one should fail +func TestApprovalStoreTwoDifferentApprovalsConcurrently(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + metrics := metrics.NewNoopCollector() + store := store.NewResultApprovals(metrics, db) + + approval1 := unittest.ResultApprovalFixture() + approval2 := unittest.ResultApprovalFixture() + + var wg sync.WaitGroup + wg.Add(2) + + var firstIndexErr, secondIndexErr error + + // First goroutine stores and indexes the first approval. + go func() { + defer wg.Done() + + err := store.Store(approval1) + require.NoError(t, err) + + firstIndexErr = store.Index(approval1.Body.ExecutionResultID, approval1.Body.ChunkIndex, approval1.ID()) + }() + + // Second goroutine stores and tries to index the second approval for the same chunk. + go func() { + defer wg.Done() + + err := store.Store(approval2) + require.NoError(t, err) + + secondIndexErr = store.Index(approval1.Body.ExecutionResultID, approval1.Body.ChunkIndex, approval2.ID()) + }() + + // Wait for both goroutines to finish + wg.Wait() + + // Check that one of the Index operations succeeded and the other failed + if firstIndexErr == nil { + require.Error(t, secondIndexErr) + require.True(t, errors.Is(secondIndexErr, storage.ErrDataMismatch)) + } else { + require.NoError(t, secondIndexErr) + require.True(t, errors.Is(firstIndexErr, storage.ErrDataMismatch)) + } + }) +} diff --git a/storage/store/cache_test.go b/storage/store/cache_test.go new file mode 100644 index 00000000000..1dcee041e11 --- /dev/null +++ b/storage/store/cache_test.go @@ -0,0 +1,174 @@ +package store + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/atomic" + + "github.com/onflow/flow-go/model/flow" + "github.com/onflow/flow-go/module/metrics" + "github.com/onflow/flow-go/storage" + "github.com/onflow/flow-go/storage/operation" + "github.com/onflow/flow-go/storage/operation/dbtest" + "github.com/onflow/flow-go/utils/unittest" +) + +// TestCache_Exists tests existence checking items in the cache. +func TestCache_Exists(t *testing.T) { + cache := newCache[flow.Identifier, any](metrics.NewNoopCollector(), "test") + + t.Run("non-existent", func(t *testing.T) { + key := unittest.IdentifierFixture() + exists := cache.IsCached(key) + assert.False(t, exists) + }) + + t.Run("existent", func(t *testing.T) { + key := unittest.IdentifierFixture() + cache.Insert(key, unittest.RandomBytes(128)) + + exists := cache.IsCached(key) + assert.True(t, exists) + }) + + t.Run("removed", func(t *testing.T) { + key := unittest.IdentifierFixture() + // insert, then remove the item + cache.Insert(key, unittest.RandomBytes(128)) + cache.Remove(key) + + exists := cache.IsCached(key) + assert.False(t, exists) + }) +} + +// Test storing an item will be cached, and when cache hit, +// the retrieve function is only called once +func TestCache_CachedHit(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + retrieved := atomic.NewUint64(0) + + store := func(rw storage.ReaderBatchWriter, key flow.Identifier, val []byte) error { + return operation.UpsertByKey(rw.Writer(), key[:], val) + } + retrieve := func(r storage.Reader, key flow.Identifier) ([]byte, error) { + retrieved.Inc() + var val []byte + err := operation.RetrieveByKey(r, key[:], &val) + if err != nil { + return nil, err + } + return val, nil + } + + cache := newCache(metrics.NewNoopCollector(), "test", + withStore(store), + withRetrieve(retrieve), + ) + + key := unittest.IdentifierFixture() + val := unittest.RandomBytes(128) + + // storing the item will cache it + require.NoError(t, db.WithReaderBatchWriter(func(rw storage.ReaderBatchWriter) error { + return cache.PutTx(rw, key, val) + })) + + // retrieving stored item should hit the cache, no db op is called + cached, err := cache.Get(db.Reader(), key) + require.NoError(t, err) + require.Equal(t, val, cached) + require.Equal(t, uint64(0), retrieved.Load()) // no db op + + // removing the cached item + cache.Remove(key) + + // Get the same item, the cached item will miss, and retrieve from db, so db op is called + cached, err = cache.Get(db.Reader(), key) + require.NoError(t, err) + require.Equal(t, val, cached) + require.Equal(t, uint64(1), retrieved.Load()) // hit db + + // Get the same item again, hit cache + _, err = cache.Get(db.Reader(), key) + require.NoError(t, err) + require.Equal(t, uint64(1), retrieved.Load()) // cache hit + + // Query other key will hit db + _, err = cache.Get(db.Reader(), unittest.IdentifierFixture()) + require.ErrorIs(t, err, storage.ErrNotFound) + }) +} + +// Test storage.ErrNotFound is returned when cache is missing +// and is not cached +func TestCache_NotFoundReturned(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + retrieved := atomic.NewUint64(0) + retrieve := func(r storage.Reader, key flow.Identifier) ([]byte, error) { + retrieved.Inc() + return nil, storage.ErrNotFound + } + + cache := newCache(metrics.NewNoopCollector(), "test", + withRetrieve(retrieve), + ) + + // Create a random identifier to use as a key + notExist := unittest.IdentifierFixture() + + // Try to get the non-existent item from the cache + // Assert that the error is storage.ErrNotFound + _, err := cache.Get(db.Reader(), notExist) + require.ErrorIs(t, err, storage.ErrNotFound) + + // Get the item again, this time the cache should not be used + _, err = cache.Get(db.Reader(), notExist) + require.ErrorIs(t, err, storage.ErrNotFound) + require.Equal(t, uint64(2), retrieved.Load()) // retrieved from DB 2 times. + }) +} + +// Test when store return exception error, the key value is not cached, +func TestCache_ExceptionNotCached(t *testing.T) { + dbtest.RunWithDB(t, func(t *testing.T, db storage.DB) { + storeException := fmt.Errorf("storing exception") + stored, retrieved := atomic.NewUint64(0), atomic.NewUint64(0) + + store := func(rw storage.ReaderBatchWriter, key flow.Identifier, val []byte) error { + stored.Inc() + return storeException + } + retrieve := func(r storage.Reader, key flow.Identifier) ([]byte, error) { + retrieved.Inc() + var val []byte + err := operation.RetrieveByKey(r, key[:], &val) + if err != nil { + return nil, err + } + return val, nil + } + + cache := newCache(metrics.NewNoopCollector(), "test", + withStore(store), + withRetrieve(retrieve), + ) + + key := unittest.IdentifierFixture() + val := unittest.RandomBytes(128) + + // store returns exception err + err := db.WithReaderBatchWriter(func(rw storage.ReaderBatchWriter) error { + return cache.PutTx(rw, key, val) + }) + + require.ErrorIs(t, err, storeException) + + // assert key value is not cached + _, err = cache.Get(db.Reader(), key) + require.ErrorIs(t, err, storage.ErrNotFound) + }) +}