diff --git a/tapdb/universe_perf_test.go b/tapdb/universe_perf_test.go index 7bbcab0d0..f1c1eecaf 100644 --- a/tapdb/universe_perf_test.go +++ b/tapdb/universe_perf_test.go @@ -2,6 +2,7 @@ package tapdb import ( "context" + "database/sql" "fmt" "sort" "testing" @@ -11,110 +12,15 @@ import ( "github.com/stretchr/testify/require" ) - type dbSizeStats struct { - tableName string - rowCount int64 - dataSize int64 // Size without indices - indexSize int64 // Total size of all indices - totalSize int64 // dataSize + indexSize -} - -// measureDBSize gets table and index sizes from SQLite -func measureDBSize(t *testing.T, db *BaseDB) map[string]*dbSizeStats { - stats := make(map[string]*dbSizeStats) - - // First get list of all tables - rows, err := db.Query(` - SELECT DISTINCT - tbl_name, - type - FROM sqlite_master - WHERE type='table' - `) - require.NoError(t, err) - defer rows.Close() - - tables := make([]string, 0) - for rows.Next() { - var name, tblType string - err := rows.Scan(&name, &tblType) - require.NoError(t, err) - tables = append(tables, name) - - stats[name] = &dbSizeStats{ - tableName: name, - } - } - - // For each table, get its stats - for _, tableName := range tables { - // Get row count using COUNT(*) - var rowCount int64 - err = db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM main.%q`, tableName)).Scan(&rowCount) - if err != nil { - t.Logf("Skipping row count for %s: %v", tableName, err) - continue - } - stats[tableName].rowCount = rowCount - - // Get table size - var pageCount int64 - err = db.QueryRow(` - SELECT COUNT(*) - FROM dbstat - WHERE name = ? - `, tableName).Scan(&pageCount) - if err != nil { - t.Logf("Skipping size stats for %s: %v", tableName, err) - continue - } - - // Get page size (constant for the database) - var pageSize int64 - err = db.QueryRow(`PRAGMA page_size`).Scan(&pageSize) - require.NoError(t, err) - - stats[tableName].dataSize = pageCount * pageSize - } - - // Get list of indices and their sizes - rows, err = db.Query(` - SELECT - m.tbl_name as table_name, - m.name as index_name, - (SELECT COUNT(*) FROM dbstat WHERE name = m.name) as page_count, - (SELECT page_size FROM pragma_page_size) as page_size - FROM sqlite_master m - WHERE m.type = 'index' - `) - require.NoError(t, err) - defer rows.Close() - - for rows.Next() { - var ( - tableName string - indexName string - pageCount int64 - pageSize int64 - ) - err := rows.Scan(&tableName, &indexName, &pageCount, &pageSize) - if err != nil { - t.Logf("Skipping index stat: %v", err) - continue - } - - if stat, ok := stats[tableName]; ok { - indexSize := pageCount * pageSize - stat.indexSize += indexSize - stat.totalSize = stat.dataSize + stat.indexSize - } - } - - return stats + tableName string + rowCount int64 + dataSize int64 // Size without indices + indexSize int64 // Total size of all indices + totalSize int64 // dataSize + indexSize } -// prettyPrintSizeStats formats the size statistics nicely +// prettyPrintSizeStats formats and logs database size statistics. func prettyPrintSizeStats(t *testing.T, stats map[string]*dbSizeStats) { var totalData, totalIndex int64 @@ -126,21 +32,24 @@ func prettyPrintSizeStats(t *testing.T, stats map[string]*dbSizeStats) { for table := range stats { tables = append(tables, table) } - sort.Strings(tables) + sort.Strings(tables) // Sort tables alphabetically for consistency for _, table := range tables { stat := stats[table] + + // If dataSize is zero, mark values as unavailable if stat.dataSize == 0 { t.Logf("%-20s %8d %12s %12s %14s", stat.tableName, stat.rowCount, - "-", - "-", - "-", + "-", // Data size unavailable + "-", // Index size unavailable + "-", // Overhead unavailable ) continue } + // Calculate index overhead percentage overhead := float64(stat.indexSize) / float64(stat.dataSize) * 100 t.Logf("%-20s %8d %12s %12s %14.1f%%", stat.tableName, @@ -149,16 +58,18 @@ func prettyPrintSizeStats(t *testing.T, stats map[string]*dbSizeStats) { formatSize(stat.indexSize), overhead, ) - + + // Add to totals totalData += stat.dataSize totalIndex += stat.indexSize } + // Print total statistics if data is available t.Log("----------------------------------------------------------------------") if totalData > 0 { totalOverhead := float64(totalIndex) / float64(totalData) * 100 t.Logf("%-20s %8s %12s %12s %14.1f%%", - "TOTAL", "-", + "TOTAL", "-", // No row count for TOTAL formatSize(totalData), formatSize(totalIndex), totalOverhead, @@ -166,7 +77,7 @@ func prettyPrintSizeStats(t *testing.T, stats map[string]*dbSizeStats) { } } -// formatSize returns human-readable file sizes +// formatSize returns a human-readable file size. func formatSize(bytes int64) string { const unit = 1024 if bytes < unit { @@ -181,6 +92,87 @@ func formatSize(bytes int64) string { float64(bytes)/float64(div), "KMGTPE"[exp]) } + +// measureDBSize gets table and index sizes from SQLite and PostgreSQL. +func measureDBSize(t *testing.T, db *BaseDB, isPostgres bool) map[string]*dbSizeStats { + stats := make(map[string]*dbSizeStats) + var rows *sql.Rows + var err error + + if isPostgres { + // PostgreSQL: Get list of all tables + rows, err = db.Query(` + SELECT tablename + FROM pg_catalog.pg_tables + WHERE schemaname != 'pg_catalog' AND schemaname != 'information_schema' + `) + } else { + // SQLite: Get list of all tables + rows, err = db.Query(` + SELECT DISTINCT tbl_name + FROM sqlite_master + WHERE type='table' + `) + } + require.NoError(t, err) + defer rows.Close() + + tables := make([]string, 0) + for rows.Next() { + var name string + err := rows.Scan(&name) + require.NoError(t, err) + tables = append(tables, name) + + stats[name] = &dbSizeStats{ + tableName: name, + } + } + + for _, tableName := range tables { + var rowCount int64 + err = db.QueryRow(fmt.Sprintf(`SELECT COUNT(*) FROM %s`, tableName)).Scan(&rowCount) + if err != nil { + t.Logf("Skipping row count for %s: %v", tableName, err) + continue + } + stats[tableName].rowCount = rowCount + + if isPostgres { + // PostgreSQL: Get data size and index size separately + var dataSize, indexSize int64 + err = db.QueryRow(`SELECT pg_table_size($1)`, tableName).Scan(&dataSize) + if err != nil { + t.Logf("Skipping data size for %s: %v", tableName, err) + continue + } + err = db.QueryRow(`SELECT pg_indexes_size($1)`, tableName).Scan(&indexSize) + if err != nil { + t.Logf("Skipping index size for %s: %v", tableName, err) + continue + } + stats[tableName].dataSize = dataSize + stats[tableName].indexSize = indexSize + stats[tableName].totalSize = dataSize + indexSize + } else { + // SQLite: Calculate size based on page count and page size + var pageCount, pageSize int64 + err = db.QueryRow(`SELECT COUNT(*) FROM dbstat WHERE name = ?`, tableName).Scan(&pageCount) + if err != nil { + t.Logf("Skipping size stats for %s: %v", tableName, err) + continue + } + + err = db.QueryRow(`PRAGMA page_size`).Scan(&pageSize) + require.NoError(t, err) + + stats[tableName].dataSize = pageCount * pageSize + } + } + + return stats +} + // TestUniverseIndexPerformance tests that our new indices improve query // performance by comparing performance with and without indices. func TestUniverseIndexPerformance(t *testing.T) { @@ -190,12 +182,23 @@ func TestUniverseIndexPerformance(t *testing.T) { t.Parallel() + // Determine the database type once + db := NewTestDB(t) + var isPostgres bool + err := db.BaseDB.QueryRow(`SELECT 1 FROM pg_catalog.pg_tables LIMIT 1`).Scan(new(int)) + if err == nil { + isPostgres = true + } else { + err = db.BaseDB.QueryRow(`SELECT 1 FROM sqlite_master LIMIT 1`).Scan(new(int)) + require.NoError(t, err, "Failed to determine database type") + } + const ( numAssets = 25 numLeavesPerTree = 10 numEventsPerAsset = 15 - numQueries = 10 - batchSize = 5 + numQueries = 10 + batchSize = 5 ) type queryStats struct { @@ -210,17 +213,14 @@ func TestUniverseIndexPerformance(t *testing.T) { runTest := func(withIndices bool) { t.Run(fmt.Sprintf("indices=%v", withIndices), func(t *testing.T) { ctx, cancel := context.WithTimeout( - context.Background(), time.Minute, + context.Background(), 2*time.Minute, ) defer cancel() - db := NewTestDB(t) - // Drop indices only if we're testing without them if !withIndices { t.Log("Dropping indices...") - sqlDB := db.BaseDB - _, err := sqlDB.Exec(` + _, err := db.BaseDB.Exec(` DROP INDEX IF EXISTS idx_universe_roots_namespace; DROP INDEX IF EXISTS idx_universe_roots_issuance; DROP INDEX IF EXISTS idx_universe_leaves_lookup; @@ -244,7 +244,6 @@ func TestUniverseIndexPerformance(t *testing.T) { // Create test data in batches for i := 0; i < numAssets; i++ { - // Create leaves in batches for j := 0; j < numLeavesPerTree; j += batchSize { end := j + batchSize if end > numLeavesPerTree { @@ -257,8 +256,6 @@ func TestUniverseIndexPerformance(t *testing.T) { require.NoError(t, err) } } - - // Create events in batches for j := 0; j < numEventsPerAsset; j += batchSize { end := j + batchSize if end > numEventsPerAsset { @@ -269,30 +266,26 @@ func TestUniverseIndexPerformance(t *testing.T) { h.logSyncEventByIndex(i) } } - if (i+1)%10 == 0 { t.Logf("Processed %d/%d assets", i+1, numAssets) } } - t.Logf("Test data creation took: %v", time.Since(dataStart)) - // Measure size after data creation t.Log("Measuring initial database size...") - initialSizes := measureDBSize(t, db.BaseDB) + initialSizes := measureDBSize(t, db.BaseDB, isPostgres) prettyPrintSizeStats(t, initialSizes) if withIndices { t.Log("Analyzing tables...") - sqlDB := db.BaseDB - _, err := sqlDB.Exec("ANALYZE;") + _, err := db.BaseDB.Exec("ANALYZE;") require.NoError(t, err) } testQueries := []struct { name string fn func() time.Duration - }{ + }{ { name: "universe root namespace", fn: func() time.Duration { @@ -318,12 +311,19 @@ func TestUniverseIndexPerformance(t *testing.T) { start := time.Now() err := stats.db.ExecTx(ctx, &readTx, func(db UniverseStatsStore) error { + if isPostgres { + _, err := db.QueryAssetStatsPerDayPostgres( + ctx, AssetStatsPerDayQueryPg{ + StartTime: testClock.Now().Add(-24 * time.Hour).Unix(), + EndTime: testClock.Now().Unix(), + }, + ) + return err + } _, err := db.QueryAssetStatsPerDaySqlite( ctx, AssetStatsPerDayQuery{ - StartTime: testClock.Now().Add( - -24 * time.Hour, - ).Unix(), - EndTime: testClock.Now().Unix(), + StartTime: testClock.Now().Add(-24 * time.Hour).Unix(), + EndTime: testClock.Now().Unix(), }, ) return err @@ -387,7 +387,7 @@ func TestUniverseIndexPerformance(t *testing.T) { // Print final comparison t.Log("\n=== Performance Comparison ===") - + var testNames []string for name := range testResults { testNames = append(testNames, name) @@ -402,4 +402,4 @@ func TestUniverseIndexPerformance(t *testing.T) { t.Logf(" Without indices: %v", result.withoutIndices) t.Logf(" Improvement: %.2fx", improvement) } -} \ No newline at end of file +}