Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions internal/libs/clickhouse.go
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@ func queryBlocksByBlockNumbers(chainId uint64, nums []uint64) ([]common.Block, e
return nil, nil
}
q := fmt.Sprintf(
"SELECT %s FROM %s.blocks FINAL WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number",
"SELECT %s FROM %s.blocks WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number",
strings.Join(defaultBlockFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand All @@ -283,7 +283,7 @@ func queryTransactionsByBlockNumbers(chainId uint64, nums []uint64) ([]common.Tr
return nil, nil
}
q := fmt.Sprintf(
"SELECT %s FROM %s.transactions FINAL WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, transaction_index",
"SELECT %s FROM %s.transactions WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, transaction_index",
strings.Join(defaultTransactionFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand All @@ -297,7 +297,7 @@ func queryLogsByBlockNumbers(chainId uint64, nums []uint64) ([]common.Log, error
return nil, nil
}
q := fmt.Sprintf(
"SELECT %s FROM %s.logs FINAL WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, log_index",
"SELECT %s FROM %s.logs WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, log_index",
strings.Join(defaultLogFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand All @@ -311,7 +311,7 @@ func queryTracesByBlockNumbers(chainId uint64, nums []uint64) ([]common.Trace, e
return nil, nil
}
q := fmt.Sprintf(
"SELECT %s FROM %s.traces FINAL WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, transaction_index",
"SELECT %s FROM %s.traces WHERE chain_id = %d AND block_number IN (%s) ORDER BY block_number, transaction_index",
strings.Join(defaultTraceFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand Down Expand Up @@ -415,7 +415,7 @@ func GetTransactionMismatchRangeFromClickHouseV2(chainId uint64, startBlockNumbe

// Aggregate transaction counts per block from the transactions table.
query := fmt.Sprintf(
"SELECT block_number, count() AS tx_count FROM %s.transactions FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d GROUP BY block_number ORDER BY block_number",
"SELECT block_number, count() AS tx_count FROM %s.transactions WHERE chain_id = %d AND block_number BETWEEN %d AND %d GROUP BY block_number ORDER BY block_number",
config.Cfg.CommitterClickhouseDatabase,
chainId,
startBlockNumber,
Expand Down Expand Up @@ -492,7 +492,7 @@ func GetLogsMismatchRangeFromClickHouseV2(chainId uint64, startBlockNumber uint6

// Aggregate log counts and max log_index per block from the logs table.
query := fmt.Sprintf(
"SELECT block_number, count() AS log_count, max(log_index) AS max_log_index FROM %s.logs FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d GROUP BY block_number ORDER BY block_number",
"SELECT block_number, count() AS log_count, max(log_index) AS max_log_index FROM %s.logs WHERE chain_id = %d AND block_number BETWEEN %d AND %d GROUP BY block_number ORDER BY block_number",
config.Cfg.CommitterClickhouseDatabase,
chainId,
startBlockNumber,
Expand Down Expand Up @@ -560,7 +560,7 @@ func getBlocksFromV2(chainId uint64, startBlockNumber uint64, endBlockNumber uin
length := endBlockNumber - startBlockNumber + 1
blocksRaw := make([]common.Block, length)

query := fmt.Sprintf("SELECT %s FROM %s.blocks FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number",
query := fmt.Sprintf("SELECT %s FROM %s.blocks WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
set -euo pipefail

# In a ClickHouse-connected environment, verify duplicate-key presence in target ranges.
# Required env vars: CH_DB, CHAIN_ID, START_BLOCK, END_BLOCK
clickhouse-client --query "
SELECT 'blocks' AS tbl, count() FROM (
  SELECT block_number FROM ${CH_DB}.blocks
  WHERE chain_id=${CHAIN_ID} AND block_number BETWEEN ${START_BLOCK} AND ${END_BLOCK}
  GROUP BY block_number HAVING count() > 1
)
UNION ALL
SELECT 'transactions', count() FROM (
  SELECT block_number, transaction_index FROM ${CH_DB}.transactions
  WHERE chain_id=${CHAIN_ID} AND block_number BETWEEN ${START_BLOCK} AND ${END_BLOCK}
  GROUP BY block_number, transaction_index HAVING count() > 1
)
UNION ALL
SELECT 'logs', count() FROM (
  SELECT block_number, transaction_index, log_index FROM ${CH_DB}.logs
  WHERE chain_id=${CHAIN_ID} AND block_number BETWEEN ${START_BLOCK} AND ${END_BLOCK}
  GROUP BY block_number, transaction_index, log_index HAVING count() > 1
)
UNION ALL
SELECT 'traces', count() FROM (
  SELECT block_number, transaction_index, trace_address FROM ${CH_DB}.traces
  WHERE chain_id=${CHAIN_ID} AND block_number BETWEEN ${START_BLOCK} AND ${END_BLOCK}
  GROUP BY block_number, transaction_index, trace_address HAVING count() > 1
);
"

Repository: thirdweb-dev/insight

Length of output: 106


🏁 Script executed:

# First, let's look at the clickhouse.go file around the specified lines
wc -l internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 95


🏁 Script executed:

# Read the relevant sections around lines 563, 592, 621, 650
sed -n '560,565p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 367


🏁 Script executed:

# Read all four locations mentioned
sed -n '560,565p;589,594p;618,623p;647,652p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 1331


🏁 Script executed:

# Search for FINAL keyword in the file to see if it's present anywhere
grep -n "FINAL" internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 544


🏁 Script executed:

# Search for getBlocksFromV2 function
rg -n "getBlocksFromV2" --type go

Repository: thirdweb-dev/insight

Length of output: 534


🏁 Script executed:

# Search for tx/log/trace loader functions
rg -n "append.*row|overwrite|last-seen" --type go internal/libs/

Repository: thirdweb-dev/insight

Length of output: 46


🏁 Script executed:

# Read the function around line 558 to see the full structure
sed -n '558,680p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 3915


🏁 Script executed:

# Let's see the full context - what functions use FINAL queries
sed -n '260,330p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 2356


🏁 Script executed:

# Let's understand the git history - was FINAL removed recently?
git log --oneline -n 20 -- internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 108


🏁 Script executed:

# Check if there's a pattern - let's see all the V2 functions and their current state
grep -n "func.*FromV2\|FINAL" internal/libs/clickhouse.go | head -20

Repository: thirdweb-dev/insight

Length of output: 1018


🏁 Script executed:

# Let's see what the commit changed
git show a92cf92 --stat

Repository: thirdweb-dev/insight

Length of output: 4326


🏁 Script executed:

# Let's see the actual diff for the changed lines
git show a92cf92 -- internal/libs/clickhouse.go | head -100

Repository: thirdweb-dev/insight

Length of output: 3891


🏁 Script executed:

# Check how the data flows - where are getBlocksFromV2 and friends called?
rg -n "getBlocksFromV2|getTransactionsFromV2|getLogsFromV2|getTracesFromV2" --type go

Repository: thirdweb-dev/insight

Length of output: 1309


🏁 Script executed:

# Let's check the data flow - see where these functions are called from
sed -n '200,250p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 1433


🏁 Script executed:

# Check the structure of the data - are blocks supposed to be unique per number?
sed -n '400,450p' internal/libs/clickhouse.go

Repository: thirdweb-dev/insight

Length of output: 1867


🏁 Script executed:

# Let's see if there's any comment explaining why FINAL was removed
git show a92cf92 -- internal/libs/clickhouse.go | grep -A 5 -B 5 "FINAL\|getBlocksFromV2\|getTransactionsFromV2"

Repository: thirdweb-dev/insight

Length of output: 4648


Range loaders lack consistent dedup strategy after removing FINAL.

Commit a92cf92 removed FINAL from getBlocksFromV2, getTransactionsFromV2, getLogsFromV2, and getTracesFromV2 (lines 563, 592, 621, 650). This creates inconsistent behavior: blocks use index-based overwrite (implicitly deduping to the last row), while transactions, logs, and traces use append (accumulating all rows). Without explicit dedup logic, outputs become dependent on merge state—if tables contain duplicates from out-of-order inserts, results vary between loaders.

Meanwhile, queryBlocksByBlockNumbers and related functions retain FINAL, further inconsistency. Establish an explicit, deterministic dedup policy across all loaders: either add FINAL back, filter for latest versions, or document the reliance on merge order.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@internal/libs/clickhouse.go` at line 563, getBlocksFromV2,
getTransactionsFromV2, getLogsFromV2, and getTracesFromV2 now behave
inconsistently after removing FINAL (while queryBlocksByBlockNumbers still uses
FINAL), so restore a deterministic dedup policy across all loaders: either
re-add FINAL to the SELECT queries in
getBlocksFromV2/getTransactionsFromV2/getLogsFromV2/getTracesFromV2 to match
queryBlocksByBlockNumbers, or replace FINAL with an explicit dedup expression
(e.g., select latest row per key via argMax/anyLast/ROW_NUMBER filter) in each
function so blocks, transactions, logs and traces all dedupe the same way;
update all four functions (getBlocksFromV2, getTransactionsFromV2,
getLogsFromV2, getTracesFromV2) consistently.

strings.Join(defaultBlockFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand Down Expand Up @@ -589,7 +589,7 @@ func getTransactionsFromV2(chainId uint64, startBlockNumber uint64, endBlockNumb
length := endBlockNumber - startBlockNumber + 1
transactionsRaw := make([][]common.Transaction, length)

query := fmt.Sprintf("SELECT %s FROM %s.transactions FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number, transaction_index",
query := fmt.Sprintf("SELECT %s FROM %s.transactions WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number, transaction_index",
strings.Join(defaultTransactionFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand Down Expand Up @@ -618,7 +618,7 @@ func getLogsFromV2(chainId uint64, startBlockNumber uint64, endBlockNumber uint6
length := endBlockNumber - startBlockNumber + 1
logsRaw := make([][]common.Log, length)

query := fmt.Sprintf("SELECT %s FROM %s.logs FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number, log_index",
query := fmt.Sprintf("SELECT %s FROM %s.logs WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number, log_index",
strings.Join(defaultLogFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand Down Expand Up @@ -647,7 +647,7 @@ func getTracesFromV2(chainId uint64, startBlockNumber uint64, endBlockNumber uin
length := endBlockNumber - startBlockNumber + 1
tracesRaw := make([][]common.Trace, length)

query := fmt.Sprintf("SELECT %s FROM %s.traces FINAL WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number",
query := fmt.Sprintf("SELECT %s FROM %s.traces WHERE chain_id = %d AND block_number BETWEEN %d AND %d order by block_number",
strings.Join(defaultTraceFields, ", "),
config.Cfg.CommitterClickhouseDatabase,
chainId,
Expand Down
Loading