From 65fc6f1e7e301203998e23409392fddb39db5009 Mon Sep 17 00:00:00 2001 From: Kirill Logachev Date: Thu, 18 Jun 2026 21:08:13 +0000 Subject: [PATCH] chore(bigquery-jdbc): update perf client to run custom queries --- .../tools/client/JDBCClient.java | 18 ++++++++- java-bigquery-jdbc/tools/client/Makefile | 12 +++++- java-bigquery-jdbc/tools/perf/Makefile | 15 ++++++- java-bigquery-jdbc/tools/perf/README.md | 31 +++++++++++---- java-bigquery-jdbc/tools/perf/run_perf.py | 39 +++++++++++++------ 5 files changed, 90 insertions(+), 25 deletions(-) diff --git a/java-bigquery-jdbc/tools/client/JDBCClient.java b/java-bigquery-jdbc/tools/client/JDBCClient.java index 007b2be06c97..8bf331c561bd 100644 --- a/java-bigquery-jdbc/tools/client/JDBCClient.java +++ b/java-bigquery-jdbc/tools/client/JDBCClient.java @@ -23,6 +23,7 @@ public static void main(String[] args) throws Exception { String driverClass = "com.google.cloud.bigquery.jdbc.BigQueryDriver"; String action = null; String query = null; + String queryFile = null; boolean noOutput = false; int generateRows = 0; int generateCols = 5; @@ -53,6 +54,7 @@ public static void main(String[] args) throws Exception { case "driver-class": driverClass = val; break; case "action": action = val; break; case "query": query = val; break; + case "query-file": queryFile = val; break; case "no-output": noOutput = true; break; case "generate-rows": generateRows = Integer.parseInt(val); break; case "generate-cols": generateCols = Integer.parseInt(val); break; @@ -88,11 +90,19 @@ public static void main(String[] args) throws Exception { System.out.println("Connection successful.\n"); if ("query".equals(action)) { - if (generateRows > 0) { + if (query == null && queryFile != null) { + try { + query = readQueryFromFile(queryFile); + } catch (Exception e) { + System.err.println("Error reading query from file: " + e.getMessage()); + System.exit(1); + } + } + if (query == null && generateRows > 0) { query = generateDataQuery(generateRows, generateCols); } if (query == null) { - System.err.println("Error: --query or --generate-rows is required when action is 'query'"); + System.err.println("Error: --query, --query-file, or --generate-rows is required when action is 'query'"); System.exit(1); } warmup(conn); @@ -122,6 +132,10 @@ private static void warmup(Connection conn) { System.out.println("Warmup complete.\n"); } + private static String readQueryFromFile(String path) throws Exception { + return new String(java.nio.file.Files.readAllBytes(java.nio.file.Paths.get(path)), java.nio.charset.StandardCharsets.UTF_8); + } + private static String generateDataQuery(int rows, int cols) { int N = (int) Math.ceil(Math.sqrt(rows)); String idxExpr = "(i - 1) * " + N + " + j"; diff --git a/java-bigquery-jdbc/tools/client/Makefile b/java-bigquery-jdbc/tools/client/Makefile index 16460397b1ad..15184e938897 100644 --- a/java-bigquery-jdbc/tools/client/Makefile +++ b/java-bigquery-jdbc/tools/client/Makefile @@ -27,12 +27,20 @@ PARAMS = ProjectId=bigquery-devtools-drivers;OAuthType=0;OAuthServiceAcctEmail=; # Additional connection parameters EXTRA_PARAMS ?= +METHOD ?= getTables + ROWS ?= 10 COLS ?= 5 -METHOD ?= getTables OUTPUT ?= false QUERY ?= SELECT 1 +QUERY_FILE ?= + +ifneq ($(QUERY_FILE),) + QUERY_FLAG = --query-file "$(QUERY_FILE)" +else + QUERY_FLAG = --query "$(QUERY)" +endif ifeq ($(OUTPUT),false) OUTPUT_FLAG = --no-output @@ -53,7 +61,7 @@ COMMON_FLAGS = --url "$(URL);$(DEFAULT_PARAMS);$(PARAMS);$(EXTRA_PARAMS)" \ --driver-class "$(DRIVER_CLASS)" query: classes - $(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) --query "$(QUERY)" $(OUTPUT_FLAG) $(EXTRA_ARGS) + $(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) $(QUERY_FLAG) $(OUTPUT_FLAG) $(EXTRA_ARGS) query-generated: classes $(J) $(JFR_FLAGS) -cp .:$(DRIVER_JAR) JDBCClient --action query $(COMMON_FLAGS) $(OUTPUT_FLAG) --generate-rows $(ROWS) --generate-cols $(COLS) $(EXTRA_ARGS) diff --git a/java-bigquery-jdbc/tools/perf/Makefile b/java-bigquery-jdbc/tools/perf/Makefile index 2f985d3c5857..e4b8a4dbf430 100644 --- a/java-bigquery-jdbc/tools/perf/Makefile +++ b/java-bigquery-jdbc/tools/perf/Makefile @@ -2,6 +2,8 @@ # Defaults ITERATIONS ?= 5 +QUERY ?= +QUERY_FILE ?= ROWS ?= 1000 COLS ?= 5 VERSION ?= $(shell sed -n 's/.*\([^<]*\)<\/version>.*/\1/p' ../../pom.xml | head -n 1) @@ -25,7 +27,18 @@ HTAPI_OPTS = EnableHighThroughputAPI=1;HighThroughputActivationRatio=0;HighThrou RUN_PERF = python3 run_perf.py # Common flags for run_perf.py -COMMON_FLAGS = -n $(ITERATIONS) --generate-rows $(ROWS) --generate-cols $(COLS) --jar1 $(JAR1) --class1 $(CLASS1) +COMMON_FLAGS = -n $(ITERATIONS) --jar1 $(JAR1) --class1 $(CLASS1) + +ifeq ($(QUERY)$(QUERY_FILE),) + COMMON_FLAGS += --generate-rows $(ROWS) --generate-cols $(COLS) +else + ifneq ($(QUERY),) + COMMON_FLAGS += --query "$(QUERY)" + endif + ifneq ($(QUERY_FILE),) + COMMON_FLAGS += --query-file "$(QUERY_FILE)" + endif +endif ifneq ($(JAR2),) COMMON_FLAGS += --jar2 $(JAR2) --class2 $(CLASS2) diff --git a/java-bigquery-jdbc/tools/perf/README.md b/java-bigquery-jdbc/tools/perf/README.md index 243d06fc0367..59f50ce4fa87 100644 --- a/java-bigquery-jdbc/tools/perf/README.md +++ b/java-bigquery-jdbc/tools/perf/README.md @@ -24,24 +24,32 @@ The easiest way to run tests is using the provided `Makefile`. It defines target The Makefile uses the following defaults which can be overridden: - `ITERATIONS`: 5 -- `ROWS`: 1000 -- `COLS`: 5 +- `ROWS`: 1000 (default, used if no query/query_file is specified) +- `COLS`: 5 (default, used if no query/query_file is specified) +- `QUERY`: Optional custom query to run +- `QUERY_FILE`: Optional path to a SQL file containing the query to run - `JAR1`: `../../drivers/google-cloud-bigquery-jdbc-0.9.0-all.jar` - `PROJECT_ID`: `bigquery-devtools-drivers` - `CREDENTIALS`: Value of `$GOOGLE_APPLICATION_CREDENTIALS` ### Examples -#### Run REST API tests with defaults +#### Run REST API tests with defaults (generates 1000 rows, 5 columns) ```bash make run-rest ``` -#### Run HTAPI tests with custom iterations and rows +#### Run REST API tests with custom generated data size ```bash -make run-htapi ITERATIONS=3 ROWS=50000 +make run-rest ROWS=50000 COLS=10 +``` + +#### Run HTAPI tests with custom iterations and query + +```bash +make run-htapi ITERATIONS=3 QUERY="SELECT * FROM my_dataset.my_table LIMIT 50000" ``` #### Compare two drivers @@ -64,14 +72,21 @@ For more control, you can run `run_perf.py` directly. - `--class1`: Class name for the first driver (default: `com.google.cloud.bigquery.jdbc.BigQueryDriver`). - `--class2`: Class name for the second driver (default: `com.google.cloud.bigquery.jdbc.BigQueryDriver`). - `-n`, `--iterations`: Number of iterations to run (default: 5). -- `--generate-rows`: Number of rows to generate via query (default: 0). -- `--generate-cols`: Number of columns to generate via query (default: 5). -- `--query`: A specific query to run (if not using generated data). +- `--query`: The query to run. +- `--query-file`: Path to a SQL file containing the query to run. +- `--generate-rows`: Number of rows to generate (default: 0, used if no query/query_file is specified). +- `--generate-cols`: Number of columns to generate (default: 5). - `--output-md`: Append results as a markdown table to this file. - `--filter-metrics`: Comma-separated list of metrics to include in the markdown table. ### Examples +#### Run a single driver with a custom query + +```bash +python3 run_perf.py --url "jdbc:bigquery://https://www.googleapis.com/bigquery/v2:443;ProjectId=my-project;OAuthType=3" --jar1 path/to/driver.jar --query "SELECT * FROM my_dataset.my_table LIMIT 1000" -n 3 +``` + #### Run a single driver with generated data ```bash diff --git a/java-bigquery-jdbc/tools/perf/run_perf.py b/java-bigquery-jdbc/tools/perf/run_perf.py index dd05423ce462..c7eab95bfed8 100644 --- a/java-bigquery-jdbc/tools/perf/run_perf.py +++ b/java-bigquery-jdbc/tools/perf/run_perf.py @@ -21,7 +21,7 @@ # Base directory of the script BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -def run_test(url, driver_jar, driver_class, query=None, generate_rows=0, generate_cols=5, no_output=True): +def run_test(url, driver_jar, driver_class, query=None, query_file=None, generate_rows=0, generate_cols=5, no_output=True): # Base client folder is tools/client. Relative to tools/perf it is ../client. client_dir = os.path.join(os.path.dirname(BASE_DIR), "client") @@ -41,7 +41,9 @@ def run_test(url, driver_jar, driver_class, query=None, generate_rows=0, generat if query: cmd.extend(["--query", query]) - if generate_rows > 0: + elif query_file: + cmd.extend(["--query-file", query_file]) + elif generate_rows > 0: cmd.extend(["--generate-rows", str(generate_rows)]) cmd.extend(["--generate-cols", str(generate_cols)]) if no_output: @@ -227,14 +229,24 @@ def main(): parser.add_argument("--class1", default="com.google.cloud.bigquery.jdbc.BigQueryDriver", help="Class name for first driver") parser.add_argument("--class2", default="com.google.cloud.bigquery.jdbc.BigQueryDriver", help="Class name for second driver") parser.add_argument("-n", "--iterations", type=int, default=5, help="Number of iterations to run (default 5)") + parser.add_argument("--query", help="Query to run") + parser.add_argument("--query-file", help="Path to a SQL file containing the query to run") parser.add_argument("--generate-rows", type=int, default=0, help="Number of rows to generate") parser.add_argument("--generate-cols", type=int, default=5, help="Number of columns to generate") - parser.add_argument("--query", help="Query to run (if not using generated data)") parser.add_argument("--output-md", help="Append markdown table to this file containing the results") parser.add_argument("--filter-metrics", help="Comma-separated list of metrics to include in markdown tables") args = parser.parse_args() + query = args.query + query_file = args.query_file + generate_rows = args.generate_rows + generate_cols = args.generate_cols + + if not query and not query_file and generate_rows == 0: + generate_rows = 1000 + generate_cols = 5 + print("=" * 70) print(f"JDBC Performance Runner") print(f"URL : {args.url}") @@ -242,11 +254,13 @@ def main(): print(f"Jar 1 : {args.jar1} ({args.class1})") if args.jar2: print(f"Jar 2 : {args.jar2} ({args.class2})") - if args.generate_rows > 0: - print(f"Generate Rows: {args.generate_rows}") - print(f"Generate Cols: {args.generate_cols}") - elif args.query: - print(f"Query : {args.query}") + if query: + print(f"Query : {query}") + elif query_file: + print(f"Query File : {query_file}") + elif generate_rows > 0: + print(f"Generate Rows: {generate_rows}") + print(f"Generate Cols: {generate_cols}") print("=" * 70) driver_results = {} @@ -270,9 +284,10 @@ def main(): url=args.url, driver_jar=driver_jar, driver_class=driver_class, - query=args.query, - generate_rows=args.generate_rows, - generate_cols=args.generate_cols, + query=query, + query_file=query_file, + generate_rows=generate_rows, + generate_cols=generate_cols, no_output=True ) if res: @@ -284,7 +299,7 @@ def main(): base_label=base_label, new_label=new_label, diff_label=diff_label, - spec_name=f"Rows: {args.generate_rows}, Cols: {args.generate_cols}" if args.generate_rows > 0 else args.query, + spec_name=query if query else (f"File: {query_file}" if query_file else f"Rows: {generate_rows}, Cols: {generate_cols}"), output_md=args.output_md, filter_metrics=args.filter_metrics )