Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 63abe04

Browse files
committed
Add use-hot-data option to taxi benchmark
1 parent 1176e24 commit 63abe04

File tree

1 file changed

+36
-35
lines changed

1 file changed

+36
-35
lines changed

omniscidb/Benchmarks/taxi/taxi_full_bench.cpp

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,17 @@ boost::filesystem::path g_data_path;
1313
size_t num_threads = 15;
1414
size_t g_fragment_size = 160000000 / num_threads;
1515
bool g_use_parquet{false};
16+
bool g_use_hot_data{false};
1617
ExecutorDeviceType g_device_type{ExecutorDeviceType::GPU};
1718

1819
using namespace TestHelpers::ArrowSQLRunner;
1920

20-
// #define USE_HOT_DATA
21+
#define USE_HOT_DATA
2122
#define PARALLEL_IMPORT_ENABLED
2223

2324
// when we want to measure storage latencies, read the csv files before starting the
2425
// benchmark
25-
#ifndef USE_HOT_DATA
2626
std::vector<std::shared_ptr<arrow::Table>> g_taxi_data_files;
27-
#endif
2827

2928
std::istream& operator>>(std::istream& in, ExecutorDeviceType& device_type) {
3029
std::string token;
@@ -286,67 +285,66 @@ T v(const TargetValue& r) {
286285

287286
static void table_count(benchmark::State& state) {
288287
for (auto _ : state) {
289-
#ifndef USE_HOT_DATA
290-
createTaxiTable();
291-
populateTaxiTable();
292-
#endif
288+
if (!g_use_hot_data) {
289+
createTaxiTable();
290+
populateTaxiTable();
291+
}
293292

294-
auto res = v<int64_t>(run_simple_agg("select count(*) from trips", g_device_type));
293+
auto res = v<int64_t>(run_simple_agg("select count(*) from trips;", g_device_type));
295294
std::cout << "Number of loaded tuples: " << res << std::endl;
296295
}
297296
}
298297

299298
static void taxi_q1(benchmark::State& state) {
300299
for (auto _ : state) {
301-
#ifndef USE_HOT_DATA
302-
createTaxiTable();
303-
populateTaxiTable();
304-
#endif
300+
if (!g_use_hot_data) {
301+
createTaxiTable();
302+
populateTaxiTable();
303+
}
305304

306-
run_multiple_agg("select cab_type, count(*) from trips group by cab_type",
305+
run_multiple_agg("select cab_type, count(*) from trips group by cab_type;",
307306
g_device_type);
308307
}
309308
}
310309

311310
static void taxi_q2(benchmark::State& state) {
312311
for (auto _ : state) {
313-
#ifndef USE_HOT_DATA
314-
createTaxiTable();
315-
populateTaxiTable();
316-
#endif
312+
if (!g_use_hot_data) {
313+
createTaxiTable();
314+
populateTaxiTable();
315+
}
317316

318317
run_multiple_agg(
319-
"SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count",
318+
"SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count;",
320319
g_device_type);
321320
}
322321
}
323322

324323
static void taxi_q3(benchmark::State& state) {
325324
for (auto _ : state) {
326-
#ifndef USE_HOT_DATA
327-
createTaxiTable();
328-
populateTaxiTable();
329-
#endif
330-
325+
if (!g_use_hot_data) {
326+
createTaxiTable();
327+
populateTaxiTable();
328+
}
331329
run_multiple_agg(
332330
"SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
333-
"count(*) FROM trips GROUP BY passenger_count, pickup_year",
331+
"count(*) FROM trips GROUP BY passenger_count, pickup_year;",
334332
g_device_type);
335333
}
336334
}
337335

338336
static void taxi_q4(benchmark::State& state) {
339337
for (auto _ : state) {
340-
#ifndef USE_HOT_DATA
341-
createTaxiTable();
342-
populateTaxiTable();
343-
#endif
338+
if (!g_use_hot_data) {
339+
createTaxiTable();
340+
populateTaxiTable();
341+
}
344342

345343
run_multiple_agg(
346344
"SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
347345
"cast(trip_distance as int) AS distance, count(*) AS the_count FROM trips GROUP "
348346
"BY passenger_count, pickup_year, distance ORDER BY pickup_year, the_count "
349-
"desc",
347+
"desc;",
350348
g_device_type);
351349
}
352350
}
@@ -406,6 +404,11 @@ int main(int argc, char* argv[]) {
406404
->implicit_value(ExecutorDeviceType::GPU)
407405
->default_value(ExecutorDeviceType::CPU),
408406
"Device type to use.");
407+
desc.add_options()("use-hot-data",
408+
po::value<bool>(&g_use_hot_data)
409+
->implicit_value(true)
410+
->default_value(g_use_hot_data),
411+
"Use prepopulated taxi data in queries.");
409412

410413
desc.add_options()(
411414
"use-lazy-materialization",
@@ -434,22 +437,20 @@ int main(int argc, char* argv[]) {
434437
}
435438

436439
try {
437-
#ifdef USE_HOT_DATA
438440
createTaxiTable();
439-
populateTaxiTable();
440-
#else
441-
if (g_use_parquet) {
441+
if (!g_use_hot_data && g_use_parquet) {
442442
throw std::runtime_error("Cannot use parquet files in cold data mode yet.");
443443
}
444-
createTaxiTable();
445444
auto table_info = getStorage()->getTableInfo(getStorage()->dbId(), "trips");
446445
if (!table_info) {
447446
throw std::runtime_error("Cannot find table \"trips\", creation failed?");
448447
}
449448

450449
auto col_infos = getStorage()->listColumns(table_info->db_id, table_info->table_id);
451450
g_taxi_data_files = readTaxiFilesCsv(col_infos);
452-
#endif
451+
if (g_use_hot_data) {
452+
loadTaxiArrowData();
453+
}
453454
// warmup();
454455
::benchmark::RunSpecifiedBenchmarks();
455456
} catch (const std::exception& e) {

0 commit comments

Comments
 (0)