Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 2711ffb

Browse files
committed
Add use-hot-data option to taxi benchmark
1 parent 1176e24 commit 2711ffb

File tree

1 file changed

+35
-35
lines changed

1 file changed

+35
-35
lines changed

omniscidb/Benchmarks/taxi/taxi_full_bench.cpp

Lines changed: 35 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,16 @@ boost::filesystem::path g_data_path;
1313
size_t num_threads = 15;
1414
size_t g_fragment_size = 160000000 / num_threads;
1515
bool g_use_parquet{false};
16+
bool g_use_hot_data{false};
1617
ExecutorDeviceType g_device_type{ExecutorDeviceType::GPU};
1718

1819
using namespace TestHelpers::ArrowSQLRunner;
1920

20-
// #define USE_HOT_DATA
2121
#define PARALLEL_IMPORT_ENABLED
2222

2323
// when we want to measure storage latencies, read the csv files before starting the
2424
// benchmark
25-
#ifndef USE_HOT_DATA
2625
std::vector<std::shared_ptr<arrow::Table>> g_taxi_data_files;
27-
#endif
2826

2927
std::istream& operator>>(std::istream& in, ExecutorDeviceType& device_type) {
3028
std::string token;
@@ -286,67 +284,66 @@ T v(const TargetValue& r) {
286284

287285
static void table_count(benchmark::State& state) {
288286
for (auto _ : state) {
289-
#ifndef USE_HOT_DATA
290-
createTaxiTable();
291-
populateTaxiTable();
292-
#endif
287+
if (!g_use_hot_data) {
288+
createTaxiTable();
289+
populateTaxiTable();
290+
}
293291

294-
auto res = v<int64_t>(run_simple_agg("select count(*) from trips", g_device_type));
292+
auto res = v<int64_t>(run_simple_agg("select count(*) from trips;", g_device_type));
295293
std::cout << "Number of loaded tuples: " << res << std::endl;
296294
}
297295
}
298296

299297
static void taxi_q1(benchmark::State& state) {
300298
for (auto _ : state) {
301-
#ifndef USE_HOT_DATA
302-
createTaxiTable();
303-
populateTaxiTable();
304-
#endif
299+
if (!g_use_hot_data) {
300+
createTaxiTable();
301+
populateTaxiTable();
302+
}
305303

306-
run_multiple_agg("select cab_type, count(*) from trips group by cab_type",
304+
run_multiple_agg("select cab_type, count(*) from trips group by cab_type;",
307305
g_device_type);
308306
}
309307
}
310308

311309
static void taxi_q2(benchmark::State& state) {
312310
for (auto _ : state) {
313-
#ifndef USE_HOT_DATA
314-
createTaxiTable();
315-
populateTaxiTable();
316-
#endif
311+
if (!g_use_hot_data) {
312+
createTaxiTable();
313+
populateTaxiTable();
314+
}
317315

318316
run_multiple_agg(
319-
"SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count",
317+
"SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count;",
320318
g_device_type);
321319
}
322320
}
323321

324322
static void taxi_q3(benchmark::State& state) {
325323
for (auto _ : state) {
326-
#ifndef USE_HOT_DATA
327-
createTaxiTable();
328-
populateTaxiTable();
329-
#endif
330-
324+
if (!g_use_hot_data) {
325+
createTaxiTable();
326+
populateTaxiTable();
327+
}
331328
run_multiple_agg(
332329
"SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
333-
"count(*) FROM trips GROUP BY passenger_count, pickup_year",
330+
"count(*) FROM trips GROUP BY passenger_count, pickup_year;",
334331
g_device_type);
335332
}
336333
}
337334

338335
static void taxi_q4(benchmark::State& state) {
339336
for (auto _ : state) {
340-
#ifndef USE_HOT_DATA
341-
createTaxiTable();
342-
populateTaxiTable();
343-
#endif
337+
if (!g_use_hot_data) {
338+
createTaxiTable();
339+
populateTaxiTable();
340+
}
344341

345342
run_multiple_agg(
346343
"SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
347344
"cast(trip_distance as int) AS distance, count(*) AS the_count FROM trips GROUP "
348345
"BY passenger_count, pickup_year, distance ORDER BY pickup_year, the_count "
349-
"desc",
346+
"desc;",
350347
g_device_type);
351348
}
352349
}
@@ -406,6 +403,11 @@ int main(int argc, char* argv[]) {
406403
->implicit_value(ExecutorDeviceType::GPU)
407404
->default_value(ExecutorDeviceType::CPU),
408405
"Device type to use.");
406+
desc.add_options()("use-hot-data",
407+
po::value<bool>(&g_use_hot_data)
408+
->implicit_value(true)
409+
->default_value(g_use_hot_data),
410+
"Use prepopulated taxi data in queries.");
409411

410412
desc.add_options()(
411413
"use-lazy-materialization",
@@ -434,22 +436,20 @@ int main(int argc, char* argv[]) {
434436
}
435437

436438
try {
437-
#ifdef USE_HOT_DATA
438439
createTaxiTable();
439-
populateTaxiTable();
440-
#else
441-
if (g_use_parquet) {
440+
if (!g_use_hot_data && g_use_parquet) {
442441
throw std::runtime_error("Cannot use parquet files in cold data mode yet.");
443442
}
444-
createTaxiTable();
445443
auto table_info = getStorage()->getTableInfo(getStorage()->dbId(), "trips");
446444
if (!table_info) {
447445
throw std::runtime_error("Cannot find table \"trips\", creation failed?");
448446
}
449447

450448
auto col_infos = getStorage()->listColumns(table_info->db_id, table_info->table_id);
451449
g_taxi_data_files = readTaxiFilesCsv(col_infos);
452-
#endif
450+
if (g_use_hot_data) {
451+
loadTaxiArrowData();
452+
}
453453
// warmup();
454454
::benchmark::RunSpecifiedBenchmarks();
455455
} catch (const std::exception& e) {

0 commit comments

Comments
 (0)