@@ -13,18 +13,16 @@ boost::filesystem::path g_data_path;
13
13
size_t num_threads = 15 ;
14
14
size_t g_fragment_size = 160000000 / num_threads;
15
15
bool g_use_parquet{false };
16
+ bool g_use_hot_data{false };
16
17
ExecutorDeviceType g_device_type{ExecutorDeviceType::GPU};
17
18
18
19
using namespace TestHelpers ::ArrowSQLRunner;
19
20
20
- // #define USE_HOT_DATA
21
21
#define PARALLEL_IMPORT_ENABLED
22
22
23
23
// when we want to measure storage latencies, read the csv files before starting the
24
24
// benchmark
25
- #ifndef USE_HOT_DATA
26
25
std::vector<std::shared_ptr<arrow::Table>> g_taxi_data_files;
27
- #endif
28
26
29
27
std::istream& operator >>(std::istream& in, ExecutorDeviceType& device_type) {
30
28
std::string token;
@@ -286,67 +284,66 @@ T v(const TargetValue& r) {
286
284
287
285
static void table_count (benchmark::State& state) {
288
286
for (auto _ : state) {
289
- # ifndef USE_HOT_DATA
290
- createTaxiTable ();
291
- populateTaxiTable ();
292
- # endif
287
+ if (!g_use_hot_data) {
288
+ createTaxiTable ();
289
+ populateTaxiTable ();
290
+ }
293
291
294
- auto res = v<int64_t >(run_simple_agg (" select count(*) from trips" , g_device_type));
292
+ auto res = v<int64_t >(run_simple_agg (" select count(*) from trips; " , g_device_type));
295
293
std::cout << " Number of loaded tuples: " << res << std::endl;
296
294
}
297
295
}
298
296
299
297
static void taxi_q1 (benchmark::State& state) {
300
298
for (auto _ : state) {
301
- # ifndef USE_HOT_DATA
302
- createTaxiTable ();
303
- populateTaxiTable ();
304
- # endif
299
+ if (!g_use_hot_data) {
300
+ createTaxiTable ();
301
+ populateTaxiTable ();
302
+ }
305
303
306
- run_multiple_agg (" select cab_type, count(*) from trips group by cab_type" ,
304
+ run_multiple_agg (" select cab_type, count(*) from trips group by cab_type; " ,
307
305
g_device_type);
308
306
}
309
307
}
310
308
311
309
static void taxi_q2 (benchmark::State& state) {
312
310
for (auto _ : state) {
313
- # ifndef USE_HOT_DATA
314
- createTaxiTable ();
315
- populateTaxiTable ();
316
- # endif
311
+ if (!g_use_hot_data) {
312
+ createTaxiTable ();
313
+ populateTaxiTable ();
314
+ }
317
315
318
316
run_multiple_agg (
319
- " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count" ,
317
+ " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count; " ,
320
318
g_device_type);
321
319
}
322
320
}
323
321
324
322
static void taxi_q3 (benchmark::State& state) {
325
323
for (auto _ : state) {
326
- #ifndef USE_HOT_DATA
327
- createTaxiTable ();
328
- populateTaxiTable ();
329
- #endif
330
-
324
+ if (!g_use_hot_data) {
325
+ createTaxiTable ();
326
+ populateTaxiTable ();
327
+ }
331
328
run_multiple_agg (
332
329
" SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
333
- " count(*) FROM trips GROUP BY passenger_count, pickup_year" ,
330
+ " count(*) FROM trips GROUP BY passenger_count, pickup_year; " ,
334
331
g_device_type);
335
332
}
336
333
}
337
334
338
335
static void taxi_q4 (benchmark::State& state) {
339
336
for (auto _ : state) {
340
- # ifndef USE_HOT_DATA
341
- createTaxiTable ();
342
- populateTaxiTable ();
343
- # endif
337
+ if (!g_use_hot_data) {
338
+ createTaxiTable ();
339
+ populateTaxiTable ();
340
+ }
344
341
345
342
run_multiple_agg (
346
343
" SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
347
344
" cast(trip_distance as int) AS distance, count(*) AS the_count FROM trips GROUP "
348
345
" BY passenger_count, pickup_year, distance ORDER BY pickup_year, the_count "
349
- " desc" ,
346
+ " desc; " ,
350
347
g_device_type);
351
348
}
352
349
}
@@ -406,6 +403,11 @@ int main(int argc, char* argv[]) {
406
403
->implicit_value (ExecutorDeviceType::GPU)
407
404
->default_value (ExecutorDeviceType::CPU),
408
405
" Device type to use." );
406
+ desc.add_options ()(" use-hot-data" ,
407
+ po::value<bool >(&g_use_hot_data)
408
+ ->implicit_value (true )
409
+ ->default_value (g_use_hot_data),
410
+ " Use prepopulated taxi data in queries." );
409
411
410
412
desc.add_options ()(
411
413
" use-lazy-materialization" ,
@@ -434,22 +436,20 @@ int main(int argc, char* argv[]) {
434
436
}
435
437
436
438
try {
437
- #ifdef USE_HOT_DATA
438
439
createTaxiTable ();
439
- populateTaxiTable ();
440
- #else
441
- if (g_use_parquet) {
440
+ if (!g_use_hot_data && g_use_parquet) {
442
441
throw std::runtime_error (" Cannot use parquet files in cold data mode yet." );
443
442
}
444
- createTaxiTable ();
445
443
auto table_info = getStorage ()->getTableInfo (getStorage ()->dbId (), " trips" );
446
444
if (!table_info) {
447
445
throw std::runtime_error (" Cannot find table \" trips\" , creation failed?" );
448
446
}
449
447
450
448
auto col_infos = getStorage ()->listColumns (table_info->db_id , table_info->table_id );
451
449
g_taxi_data_files = readTaxiFilesCsv (col_infos);
452
- #endif
450
+ if (g_use_hot_data) {
451
+ loadTaxiArrowData ();
452
+ }
453
453
// warmup();
454
454
::benchmark::RunSpecifiedBenchmarks ();
455
455
} catch (const std::exception & e) {
0 commit comments