@@ -13,18 +13,17 @@ boost::filesystem::path g_data_path;
13
13
size_t num_threads = 15 ;
14
14
size_t g_fragment_size = 160000000 / num_threads;
15
15
bool g_use_parquet{false };
16
+ bool g_use_hot_data{false };
16
17
ExecutorDeviceType g_device_type{ExecutorDeviceType::GPU};
17
18
18
19
using namespace TestHelpers ::ArrowSQLRunner;
19
20
20
- // #define USE_HOT_DATA
21
+ #define USE_HOT_DATA
21
22
#define PARALLEL_IMPORT_ENABLED
22
23
23
24
// when we want to measure storage latencies, read the csv files before starting the
24
25
// benchmark
25
- #ifndef USE_HOT_DATA
26
26
std::vector<std::shared_ptr<arrow::Table>> g_taxi_data_files;
27
- #endif
28
27
29
28
std::istream& operator >>(std::istream& in, ExecutorDeviceType& device_type) {
30
29
std::string token;
@@ -286,67 +285,66 @@ T v(const TargetValue& r) {
286
285
287
286
static void table_count (benchmark::State& state) {
288
287
for (auto _ : state) {
289
- # ifndef USE_HOT_DATA
290
- createTaxiTable ();
291
- populateTaxiTable ();
292
- # endif
288
+ if (!g_use_hot_data) {
289
+ createTaxiTable ();
290
+ populateTaxiTable ();
291
+ }
293
292
294
- auto res = v<int64_t >(run_simple_agg (" select count(*) from trips" , g_device_type));
293
+ auto res = v<int64_t >(run_simple_agg (" select count(*) from trips; " , g_device_type));
295
294
std::cout << " Number of loaded tuples: " << res << std::endl;
296
295
}
297
296
}
298
297
299
298
static void taxi_q1 (benchmark::State& state) {
300
299
for (auto _ : state) {
301
- # ifndef USE_HOT_DATA
302
- createTaxiTable ();
303
- populateTaxiTable ();
304
- # endif
300
+ if (!g_use_hot_data) {
301
+ createTaxiTable ();
302
+ populateTaxiTable ();
303
+ }
305
304
306
- run_multiple_agg (" select cab_type, count(*) from trips group by cab_type" ,
305
+ run_multiple_agg (" select cab_type, count(*) from trips group by cab_type; " ,
307
306
g_device_type);
308
307
}
309
308
}
310
309
311
310
static void taxi_q2 (benchmark::State& state) {
312
311
for (auto _ : state) {
313
- # ifndef USE_HOT_DATA
314
- createTaxiTable ();
315
- populateTaxiTable ();
316
- # endif
312
+ if (!g_use_hot_data) {
313
+ createTaxiTable ();
314
+ populateTaxiTable ();
315
+ }
317
316
318
317
run_multiple_agg (
319
- " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count" ,
318
+ " SELECT passenger_count, avg(total_amount) FROM trips GROUP BY passenger_count; " ,
320
319
g_device_type);
321
320
}
322
321
}
323
322
324
323
static void taxi_q3 (benchmark::State& state) {
325
324
for (auto _ : state) {
326
- #ifndef USE_HOT_DATA
327
- createTaxiTable ();
328
- populateTaxiTable ();
329
- #endif
330
-
325
+ if (!g_use_hot_data) {
326
+ createTaxiTable ();
327
+ populateTaxiTable ();
328
+ }
331
329
run_multiple_agg (
332
330
" SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
333
- " count(*) FROM trips GROUP BY passenger_count, pickup_year" ,
331
+ " count(*) FROM trips GROUP BY passenger_count, pickup_year; " ,
334
332
g_device_type);
335
333
}
336
334
}
337
335
338
336
static void taxi_q4 (benchmark::State& state) {
339
337
for (auto _ : state) {
340
- # ifndef USE_HOT_DATA
341
- createTaxiTable ();
342
- populateTaxiTable ();
343
- # endif
338
+ if (!g_use_hot_data) {
339
+ createTaxiTable ();
340
+ populateTaxiTable ();
341
+ }
344
342
345
343
run_multiple_agg (
346
344
" SELECT passenger_count, extract(year from pickup_datetime) AS pickup_year, "
347
345
" cast(trip_distance as int) AS distance, count(*) AS the_count FROM trips GROUP "
348
346
" BY passenger_count, pickup_year, distance ORDER BY pickup_year, the_count "
349
- " desc" ,
347
+ " desc; " ,
350
348
g_device_type);
351
349
}
352
350
}
@@ -406,6 +404,11 @@ int main(int argc, char* argv[]) {
406
404
->implicit_value (ExecutorDeviceType::GPU)
407
405
->default_value (ExecutorDeviceType::CPU),
408
406
" Device type to use." );
407
+ desc.add_options ()(" use-hot-data" ,
408
+ po::value<bool >(&g_use_hot_data)
409
+ ->implicit_value (true )
410
+ ->default_value (g_use_hot_data),
411
+ " Use prepopulated taxi data in queries." );
409
412
410
413
desc.add_options ()(
411
414
" use-lazy-materialization" ,
@@ -434,22 +437,20 @@ int main(int argc, char* argv[]) {
434
437
}
435
438
436
439
try {
437
- #ifdef USE_HOT_DATA
438
440
createTaxiTable ();
439
- populateTaxiTable ();
440
- #else
441
- if (g_use_parquet) {
441
+ if (!g_use_hot_data && g_use_parquet) {
442
442
throw std::runtime_error (" Cannot use parquet files in cold data mode yet." );
443
443
}
444
- createTaxiTable ();
445
444
auto table_info = getStorage ()->getTableInfo (getStorage ()->dbId (), " trips" );
446
445
if (!table_info) {
447
446
throw std::runtime_error (" Cannot find table \" trips\" , creation failed?" );
448
447
}
449
448
450
449
auto col_infos = getStorage ()->listColumns (table_info->db_id , table_info->table_id );
451
450
g_taxi_data_files = readTaxiFilesCsv (col_infos);
452
- #endif
451
+ if (g_use_hot_data) {
452
+ loadTaxiArrowData ();
453
+ }
453
454
// warmup();
454
455
::benchmark::RunSpecifiedBenchmarks ();
455
456
} catch (const std::exception & e) {
0 commit comments