[
{
"name": "LAYER_CUDNN_ACTIVATION_FWD_FLOAT32__BatchSize_1__18396597787597793727<CUDNN_ACTIVATION_RELU>/input[0]:1/input[1]:576/input[2]:14/input[3]:14/input[4]:-1/input[5]:-1/input[6]:-1/input[7]:-1/batch_size:1/manual_time",
"iterations": 82445,
"real_time": 8.4861481288399464e+03,
"cpu_time": 1.8410742640548084e+04,
"time_unit": "ns",
"items_per_second": 1.3303562262403360e+10,
"activation_mode": 1.0000000000000000e+00,
"batch_size": 1.0000000000000000e+00,
"benchmark_file:/home/ubuntu/scope/scopes/cudnn_scope/src/cudnn_activation_fwd.cpp": 1.0002183616584573e+19,
"benchmark_func:void iLAYER_CUDNN_ACTIVATION_FWD_Impl(benchmark::State&) [with T = float; cudnnActivationMode_t activation_mode = (cudnnActivationMode_t)1]": 1.0148130340695654e+19,
"compute_capability:7.0": 3.8315573330875090e+18,
"cublas_version:10.1": 9.6389230149667533e+17,
"cuda_driver_version:10.10": 3.1909591731558548e+18,
"cuda_runtime_version:10.10": 3.1909591731558548e+18,
"cudnn_version:7.6.3": 1.5847091547736381e+19,
"cupti_enabled": 0.0000000000000000e+00,
"demangled_benchmark_func:iLAYER_CUDNN_ACTIVATION_FWD_Impl": 1.7018446903751907e+19,
"gpu_name:Tesla V100-SXM2-16GB": 6.4152751867444204e+18,
"host_name:ip-172-31-27-158": 2.9285567833032914e+18,
"input[0]": 1.0000000000000000e+00,
"input[1]": 5.7600000000000000e+02,
"input[2]": 1.4000000000000000e+01,
"input[3]": 1.4000000000000000e+01,
"input[4]": -1.0000000000000000e+00,
"input[5]": -1.0000000000000000e+00,
"input[6]": -1.0000000000000000e+00,
"input[7]": -1.0000000000000000e+00,
"input_batch_size": 1.0000000000000000e+00,
"input_channels": 5.7600000000000000e+02,
"input_height": 1.4000000000000000e+01,
"input_size": 1.1289600000000000e+05,
"input_width": 1.4000000000000000e+01,
"num_iterations": 8.2445000000000000e+04,
"output_batch_size": 1.0000000000000000e+00,
"output_channels": 5.7600000000000000e+02,
"output_height": 1.4000000000000000e+01,
"output_size": 1.1289600000000000e+05,
"output_width": 1.4000000000000000e+01,
"predicted_flops": 1.3303562262403360e+10,
"predicted_flops_count": 1.1289600000000000e+05
},
{
"name": "LAYER_CUDNN_CONV_FWD_1_FLOAT32__BatchSize_1__13856116588321986858<CUDNN_CONVOLUTION_FWD_ALGO_GEMM>/input[0]:1/input[1]:576/input[2]:7/input[3]:7/filter_count:160/filter_height:1/filter_width:1/pad_height:0/pad_width:0/stride_height:1/stride_width:1/dilation_height:1/dilation_width:1/group:1/batch_size:1/conv_fwd_type:1/conv_bwd_type:0/manual_time",
"iterations": 5784,
"real_time": 1.2106441175483409e+05,
"cpu_time": 1.3202411497202059e+05,
"time_unit": "ns",
"items_per_second": 3.7301135276194687e+10,
"advised_convolution_algorithm": 0.0000000000000000e+00,
"advised_determinism": 1.0000000000000000e+00,
"advised_memory": 1.1289600000000000e+05,
"advised_time": 1.2492799758911133e-01,
"batch_size": 1.0000000000000000e+00,
"benchmark_file:/home/ubuntu/scope/scopes/cudnn_scope/src/cudnn_conv_fwd.inc": 1.3448635149818148e+18,
"benchmark_func:void iLAYER_CUDNN_CONV_FWD_Impl(benchmark::State&) [with T = float; cudnnConvolutionFwdAlgo_t convolution_algorithm = (cudnnConvolutionFwdAlgo_t)2; cudnnMathType_t math_type = (cudnnMathType_t)0]": 1.4280899851402570e+19,
"compute_capability:7.0": 3.8315573330875090e+18,
"conv_bwd_type": 0.0000000000000000e+00,
"conv_fwd_type": 1.0000000000000000e+00,
"convolution_algorithm": 2.0000000000000000e+00,
"cublas_version:10.1": 9.6389230149667533e+17,
"cuda_driver_version:10.10": 3.1909591731558548e+18,
"cuda_runtime_version:10.10": 3.1909591731558548e+18,
"cudnn_version:7.6.3": 1.5847091547736381e+19,
"cupti_enabled": 0.0000000000000000e+00,
"demangled_benchmark_func:iLAYER_CUDNN_CONV_FWD_Impl": 7.6772958670751939e+18,
"dilation_height": 1.0000000000000000e+00,
"dilation_width": 1.0000000000000000e+00,
"filter_count": 1.6000000000000000e+02,
"filter_height": 1.0000000000000000e+00,
"filter_width": 1.0000000000000000e+00,
"gpu_name:Tesla V100-SXM2-16GB": 6.4152751867444204e+18,
"group": 1.0000000000000000e+00,
"host_name:ip-172-31-27-158": 2.9285567833032914e+18,
"input[0]": 1.0000000000000000e+00,
"input[1]": 5.7600000000000000e+02,
"input[2]": 7.0000000000000000e+00,
"input[3]": 7.0000000000000000e+00,
"input_batch_size": 1.0000000000000000e+00,
"input_channels": 5.7600000000000000e+02,
"input_height": 7.0000000000000000e+00,
"input_size": 2.8224000000000000e+04,
"input_width": 7.0000000000000000e+00,
"math_type": 0.0000000000000000e+00,
"num_filters": 1.6000000000000000e+02,
"num_iterations": 5.7840000000000000e+03,
"output_batch_size": 1.0000000000000000e+00,
"output_channels": 1.6000000000000000e+02,
"output_height": 7.0000000000000000e+00,
"output_size": 7.8400000000000000e+03,
"output_width": 7.0000000000000000e+00,
"pad_height": 0.0000000000000000e+00,
"pad_width": 0.0000000000000000e+00,
"predicted_advised_flops": 3.7301135276194687e+10,
"predicted_advised_flops_count": 4.5158400000000000e+06,
"predicted_flops": 3.7301135276194687e+10,
"predicted_flops_count": 4.5158400000000000e+06,
"stride_height": 1.0000000000000000e+00,
"stride_width": 1.0000000000000000e+00,
"workspace_bytes": 1.1289600000000000e+05,
"workspace_megabytes": 1.0766601562500000e-01
}
]