Apollo  6.0
Open source self driving car software
kernels.h
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright 2020 The Apollo Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *****************************************************************************/
16 
17 #pragma once
18 
19 #include <iostream>
20 
22 
23 namespace apollo {
24 namespace perception {
25 namespace inference {
26 
27 #define NUM_2D_BOX_CORNERS_MACRO 4
28 #define NUM_THREADS_MACRO 64
29 
30 #define CUDA_KERNEL_LOOP(i, n) \
31  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
32  i += blockDim.x * gridDim.x)
33 
34 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
35 
36 void bbox_transform_inv_cuda(int block_size, int thread_size, int shared_mem,
37  cudaStream_t stream, const int nthreads,
38  const float *boxes, const float *deltas,
39  const int num_box, const int num_channel,
40  float *out_boxes);
41 
42 void clip_boxes_cuda(int block_size, int thread_size, int shared_mem,
43  cudaStream_t stream, const int nthreads, float *boxes,
44  const float height, const float width);
45 
47  int block_size, int thread_size, int shared_mem, cudaStream_t stream,
48  const int nthreads, const float *boxes, const float *scores,
49  const float *all_probs, const int num_box, const int num_channel,
50  const int num_class, const int num_prob, const int filter_channel,
51  const int filter_class, const int min_size_mode, const float min_size_h,
52  const float min_size_w, const float threshold_score, float *filtered_boxes,
53  float *filtered_scores, float *filtered_all_probs, int *filtered_count);
54 
55 void keep_topN_boxes_cuda(int block_size, int thread_size, int shared_mem,
56  cudaStream_t stream, const int nthreads,
57  const float *boxes, const float *scores,
58  const float *all_probs, const int *indexes,
59  const int *count, const bool keep_score,
60  const int num_box, const int num_prob, const int topN,
61  float *out_boxes, float *out_scores,
62  float *out_all_probs);
63 
64 void repeatedly_add_cuda(int block_size, int thread_size, int shared_mem,
65  cudaStream_t stream, const int nthreads,
66  const float *in_data, float *out_data,
67  const float *add_vec, int add_vec_size);
68 
69 void repeatedly_mul_cuda(int block_size, int thread_size, int shared_mem,
70  cudaStream_t stream, const int nthreads,
71  const float *in_data, float *out_data,
72  const float *mul_vec, int mul_vec_size);
73 
74 void slice2d_cuda(int block_size, int thread_size, int shared_mem,
75  cudaStream_t stream, const int nthreads, const float *in_data,
76  float *out_data, const int *slice_axises, int slice_axis_num,
77  int input_axis_size);
78 
96 void NmsForward(bool rpn_proposal_output_score, int host_filter_count,
97  int num_box_corners, float nms_overlap_threshold,
98  int num_candidate, int top_n, int batch_id, int num_prob,
99  float *dev_sorted_box_for_nms, float *scores, float *all_probs,
100  float *out_boxes, int *acc_box_num, cudaStream_t stream);
101 
102 } // namespace inference
103 } // namespace perception
104 } // namespace apollo
PlanningContext is the runtime context in planning. It is persistent across multiple frames...
Definition: atomic_hash_map.h:25
void clip_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, float *boxes, const float height, const float width)
void NmsForward(bool rpn_proposal_output_score, int host_filter_count, int num_box_corners, float nms_overlap_threshold, int num_candidate, int top_n, int batch_id, int num_prob, float *dev_sorted_box_for_nms, float *scores, float *all_probs, float *out_boxes, int *acc_box_num, cudaStream_t stream)
GPU Non-Maximum Suppresion for network output.
void repeatedly_add_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const float *add_vec, int add_vec_size)
void slice2d_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const int *slice_axises, int slice_axis_num, int input_axis_size)
void repeatedly_mul_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const float *mul_vec, int mul_vec_size)
void bbox_transform_inv_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *deltas, const int num_box, const int num_channel, float *out_boxes)
void keep_topN_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *scores, const float *all_probs, const int *indexes, const int *count, const bool keep_score, const int num_box, const int num_prob, const int topN, float *out_boxes, float *out_scores, float *out_all_probs)
void filter_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *scores, const float *all_probs, const int num_box, const int num_channel, const int num_class, const int num_prob, const int filter_channel, const int filter_class, const int min_size_mode, const float min_size_h, const float min_size_w, const float threshold_score, float *filtered_boxes, float *filtered_scores, float *filtered_all_probs, int *filtered_count)