24 namespace perception {
27 #define NUM_2D_BOX_CORNERS_MACRO 4 28 #define NUM_THREADS_MACRO 64 30 #define CUDA_KERNEL_LOOP(i, n) \ 31 for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ 32 i += blockDim.x * gridDim.x) 34 #define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0)) 37 cudaStream_t stream,
const int nthreads,
38 const float *boxes,
const float *deltas,
39 const int num_box,
const int num_channel,
43 cudaStream_t stream,
const int nthreads,
float *boxes,
44 const float height,
const float width);
47 int block_size,
int thread_size,
int shared_mem, cudaStream_t stream,
48 const int nthreads,
const float *boxes,
const float *scores,
49 const float *all_probs,
const int num_box,
const int num_channel,
50 const int num_class,
const int num_prob,
const int filter_channel,
51 const int filter_class,
const int min_size_mode,
const float min_size_h,
52 const float min_size_w,
const float threshold_score,
float *filtered_boxes,
53 float *filtered_scores,
float *filtered_all_probs,
int *filtered_count);
56 cudaStream_t stream,
const int nthreads,
57 const float *boxes,
const float *scores,
58 const float *all_probs,
const int *indexes,
59 const int *count,
const bool keep_score,
60 const int num_box,
const int num_prob,
const int topN,
61 float *out_boxes,
float *out_scores,
62 float *out_all_probs);
65 cudaStream_t stream,
const int nthreads,
66 const float *in_data,
float *out_data,
67 const float *add_vec,
int add_vec_size);
70 cudaStream_t stream,
const int nthreads,
71 const float *in_data,
float *out_data,
72 const float *mul_vec,
int mul_vec_size);
74 void slice2d_cuda(
int block_size,
int thread_size,
int shared_mem,
75 cudaStream_t stream,
const int nthreads,
const float *in_data,
76 float *out_data,
const int *slice_axises,
int slice_axis_num,
96 void NmsForward(
bool rpn_proposal_output_score,
int host_filter_count,
97 int num_box_corners,
float nms_overlap_threshold,
98 int num_candidate,
int top_n,
int batch_id,
int num_prob,
99 float *dev_sorted_box_for_nms,
float *scores,
float *all_probs,
100 float *out_boxes,
int *acc_box_num, cudaStream_t stream);
PlanningContext is the runtime context in planning. It is persistent across multiple frames...
Definition: atomic_hash_map.h:25
void clip_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, float *boxes, const float height, const float width)
void NmsForward(bool rpn_proposal_output_score, int host_filter_count, int num_box_corners, float nms_overlap_threshold, int num_candidate, int top_n, int batch_id, int num_prob, float *dev_sorted_box_for_nms, float *scores, float *all_probs, float *out_boxes, int *acc_box_num, cudaStream_t stream)
GPU Non-Maximum Suppresion for network output.
void repeatedly_add_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const float *add_vec, int add_vec_size)
void slice2d_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const int *slice_axises, int slice_axis_num, int input_axis_size)
void repeatedly_mul_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *in_data, float *out_data, const float *mul_vec, int mul_vec_size)
void bbox_transform_inv_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *deltas, const int num_box, const int num_channel, float *out_boxes)
void keep_topN_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *scores, const float *all_probs, const int *indexes, const int *count, const bool keep_score, const int num_box, const int num_prob, const int topN, float *out_boxes, float *out_scores, float *out_all_probs)
void filter_boxes_cuda(int block_size, int thread_size, int shared_mem, cudaStream_t stream, const int nthreads, const float *boxes, const float *scores, const float *all_probs, const int num_box, const int num_channel, const int num_class, const int num_prob, const int filter_channel, const int filter_class, const int min_size_mode, const float min_size_h, const float min_size_w, const float threshold_score, float *filtered_boxes, float *filtered_scores, float *filtered_all_probs, int *filtered_count)