Apollo  6.0
Open source self driving car software
util.h
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright 2018 The Apollo Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *****************************************************************************/
16 
17 #pragma once
18 
19 #include <fcntl.h> /* low-level i/o */
20 #include <malloc.h>
21 #include <sys/ioctl.h>
22 #include <sys/mman.h>
23 #include <sys/stat.h>
24 #include <sys/time.h>
25 #include <sys/types.h>
26 #include <unistd.h>
27 #include <cassert>
28 #include <cerrno>
29 #include <cstdint>
30 #include <cstdio>
31 #include <cstdlib>
32 #include <cstring>
33 
34 #include <immintrin.h>
35 #include <x86intrin.h>
36 
37 namespace apollo {
38 namespace drivers {
39 namespace camera {
40 
41 void yuyv2rgb_avx(unsigned char *YUV, unsigned char *RGB, int NumPixels);
42 
43 #define SIMD_INLINE inline __attribute__((always_inline))
44 
45 void print_m256(const __m256i a);
46 void print_m256_i32(const __m256i a);
47 void print_m256_i16(const __m256i a);
48 
49 template <class T>
50 SIMD_INLINE char GetChar(T value, size_t index) {
51  return (reinterpret_cast<char *>(&value))[index];
52 }
53 
54 #define SIMD_CHAR_AS_LONGLONG(a) (((int64_t)a) & 0xFF)
55 
56 #define SIMD_SHORT_AS_LONGLONG(a) (((int64_t)a) & 0xFFFF)
57 
58 #define SIMD_INT_AS_LONGLONG(a) (((int64_t)a) & 0xFFFFFFFF)
59 
60 #define SIMD_LL_SET1_EPI8(a) \
61  SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(a) << 8) | \
62  (SIMD_CHAR_AS_LONGLONG(a) << 16) | (SIMD_CHAR_AS_LONGLONG(a) << 24) | \
63  (SIMD_CHAR_AS_LONGLONG(a) << 32) | (SIMD_CHAR_AS_LONGLONG(a) << 40) | \
64  (SIMD_CHAR_AS_LONGLONG(a) << 48) | (SIMD_CHAR_AS_LONGLONG(a) << 56)
65 
66 #define SIMD_LL_SET2_EPI8(a, b) \
67  SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(b) << 8) | \
68  (SIMD_CHAR_AS_LONGLONG(a) << 16) | (SIMD_CHAR_AS_LONGLONG(b) << 24) | \
69  (SIMD_CHAR_AS_LONGLONG(a) << 32) | (SIMD_CHAR_AS_LONGLONG(b) << 40) | \
70  (SIMD_CHAR_AS_LONGLONG(a) << 48) | (SIMD_CHAR_AS_LONGLONG(b) << 56)
71 
72 #define SIMD_LL_SETR_EPI8(a, b, c, d, e, f, g, h) \
73  SIMD_CHAR_AS_LONGLONG(a) | (SIMD_CHAR_AS_LONGLONG(b) << 8) | \
74  (SIMD_CHAR_AS_LONGLONG(c) << 16) | (SIMD_CHAR_AS_LONGLONG(d) << 24) | \
75  (SIMD_CHAR_AS_LONGLONG(e) << 32) | (SIMD_CHAR_AS_LONGLONG(f) << 40) | \
76  (SIMD_CHAR_AS_LONGLONG(g) << 48) | (SIMD_CHAR_AS_LONGLONG(h) << 56)
77 
78 #define SIMD_LL_SET1_EPI16(a) \
79  SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(a) << 16) | \
80  (SIMD_SHORT_AS_LONGLONG(a) << 32) | (SIMD_SHORT_AS_LONGLONG(a) << 48)
81 
82 #define SIMD_LL_SET2_EPI16(a, b) \
83  SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(b) << 16) | \
84  (SIMD_SHORT_AS_LONGLONG(a) << 32) | (SIMD_SHORT_AS_LONGLONG(b) << 48)
85 
86 #define SIMD_LL_SETR_EPI16(a, b, c, d) \
87  SIMD_SHORT_AS_LONGLONG(a) | (SIMD_SHORT_AS_LONGLONG(b) << 16) | \
88  (SIMD_SHORT_AS_LONGLONG(c) << 32) | (SIMD_SHORT_AS_LONGLONG(d) << 48)
89 
90 #define SIMD_LL_SET1_EPI32(a) \
91  SIMD_INT_AS_LONGLONG(a) | (SIMD_INT_AS_LONGLONG(a) << 32)
92 
93 #define SIMD_LL_SET2_EPI32(a, b) \
94  SIMD_INT_AS_LONGLONG(a) | (SIMD_INT_AS_LONGLONG(b) << 32)
95 
96 #define SIMD_MM256_SET1_EPI8(a) \
97  { \
98  SIMD_LL_SET1_EPI8(a) \
99  , SIMD_LL_SET1_EPI8(a), SIMD_LL_SET1_EPI8(a), SIMD_LL_SET1_EPI8(a) \
100  }
101 
102 #define SIMD_MM256_SET2_EPI8(a0, a1) \
103  { \
104  SIMD_LL_SET2_EPI8(a0, a1) \
105  , SIMD_LL_SET2_EPI8(a0, a1), SIMD_LL_SET2_EPI8(a0, a1), \
106  SIMD_LL_SET2_EPI8(a0, a1) \
107  }
108 
109 #define SIMD_MM256_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, \
110  ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, \
111  b8, b9, ba, bb, bc, bd, be, bf) \
112  { \
113  SIMD_LL_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7) \
114  , SIMD_LL_SETR_EPI8(a8, a9, aa, ab, ac, ad, ae, af), \
115  SIMD_LL_SETR_EPI8(b0, b1, b2, b3, b4, b5, b6, b7), \
116  SIMD_LL_SETR_EPI8(b8, b9, ba, bb, bc, bd, be, bf) \
117  }
118 
119 #define SIMD_MM256_SET1_EPI16(a) \
120  { \
121  SIMD_LL_SET1_EPI16(a) \
122  , SIMD_LL_SET1_EPI16(a), SIMD_LL_SET1_EPI16(a), SIMD_LL_SET1_EPI16(a) \
123  }
124 
125 #define SIMD_MM256_SET2_EPI16(a0, a1) \
126  { \
127  SIMD_LL_SET2_EPI16(a0, a1) \
128  , SIMD_LL_SET2_EPI16(a0, a1), SIMD_LL_SET2_EPI16(a0, a1), \
129  SIMD_LL_SET2_EPI16(a0, a1) \
130  }
131 
132 #define SIMD_MM256_SETR_EPI16(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, \
133  ac, ad, ae, af) \
134  { \
135  SIMD_LL_SETR_EPI16(a0, a1, a2, a3) \
136  , SIMD_LL_SETR_EPI16(a4, a5, a6, a7), SIMD_LL_SETR_EPI16(a8, a9, aa, ab), \
137  SIMD_LL_SETR_EPI16(ac, ad, ae, af) \
138  }
139 
140 #define SIMD_MM256_SET1_EPI32(a) \
141  { \
142  SIMD_LL_SET1_EPI32(a) \
143  , SIMD_LL_SET1_EPI32(a), SIMD_LL_SET1_EPI32(a), SIMD_LL_SET1_EPI32(a) \
144  }
145 
146 #define SIMD_MM256_SET2_EPI32(a0, a1) \
147  { \
148  SIMD_LL_SET2_EPI32(a0, a1) \
149  , SIMD_LL_SET2_EPI32(a0, a1), SIMD_LL_SET2_EPI32(a0, a1), \
150  SIMD_LL_SET2_EPI32(a0, a1) \
151  }
152 
153 #define SIMD_MM256_SETR_EPI32(a0, a1, a2, a3, a4, a5, a6, a7) \
154  { \
155  SIMD_LL_SET2_EPI32(a0, a1) \
156  , SIMD_LL_SET2_EPI32(a2, a3), SIMD_LL_SET2_EPI32(a4, a5), \
157  SIMD_LL_SET2_EPI32(a6, a7) \
158  }
159 
160 const size_t A = sizeof(__m256i);
161 const size_t DA = 2 * A;
162 const size_t QA = 4 * A;
163 const size_t OA = 8 * A;
164 const size_t HA = A / 2;
165 
166 const __m256i K_ZERO = SIMD_MM256_SET1_EPI8(0);
167 const __m256i K_INV_ZERO = SIMD_MM256_SET1_EPI8(0xFF);
168 
169 const __m256i K8_01 = SIMD_MM256_SET1_EPI8(0x01);
170 const __m256i K8_02 = SIMD_MM256_SET1_EPI8(0x02);
171 const __m256i K8_04 = SIMD_MM256_SET1_EPI8(0x04);
172 const __m256i K8_08 = SIMD_MM256_SET1_EPI8(0x08);
173 const __m256i K8_10 = SIMD_MM256_SET1_EPI8(0x10);
174 const __m256i K8_20 = SIMD_MM256_SET1_EPI8(0x20);
175 const __m256i K8_40 = SIMD_MM256_SET1_EPI8(0x40);
176 const __m256i K8_80 = SIMD_MM256_SET1_EPI8(0x80);
177 
178 const __m256i K8_01_FF = SIMD_MM256_SET2_EPI8(0x01, 0xFF);
179 
180 const __m256i K16_0001 = SIMD_MM256_SET1_EPI16(0x0001);
181 const __m256i K16_0002 = SIMD_MM256_SET1_EPI16(0x0002);
182 const __m256i K16_0003 = SIMD_MM256_SET1_EPI16(0x0003);
183 const __m256i K16_0004 = SIMD_MM256_SET1_EPI16(0x0004);
184 const __m256i K16_0005 = SIMD_MM256_SET1_EPI16(0x0005);
185 const __m256i K16_0006 = SIMD_MM256_SET1_EPI16(0x0006);
186 const __m256i K16_0008 = SIMD_MM256_SET1_EPI16(0x0008);
187 const __m256i K16_0010 = SIMD_MM256_SET1_EPI16(0x0010);
188 const __m256i K16_0018 = SIMD_MM256_SET1_EPI16(0x0018);
189 const __m256i K16_0020 = SIMD_MM256_SET1_EPI16(0x0020);
190 const __m256i K16_0080 = SIMD_MM256_SET1_EPI16(0x0080);
191 const __m256i K16_00FF = SIMD_MM256_SET1_EPI16(0x00FF);
192 const __m256i K16_FF00 = SIMD_MM256_SET1_EPI16(0xFF00);
193 
194 const __m256i K32_00000001 = SIMD_MM256_SET1_EPI32(0x00000001);
195 const __m256i K32_00000002 = SIMD_MM256_SET1_EPI32(0x00000002);
196 const __m256i K32_00000004 = SIMD_MM256_SET1_EPI32(0x00000004);
197 const __m256i K32_00000008 = SIMD_MM256_SET1_EPI32(0x00000008);
198 const __m256i K32_000000FF = SIMD_MM256_SET1_EPI32(0x000000FF);
199 const __m256i K32_0000FFFF = SIMD_MM256_SET1_EPI32(0x0000FFFF);
200 const __m256i K32_00010000 = SIMD_MM256_SET1_EPI32(0x00010000);
201 const __m256i K32_01000000 = SIMD_MM256_SET1_EPI32(0x01000000);
202 const __m256i K32_FFFFFF00 = SIMD_MM256_SET1_EPI32(0xFFFFFF00);
203 
205  0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
206  -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1);
208  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, 0x0,
209  0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
211  -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1,
212  -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD);
213 
215  0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1, -1, -1, -1, -1, -1, -1, -1, 0x0,
216  0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1, -1, -1, -1, -1, -1, -1, -1);
218  -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe, -1,
219  -1, -1, -1, -1, -1, -1, -1, 0x0, 0x2, 0x4, 0x6, 0x8, 0xa, 0xc, 0xe);
220 
222  0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1,
223  0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
225  -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
226  -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1, -1, -1);
228  -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1, -1, -1,
229  -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1, -1, -1);
230 
232  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd, -1, -1,
233  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x1, 0x5, 0x9, 0xd);
234 const __m256i U_SHUFFLE4 =
235  SIMD_MM256_SETR_EPI8(0x0, 0x0, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1, 0x0, 0x0,
236  0x0, 0x5, 0x0, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x0,
237  0x0, 0x0, 0x3, 0x0, 0x0, 0x0, 0x7, 0x0, 0x0, 0x0);
238 
240  0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3,
241  0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
243  -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
244  -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1, -1, -1);
246  -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1, -1, -1,
247  -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1, -1, -1);
248 
250  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf, -1, -1,
251  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x3, 0x7, 0xb, 0xf);
252 
254  0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1,
255  -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1);
257  -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8,
258  -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD);
260  -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1,
261  0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1, -1);
262 
264  -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5,
265  -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA);
267  -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1,
268  0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1, -1);
270  0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1,
271  -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF, -1);
272 
274  -1, -1, 0x0, -1, -1, 0x1, -1, -1, 0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1,
275  0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1);
277  0x2, -1, -1, 0x3, -1, -1, 0x4, -1, -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1,
278  -1, 0x8, -1, -1, 0x9, -1, -1, 0xA, -1, -1, 0xB, -1, -1, 0xC, -1);
280  -1, 0x5, -1, -1, 0x6, -1, -1, 0x7, -1, -1, 0x8, -1, -1, 0x9, -1, -1, 0xA,
281  -1, -1, 0xB, -1, -1, 0xC, -1, -1, 0xD, -1, -1, 0xE, -1, -1, 0xF);
282 
284  0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
285  -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1);
287  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE, 0x1,
288  0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
290  -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, -1, -1, -1, -1, -1, -1,
291  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x2, 0x5, 0x8, 0xB, 0xE);
292 
294  0x2, 0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
295  -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1);
297  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF, 0x2,
298  0x5, 0x8, 0xB, 0xE, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
300  -1, -1, -1, -1, -1, 0x1, 0x4, 0x7, 0xA, 0xD, -1, -1, -1, -1, -1, -1, -1, -1,
301  -1, -1, -1, -1, -1, -1, -1, -1, 0x0, 0x3, 0x6, 0x9, 0xC, 0xF);
302 
305 const int Y_ADJUST = 0;
306 const int UV_ADJUST = 128;
308 const int YUV_TO_BGR_ROUND_TERM = 0; // 1 << (YUV_TO_BGR_AVERAGING_SHIFT);
309 const int Y_TO_RGB_WEIGHT =
310  static_cast<int>((((1 << YUV_TO_BGR_AVERAGING_SHIFT))));
311 const int U_TO_BLUE_WEIGHT =
312  static_cast<int>((2.041 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
313 const int U_TO_GREEN_WEIGHT =
314  -static_cast<int>((0.3455 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
315 const int V_TO_GREEN_WEIGHT =
316  -static_cast<int>((0.7169 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
317 const int V_TO_RED_WEIGHT =
318  static_cast<int>((1.4065 * (1 << YUV_TO_BGR_AVERAGING_SHIFT)));
319 
320 const __m256i K16_YRGB_RT =
321  SIMD_MM256_SET2_EPI16(Y_TO_RGB_WEIGHT, YUV_TO_BGR_ROUND_TERM);
322 const __m256i K16_VR_0 = SIMD_MM256_SET2_EPI16(V_TO_RED_WEIGHT, 0);
323 const __m256i K16_UG_VG =
324  SIMD_MM256_SET2_EPI16(U_TO_GREEN_WEIGHT, V_TO_GREEN_WEIGHT);
325 const __m256i K16_UB_0 = SIMD_MM256_SET2_EPI16(U_TO_BLUE_WEIGHT, 0);
326 
327 template <bool align>
328 SIMD_INLINE __m256i Load(const __m256i *p);
329 
330 template <>
331 SIMD_INLINE __m256i Load<false>(const __m256i *p) {
332  return _mm256_loadu_si256(p);
333 }
334 
335 template <>
336 SIMD_INLINE __m256i Load<true>(const __m256i *p) {
337  return _mm256_load_si256(p);
338 }
339 
340 SIMD_INLINE void *AlignLo(const void *ptr, size_t align) {
341  return reinterpret_cast<void *>(((size_t)ptr) & ~(align - 1));
342 }
343 
344 SIMD_INLINE bool Aligned(const void *ptr, size_t align = sizeof(__m256)) {
345  return ptr == AlignLo(ptr, align);
346 }
347 
348 template <bool align>
349 SIMD_INLINE void Store(__m256i *p, __m256i a);
350 
351 template <>
352 SIMD_INLINE void Store<false>(__m256i *p, __m256i a) {
353  _mm256_storeu_si256(p, a);
354 }
355 
356 template <>
357 SIMD_INLINE void Store<true>(__m256i *p, __m256i a) {
358  _mm256_store_si256(p, a);
359 }
360 
362  return _mm256_min_epi16(K16_00FF, _mm256_max_epi16(value, K_ZERO));
363 }
364 
365 SIMD_INLINE __m256i AdjustY16(__m256i y16) {
366  return _mm256_subs_epi16(y16, K16_Y_ADJUST);
367 }
368 
369 SIMD_INLINE __m256i AdjustUV16(__m256i uv16) {
370  return _mm256_subs_epi16(uv16, K16_UV_ADJUST);
371 }
372 
373 SIMD_INLINE __m256i AdjustedYuvToRed32(__m256i y16_1, __m256i v16_0) {
374  // print_m256_i16(y16_1);
375  // print_m256_i16(v16_0);
376  // print_m256_i32(_mm256_madd_epi16(y16_1, K16_YRGB_RT));
377  // print_m256_i32(_mm256_madd_epi16(v16_0, K16_VR_0));
378  return _mm256_srai_epi32(
379  _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
380  _mm256_madd_epi16(v16_0, K16_VR_0)),
381  YUV_TO_BGR_AVERAGING_SHIFT);
382 }
383 
384 SIMD_INLINE __m256i AdjustedYuvToRed16(__m256i y16, __m256i v16) {
385  // print_m256_i32(AdjustedYuvToRed32(_mm256_unpacklo_epi16(y16, K16_0001),
386  // _mm256_unpacklo_epi16(v16, K_ZERO)));
387  // print_m256_i16(_mm256_unpacklo_epi16(y16, K16_0001));
388  return SaturateI16ToU8(_mm256_packs_epi32(
389  AdjustedYuvToRed32(_mm256_unpacklo_epi16(y16, K16_0001),
390  _mm256_unpacklo_epi16(v16, K_ZERO)),
391  AdjustedYuvToRed32(_mm256_unpackhi_epi16(y16, K16_0001),
392  _mm256_unpackhi_epi16(v16, K_ZERO))));
393 }
394 
395 SIMD_INLINE __m256i AdjustedYuvToGreen32(__m256i y16_1, __m256i u16_v16) {
396  return _mm256_srai_epi32(
397  _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
398  _mm256_madd_epi16(u16_v16, K16_UG_VG)),
399  YUV_TO_BGR_AVERAGING_SHIFT);
400 }
401 
402 SIMD_INLINE __m256i AdjustedYuvToGreen16(__m256i y16, __m256i u16,
403  __m256i v16) {
404  return SaturateI16ToU8(_mm256_packs_epi32(
405  AdjustedYuvToGreen32(_mm256_unpacklo_epi16(y16, K16_0001),
406  _mm256_unpacklo_epi16(u16, v16)),
407  AdjustedYuvToGreen32(_mm256_unpackhi_epi16(y16, K16_0001),
408  _mm256_unpackhi_epi16(u16, v16))));
409 }
410 
411 SIMD_INLINE __m256i AdjustedYuvToBlue32(__m256i y16_1, __m256i u16_0) {
412  return _mm256_srai_epi32(
413  _mm256_add_epi32(_mm256_madd_epi16(y16_1, K16_YRGB_RT),
414  _mm256_madd_epi16(u16_0, K16_UB_0)),
415  YUV_TO_BGR_AVERAGING_SHIFT);
416 }
417 
418 SIMD_INLINE __m256i AdjustedYuvToBlue16(__m256i y16, __m256i u16) {
419  return SaturateI16ToU8(_mm256_packs_epi32(
420  AdjustedYuvToBlue32(_mm256_unpacklo_epi16(y16, K16_0001),
421  _mm256_unpacklo_epi16(u16, K_ZERO)),
422  AdjustedYuvToBlue32(_mm256_unpackhi_epi16(y16, K16_0001),
423  _mm256_unpackhi_epi16(u16, K_ZERO))));
424 }
425 
426 SIMD_INLINE __m256i YuvToRed(__m256i y, __m256i v) {
427  __m256i lo = AdjustedYuvToRed16(_mm256_unpacklo_epi8(y, K_ZERO),
428  AdjustUV16(_mm256_unpacklo_epi8(v, K_ZERO)));
429  __m256i hi = AdjustedYuvToRed16((_mm256_unpackhi_epi8(y, K_ZERO)),
430  AdjustUV16(_mm256_unpackhi_epi8(v, K_ZERO)));
431 
432  // print_m256_i16(lo);
433  // print_m256_i16(hi);
434  return _mm256_packus_epi16(lo, hi);
435 }
436 
437 SIMD_INLINE __m256i YuvToGreen(__m256i y, __m256i u, __m256i v) {
438  __m256i lo =
439  AdjustedYuvToGreen16((_mm256_unpacklo_epi8(y, K_ZERO)),
440  AdjustUV16(_mm256_unpacklo_epi8(u, K_ZERO)),
441  AdjustUV16(_mm256_unpacklo_epi8(v, K_ZERO)));
442  __m256i hi =
443  AdjustedYuvToGreen16((_mm256_unpackhi_epi8(y, K_ZERO)),
444  AdjustUV16(_mm256_unpackhi_epi8(u, K_ZERO)),
445  AdjustUV16(_mm256_unpackhi_epi8(v, K_ZERO)));
446  return _mm256_packus_epi16(lo, hi);
447 }
448 
449 SIMD_INLINE __m256i YuvToBlue(__m256i y, __m256i u) {
450  __m256i lo = AdjustedYuvToBlue16((_mm256_unpacklo_epi8(y, K_ZERO)),
451  AdjustUV16(_mm256_unpacklo_epi8(u, K_ZERO)));
452  __m256i hi = AdjustedYuvToBlue16((_mm256_unpackhi_epi8(y, K_ZERO)),
453  AdjustUV16(_mm256_unpackhi_epi8(u, K_ZERO)));
454  return _mm256_packus_epi16(lo, hi);
455 }
456 
457 template <int index>
458 __m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red);
459 
460 template <>
461 SIMD_INLINE __m256i InterleaveBgr<0>(__m256i blue, __m256i green, __m256i red) {
462  return _mm256_or_si256(
463  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x44),
464  K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0),
465  _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x44),
466  K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0),
467  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x44),
468  K8_SHUFFLE_PERMUTED_RED_TO_BGR0)));
469 }
470 
471 template <>
472 SIMD_INLINE __m256i InterleaveBgr<1>(__m256i blue, __m256i green, __m256i red) {
473  return _mm256_or_si256(
474  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0x99),
475  K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1),
476  _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0x99),
477  K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1),
478  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0x99),
479  K8_SHUFFLE_PERMUTED_RED_TO_BGR1)));
480 }
481 
482 template <>
483 SIMD_INLINE __m256i InterleaveBgr<2>(__m256i blue, __m256i green, __m256i red) {
484  return _mm256_or_si256(
485  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(blue, 0xEE),
486  K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2),
487  _mm256_or_si256(_mm256_shuffle_epi8(_mm256_permute4x64_epi64(green, 0xEE),
488  K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2),
489  _mm256_shuffle_epi8(_mm256_permute4x64_epi64(red, 0xEE),
490  K8_SHUFFLE_PERMUTED_RED_TO_BGR2)));
491 }
492 
493 SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3]) {
494  __m256i b0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_BLUE);
495  __m256i b2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_BLUE);
496  return _mm256_or_si256(
497  _mm256_permute2x128_si256(b0, b2, 0x20),
498  _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_BLUE),
499  _mm256_permute2x128_si256(b0, b2, 0x31)));
500 }
501 
502 SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3]) {
503  __m256i g0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_GREEN);
504  __m256i g2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_GREEN);
505  return _mm256_or_si256(
506  _mm256_permute2x128_si256(g0, g2, 0x20),
507  _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_GREEN),
508  _mm256_permute2x128_si256(g0, g2, 0x31)));
509 }
510 
511 SIMD_INLINE __m256i BgrToRed(__m256i bgr[3]) {
512  __m256i r0 = _mm256_shuffle_epi8(bgr[0], K8_SHUFFLE_BGR0_TO_RED);
513  __m256i r2 = _mm256_shuffle_epi8(bgr[2], K8_SHUFFLE_BGR2_TO_RED);
514  return _mm256_or_si256(
515  _mm256_permute2x128_si256(r0, r2, 0x20),
516  _mm256_or_si256(_mm256_shuffle_epi8(bgr[1], K8_SHUFFLE_BGR1_TO_RED),
517  _mm256_permute2x128_si256(r0, r2, 0x31)));
518 }
519 
520 template <bool align>
521 SIMD_INLINE __m256i LoadPermuted(const __m256i *p) {
522  return _mm256_permute4x64_epi64(Load<align>(p), 0xD8);
523 }
524 
525 } // namespace camera
526 } // namespace drivers
527 } // namespace apollo
SIMD_INLINE __m256i YuvToRed(__m256i y, __m256i v)
Definition: util.h:426
SIMD_INLINE __m256i InterleaveBgr< 2 >(__m256i blue, __m256i green, __m256i red)
Definition: util.h:483
const __m256i K32_01000000
Definition: util.h:201
const __m256i K8_01
Definition: util.h:169
const __m256i K16_00FF
Definition: util.h:191
const __m256i K16_UG_VG
Definition: util.h:323
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR2
Definition: util.h:269
const __m256i K8_SHUFFLE_BGR0_TO_BLUE
Definition: util.h:204
#define SIMD_MM256_SET2_EPI16(a0, a1)
Definition: util.h:125
const __m256i K32_00000008
Definition: util.h:197
const __m256i K16_YRGB_RT
Definition: util.h:320
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR0
Definition: util.h:263
SIMD_INLINE __m256i BgrToBlue(__m256i bgr[3])
Definition: util.h:493
const size_t OA
Definition: util.h:163
#define SIMD_MM256_SET1_EPI32(a)
Definition: util.h:140
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR2
Definition: util.h:279
const __m256i K16_0080
Definition: util.h:190
SIMD_INLINE __m256i YuvToBlue(__m256i y, __m256i u)
Definition: util.h:449
const __m256i K32_00000001
Definition: util.h:194
PlanningContext is the runtime context in planning. It is persistent across multiple frames...
Definition: atomic_hash_map.h:25
const __m256i K8_80
Definition: util.h:176
const __m256i K16_Y_ADJUST
Definition: util.h:303
const __m256i K8_20
Definition: util.h:174
SIMD_INLINE bool Aligned(const void *ptr, size_t align=sizeof(__m256))
Definition: util.h:344
void print_m256_i16(const __m256i a)
const __m256i V_SHUFFLE1
Definition: util.h:242
const size_t HA
Definition: util.h:164
const __m256i K16_0002
Definition: util.h:181
const __m256i K8_SHUFFLE_BGR1_TO_BLUE
Definition: util.h:207
const __m256i K16_0005
Definition: util.h:184
const size_t A
Definition: util.h:160
const __m256i U_SHUFFLE0
Definition: util.h:221
SIMD_INLINE __m256i Load< false >(const __m256i *p)
Definition: util.h:331
const int Y_TO_RGB_WEIGHT
Definition: util.h:309
SIMD_INLINE __m256i AdjustedYuvToBlue16(__m256i y16, __m256i u16)
Definition: util.h:418
const __m256i K16_VR_0
Definition: util.h:322
const __m256i K_INV_ZERO
Definition: util.h:167
const __m256i K8_04
Definition: util.h:171
const __m256i K16_0020
Definition: util.h:189
const int V_TO_GREEN_WEIGHT
Definition: util.h:315
SIMD_INLINE __m256i AdjustedYuvToBlue32(__m256i y16_1, __m256i u16_0)
Definition: util.h:411
SIMD_INLINE __m256i AdjustedYuvToGreen32(__m256i y16_1, __m256i u16_v16)
Definition: util.h:395
const __m256i K8_08
Definition: util.h:172
void print_m256(const __m256i a)
SIMD_INLINE __m256i AdjustUV16(__m256i uv16)
Definition: util.h:369
const __m256i K16_0010
Definition: util.h:187
const __m256i K16_0018
Definition: util.h:188
const __m256i K_ZERO
Definition: util.h:166
SIMD_INLINE __m256i InterleaveBgr< 1 >(__m256i blue, __m256i green, __m256i red)
Definition: util.h:472
const __m256i K16_UV_ADJUST
Definition: util.h:304
const __m256i V_SHUFFLE3
Definition: util.h:249
const __m256i U_SHUFFLE4
Definition: util.h:234
const size_t QA
Definition: util.h:162
SIMD_INLINE __m256i SaturateI16ToU8(__m256i value)
Definition: util.h:361
const int UV_ADJUST
Definition: util.h:306
void yuyv2rgb_avx(unsigned char *YUV, unsigned char *RGB, int NumPixels)
const __m256i V_SHUFFLE2
Definition: util.h:245
SIMD_INLINE __m256i AdjustY16(__m256i y16)
Definition: util.h:365
const __m256i K8_SHUFFLE_BGR0_TO_RED
Definition: util.h:293
const __m256i K8_SHUFFLE_BGR2_TO_BLUE
Definition: util.h:210
const int U_TO_BLUE_WEIGHT
Definition: util.h:311
const __m256i K16_0006
Definition: util.h:185
const __m256i V_SHUFFLE0
Definition: util.h:239
const __m256i K8_SHUFFLE_BGR1_TO_GREEN
Definition: util.h:286
#define SIMD_MM256_SET1_EPI16(a)
Definition: util.h:119
const size_t DA
Definition: util.h:161
SIMD_INLINE __m256i Load(const __m256i *p)
#define SIMD_MM256_SET2_EPI8(a0, a1)
Definition: util.h:102
const __m256i K8_SHUFFLE_BGR2_TO_RED
Definition: util.h:299
const int Y_ADJUST
Definition: util.h:305
SIMD_INLINE void Store(__m256i *p, __m256i a)
const int YUV_TO_BGR_ROUND_TERM
Definition: util.h:308
const __m256i U_SHUFFLE3
Definition: util.h:231
const __m256i K8_02
Definition: util.h:170
SIMD_INLINE __m256i AdjustedYuvToGreen16(__m256i y16, __m256i u16, __m256i v16)
Definition: util.h:402
const __m256i K16_UB_0
Definition: util.h:325
void print_m256_i32(const __m256i a)
SIMD_INLINE __m256i AdjustedYuvToRed16(__m256i y16, __m256i v16)
Definition: util.h:384
SIMD_INLINE void Store< true >(__m256i *p, __m256i a)
Definition: util.h:357
const __m256i K8_10
Definition: util.h:173
const __m256i K8_01_FF
Definition: util.h:178
const __m256i K32_00000002
Definition: util.h:195
SIMD_INLINE __m256i YuvToGreen(__m256i y, __m256i u, __m256i v)
Definition: util.h:437
const int V_TO_RED_WEIGHT
Definition: util.h:317
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR0
Definition: util.h:273
SIMD_INLINE __m256i Load< true >(const __m256i *p)
Definition: util.h:336
SIMD_INLINE void Store< false >(__m256i *p, __m256i a)
Definition: util.h:352
const __m256i U_SHUFFLE2
Definition: util.h:227
const __m256i K16_FF00
Definition: util.h:192
const __m256i K8_SHUFFLE_BGR1_TO_RED
Definition: util.h:296
const __m256i K8_SHUFFLE_PERMUTED_RED_TO_BGR1
Definition: util.h:276
SIMD_INLINE __m256i InterleaveBgr< 0 >(__m256i blue, __m256i green, __m256i red)
Definition: util.h:461
const __m256i Y_SHUFFLE1
Definition: util.h:217
const __m256i K32_00000004
Definition: util.h:196
SIMD_INLINE __m256i AdjustedYuvToRed32(__m256i y16_1, __m256i v16_0)
Definition: util.h:373
const int YUV_TO_BGR_AVERAGING_SHIFT
Definition: util.h:307
const __m256i K32_0000FFFF
Definition: util.h:199
const __m256i U_SHUFFLE1
Definition: util.h:224
const __m256i K8_40
Definition: util.h:175
#define SIMD_INLINE
Definition: util.h:43
const __m256i K16_0001
Definition: util.h:180
const __m256i Y_SHUFFLE0
Definition: util.h:214
const __m256i K8_SHUFFLE_BGR2_TO_GREEN
Definition: util.h:289
apollo::cyber::base::std value
const __m256i K32_000000FF
Definition: util.h:198
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR1
Definition: util.h:256
const __m256i K16_0003
Definition: util.h:182
const __m256i K8_SHUFFLE_PERMUTED_GREEN_TO_BGR1
Definition: util.h:266
const __m256i K16_0008
Definition: util.h:186
SIMD_INLINE char GetChar(T value, size_t index)
Definition: util.h:50
const __m256i K32_FFFFFF00
Definition: util.h:202
const __m256i K16_0004
Definition: util.h:183
#define SIMD_MM256_SET1_EPI8(a)
Definition: util.h:96
#define SIMD_MM256_SETR_EPI8(a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, aa, ab, ac, ad, ae, af, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)
Definition: util.h:109
const int U_TO_GREEN_WEIGHT
Definition: util.h:313
const __m256i K32_00010000
Definition: util.h:200
__m256i InterleaveBgr(__m256i blue, __m256i green, __m256i red)
const __m256i K8_SHUFFLE_BGR0_TO_GREEN
Definition: util.h:283
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR2
Definition: util.h:259
SIMD_INLINE void * AlignLo(const void *ptr, size_t align)
Definition: util.h:340
SIMD_INLINE __m256i BgrToGreen(__m256i bgr[3])
Definition: util.h:502
SIMD_INLINE __m256i LoadPermuted(const __m256i *p)
Definition: util.h:521
SIMD_INLINE __m256i BgrToRed(__m256i bgr[3])
Definition: util.h:511
const __m256i K8_SHUFFLE_PERMUTED_BLUE_TO_BGR0
Definition: util.h:253