Apollo  6.0
Open source self driving car software
i_blas.h
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright 2018 The Apollo Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *****************************************************************************/
16 
17 #pragma once
18 
20 
21 namespace apollo {
22 namespace perception {
23 namespace common {
24 
25 // Copy of 1D arrays
26 template <typename T>
27 inline void ICopy(const T *src, T *dst, int n) {
28  memcpy(dst, src, n * sizeof(T));
29 }
30 template <typename T>
31 inline void ICopy1(const T src[1], T dst[1]) {
32  dst[0] = src[0];
33 }
34 template <typename T>
35 inline void ICopy2(const T src[2], T dst[2]) {
36  dst[0] = src[0];
37  dst[1] = src[1];
38 }
39 template <typename T>
40 inline void ICopy3(const T src[3], T dst[3]) {
41  dst[0] = src[0];
42  dst[1] = src[1];
43  dst[2] = src[2];
44 }
45 template <typename T>
46 inline void ICopy4(const T src[4], T dst[4]) {
47  dst[0] = src[0];
48  dst[1] = src[1];
49  dst[2] = src[2];
50  dst[3] = src[3];
51 }
52 template <typename T>
53 inline void ICopy5(const T src[5], T dst[5]) {
54  dst[0] = src[0];
55  dst[1] = src[1];
56  dst[2] = src[2];
57  dst[3] = src[3];
58  dst[4] = src[4];
59 }
60 template <typename T>
61 inline void ICopy6(const T src[6], T dst[6]) {
62  dst[0] = src[0];
63  dst[1] = src[1];
64  dst[2] = src[2];
65  dst[3] = src[3];
66  dst[4] = src[4];
67  dst[5] = src[5];
68 }
69 template <typename T>
70 inline void ICopy7(const T src[7], T dst[7]) {
71  dst[0] = src[0];
72  dst[1] = src[1];
73  dst[2] = src[2];
74  dst[3] = src[3];
75  dst[4] = src[4];
76  dst[5] = src[5];
77  dst[6] = src[6];
78 }
79 template <typename T>
80 inline void ICopy8(const T src[8], T dst[8]) {
81  dst[0] = src[0];
82  dst[1] = src[1];
83  dst[2] = src[2];
84  dst[3] = src[3];
85  dst[4] = src[4];
86  dst[5] = src[5];
87  dst[6] = src[6];
88  dst[7] = src[7];
89 }
90 template <typename T>
91 inline void ICopy9(const T src[9], T dst[9]) {
92  dst[0] = src[0];
93  dst[1] = src[1];
94  dst[2] = src[2];
95  dst[3] = src[3];
96  dst[4] = src[4];
97  dst[5] = src[5];
98  dst[6] = src[6];
99  dst[7] = src[7];
100  dst[8] = src[8];
101 }
102 template <typename T>
103 inline void ICopy10(const T src[10], T dst[10]) {
104  dst[0] = src[0];
105  dst[1] = src[1];
106  dst[2] = src[2];
107  dst[3] = src[3];
108  dst[4] = src[4];
109  dst[5] = src[5];
110  dst[6] = src[6];
111  dst[7] = src[7];
112  dst[8] = src[8];
113  dst[9] = src[9];
114 }
115 template <typename T>
116 inline void ICopy11(const T src[11], T dst[11]) {
117  dst[0] = src[0];
118  dst[1] = src[1];
119  dst[2] = src[2];
120  dst[3] = src[3];
121  dst[4] = src[4];
122  dst[5] = src[5];
123  dst[6] = src[6];
124  dst[7] = src[7];
125  dst[8] = src[8];
126  dst[9] = src[9];
127  dst[10] = src[10];
128 }
129 template <typename T>
130 inline void ICopy12(const T src[12], T dst[12]) {
131  dst[0] = src[0];
132  dst[1] = src[1];
133  dst[2] = src[2];
134  dst[3] = src[3];
135  dst[4] = src[4];
136  dst[5] = src[5];
137  dst[6] = src[6];
138  dst[7] = src[7];
139  dst[8] = src[8];
140  dst[9] = src[9];
141  dst[10] = src[10];
142  dst[11] = src[11];
143 }
144 template <typename T>
145 inline void ICopy13(const T src[13], T dst[13]) {
146  dst[0] = src[0];
147  dst[1] = src[1];
148  dst[2] = src[2];
149  dst[3] = src[3];
150  dst[4] = src[4];
151  dst[5] = src[5];
152  dst[6] = src[6];
153  dst[7] = src[7];
154  dst[8] = src[8];
155  dst[9] = src[9];
156  dst[10] = src[10];
157  dst[11] = src[11];
158  dst[12] = src[12];
159 }
160 template <typename T>
161 inline void ICopy14(const T src[14], T dst[14]) {
162  dst[0] = src[0];
163  dst[1] = src[1];
164  dst[2] = src[2];
165  dst[3] = src[3];
166  dst[4] = src[4];
167  dst[5] = src[5];
168  dst[6] = src[6];
169  dst[7] = src[7];
170  dst[8] = src[8];
171  dst[9] = src[9];
172  dst[10] = src[10];
173  dst[11] = src[11];
174  dst[12] = src[12];
175  dst[13] = src[13];
176 }
177 template <typename T>
178 inline void ICopy15(const T src[15], T dst[15]) {
179  dst[0] = src[0];
180  dst[1] = src[1];
181  dst[2] = src[2];
182  dst[3] = src[3];
183  dst[4] = src[4];
184  dst[5] = src[5];
185  dst[6] = src[6];
186  dst[7] = src[7];
187  dst[8] = src[8];
188  dst[9] = src[9];
189  dst[10] = src[10];
190  dst[11] = src[11];
191  dst[12] = src[12];
192  dst[13] = src[13];
193  dst[14] = src[14];
194 }
195 template <typename T>
196 inline void ICopy16(const T src[16], T dst[16]) {
197  dst[0] = src[0];
198  dst[1] = src[1];
199  dst[2] = src[2];
200  dst[3] = src[3];
201  dst[4] = src[4];
202  dst[5] = src[5];
203  dst[6] = src[6];
204  dst[7] = src[7];
205  dst[8] = src[8];
206  dst[9] = src[9];
207  dst[10] = src[10];
208  dst[11] = src[11];
209  dst[12] = src[12];
210  dst[13] = src[13];
211  dst[14] = src[14];
212  dst[15] = src[15];
213 }
214 // Copy of 2D arrays
215 template <typename T>
216 inline void ICopy(const T *const *src, T **dst, int m, int n) {
217  int i;
218  for (i = 0; i < m; i++) {
219  ICopy<T>(src[i], dst[i], n);
220  }
221 }
222 
223 // Copy of 1D arrays with different types
224 template <typename T, typename S>
225 inline void ICopy(const T *src, S *dst, int n) {
226  int i;
227  for (i = 0; i < n; ++i) {
228  dst[i] = (S)(src[i]);
229  }
230 }
231 // Copy of 2D arrays with different types
232 template <typename T, typename S>
233 inline void ICopy(const T *const *src, S **dst, int m, int n) {
234  int i;
235  for (i = 0; i < m; i++) {
236  ICopy<T, S>(src[i], dst[i], n);
237  }
238 }
239 
240 // Fill array elements with constant value
241 template <typename T>
242 inline void IFill(T *a, int n, T val) {
243  for (int i = 0; i < n; i++) {
244  a[i] = val;
245  }
246 }
247 template <typename T>
248 inline void IFill1(T a[1], T val) {
249  a[0] = val;
250 }
251 template <typename T>
252 inline void IFill2(T a[2], T val) {
253  a[0] = a[1] = val;
254 }
255 template <typename T>
256 inline void IFill3(T a[3], T val) {
257  a[0] = a[1] = a[2] = val;
258 }
259 template <typename T>
260 inline void IFill4(T a[4], T val) {
261  a[0] = a[1] = a[2] = a[3] = val;
262 }
263 template <typename T>
264 inline void IFill5(T a[5], T val) {
265  a[0] = a[1] = a[2] = a[3] = a[4] = val;
266 }
267 template <typename T>
268 inline void IFill6(T a[6], T val) {
269  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = val;
270 }
271 template <typename T>
272 inline void IFill7(T a[7], T val) {
273  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = val;
274 }
275 template <typename T>
276 inline void IFill8(T a[8], T val) {
277  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = val;
278 }
279 template <typename T>
280 inline void IFill9(T a[9], T val) {
281  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = val;
282 }
283 template <typename T>
284 inline void IFill10(T a[10], T val) {
285  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = val;
286 }
287 template <typename T>
288 inline void IFill11(T a[11], T val) {
289  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
290  val;
291 }
292 template <typename T>
293 inline void IFill12(T a[12], T val) {
294  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
295  a[11] = val;
296 }
297 template <typename T>
298 inline void IFill13(T a[13], T val) {
299  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
300  a[11] = a[12] = val;
301 }
302 template <typename T>
303 inline void IFill14(T a[14], T val) {
304  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
305  a[11] = a[12] = a[13] = val;
306 }
307 template <typename T>
308 inline void IFill15(T a[15], T val) {
309  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
310  a[11] = a[12] = a[13] = a[14] = val;
311 }
312 template <typename T>
313 inline void IFill16(T a[16], T val) {
314  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
315  a[11] = a[12] = a[13] = a[14] = a[15] = val;
316 }
317 
318 // Fill array elements with zeroes
319 template <typename T>
320 inline void IZero(T *a, int n) {
321  for (int i = 0; i < n; i++) {
322  a[i] = static_cast<T>(0.0);
323  }
324 }
325 template <typename T>
326 inline void IZero1(T a[1]) {
327  a[0] = static_cast<T>(0.0);
328 }
329 template <typename T>
330 inline void IZero2(T a[2]) {
331  a[0] = a[1] = static_cast<T>(0.0);
332 }
333 template <typename T>
334 inline void IZero3(T a[3]) {
335  a[0] = a[1] = a[2] = static_cast<T>(0.0);
336 }
337 template <typename T>
338 inline void IZero4(T a[4]) {
339  a[0] = a[1] = a[2] = a[3] = static_cast<T>(0.0);
340 }
341 template <typename T>
342 inline void IZero5(T a[5]) {
343  a[0] = a[1] = a[2] = a[3] = a[4] = static_cast<T>(0.0);
344 }
345 template <typename T>
346 inline void IZero6(T a[6]) {
347  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = static_cast<T>(0.0);
348 }
349 template <typename T>
350 inline void IZero7(T a[7]) {
351  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = static_cast<T>(0.0);
352 }
353 template <typename T>
354 inline void IZero8(T a[8]) {
355  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = static_cast<T>(0.0);
356 }
357 template <typename T>
358 inline void IZero9(T a[9]) {
359  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] =
360  static_cast<T>(0.0);
361 }
362 template <typename T>
363 inline void IZero10(T a[10]) {
364  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] =
365  static_cast<T>(0.0);
366 }
367 template <typename T>
368 inline void IZero11(T a[11]) {
369  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
370  static_cast<T>(0.0);
371 }
372 template <typename T>
373 inline void IZero12(T a[12]) {
374  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
375  a[11] = static_cast<T>(0.0);
376 }
377 template <typename T>
378 inline void IZero13(T a[13]) {
379  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
380  a[11] = a[12] = static_cast<T>(0.0);
381 }
382 template <typename T>
383 inline void IZero14(T a[14]) {
384  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
385  a[11] = a[12] = a[13] = static_cast<T>(0.0);
386 }
387 template <typename T>
388 inline void IZero15(T a[15]) {
389  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
390  a[11] = a[12] = a[13] = a[14] = static_cast<T>(0.0);
391 }
392 template <typename T>
393 inline void IZero16(T a[16]) {
394  a[0] = a[1] = a[2] = a[3] = a[4] = a[5] = a[6] = a[7] = a[8] = a[9] = a[10] =
395  a[11] = a[12] = a[13] = a[14] = a[15] = static_cast<T>(0.0);
396 }
397 
398 // Negate a vector x of length n inplace
399 template <typename T>
400 inline void INeg(T *x, int n) {
401  for (int i = 0; i < n; i++) {
402  x[i] = -x[i];
403  }
404 }
405 template <typename T>
406 inline void INeg1(T x[1]) {
407  x[0] = -x[0];
408 }
409 template <typename T>
410 inline void INeg2(T x[2]) {
411  x[0] = -x[0];
412  x[1] = -x[1];
413 }
414 template <typename T>
415 inline void INeg3(T x[3]) {
416  x[0] = -x[0];
417  x[1] = -x[1];
418  x[2] = -x[2];
419 }
420 template <typename T>
421 inline void INeg4(T x[4]) {
422  x[0] = -x[0];
423  x[1] = -x[1];
424  x[2] = -x[2];
425  x[3] = -x[3];
426 }
427 template <typename T>
428 inline void INeg5(T x[5]) {
429  x[0] = -x[0];
430  x[1] = -x[1];
431  x[2] = -x[2];
432  x[3] = -x[3];
433  x[4] = -x[4];
434 }
435 template <typename T>
436 inline void INeg6(T x[6]) {
437  x[0] = -x[0];
438  x[1] = -x[1];
439  x[2] = -x[2];
440  x[3] = -x[3];
441  x[4] = -x[4];
442  x[5] = -x[5];
443 }
444 template <typename T>
445 inline void INeg7(T x[7]) {
446  x[0] = -x[0];
447  x[1] = -x[1];
448  x[2] = -x[2];
449  x[3] = -x[3];
450  x[4] = -x[4];
451  x[5] = -x[5];
452  x[6] = -x[6];
453 }
454 template <typename T>
455 inline void INeg8(T x[8]) {
456  x[0] = -x[0];
457  x[1] = -x[1];
458  x[2] = -x[2];
459  x[3] = -x[3];
460  x[4] = -x[4];
461  x[5] = -x[5];
462  x[6] = -x[6];
463  x[7] = -x[7];
464 }
465 template <typename T>
466 inline void INeg9(T x[9]) {
467  x[0] = -x[0];
468  x[1] = -x[1];
469  x[2] = -x[2];
470  x[3] = -x[3];
471  x[4] = -x[4];
472  x[5] = -x[5];
473  x[6] = -x[6];
474  x[7] = -x[7];
475  x[8] = -x[8];
476 }
477 template <typename T>
478 inline void INeg10(T x[10]) {
479  x[0] = -x[0];
480  x[1] = -x[1];
481  x[2] = -x[2];
482  x[3] = -x[3];
483  x[4] = -x[4];
484  x[5] = -x[5];
485  x[6] = -x[6];
486  x[7] = -x[7];
487  x[8] = -x[8];
488  x[9] = -x[9];
489 }
490 template <typename T>
491 inline void INeg11(T x[11]) {
492  x[0] = -x[0];
493  x[1] = -x[1];
494  x[2] = -x[2];
495  x[3] = -x[3];
496  x[4] = -x[4];
497  x[5] = -x[5];
498  x[6] = -x[6];
499  x[7] = -x[7];
500  x[8] = -x[8];
501  x[9] = -x[9];
502  x[10] = -x[10];
503 }
504 template <typename T>
505 inline void INeg12(T x[12]) {
506  x[0] = -x[0];
507  x[1] = -x[1];
508  x[2] = -x[2];
509  x[3] = -x[3];
510  x[4] = -x[4];
511  x[5] = -x[5];
512  x[6] = -x[6];
513  x[7] = -x[7];
514  x[8] = -x[8];
515  x[9] = -x[9];
516  x[10] = -x[10];
517  x[11] = -x[11];
518 }
519 template <typename T>
520 inline void INeg13(T x[13]) {
521  x[0] = -x[0];
522  x[1] = -x[1];
523  x[2] = -x[2];
524  x[3] = -x[3];
525  x[4] = -x[4];
526  x[5] = -x[5];
527  x[6] = -x[6];
528  x[7] = -x[7];
529  x[8] = -x[8];
530  x[9] = -x[9];
531  x[10] = -x[10];
532  x[11] = -x[11];
533  x[12] = -x[12];
534 }
535 template <typename T>
536 inline void INeg14(T x[14]) {
537  x[0] = -x[0];
538  x[1] = -x[1];
539  x[2] = -x[2];
540  x[3] = -x[3];
541  x[4] = -x[4];
542  x[5] = -x[5];
543  x[6] = -x[6];
544  x[7] = -x[7];
545  x[8] = -x[8];
546  x[9] = -x[9];
547  x[10] = -x[10];
548  x[11] = -x[11];
549  x[12] = -x[12];
550  x[13] = -x[13];
551 }
552 template <typename T>
553 inline void INeg15(T x[15]) {
554  x[0] = -x[0];
555  x[1] = -x[1];
556  x[2] = -x[2];
557  x[3] = -x[3];
558  x[4] = -x[4];
559  x[5] = -x[5];
560  x[6] = -x[6];
561  x[7] = -x[7];
562  x[8] = -x[8];
563  x[9] = -x[9];
564  x[10] = -x[10];
565  x[11] = -x[11];
566  x[12] = -x[12];
567  x[13] = -x[13];
568  x[14] = -x[14];
569 }
570 template <typename T>
571 inline void INeg16(T x[16]) {
572  x[0] = -x[0];
573  x[1] = -x[1];
574  x[2] = -x[2];
575  x[3] = -x[3];
576  x[4] = -x[4];
577  x[5] = -x[5];
578  x[6] = -x[6];
579  x[7] = -x[7];
580  x[8] = -x[8];
581  x[9] = -x[9];
582  x[10] = -x[10];
583  x[11] = -x[11];
584  x[12] = -x[12];
585  x[13] = -x[13];
586  x[14] = -x[14];
587  x[15] = -x[15];
588 }
589 // Negate a vector x of length n, save results in a vector y
590 template <typename T>
591 inline void INeg1(const T x[1], T y[1]) {
592  y[0] = -x[0];
593 }
594 template <typename T>
595 inline void INeg2(const T x[2], T y[2]) {
596  y[0] = -x[0];
597  y[1] = -x[1];
598 }
599 template <typename T>
600 inline void INeg3(const T x[3], T y[3]) {
601  y[0] = -x[0];
602  y[1] = -x[1];
603  y[2] = -x[2];
604 }
605 template <typename T>
606 inline void INeg4(const T x[4], T y[4]) {
607  y[0] = -x[0];
608  y[1] = -x[1];
609  y[2] = -x[2];
610  y[3] = -x[3];
611 }
612 template <typename T>
613 inline void INeg5(const T x[5], T y[5]) {
614  y[0] = -x[0];
615  y[1] = -x[1];
616  y[2] = -x[2];
617  y[3] = -x[3];
618  y[4] = -x[4];
619 }
620 template <typename T>
621 inline void INeg6(const T x[6], T y[6]) {
622  y[0] = -x[0];
623  y[1] = -x[1];
624  y[2] = -x[2];
625  y[3] = -x[3];
626  y[4] = -x[4];
627  y[5] = -x[5];
628 }
629 template <typename T>
630 inline void INeg7(const T x[7], T y[7]) {
631  y[0] = -x[0];
632  y[1] = -x[1];
633  y[2] = -x[2];
634  y[3] = -x[3];
635  y[4] = -x[4];
636  y[5] = -x[5];
637  y[6] = -x[6];
638 }
639 template <typename T>
640 inline void INeg8(const T x[8], T y[8]) {
641  y[0] = -x[0];
642  y[1] = -x[1];
643  y[2] = -x[2];
644  y[3] = -x[3];
645  y[4] = -x[4];
646  y[5] = -x[5];
647  y[6] = -x[6];
648  y[7] = -x[7];
649 }
650 template <typename T>
651 inline void INeg9(const T x[9], T y[9]) {
652  y[0] = -x[0];
653  y[1] = -x[1];
654  y[2] = -x[2];
655  y[3] = -x[3];
656  y[4] = -x[4];
657  y[5] = -x[5];
658  y[6] = -x[6];
659  y[7] = -x[7];
660  y[8] = -x[8];
661 }
662 template <typename T>
663 inline void INeg10(const T x[10], T y[10]) {
664  y[0] = -x[0];
665  y[1] = -x[1];
666  y[2] = -x[2];
667  y[3] = -x[3];
668  y[4] = -x[4];
669  y[5] = -x[5];
670  y[6] = -x[6];
671  y[7] = -x[7];
672  y[8] = -x[8];
673  y[9] = -x[9];
674 }
675 template <typename T>
676 inline void INeg11(const T x[11], T y[11]) {
677  y[0] = -x[0];
678  y[1] = -x[1];
679  y[2] = -x[2];
680  y[3] = -x[3];
681  y[4] = -x[4];
682  y[5] = -x[5];
683  y[6] = -x[6];
684  y[7] = -x[7];
685  y[8] = -x[8];
686  y[9] = -x[9];
687  y[10] = -x[10];
688 }
689 template <typename T>
690 inline void INeg12(const T x[12], T y[12]) {
691  y[0] = -x[0];
692  y[1] = -x[1];
693  y[2] = -x[2];
694  y[3] = -x[3];
695  y[4] = -x[4];
696  y[5] = -x[5];
697  y[6] = -x[6];
698  y[7] = -x[7];
699  y[8] = -x[8];
700  y[9] = -x[9];
701  y[10] = -x[10];
702  y[11] = -x[11];
703 }
704 template <typename T>
705 inline void INeg13(const T x[13], T y[13]) {
706  y[0] = -x[0];
707  y[1] = -x[1];
708  y[2] = -x[2];
709  y[3] = -x[3];
710  y[4] = -x[4];
711  y[5] = -x[5];
712  y[6] = -x[6];
713  y[7] = -x[7];
714  y[8] = -x[8];
715  y[9] = -x[9];
716  y[10] = -x[10];
717  y[11] = -x[11];
718  y[12] = -x[12];
719 }
720 template <typename T>
721 inline void INeg14(const T x[14], T y[14]) {
722  y[0] = -x[0];
723  y[1] = -x[1];
724  y[2] = -x[2];
725  y[3] = -x[3];
726  y[4] = -x[4];
727  y[5] = -x[5];
728  y[6] = -x[6];
729  y[7] = -x[7];
730  y[8] = -x[8];
731  y[9] = -x[9];
732  y[10] = -x[10];
733  y[11] = -x[11];
734  y[12] = -x[12];
735  y[13] = -x[13];
736 }
737 template <typename T>
738 inline void INeg15(const T x[15], T y[15]) {
739  y[0] = -x[0];
740  y[1] = -x[1];
741  y[2] = -x[2];
742  y[3] = -x[3];
743  y[4] = -x[4];
744  y[5] = -x[5];
745  y[6] = -x[6];
746  y[7] = -x[7];
747  y[8] = -x[8];
748  y[9] = -x[9];
749  y[10] = -x[10];
750  y[11] = -x[11];
751  y[12] = -x[12];
752  y[13] = -x[13];
753  y[14] = -x[14];
754 }
755 template <typename T>
756 inline void INeg16(const T x[16], T y[16]) {
757  y[0] = -x[0];
758  y[1] = -x[1];
759  y[2] = -x[2];
760  y[3] = -x[3];
761  y[4] = -x[4];
762  y[5] = -x[5];
763  y[6] = -x[6];
764  y[7] = -x[7];
765  y[8] = -x[8];
766  y[9] = -x[9];
767  y[10] = -x[10];
768  y[11] = -x[11];
769  y[12] = -x[12];
770  y[13] = -x[13];
771  y[14] = -x[14];
772  y[15] = -x[15];
773 }
774 
775 // Negate the cth column of mxn matrix A
776 template <typename T>
777 inline void INegCol(T *A, int c, int m, int n) {
778  T *ref = A;
779  for (int r = 0; r < m; ++r, ref += n) {
780  ref[c] = -ref[c];
781  }
782 }
783 
784 // Compute x=x+c where x is n-dimensional vectors and c is a constant
785 template <typename T>
786 inline void IAdd(T *x, int n, T k) {
787  for (int i = 0; i < n; i++) {
788  x[i] += k;
789  }
790 }
791 // Compute z=x+y where x, y, and z are n-dimensional vectors
792 template <typename T>
793 inline void IAdd(const T *x, const T *y, int n, T *z) {
794  for (int i = 0; i < n; i++) {
795  z[i] = x[i] + y[i];
796  }
797 }
798 template <typename T>
799 inline void IAdd1(const T x[1], const T y[1], T z[1]) {
800  z[0] = x[0] + y[0];
801 }
802 template <typename T>
803 inline void IAdd2(const T x[2], const T y[2], T z[2]) {
804  z[0] = x[0] + y[0];
805  z[1] = x[1] + y[1];
806 }
807 template <typename T>
808 inline void IAdd3(const T x[3], const T y[3], T z[3]) {
809  z[0] = x[0] + y[0];
810  z[1] = x[1] + y[1];
811  z[2] = x[2] + y[2];
812 }
813 template <typename T>
814 inline void IAdd4(const T x[4], const T y[4], T z[4]) {
815  z[0] = x[0] + y[0];
816  z[1] = x[1] + y[1];
817  z[2] = x[2] + y[2];
818  z[3] = x[3] + y[3];
819 }
820 template <typename T>
821 inline void IAdd5(const T x[5], const T y[5], T z[5]) {
822  z[0] = x[0] + y[0];
823  z[1] = x[1] + y[1];
824  z[2] = x[2] + y[2];
825  z[3] = x[3] + y[3];
826  z[4] = x[4] + y[4];
827 }
828 template <typename T>
829 inline void IAdd6(const T x[6], const T y[6], T z[6]) {
830  z[0] = x[0] + y[0];
831  z[1] = x[1] + y[1];
832  z[2] = x[2] + y[2];
833  z[3] = x[3] + y[3];
834  z[4] = x[4] + y[4];
835  z[5] = x[5] + y[5];
836 }
837 template <typename T>
838 inline void IAdd7(const T x[7], const T y[7], T z[7]) {
839  z[0] = x[0] + y[0];
840  z[1] = x[1] + y[1];
841  z[2] = x[2] + y[2];
842  z[3] = x[3] + y[3];
843  z[4] = x[4] + y[4];
844  z[5] = x[5] + y[5];
845  z[6] = x[6] + y[6];
846 }
847 template <typename T>
848 inline void IAdd8(const T x[8], const T y[8], T z[8]) {
849  z[0] = x[0] + y[0];
850  z[1] = x[1] + y[1];
851  z[2] = x[2] + y[2];
852  z[3] = x[3] + y[3];
853  z[4] = x[4] + y[4];
854  z[5] = x[5] + y[5];
855  z[6] = x[6] + y[6];
856  z[7] = x[7] + y[7];
857 }
858 template <typename T>
859 inline void IAdd9(const T x[9], const T y[9], T z[9]) {
860  z[0] = x[0] + y[0];
861  z[1] = x[1] + y[1];
862  z[2] = x[2] + y[2];
863  z[3] = x[3] + y[3];
864  z[4] = x[4] + y[4];
865  z[5] = x[5] + y[5];
866  z[6] = x[6] + y[6];
867  z[7] = x[7] + y[7];
868  z[8] = x[8] + y[8];
869 }
870 template <typename T>
871 inline void IAdd10(const T x[10], const T y[10], T z[10]) {
872  z[0] = x[0] + y[0];
873  z[1] = x[1] + y[1];
874  z[2] = x[2] + y[2];
875  z[3] = x[3] + y[3];
876  z[4] = x[4] + y[4];
877  z[5] = x[5] + y[5];
878  z[6] = x[6] + y[6];
879  z[7] = x[7] + y[7];
880  z[8] = x[8] + y[8];
881  z[9] = x[9] + y[9];
882 }
883 template <typename T>
884 inline void IAdd11(const T x[11], const T y[11], T z[11]) {
885  z[0] = x[0] + y[0];
886  z[1] = x[1] + y[1];
887  z[2] = x[2] + y[2];
888  z[3] = x[3] + y[3];
889  z[4] = x[4] + y[4];
890  z[5] = x[5] + y[5];
891  z[6] = x[6] + y[6];
892  z[7] = x[7] + y[7];
893  z[8] = x[8] + y[8];
894  z[9] = x[9] + y[9];
895  z[10] = x[10] + y[10];
896 }
897 template <typename T>
898 inline void IAdd12(const T x[12], const T y[12], T z[12]) {
899  z[0] = x[0] + y[0];
900  z[1] = x[1] + y[1];
901  z[2] = x[2] + y[2];
902  z[3] = x[3] + y[3];
903  z[4] = x[4] + y[4];
904  z[5] = x[5] + y[5];
905  z[6] = x[6] + y[6];
906  z[7] = x[7] + y[7];
907  z[8] = x[8] + y[8];
908  z[9] = x[9] + y[9];
909  z[10] = x[10] + y[10];
910  z[11] = x[11] + y[11];
911 }
912 template <typename T>
913 inline void IAdd13(const T x[13], const T y[13], T z[13]) {
914  z[0] = x[0] + y[0];
915  z[1] = x[1] + y[1];
916  z[2] = x[2] + y[2];
917  z[3] = x[3] + y[3];
918  z[4] = x[4] + y[4];
919  z[5] = x[5] + y[5];
920  z[6] = x[6] + y[6];
921  z[7] = x[7] + y[7];
922  z[8] = x[8] + y[8];
923  z[9] = x[9] + y[9];
924  z[10] = x[10] + y[10];
925  z[11] = x[11] + y[11];
926  z[12] = x[12] + y[12];
927 }
928 template <typename T>
929 inline void IAdd14(const T x[14], const T y[14], T z[14]) {
930  z[0] = x[0] + y[0];
931  z[1] = x[1] + y[1];
932  z[2] = x[2] + y[2];
933  z[3] = x[3] + y[3];
934  z[4] = x[4] + y[4];
935  z[5] = x[5] + y[5];
936  z[6] = x[6] + y[6];
937  z[7] = x[7] + y[7];
938  z[8] = x[8] + y[8];
939  z[9] = x[9] + y[9];
940  z[10] = x[10] + y[10];
941  z[11] = x[11] + y[11];
942  z[12] = x[12] + y[12];
943  z[13] = x[13] + y[13];
944 }
945 template <typename T>
946 inline void IAdd15(const T x[15], const T y[15], T z[15]) {
947  z[0] = x[0] + y[0];
948  z[1] = x[1] + y[1];
949  z[2] = x[2] + y[2];
950  z[3] = x[3] + y[3];
951  z[4] = x[4] + y[4];
952  z[5] = x[5] + y[5];
953  z[6] = x[6] + y[6];
954  z[7] = x[7] + y[7];
955  z[8] = x[8] + y[8];
956  z[9] = x[9] + y[9];
957  z[10] = x[10] + y[10];
958  z[11] = x[11] + y[11];
959  z[12] = x[12] + y[12];
960  z[13] = x[13] + y[13];
961  z[14] = x[14] + y[14];
962 }
963 template <typename T>
964 inline void IAdd16(const T x[16], const T y[16], T z[16]) {
965  z[0] = x[0] + y[0];
966  z[1] = x[1] + y[1];
967  z[2] = x[2] + y[2];
968  z[3] = x[3] + y[3];
969  z[4] = x[4] + y[4];
970  z[5] = x[5] + y[5];
971  z[6] = x[6] + y[6];
972  z[7] = x[7] + y[7];
973  z[8] = x[8] + y[8];
974  z[9] = x[9] + y[9];
975  z[10] = x[10] + y[10];
976  z[11] = x[11] + y[11];
977  z[12] = x[12] + y[12];
978  z[13] = x[13] + y[13];
979  z[14] = x[14] + y[14];
980  z[15] = x[15] + y[15];
981 }
982 template <typename T>
983 inline void IAdd20(const T x[20], const T y[20], T z[20]) {
984  z[0] = x[0] + y[0];
985  z[1] = x[1] + y[1];
986  z[2] = x[2] + y[2];
987  z[3] = x[3] + y[3];
988  z[4] = x[4] + y[4];
989  z[5] = x[5] + y[5];
990  z[6] = x[6] + y[6];
991  z[7] = x[7] + y[7];
992  z[8] = x[8] + y[8];
993  z[9] = x[9] + y[9];
994  z[10] = x[10] + y[10];
995  z[11] = x[11] + y[11];
996  z[12] = x[12] + y[12];
997  z[13] = x[13] + y[13];
998  z[14] = x[14] + y[14];
999  z[15] = x[15] + y[15];
1000  z[16] = x[16] + y[16];
1001  z[17] = x[17] + y[17];
1002  z[18] = x[18] + y[18];
1003  z[19] = x[19] + y[19];
1004 }
1005 
1006 // Compute y=y+x where x and y are n-dimensional vectors
1007 template <typename T>
1008 inline void IAdd(const T *x, T *y, int n) {
1009  for (int i = 0; i < n; i++) {
1010  y[i] += x[i];
1011  }
1012 }
1013 template <typename T>
1014 inline void IAdd1(const T x[1], T y[1]) {
1015  y[0] += x[0];
1016 }
1017 template <typename T>
1018 inline void IAdd2(const T x[2], T y[2]) {
1019  y[0] += x[0];
1020  y[1] += x[1];
1021 }
1022 template <typename T>
1023 inline void IAdd3(const T x[3], T y[3]) {
1024  y[0] += x[0];
1025  y[1] += x[1];
1026  y[2] += x[2];
1027 }
1028 template <typename T>
1029 inline void IAdd4(const T x[4], T y[4]) {
1030  y[0] += x[0];
1031  y[1] += x[1];
1032  y[2] += x[2];
1033  y[3] += x[3];
1034 }
1035 template <typename T>
1036 inline void IAdd5(const T x[5], T y[5]) {
1037  y[0] += x[0];
1038  y[1] += x[1];
1039  y[2] += x[2];
1040  y[3] += x[3];
1041  y[4] += x[4];
1042 }
1043 template <typename T>
1044 inline void IAdd6(const T x[6], T y[6]) {
1045  y[0] += x[0];
1046  y[1] += x[1];
1047  y[2] += x[2];
1048  y[3] += x[3];
1049  y[4] += x[4];
1050  y[5] += x[5];
1051 }
1052 template <typename T>
1053 inline void IAdd7(const T x[7], T y[7]) {
1054  y[0] += x[0];
1055  y[1] += x[1];
1056  y[2] += x[2];
1057  y[3] += x[3];
1058  y[4] += x[4];
1059  y[5] += x[5];
1060  y[6] += x[6];
1061 }
1062 template <typename T>
1063 inline void IAdd8(const T x[8], T y[8]) {
1064  y[0] += x[0];
1065  y[1] += x[1];
1066  y[2] += x[2];
1067  y[3] += x[3];
1068  y[4] += x[4];
1069  y[5] += x[5];
1070  y[6] += x[6];
1071  y[7] += x[7];
1072 }
1073 template <typename T>
1074 inline void IAdd9(const T x[9], T y[9]) {
1075  y[0] += x[0];
1076  y[1] += x[1];
1077  y[2] += x[2];
1078  y[3] += x[3];
1079  y[4] += x[4];
1080  y[5] += x[5];
1081  y[6] += x[6];
1082  y[7] += x[7];
1083  y[8] += x[8];
1084 }
1085 template <typename T>
1086 inline void IAdd10(const T x[10], T y[10]) {
1087  y[0] += x[0];
1088  y[1] += x[1];
1089  y[2] += x[2];
1090  y[3] += x[3];
1091  y[4] += x[4];
1092  y[5] += x[5];
1093  y[6] += x[6];
1094  y[7] += x[7];
1095  y[8] += x[8];
1096  y[9] += x[9];
1097 }
1098 template <typename T>
1099 inline void IAdd11(const T x[11], T y[11]) {
1100  y[0] += x[0];
1101  y[1] += x[1];
1102  y[2] += x[2];
1103  y[3] += x[3];
1104  y[4] += x[4];
1105  y[5] += x[5];
1106  y[6] += x[6];
1107  y[7] += x[7];
1108  y[8] += x[8];
1109  y[9] += x[9];
1110  y[10] += x[10];
1111 }
1112 template <typename T>
1113 inline void IAdd12(const T x[12], T y[12]) {
1114  y[0] += x[0];
1115  y[1] += x[1];
1116  y[2] += x[2];
1117  y[3] += x[3];
1118  y[4] += x[4];
1119  y[5] += x[5];
1120  y[6] += x[6];
1121  y[7] += x[7];
1122  y[8] += x[8];
1123  y[9] += x[9];
1124  y[10] += x[10];
1125  y[11] += x[11];
1126 }
1127 template <typename T>
1128 inline void IAdd13(const T x[13], T y[13]) {
1129  y[0] += x[0];
1130  y[1] += x[1];
1131  y[2] += x[2];
1132  y[3] += x[3];
1133  y[4] += x[4];
1134  y[5] += x[5];
1135  y[6] += x[6];
1136  y[7] += x[7];
1137  y[8] += x[8];
1138  y[9] += x[9];
1139  y[10] += x[10];
1140  y[11] += x[11];
1141  y[12] += x[12];
1142 }
1143 template <typename T>
1144 inline void IAdd14(const T x[14], T y[14]) {
1145  y[0] += x[0];
1146  y[1] += x[1];
1147  y[2] += x[2];
1148  y[3] += x[3];
1149  y[4] += x[4];
1150  y[5] += x[5];
1151  y[6] += x[6];
1152  y[7] += x[7];
1153  y[8] += x[8];
1154  y[9] += x[9];
1155  y[10] += x[10];
1156  y[11] += x[11];
1157  y[12] += x[12];
1158  y[13] += x[13];
1159 }
1160 template <typename T>
1161 inline void IAdd15(const T x[15], T y[15]) {
1162  y[0] += x[0];
1163  y[1] += x[1];
1164  y[2] += x[2];
1165  y[3] += x[3];
1166  y[4] += x[4];
1167  y[5] += x[5];
1168  y[6] += x[6];
1169  y[7] += x[7];
1170  y[8] += x[8];
1171  y[9] += x[9];
1172  y[10] += x[10];
1173  y[11] += x[11];
1174  y[12] += x[12];
1175  y[13] += x[13];
1176  y[14] += x[14];
1177 }
1178 template <typename T>
1179 inline void IAdd16(const T x[16], T y[16]) {
1180  y[0] += x[0];
1181  y[1] += x[1];
1182  y[2] += x[2];
1183  y[3] += x[3];
1184  y[4] += x[4];
1185  y[5] += x[5];
1186  y[6] += x[6];
1187  y[7] += x[7];
1188  y[8] += x[8];
1189  y[9] += x[9];
1190  y[10] += x[10];
1191  y[11] += x[11];
1192  y[12] += x[12];
1193  y[13] += x[13];
1194  y[14] += x[14];
1195  y[15] += x[15];
1196 }
1197 template <typename T>
1198 inline void IAdd20(const T x[20], T y[20]) {
1199  y[0] += x[0];
1200  y[1] += x[1];
1201  y[2] += x[2];
1202  y[3] += x[3];
1203  y[4] += x[4];
1204  y[5] += x[5];
1205  y[6] += x[6];
1206  y[7] += x[7];
1207  y[8] += x[8];
1208  y[9] += x[9];
1209  y[10] += x[10];
1210  y[11] += x[11];
1211  y[12] += x[12];
1212  y[13] += x[13];
1213  y[14] += x[14];
1214  y[15] += x[15];
1215  y[16] += x[16];
1216  y[17] += x[17];
1217  y[18] += x[18];
1218  y[19] += x[19];
1219 }
1220 
1221 // Compute y=y+x*k where x and y are n-dimensional vectors, k is constant
1222 template <typename T>
1223 inline void IAddScaled(const T *x, T *y, int n, T k) {
1224  for (int i = 0; i < n; i++) {
1225  y[i] += (x[i] * k);
1226  }
1227 }
1228 template <typename T>
1229 inline void IAddScaled1(const T x[1], T y[1], T k) {
1230  y[0] += x[0] * k;
1231 }
1232 template <typename T>
1233 inline void IAddScaled2(const T x[2], T y[2], T k) {
1234  y[0] += x[0] * k;
1235  y[1] += x[1] * k;
1236 }
1237 template <typename T>
1238 inline void IAddScaled3(const T x[3], T y[3], T k) {
1239  y[0] += x[0] * k;
1240  y[1] += x[1] * k;
1241  y[2] += x[2] * k;
1242 }
1243 template <typename T>
1244 inline void IAddScaled4(const T x[4], T y[4], T k) {
1245  y[0] += x[0] * k;
1246  y[1] += x[1] * k;
1247  y[2] += x[2] * k;
1248  y[3] += x[3] * k;
1249 }
1250 template <typename T>
1251 inline void IAddScaled5(const T x[5], T y[5], T k) {
1252  y[0] += x[0] * k;
1253  y[1] += x[1] * k;
1254  y[2] += x[2] * k;
1255  y[3] += x[3] * k;
1256  y[4] += x[4] * k;
1257 }
1258 template <typename T>
1259 inline void IAddScaled6(const T x[6], T y[6], T k) {
1260  y[0] += x[0] * k;
1261  y[1] += x[1] * k;
1262  y[2] += x[2] * k;
1263  y[3] += x[3] * k;
1264  y[4] += x[4] * k;
1265  y[5] += x[5] * k;
1266 }
1267 template <typename T>
1268 inline void IAddScaled7(const T x[7], T y[7], T k) {
1269  y[0] += x[0] * k;
1270  y[1] += x[1] * k;
1271  y[2] += x[2] * k;
1272  y[3] += x[3] * k;
1273  y[4] += x[4] * k;
1274  y[5] += x[5] * k;
1275  y[6] += x[6] * k;
1276 }
1277 template <typename T>
1278 inline void IAddScaled8(const T x[8], T y[8], T k) {
1279  y[0] += x[0] * k;
1280  y[1] += x[1] * k;
1281  y[2] += x[2] * k;
1282  y[3] += x[3] * k;
1283  y[4] += x[4] * k;
1284  y[5] += x[5] * k;
1285  y[6] += x[6] * k;
1286  y[7] += x[7] * k;
1287 }
1288 template <typename T>
1289 inline void IAddScaled9(const T x[9], T y[9], T k) {
1290  y[0] += x[0] * k;
1291  y[1] += x[1] * k;
1292  y[2] += x[2] * k;
1293  y[3] += x[3] * k;
1294  y[4] += x[4] * k;
1295  y[5] += x[5] * k;
1296  y[6] += x[6] * k;
1297  y[7] += x[7] * k;
1298  y[8] += x[8] * k;
1299 }
1300 
1301 // Compute z=x+y*k where x, y and z are n-dimensional vectors, k is constant
1302 template <typename T>
1303 inline void IAddScaled(const T *x, const T *y, T *z, int n, T k) {
1304  for (int i = 0; i < n; i++) {
1305  z[i] = x[i] + y[i] * k;
1306  }
1307 }
1308 template <typename T>
1309 inline void IAddScaled1(const T x[1], const T y[1], T z[1], T k) {
1310  z[0] = x[0] + y[0] * k;
1311 }
1312 template <typename T>
1313 inline void IAddScaled2(const T x[2], const T y[2], T z[2], T k) {
1314  z[0] = x[0] + y[0] * k;
1315  z[1] = x[1] + y[1] * k;
1316 }
1317 template <typename T>
1318 inline void IAddScaled3(const T x[3], const T y[3], T z[3], T k) {
1319  z[0] = x[0] + y[0] * k;
1320  z[1] = x[1] + y[1] * k;
1321  z[2] = x[2] + y[2] * k;
1322 }
1323 template <typename T>
1324 inline void IAddScaled4(const T x[4], const T y[4], T z[4], T k) {
1325  z[0] = x[0] + y[0] * k;
1326  z[1] = x[1] + y[1] * k;
1327  z[2] = x[2] + y[2] * k;
1328  z[3] = x[3] + y[3] * k;
1329 }
1330 template <typename T>
1331 inline void IAddScaled5(const T x[5], const T y[5], T z[5], T k) {
1332  z[0] = x[0] + y[0] * k;
1333  z[1] = x[1] + y[1] * k;
1334  z[2] = x[2] + y[2] * k;
1335  z[3] = x[3] + y[3] * k;
1336  z[4] = x[4] + y[4] * k;
1337 }
1338 template <typename T>
1339 inline void IAddScaled6(const T x[6], const T y[6], T z[6], T k) {
1340  z[0] = x[0] + y[0] * k;
1341  z[1] = x[1] + y[1] * k;
1342  z[2] = x[2] + y[2] * k;
1343  z[3] = x[3] + y[3] * k;
1344  z[4] = x[4] + y[4] * k;
1345  z[5] = x[5] + y[5] * k;
1346 }
1347 template <typename T>
1348 inline void IAddScaled7(const T x[7], const T y[7], T z[7], T k) {
1349  z[0] = x[0] + y[0] * k;
1350  z[1] = x[1] + y[1] * k;
1351  z[2] = x[2] + y[2] * k;
1352  z[3] = x[3] + y[3] * k;
1353  z[4] = x[4] + y[4] * k;
1354  z[5] = x[5] + y[5] * k;
1355  z[6] = x[6] + y[6] * k;
1356 }
1357 template <typename T>
1358 inline void IAddScaled8(const T x[8], const T y[8], T z[8], T k) {
1359  z[0] = x[0] + y[0] * k;
1360  z[1] = x[1] + y[1] * k;
1361  z[2] = x[2] + y[2] * k;
1362  z[3] = x[3] + y[3] * k;
1363  z[4] = x[4] + y[4] * k;
1364  z[5] = x[5] + y[5] * k;
1365  z[6] = x[6] + y[6] * k;
1366  z[7] = x[7] + y[7] * k;
1367 }
1368 template <typename T>
1369 inline void IAddScaled9(const T x[9], const T y[9], T z[9], T k) {
1370  z[0] = x[0] + y[0] * k;
1371  z[1] = x[1] + y[1] * k;
1372  z[2] = x[2] + y[2] * k;
1373  z[3] = x[3] + y[3] * k;
1374  z[4] = x[4] + y[4] * k;
1375  z[5] = x[5] + y[5] * k;
1376  z[6] = x[6] + y[6] * k;
1377  z[7] = x[7] + y[7] * k;
1378  z[8] = x[8] + y[8] * k;
1379 }
1380 
1381 // Compute x=x-c where x is n-dimensional vectors and c is a constant
1382 template <typename T>
1383 inline void ISub(T *x, int n, T k) {
1384  for (int i = 0; i < n; i++) {
1385  x[i] -= k;
1386  }
1387 }
1388 // Compute z=x-y where x, y, and z are n-dimensional vectors
1389 template <typename T>
1390 inline void ISub(const T *x, const T *y, int n, T *z) {
1391  for (int i = 0; i < n; i++) {
1392  z[i] = x[i] - y[i];
1393  }
1394 }
1395 template <typename T>
1396 inline void ISub1(const T x[1], const T y[1], T z[1]) {
1397  z[0] = x[0] - y[0];
1398 }
1399 template <typename T>
1400 inline void ISub2(const T x[2], const T y[2], T z[2]) {
1401  z[0] = x[0] - y[0];
1402  z[1] = x[1] - y[1];
1403 }
1404 template <typename T>
1405 inline void ISub3(const T x[3], const T y[3], T z[3]) {
1406  z[0] = x[0] - y[0];
1407  z[1] = x[1] - y[1];
1408  z[2] = x[2] - y[2];
1409 }
1410 template <typename T>
1411 inline void ISub4(const T x[4], const T y[4], T z[4]) {
1412  z[0] = x[0] - y[0];
1413  z[1] = x[1] - y[1];
1414  z[2] = x[2] - y[2];
1415  z[3] = x[3] - y[3];
1416 }
1417 template <typename T>
1418 inline void ISub5(const T x[5], const T y[5], T z[5]) {
1419  z[0] = x[0] - y[0];
1420  z[1] = x[1] - y[1];
1421  z[2] = x[2] - y[2];
1422  z[3] = x[3] - y[3];
1423  z[4] = x[4] - y[4];
1424 }
1425 template <typename T>
1426 inline void ISub6(const T x[6], const T y[6], T z[6]) {
1427  z[0] = x[0] - y[0];
1428  z[1] = x[1] - y[1];
1429  z[2] = x[2] - y[2];
1430  z[3] = x[3] - y[3];
1431  z[4] = x[4] - y[4];
1432  z[5] = x[5] - y[5];
1433 }
1434 template <typename T>
1435 inline void ISub7(const T x[7], const T y[7], T z[7]) {
1436  z[0] = x[0] - y[0];
1437  z[1] = x[1] - y[1];
1438  z[2] = x[2] - y[2];
1439  z[3] = x[3] - y[3];
1440  z[4] = x[4] - y[4];
1441  z[5] = x[5] - y[5];
1442  z[6] = x[6] - y[6];
1443 }
1444 template <typename T>
1445 inline void ISub8(const T x[8], const T y[8], T z[8]) {
1446  z[0] = x[0] - y[0];
1447  z[1] = x[1] - y[1];
1448  z[2] = x[2] - y[2];
1449  z[3] = x[3] - y[3];
1450  z[4] = x[4] - y[4];
1451  z[5] = x[5] - y[5];
1452  z[6] = x[6] - y[6];
1453  z[7] = x[7] - y[7];
1454 }
1455 template <typename T>
1456 inline void ISub9(const T x[9], const T y[9], T z[9]) {
1457  z[0] = x[0] - y[0];
1458  z[1] = x[1] - y[1];
1459  z[2] = x[2] - y[2];
1460  z[3] = x[3] - y[3];
1461  z[4] = x[4] - y[4];
1462  z[5] = x[5] - y[5];
1463  z[6] = x[6] - y[6];
1464  z[7] = x[7] - y[7];
1465  z[8] = x[8] - y[8];
1466 }
1467 template <typename T>
1468 inline void ISub10(const T x[10], const T y[10], T z[10]) {
1469  z[0] = x[0] - y[0];
1470  z[1] = x[1] - y[1];
1471  z[2] = x[2] - y[2];
1472  z[3] = x[3] - y[3];
1473  z[4] = x[4] - y[4];
1474  z[5] = x[5] - y[5];
1475  z[6] = x[6] - y[6];
1476  z[7] = x[7] - y[7];
1477  z[8] = x[8] - y[8];
1478  z[9] = x[9] - y[9];
1479 }
1480 template <typename T>
1481 inline void ISub11(const T x[11], const T y[11], T z[11]) {
1482  z[0] = x[0] - y[0];
1483  z[1] = x[1] - y[1];
1484  z[2] = x[2] - y[2];
1485  z[3] = x[3] - y[3];
1486  z[4] = x[4] - y[4];
1487  z[5] = x[5] - y[5];
1488  z[6] = x[6] - y[6];
1489  z[7] = x[7] - y[7];
1490  z[8] = x[8] - y[8];
1491  z[9] = x[9] - y[9];
1492  z[10] = x[10] - y[10];
1493 }
1494 template <typename T>
1495 inline void ISub12(const T x[12], const T y[12], T z[12]) {
1496  z[0] = x[0] - y[0];
1497  z[1] = x[1] - y[1];
1498  z[2] = x[2] - y[2];
1499  z[3] = x[3] - y[3];
1500  z[4] = x[4] - y[4];
1501  z[5] = x[5] - y[5];
1502  z[6] = x[6] - y[6];
1503  z[7] = x[7] - y[7];
1504  z[8] = x[8] - y[8];
1505  z[9] = x[9] - y[9];
1506  z[10] = x[10] - y[10];
1507  z[11] = x[11] - y[11];
1508 }
1509 template <typename T>
1510 inline void ISub13(const T x[13], const T y[13], T z[13]) {
1511  z[0] = x[0] - y[0];
1512  z[1] = x[1] - y[1];
1513  z[2] = x[2] - y[2];
1514  z[3] = x[3] - y[3];
1515  z[4] = x[4] - y[4];
1516  z[5] = x[5] - y[5];
1517  z[6] = x[6] - y[6];
1518  z[7] = x[7] - y[7];
1519  z[8] = x[8] - y[8];
1520  z[9] = x[9] - y[9];
1521  z[10] = x[10] - y[10];
1522  z[11] = x[11] - y[11];
1523  z[12] = x[12] - y[12];
1524 }
1525 template <typename T>
1526 inline void ISub14(const T x[14], const T y[14], T z[14]) {
1527  z[0] = x[0] - y[0];
1528  z[1] = x[1] - y[1];
1529  z[2] = x[2] - y[2];
1530  z[3] = x[3] - y[3];
1531  z[4] = x[4] - y[4];
1532  z[5] = x[5] - y[5];
1533  z[6] = x[6] - y[6];
1534  z[7] = x[7] - y[7];
1535  z[8] = x[8] - y[8];
1536  z[9] = x[9] - y[9];
1537  z[10] = x[10] - y[10];
1538  z[11] = x[11] - y[11];
1539  z[12] = x[12] - y[12];
1540  z[13] = x[13] - y[13];
1541 }
1542 template <typename T>
1543 inline void ISub15(const T x[15], const T y[15], T z[15]) {
1544  z[0] = x[0] - y[0];
1545  z[1] = x[1] - y[1];
1546  z[2] = x[2] - y[2];
1547  z[3] = x[3] - y[3];
1548  z[4] = x[4] - y[4];
1549  z[5] = x[5] - y[5];
1550  z[6] = x[6] - y[6];
1551  z[7] = x[7] - y[7];
1552  z[8] = x[8] - y[8];
1553  z[9] = x[9] - y[9];
1554  z[10] = x[10] - y[10];
1555  z[11] = x[11] - y[11];
1556  z[12] = x[12] - y[12];
1557  z[13] = x[13] - y[13];
1558  z[14] = x[14] - y[14];
1559 }
1560 template <typename T>
1561 inline void ISub16(const T x[16], const T y[16], T z[16]) {
1562  z[0] = x[0] - y[0];
1563  z[1] = x[1] - y[1];
1564  z[2] = x[2] - y[2];
1565  z[3] = x[3] - y[3];
1566  z[4] = x[4] - y[4];
1567  z[5] = x[5] - y[5];
1568  z[6] = x[6] - y[6];
1569  z[7] = x[7] - y[7];
1570  z[8] = x[8] - y[8];
1571  z[9] = x[9] - y[9];
1572  z[10] = x[10] - y[10];
1573  z[11] = x[11] - y[11];
1574  z[12] = x[12] - y[12];
1575  z[13] = x[13] - y[13];
1576  z[14] = x[14] - y[14];
1577  z[15] = x[15] - y[15];
1578 }
1579 
1580 // Compute y=y-x where x and y are n-dimensional vectors
1581 template <typename T>
1582 inline void ISub(const T *x, T *y, int n) {
1583  for (int i = 0; i < n; i++) {
1584  y[i] -= x[i];
1585  }
1586 }
1587 template <typename T>
1588 inline void ISub1(const T x[1], T y[1]) {
1589  y[0] -= x[0];
1590 }
1591 template <typename T>
1592 inline void ISub2(const T x[2], T y[2]) {
1593  y[0] -= x[0];
1594  y[1] -= x[1];
1595 }
1596 template <typename T>
1597 inline void ISub3(const T x[3], T y[3]) {
1598  y[0] -= x[0];
1599  y[1] -= x[1];
1600  y[2] -= x[2];
1601 }
1602 template <typename T>
1603 inline void ISub4(const T x[4], T y[4]) {
1604  y[0] -= x[0];
1605  y[1] -= x[1];
1606  y[2] -= x[2];
1607  y[3] -= x[3];
1608 }
1609 template <typename T>
1610 inline void ISub5(const T x[5], T y[5]) {
1611  y[0] -= x[0];
1612  y[1] -= x[1];
1613  y[2] -= x[2];
1614  y[3] -= x[3];
1615  y[4] -= x[4];
1616 }
1617 template <typename T>
1618 inline void ISub6(const T x[6], T y[6]) {
1619  y[0] -= x[0];
1620  y[1] -= x[1];
1621  y[2] -= x[2];
1622  y[3] -= x[3];
1623  y[4] -= x[4];
1624  y[5] -= x[5];
1625 }
1626 template <typename T>
1627 inline void ISub7(const T x[7], T y[7]) {
1628  y[0] -= x[0];
1629  y[1] -= x[1];
1630  y[2] -= x[2];
1631  y[3] -= x[3];
1632  y[4] -= x[4];
1633  y[5] -= x[5];
1634  y[6] -= x[6];
1635 }
1636 template <typename T>
1637 inline void ISub8(const T x[8], T y[8]) {
1638  y[0] -= x[0];
1639  y[1] -= x[1];
1640  y[2] -= x[2];
1641  y[3] -= x[3];
1642  y[4] -= x[4];
1643  y[5] -= x[5];
1644  y[6] -= x[6];
1645  y[7] -= x[7];
1646 }
1647 template <typename T>
1648 inline void ISub9(const T x[9], T y[9]) {
1649  y[0] -= x[0];
1650  y[1] -= x[1];
1651  y[2] -= x[2];
1652  y[3] -= x[3];
1653  y[4] -= x[4];
1654  y[5] -= x[5];
1655  y[6] -= x[6];
1656  y[7] -= x[7];
1657  y[8] -= x[8];
1658 }
1659 template <typename T>
1660 inline void ISub10(const T x[10], T y[10]) {
1661  y[0] -= x[0];
1662  y[1] -= x[1];
1663  y[2] -= x[2];
1664  y[3] -= x[3];
1665  y[4] -= x[4];
1666  y[5] -= x[5];
1667  y[6] -= x[6];
1668  y[7] -= x[7];
1669  y[8] -= x[8];
1670  y[9] -= x[9];
1671 }
1672 template <typename T>
1673 inline void ISub11(const T x[11], T y[11]) {
1674  y[0] -= x[0];
1675  y[1] -= x[1];
1676  y[2] -= x[2];
1677  y[3] -= x[3];
1678  y[4] -= x[4];
1679  y[5] -= x[5];
1680  y[6] -= x[6];
1681  y[7] -= x[7];
1682  y[8] -= x[8];
1683  y[9] -= x[9];
1684  y[10] -= x[10];
1685 }
1686 template <typename T>
1687 inline void ISub12(const T x[12], T y[12]) {
1688  y[0] -= x[0];
1689  y[1] -= x[1];
1690  y[2] -= x[2];
1691  y[3] -= x[3];
1692  y[4] -= x[4];
1693  y[5] -= x[5];
1694  y[6] -= x[6];
1695  y[7] -= x[7];
1696  y[8] -= x[8];
1697  y[9] -= x[9];
1698  y[10] -= x[10];
1699  y[11] -= x[11];
1700 }
1701 template <typename T>
1702 inline void ISub13(const T x[13], T y[13]) {
1703  y[0] -= x[0];
1704  y[1] -= x[1];
1705  y[2] -= x[2];
1706  y[3] -= x[3];
1707  y[4] -= x[4];
1708  y[5] -= x[5];
1709  y[6] -= x[6];
1710  y[7] -= x[7];
1711  y[8] -= x[8];
1712  y[9] -= x[9];
1713  y[10] -= x[10];
1714  y[11] -= x[11];
1715  y[12] -= x[12];
1716 }
1717 template <typename T>
1718 inline void ISub14(const T x[14], T y[14]) {
1719  y[0] -= x[0];
1720  y[1] -= x[1];
1721  y[2] -= x[2];
1722  y[3] -= x[3];
1723  y[4] -= x[4];
1724  y[5] -= x[5];
1725  y[6] -= x[6];
1726  y[7] -= x[7];
1727  y[8] -= x[8];
1728  y[9] -= x[9];
1729  y[10] -= x[10];
1730  y[11] -= x[11];
1731  y[12] -= x[12];
1732  y[13] -= x[13];
1733 }
1734 template <typename T>
1735 inline void ISub15(const T x[15], T y[15]) {
1736  y[0] -= x[0];
1737  y[1] -= x[1];
1738  y[2] -= x[2];
1739  y[3] -= x[3];
1740  y[4] -= x[4];
1741  y[5] -= x[5];
1742  y[6] -= x[6];
1743  y[7] -= x[7];
1744  y[8] -= x[8];
1745  y[9] -= x[9];
1746  y[10] -= x[10];
1747  y[11] -= x[11];
1748  y[12] -= x[12];
1749  y[13] -= x[13];
1750  y[14] -= x[14];
1751 }
1752 template <typename T>
1753 inline void ISub16(const T x[16], T y[16]) {
1754  y[0] -= x[0];
1755  y[1] -= x[1];
1756  y[2] -= x[2];
1757  y[3] -= x[3];
1758  y[4] -= x[4];
1759  y[5] -= x[5];
1760  y[6] -= x[6];
1761  y[7] -= x[7];
1762  y[8] -= x[8];
1763  y[9] -= x[9];
1764  y[10] -= x[10];
1765  y[11] -= x[11];
1766  y[12] -= x[12];
1767  y[13] -= x[13];
1768  y[14] -= x[14];
1769  y[15] -= x[15];
1770 }
1771 // Compute y=y-x*k where x and y are n-dimensional vectors, k is constant
1772 template <typename T>
1773 inline void ISubScaled(const T *x, T *y, int n, T k) {
1774  for (int i = 0; i < n; i++) {
1775  y[i] -= (x[i] * k);
1776  }
1777 }
1778 template <typename T>
1779 inline void ISubScaled1(const T x[1], T y[1], T k) {
1780  y[0] -= x[0] * k;
1781 }
1782 template <typename T>
1783 inline void ISubScaled2(const T x[2], T y[2], T k) {
1784  y[0] -= x[0] * k;
1785  y[1] -= x[1] * k;
1786 }
1787 template <typename T>
1788 inline void ISubScaled3(const T x[3], T y[3], T k) {
1789  y[0] -= x[0] * k;
1790  y[1] -= x[1] * k;
1791  y[2] -= x[2] * k;
1792 }
1793 template <typename T>
1794 inline void ISubScaled4(const T x[4], T y[4], T k) {
1795  y[0] -= x[0] * k;
1796  y[1] -= x[1] * k;
1797  y[2] -= x[2] * k;
1798  y[3] -= x[3] * k;
1799 }
1800 template <typename T>
1801 inline void ISubScaled5(const T x[5], T y[5], T k) {
1802  y[0] -= x[0] * k;
1803  y[1] -= x[1] * k;
1804  y[2] -= x[2] * k;
1805  y[3] -= x[3] * k;
1806  y[4] -= x[4] * k;
1807 }
1808 template <typename T>
1809 inline void ISubScaled6(const T x[6], T y[6], T k) {
1810  y[0] -= x[0] * k;
1811  y[1] -= x[1] * k;
1812  y[2] -= x[2] * k;
1813  y[3] -= x[3] * k;
1814  y[4] -= x[4] * k;
1815  y[5] -= x[5] * k;
1816 }
1817 template <typename T>
1818 inline void ISubScaled7(const T x[7], T y[7], T k) {
1819  y[0] -= x[0] * k;
1820  y[1] -= x[1] * k;
1821  y[2] -= x[2] * k;
1822  y[3] -= x[3] * k;
1823  y[4] -= x[4] * k;
1824  y[5] -= x[5] * k;
1825  y[6] -= x[6] * k;
1826 }
1827 template <typename T>
1828 inline void ISubScaled8(const T x[8], T y[8], T k) {
1829  y[0] -= x[0] * k;
1830  y[1] -= x[1] * k;
1831  y[2] -= x[2] * k;
1832  y[3] -= x[3] * k;
1833  y[4] -= x[4] * k;
1834  y[5] -= x[5] * k;
1835  y[6] -= x[6] * k;
1836  y[7] -= x[7] * k;
1837 }
1838 template <typename T>
1839 inline void ISubScaled9(const T x[9], T y[9], T k) {
1840  y[0] -= x[0] * k;
1841  y[1] -= x[1] * k;
1842  y[2] -= x[2] * k;
1843  y[3] -= x[3] * k;
1844  y[4] -= x[4] * k;
1845  y[5] -= x[5] * k;
1846  y[6] -= x[6] * k;
1847  y[7] -= x[7] * k;
1848  y[8] -= x[8] * k;
1849 }
1850 
1851 // Rescale n-dimensional vector x with a scale factor sf (inplace)
1852 template <typename T>
1853 inline void IScale(T *x, int n, T sf) {
1854  for (int i = 0; i < n; i++) {
1855  x[i] *= sf;
1856  }
1857 }
1858 template <typename T>
1859 inline void IScale1(T x[1], T sf) {
1860  x[0] *= sf;
1861 }
1862 template <typename T>
1863 inline void IScale2(T x[2], T sf) {
1864  x[0] *= sf;
1865  x[1] *= sf;
1866 }
1867 template <typename T>
1868 inline void IScale3(T x[3], T sf) {
1869  x[0] *= sf;
1870  x[1] *= sf;
1871  x[2] *= sf;
1872 }
1873 template <typename T>
1874 inline void IScale4(T x[4], T sf) {
1875  x[0] *= sf;
1876  x[1] *= sf;
1877  x[2] *= sf;
1878  x[3] *= sf;
1879 }
1880 template <typename T>
1881 inline void IScale5(T x[5], T sf) {
1882  x[0] *= sf;
1883  x[1] *= sf;
1884  x[2] *= sf;
1885  x[3] *= sf;
1886  x[4] *= sf;
1887 }
1888 template <typename T>
1889 inline void IScale6(T x[6], T sf) {
1890  x[0] *= sf;
1891  x[1] *= sf;
1892  x[2] *= sf;
1893  x[3] *= sf;
1894  x[4] *= sf;
1895  x[5] *= sf;
1896 }
1897 template <typename T>
1898 inline void IScale7(T x[7], T sf) {
1899  x[0] *= sf;
1900  x[1] *= sf;
1901  x[2] *= sf;
1902  x[3] *= sf;
1903  x[4] *= sf;
1904  x[5] *= sf;
1905  x[6] *= sf;
1906 }
1907 template <typename T>
1908 inline void IScale8(T x[8], T sf) {
1909  x[0] *= sf;
1910  x[1] *= sf;
1911  x[2] *= sf;
1912  x[3] *= sf;
1913  x[4] *= sf;
1914  x[5] *= sf;
1915  x[6] *= sf;
1916  x[7] *= sf;
1917 }
1918 template <typename T>
1919 inline void IScale9(T x[9], T sf) {
1920  x[0] *= sf;
1921  x[1] *= sf;
1922  x[2] *= sf;
1923  x[3] *= sf;
1924  x[4] *= sf;
1925  x[5] *= sf;
1926  x[6] *= sf;
1927  x[7] *= sf;
1928  x[8] *= sf;
1929 }
1930 template <typename T>
1931 inline void IScale10(T x[10], T sf) {
1932  x[0] *= sf;
1933  x[1] *= sf;
1934  x[2] *= sf;
1935  x[3] *= sf;
1936  x[4] *= sf;
1937  x[5] *= sf;
1938  x[6] *= sf;
1939  x[7] *= sf;
1940  x[8] *= sf;
1941  x[9] *= sf;
1942 }
1943 template <typename T>
1944 inline void IScale11(T x[11], T sf) {
1945  x[0] *= sf;
1946  x[1] *= sf;
1947  x[2] *= sf;
1948  x[3] *= sf;
1949  x[4] *= sf;
1950  x[5] *= sf;
1951  x[6] *= sf;
1952  x[7] *= sf;
1953  x[8] *= sf;
1954  x[9] *= sf;
1955  x[10] *= sf;
1956 }
1957 template <typename T>
1958 inline void IScale12(T x[12], T sf) {
1959  x[0] *= sf;
1960  x[1] *= sf;
1961  x[2] *= sf;
1962  x[3] *= sf;
1963  x[4] *= sf;
1964  x[5] *= sf;
1965  x[6] *= sf;
1966  x[7] *= sf;
1967  x[8] *= sf;
1968  x[9] *= sf;
1969  x[10] *= sf;
1970  x[11] *= sf;
1971 }
1972 template <typename T>
1973 inline void IScale13(T x[13], T sf) {
1974  x[0] *= sf;
1975  x[1] *= sf;
1976  x[2] *= sf;
1977  x[3] *= sf;
1978  x[4] *= sf;
1979  x[5] *= sf;
1980  x[6] *= sf;
1981  x[7] *= sf;
1982  x[8] *= sf;
1983  x[9] *= sf;
1984  x[10] *= sf;
1985  x[11] *= sf;
1986  x[12] *= sf;
1987 }
1988 template <typename T>
1989 inline void IScale14(T x[14], T sf) {
1990  x[0] *= sf;
1991  x[1] *= sf;
1992  x[2] *= sf;
1993  x[3] *= sf;
1994  x[4] *= sf;
1995  x[5] *= sf;
1996  x[6] *= sf;
1997  x[7] *= sf;
1998  x[8] *= sf;
1999  x[9] *= sf;
2000  x[10] *= sf;
2001  x[11] *= sf;
2002  x[12] *= sf;
2003  x[13] *= sf;
2004 }
2005 template <typename T>
2006 inline void IScale15(T x[15], T sf) {
2007  x[0] *= sf;
2008  x[1] *= sf;
2009  x[2] *= sf;
2010  x[3] *= sf;
2011  x[4] *= sf;
2012  x[5] *= sf;
2013  x[6] *= sf;
2014  x[7] *= sf;
2015  x[8] *= sf;
2016  x[9] *= sf;
2017  x[10] *= sf;
2018  x[11] *= sf;
2019  x[12] *= sf;
2020  x[13] *= sf;
2021  x[14] *= sf;
2022 }
2023 template <typename T>
2024 inline void IScale16(T x[16], T sf) {
2025  x[0] *= sf;
2026  x[1] *= sf;
2027  x[2] *= sf;
2028  x[3] *= sf;
2029  x[4] *= sf;
2030  x[5] *= sf;
2031  x[6] *= sf;
2032  x[7] *= sf;
2033  x[8] *= sf;
2034  x[9] *= sf;
2035  x[10] *= sf;
2036  x[11] *= sf;
2037  x[12] *= sf;
2038  x[13] *= sf;
2039  x[14] *= sf;
2040  x[15] *= sf;
2041 }
2042 // Rescale n-dimensional vector x with a scale factor sf, save the result to
2043 // * n-dimensional vector y
2044 template <typename T>
2045 inline void IScale(const T *x, T *y, int n, T sf) {
2046  for (int i = 0; i < n; i++) {
2047  y[i] = x[i] * sf;
2048  }
2049 }
2050 template <typename T>
2051 inline void IScale1(const T x[1], T y[1], T sf) {
2052  y[0] = x[0] * sf;
2053 }
2054 template <typename T>
2055 inline void IScale2(const T x[2], T y[2], T sf) {
2056  y[0] = x[0] * sf;
2057  y[1] = x[1] * sf;
2058 }
2059 template <typename T>
2060 inline void IScale3(const T x[3], T y[3], T sf) {
2061  y[0] = x[0] * sf;
2062  y[1] = x[1] * sf;
2063  y[2] = x[2] * sf;
2064 }
2065 template <typename T>
2066 inline void IScale4(const T x[4], T y[4], T sf) {
2067  y[0] = x[0] * sf;
2068  y[1] = x[1] * sf;
2069  y[2] = x[2] * sf;
2070  y[3] = x[3] * sf;
2071 }
2072 template <typename T>
2073 inline void IScale5(const T x[5], T y[5], T sf) {
2074  y[0] = x[0] * sf;
2075  y[1] = x[1] * sf;
2076  y[2] = x[2] * sf;
2077  y[3] = x[3] * sf;
2078  y[4] = x[4] * sf;
2079 }
2080 template <typename T>
2081 inline void IScale6(const T x[6], T y[6], T sf) {
2082  y[0] = x[0] * sf;
2083  y[1] = x[1] * sf;
2084  y[2] = x[2] * sf;
2085  y[3] = x[3] * sf;
2086  y[4] = x[4] * sf;
2087  y[5] = x[5] * sf;
2088 }
2089 template <typename T>
2090 inline void IScale7(const T x[7], T y[7], T sf) {
2091  y[0] = x[0] * sf;
2092  y[1] = x[1] * sf;
2093  y[2] = x[2] * sf;
2094  y[3] = x[3] * sf;
2095  y[4] = x[4] * sf;
2096  y[5] = x[5] * sf;
2097  y[6] = x[6] * sf;
2098 }
2099 template <typename T>
2100 inline void IScale8(const T x[8], T y[8], T sf) {
2101  y[0] = x[0] * sf;
2102  y[1] = x[1] * sf;
2103  y[2] = x[2] * sf;
2104  y[3] = x[3] * sf;
2105  y[4] = x[4] * sf;
2106  y[5] = x[5] * sf;
2107  y[6] = x[6] * sf;
2108  y[7] = x[7] * sf;
2109 }
2110 template <typename T>
2111 inline void IScale9(const T x[9], T y[9], T sf) {
2112  y[0] = x[0] * sf;
2113  y[1] = x[1] * sf;
2114  y[2] = x[2] * sf;
2115  y[3] = x[3] * sf;
2116  y[4] = x[4] * sf;
2117  y[5] = x[5] * sf;
2118  y[6] = x[6] * sf;
2119  y[7] = x[7] * sf;
2120  y[8] = x[8] * sf;
2121 }
2122 template <typename T>
2123 inline void IScale10(const T x[10], T y[10], T sf) {
2124  y[0] = x[0] * sf;
2125  y[1] = x[1] * sf;
2126  y[2] = x[2] * sf;
2127  y[3] = x[3] * sf;
2128  y[4] = x[4] * sf;
2129  y[5] = x[5] * sf;
2130  y[6] = x[6] * sf;
2131  y[7] = x[7] * sf;
2132  y[8] = x[8] * sf;
2133  y[9] = x[9] * sf;
2134 }
2135 template <typename T>
2136 inline void IScale11(const T x[11], T y[11], T sf) {
2137  y[0] = x[0] * sf;
2138  y[1] = x[1] * sf;
2139  y[2] = x[2] * sf;
2140  y[3] = x[3] * sf;
2141  y[4] = x[4] * sf;
2142  y[5] = x[5] * sf;
2143  y[6] = x[6] * sf;
2144  y[7] = x[7] * sf;
2145  y[8] = x[8] * sf;
2146  y[9] = x[9] * sf;
2147  y[10] = x[10] * sf;
2148 }
2149 template <typename T>
2150 inline void IScale12(const T x[12], T y[12], T sf) {
2151  y[0] = x[0] * sf;
2152  y[1] = x[1] * sf;
2153  y[2] = x[2] * sf;
2154  y[3] = x[3] * sf;
2155  y[4] = x[4] * sf;
2156  y[5] = x[5] * sf;
2157  y[6] = x[6] * sf;
2158  y[7] = x[7] * sf;
2159  y[8] = x[8] * sf;
2160  y[9] = x[9] * sf;
2161  y[10] = x[10] * sf;
2162  y[11] = x[11] * sf;
2163 }
2164 template <typename T>
2165 inline void IScale13(const T x[13], T y[13], T sf) {
2166  y[0] = x[0] * sf;
2167  y[1] = x[1] * sf;
2168  y[2] = x[2] * sf;
2169  y[3] = x[3] * sf;
2170  y[4] = x[4] * sf;
2171  y[5] = x[5] * sf;
2172  y[6] = x[6] * sf;
2173  y[7] = x[7] * sf;
2174  y[8] = x[8] * sf;
2175  y[9] = x[9] * sf;
2176  y[10] = x[10] * sf;
2177  y[11] = x[11] * sf;
2178  y[12] = x[12] * sf;
2179 }
2180 template <typename T>
2181 inline void IScale14(const T x[14], T y[14], T sf) {
2182  y[0] = x[0] * sf;
2183  y[1] = x[1] * sf;
2184  y[2] = x[2] * sf;
2185  y[3] = x[3] * sf;
2186  y[4] = x[4] * sf;
2187  y[5] = x[5] * sf;
2188  y[6] = x[6] * sf;
2189  y[7] = x[7] * sf;
2190  y[8] = x[8] * sf;
2191  y[9] = x[9] * sf;
2192  y[10] = x[10] * sf;
2193  y[11] = x[11] * sf;
2194  y[12] = x[12] * sf;
2195  y[13] = x[13] * sf;
2196 }
2197 template <typename T>
2198 inline void IScale15(const T x[15], T y[15], T sf) {
2199  y[0] = x[0] * sf;
2200  y[1] = x[1] * sf;
2201  y[2] = x[2] * sf;
2202  y[3] = x[3] * sf;
2203  y[4] = x[4] * sf;
2204  y[5] = x[5] * sf;
2205  y[6] = x[6] * sf;
2206  y[7] = x[7] * sf;
2207  y[8] = x[8] * sf;
2208  y[9] = x[9] * sf;
2209  y[10] = x[10] * sf;
2210  y[11] = x[11] * sf;
2211  y[12] = x[12] * sf;
2212  y[13] = x[13] * sf;
2213  y[14] = x[14] * sf;
2214 }
2215 template <typename T>
2216 inline void IScale16(const T x[16], T y[16], T sf) {
2217  y[0] = x[0] * sf;
2218  y[1] = x[1] * sf;
2219  y[2] = x[2] * sf;
2220  y[3] = x[3] * sf;
2221  y[4] = x[4] * sf;
2222  y[5] = x[5] * sf;
2223  y[6] = x[6] * sf;
2224  y[7] = x[7] * sf;
2225  y[8] = x[8] * sf;
2226  y[9] = x[9] * sf;
2227  y[10] = x[10] * sf;
2228  y[11] = x[11] * sf;
2229  y[12] = x[12] * sf;
2230  y[13] = x[13] * sf;
2231  y[14] = x[14] * sf;
2232  y[15] = x[15] * sf;
2233 }
2234 
2235 // Compute dot product of x and y
2236 // inline int IDotU(const unsigned char *x, const unsigned char *y, int n) {
2237 // int acc = 0;
2238 // for (int i = 0; i < n; ++i) {
2239 // acc += (int) x[i] * (int) y[i];
2240 // }
2241 // return acc;
2242 // }
2243 template <typename T>
2244 inline T IDot(const T *x, const T *y, int n) {
2245  T acc = static_cast<T>(0.0);
2246  for (int i = 0; i < n; ++i) {
2247  acc += x[i] * y[i];
2248  }
2249  return acc;
2250 }
2251 template <typename T>
2252 inline T IDot1(const T x[1], const T y[1]) {
2253  return (x[0] * y[0]);
2254 }
2255 template <typename T>
2256 inline T IDot2(const T x[2], const T y[2]) {
2257  return (x[0] * y[0] + x[1] * y[1]);
2258 }
2259 template <typename T>
2260 inline T IDot3(const T x[3], const T y[3]) {
2261  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2]);
2262 }
2263 template <typename T>
2264 inline T IDot4(const T x[4], const T y[4]) {
2265  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3]);
2266 }
2267 template <typename T>
2268 inline T IDot5(const T x[5], const T y[5]) {
2269  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4]);
2270 }
2271 template <typename T>
2272 inline T IDot6(const T x[6], const T y[6]) {
2273  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2274  x[5] * y[5]);
2275 }
2276 template <typename T>
2277 inline T IDot7(const T x[7], const T y[7]) {
2278  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2279  x[5] * y[5] + x[6] * y[6]);
2280 }
2281 template <typename T>
2282 inline T IDot8(const T x[8], const T y[8]) {
2283  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2284  x[5] * y[5] + x[6] * y[6] + x[7] * y[7]);
2285 }
2286 template <typename T>
2287 inline T IDot9(const T x[9], const T y[9]) {
2288  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2289  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8]);
2290 }
2291 template <typename T>
2292 inline T IDot10(const T x[10], const T y[10]) {
2293  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2294  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9]);
2295 }
2296 template <typename T>
2297 inline T IDot11(const T x[11], const T y[11]) {
2298  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2299  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2300  x[10] * y[10]);
2301 }
2302 template <typename T>
2303 inline T IDot12(const T x[12], const T y[12]) {
2304  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2305  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2306  x[10] * y[10] + x[11] * y[11]);
2307 }
2308 template <typename T>
2309 inline T IDot13(const T x[13], const T y[13]) {
2310  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2311  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2312  x[10] * y[10] + x[11] * y[11] + x[12] * y[12]);
2313 }
2314 template <typename T>
2315 inline T IDot14(const T x[14], const T y[14]) {
2316  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2317  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2318  x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13]);
2319 }
2320 template <typename T>
2321 inline T IDot15(const T x[15], const T y[15]) {
2322  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2323  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2324  x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13] +
2325  x[14] * y[14]);
2326 }
2327 template <typename T>
2328 inline T IDot16(const T x[16], const T y[16]) {
2329  return (x[0] * y[0] + x[1] * y[1] + x[2] * y[2] + x[3] * y[3] + x[4] * y[4] +
2330  x[5] * y[5] + x[6] * y[6] + x[7] * y[7] + x[8] * y[8] + x[9] * y[9] +
2331  x[10] * y[10] + x[11] * y[11] + x[12] * y[12] + x[13] * y[13] +
2332  x[14] * y[14] + x[15] * y[15]);
2333 }
2334 
2335 // Compute sum of n-dimensional vector x
2336 inline int ISumU(const unsigned char *x, int n) {
2337  int acc = 0;
2338  for (int i = 0; i < n; ++i) {
2339  acc += static_cast<int>(x[i]);
2340  }
2341  return acc;
2342 }
2343 template <typename T>
2344 inline T ISum(const T *x, int n) {
2345  T acc = static_cast<T>(0.0);
2346  for (int i = 0; i < n; ++i) {
2347  acc += x[i];
2348  }
2349  return acc;
2350 }
2351 template <typename T>
2352 inline T ISum1(const T x[1]) {
2353  return (x[0]);
2354 }
2355 template <typename T>
2356 inline T ISum2(const T x[2]) {
2357  return (x[0] + x[1]);
2358 }
2359 template <typename T>
2360 inline T ISum3(const T x[3]) {
2361  return (x[0] + x[1] + x[2]);
2362 }
2363 template <typename T>
2364 inline T ISum4(const T x[4]) {
2365  return (x[0] + x[1] + x[2] + x[3]);
2366 }
2367 template <typename T>
2368 inline T ISum5(const T x[5]) {
2369  return (x[0] + x[1] + x[2] + x[3] + x[4]);
2370 }
2371 template <typename T>
2372 inline T ISum6(const T x[6]) {
2373  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5]);
2374 }
2375 template <typename T>
2376 inline T ISum7(const T x[7]) {
2377  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6]);
2378 }
2379 template <typename T>
2380 inline T ISum8(const T x[8]) {
2381  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7]);
2382 }
2383 template <typename T>
2384 inline T ISum9(const T x[9]) {
2385  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8]);
2386 }
2387 template <typename T>
2388 inline T ISum10(const T x[10]) {
2389  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9]);
2390 }
2391 template <typename T>
2392 inline T ISum11(const T x[11]) {
2393  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2394  x[10]);
2395 }
2396 template <typename T>
2397 inline T ISum12(const T x[12]) {
2398  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2399  x[10] + x[11]);
2400 }
2401 template <typename T>
2402 inline T ISum13(const T x[13]) {
2403  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2404  x[10] + x[11] + x[12]);
2405 }
2406 template <typename T>
2407 inline T ISum14(const T x[14]) {
2408  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2409  x[10] + x[11] + x[12] + x[13]);
2410 }
2411 template <typename T>
2412 inline T ISum15(const T x[15]) {
2413  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2414  x[10] + x[11] + x[12] + x[13] + x[14]);
2415 }
2416 template <typename T>
2417 inline T ISum16(const T x[16]) {
2418  return (x[0] + x[1] + x[2] + x[3] + x[4] + x[5] + x[6] + x[7] + x[8] + x[9] +
2419  x[10] + x[11] + x[12] + x[13] + x[14] + x[15]);
2420 }
2421 
2422 template <typename T>
2423 inline T IAbsSum(const T *x, int n) {
2424  T acc = static_cast<T>(0.0);
2425  for (int i = 0; i < n; ++i) {
2426  acc += IAbs(x[i]);
2427  }
2428  return acc;
2429 }
2430 
2431 // Compute mean of n-dimensional vector x
2432 inline int IMeanU(const unsigned char *x, int n) { return ISumU(x, n) / n; }
2433 template <typename T>
2434 inline T IMean(const T *x, int n) {
2435  return ISum(x, n) / n;
2436 }
2437 template <typename T>
2438 inline T IMean2(const T x[2]) {
2439  return ISum2(x) / 2;
2440 }
2441 template <typename T>
2442 inline T IMean3(const T x[3]) {
2443  return ISum3(x) / 3;
2444 }
2445 template <typename T>
2446 inline T IMean4(const T x[4]) {
2447  return ISum4(x) / 4;
2448 }
2449 template <typename T>
2450 inline T IMean5(const T x[5]) {
2451  return ISum5(x) / 5;
2452 }
2453 template <typename T>
2454 inline T IMean6(const T x[6]) {
2455  return ISum6(x) / 6;
2456 }
2457 template <typename T>
2458 inline T IMean7(const T x[7]) {
2459  return ISum7(x) / 7;
2460 }
2461 template <typename T>
2462 inline T IMean8(const T x[8]) {
2463  return ISum8(x) / 8;
2464 }
2465 template <typename T>
2466 inline T IMean9(const T x[9]) {
2467  return ISum9(x) / 9;
2468 }
2469 template <typename T>
2470 inline T IMean10(const T x[10]) {
2471  return ISum10(x) / 10;
2472 }
2473 template <typename T>
2474 inline T IMean11(const T x[11]) {
2475  return ISum11(x) / 11;
2476 }
2477 template <typename T>
2478 inline T IMean12(const T x[12]) {
2479  return ISum12(x) / 12;
2480 }
2481 template <typename T>
2482 inline T IMean13(const T x[13]) {
2483  return ISum13(x) / 13;
2484 }
2485 template <typename T>
2486 inline T IMean14(const T x[14]) {
2487  return ISum14(x) / 14;
2488 }
2489 template <typename T>
2490 inline T IMean15(const T x[15]) {
2491  return ISum15(x) / 15;
2492 }
2493 template <typename T>
2494 inline T IMean16(const T x[16]) {
2495  return ISum16(x) / 16;
2496 }
2497 
2498 // Compute the sample standard deviation of sample data x
2499 template <typename T>
2500 inline T ISdv(const T *x, T mean, int n) {
2501  if (n < 2) {
2502  return static_cast<T>(0.0);
2503  }
2504  T sdv = static_cast<T>(0.0);
2505  for (int i = 0; i < n; ++i) {
2506  sdv += ISqr(x[i] - mean);
2507  }
2508  return ISqrt(IDiv(sdv, n - 1));
2509 }
2510 
2511 // Compute square sum of n-dimensional vector x
2512 inline int ISquaresumU(const unsigned char *x, int n) {
2513  int acc = 0;
2514  for (int i = 0; i < n; i++) {
2515  acc += ISqr(x[i]);
2516  }
2517  return (acc);
2518 }
2519 template <typename T>
2520 inline T ISquaresum(const T *x, int n) {
2521  T acc = static_cast<T>(0.0);
2522  for (int i = 0; i < n; ++i) {
2523  acc += ISqr(x[i]);
2524  }
2525  return acc;
2526 }
2527 template <typename T>
2528 inline T ISquaresum1(const T x[1]) {
2529  return (ISqr(x[0]));
2530 }
2531 template <typename T>
2532 inline T ISquaresum2(const T x[2]) {
2533  return (ISqr(x[0]) + ISqr(x[1]));
2534 }
2535 template <typename T>
2536 inline T ISquaresum3(const T x[3]) {
2537  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]));
2538 }
2539 template <typename T>
2540 inline T ISquaresum4(const T x[4]) {
2541  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]));
2542 }
2543 template <typename T>
2544 inline T ISquaresum5(const T x[5]) {
2545  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]));
2546 }
2547 template <typename T>
2548 inline T ISquaresum6(const T x[6]) {
2549  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2550  ISqr(x[5]));
2551 }
2552 template <typename T>
2553 inline T ISquaresum7(const T x[7]) {
2554  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2555  ISqr(x[5]) + ISqr(x[6]));
2556 }
2557 template <typename T>
2558 inline T ISquaresum8(const T x[8]) {
2559  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2560  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]));
2561 }
2562 template <typename T>
2563 inline T ISquaresum9(const T x[9]) {
2564  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2565  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]));
2566 }
2567 template <typename T>
2568 inline T ISquaresum10(const T x[10]) {
2569  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2570  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]));
2571 }
2572 template <typename T>
2573 inline T ISquaresum11(const T x[11]) {
2574  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2575  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2576  ISqr(x[10]));
2577 }
2578 template <typename T>
2579 inline T ISquaresum12(const T x[12]) {
2580  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2581  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2582  ISqr(x[10]) + ISqr(x[11]));
2583 }
2584 template <typename T>
2585 inline T ISquaresum13(const T x[13]) {
2586  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2587  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2588  ISqr(x[10]) + ISqr(x[11]) + ISqr(x[12]));
2589 }
2590 template <typename T>
2591 inline T ISquaresum14(const T x[14]) {
2592  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2593  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2594  ISqr(x[10]) + ISqr(x[11]) + ISqr(x[12]) + ISqr(x[13]));
2595 }
2596 template <typename T>
2597 inline T ISquaresum15(const T x[15]) {
2598  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2599  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2600  ISqr(x[10]) + ISqr(x[11]) + ISqr(x[12]) + ISqr(x[13]) + ISqr(x[14]));
2601 }
2602 template <typename T>
2603 inline T ISquaresum16(const T x[16]) {
2604  return (ISqr(x[0]) + ISqr(x[1]) + ISqr(x[2]) + ISqr(x[3]) + ISqr(x[4]) +
2605  ISqr(x[5]) + ISqr(x[6]) + ISqr(x[7]) + ISqr(x[8]) + ISqr(x[9]) +
2606  ISqr(x[10]) + ISqr(x[11]) + ISqr(x[12]) + ISqr(x[13]) + ISqr(x[14]) +
2607  ISqr(x[15]));
2608 }
2609 
2610 // Compute square sum of the diff (x-y) between two n-dimensional vector x and
2611 // * y
2612 inline int ISquaresumDiffU(const unsigned char *x, const unsigned char *y,
2613  int n) {
2614  int acc = 0;
2615  for (int i = 0; i < n; i++) {
2616  acc += ISqr(static_cast<int>(x[i]) - static_cast<int>(y[i]));
2617  }
2618  return acc;
2619 }
2620 template <typename T>
2621 inline T ISquaresumDiffU(const T *x, const T *y, int n) {
2622  T acc = static_cast<T>(0.0);
2623  for (int i = 0; i < n; i++) {
2624  acc += ISqr(x[i] - y[i]);
2625  }
2626  return acc;
2627 }
2628 template <typename T>
2629 inline T ISquaresumDiffU1(const T x[1], const T y[1]) {
2630  return (ISqr(x[0] - y[0]));
2631 }
2632 template <typename T>
2633 inline T ISquaresumDiffU2(const T x[2], const T y[2]) {
2634  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]));
2635 }
2636 template <typename T>
2637 inline T ISquaresumDiffU3(const T x[3], const T y[3]) {
2638  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]));
2639 }
2640 template <typename T>
2641 inline T ISquaresumDiffU4(const T x[4], const T y[4]) {
2642  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2643  ISqr(x[3] - y[3]));
2644 }
2645 template <typename T>
2646 inline T ISquaresumDiffU5(const T x[5], const T y[5]) {
2647  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2648  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]));
2649 }
2650 template <typename T>
2651 inline T ISquaresumDiffU6(const T x[6], const T y[6]) {
2652  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2653  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]));
2654 }
2655 template <typename T>
2656 inline T ISquaresumDiffU7(const T x[7], const T y[7]) {
2657  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2658  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2659  ISqr(x[6] - y[6]));
2660 }
2661 template <typename T>
2662 inline T ISquaresumDiffU8(const T x[8], const T y[8]) {
2663  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2664  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2665  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]));
2666 }
2667 template <typename T>
2668 inline T ISquaresumDiffU9(const T x[9], const T y[9]) {
2669  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2670  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2671  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]));
2672 }
2673 template <typename T>
2674 inline T ISquaresumDiffU10(const T x[10], const T y[10]) {
2675  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2676  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2677  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2678  ISqr(x[9] - y[9]));
2679 }
2680 template <typename T>
2681 inline T ISquaresumDiffU11(const T x[11], const T y[11]) {
2682  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2683  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2684  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2685  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]));
2686 }
2687 template <typename T>
2688 inline T ISquaresumDiffU12(const T x[12], const T y[12]) {
2689  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2690  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2691  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2692  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]) + ISqr(x[11] - y[11]));
2693 }
2694 template <typename T>
2695 inline T ISquaresumDiffU13(const T x[13], const T y[13]) {
2696  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2697  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2698  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2699  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]) + ISqr(x[11] - y[11]) +
2700  ISqr(x[12] - y[12]));
2701 }
2702 template <typename T>
2703 inline T ISquaresumDiffU14(const T x[14], const T y[14]) {
2704  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2705  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2706  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2707  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]) + ISqr(x[11] - y[11]) +
2708  ISqr(x[12] - y[12]) + ISqr(x[13] - y[13]));
2709 }
2710 template <typename T>
2711 inline T ISquaresumDiffU15(const T x[15], const T y[15]) {
2712  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2713  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2714  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2715  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]) + ISqr(x[11] - y[11]) +
2716  ISqr(x[12] - y[12]) + ISqr(x[13] - y[13]) + ISqr(x[14] - y[14]));
2717 }
2718 template <typename T>
2719 inline T ISquaresumDiffU16(const T x[16], const T y[16]) {
2720  return (ISqr(x[0] - y[0]) + ISqr(x[1] - y[1]) + ISqr(x[2] - y[2]) +
2721  ISqr(x[3] - y[3]) + ISqr(x[4] - y[4]) + ISqr(x[5] - y[5]) +
2722  ISqr(x[6] - y[6]) + ISqr(x[7] - y[7]) + ISqr(x[8] - y[8]) +
2723  ISqr(x[9] - y[9]) + ISqr(x[10] - y[10]) + ISqr(x[11] - y[11]) +
2724  ISqr(x[12] - y[12]) + ISqr(x[13] - y[13]) + ISqr(x[14] - y[14]) +
2725  ISqr(x[15] - y[15]));
2726 }
2727 
2728 // Compute the Hamming distance between two (unsigned) integer arrays
2729 // (considered
2730 // * as binary values, that is, as sequences of bits)
2731 inline unsigned int IHammingDiff(const unsigned int *x, const unsigned int *y,
2732  int n) {
2733  unsigned int distance = 0;
2734  for (int i = 0; i < n; ++i) {
2735  distance += IHammingLut(x[i], y[i]);
2736  }
2737  return distance; // Return the number of differing bits
2738 }
2739 inline unsigned int IHammingDiff2(const unsigned int x[2],
2740  const unsigned int y[2]) {
2741  unsigned int distance = 0;
2742  distance += IHammingLut(x[0], y[0]);
2743  distance += IHammingLut(x[1], y[1]);
2744  return distance;
2745 }
2746 inline unsigned int IHammingDiff4(const unsigned int x[4],
2747  const unsigned int y[4]) {
2748  unsigned int distance = 0;
2749  distance += IHammingLut(x[0], y[0]);
2750  distance += IHammingLut(x[1], y[1]);
2751  distance += IHammingLut(x[2], y[2]);
2752  distance += IHammingLut(x[3], y[3]);
2753  return distance;
2754 }
2755 inline unsigned int IHammingDiff8(const unsigned int x[8],
2756  const unsigned int y[8]) {
2757  unsigned int distance = 0;
2758  distance += IHammingLut(x[0], y[0]);
2759  distance += IHammingLut(x[1], y[1]);
2760  distance += IHammingLut(x[2], y[2]);
2761  distance += IHammingLut(x[3], y[3]);
2762  distance += IHammingLut(x[4], y[4]);
2763  distance += IHammingLut(x[5], y[5]);
2764  distance += IHammingLut(x[6], y[6]);
2765  distance += IHammingLut(x[7], y[7]);
2766  return distance;
2767 }
2768 inline unsigned int IHammingDiff16(const unsigned int x[16],
2769  const unsigned int y[16]) {
2770  unsigned int distance = 0;
2771  distance += IHammingLut(x[0], y[0]);
2772  distance += IHammingLut(x[1], y[1]);
2773  distance += IHammingLut(x[2], y[2]);
2774  distance += IHammingLut(x[3], y[3]);
2775  distance += IHammingLut(x[4], y[4]);
2776  distance += IHammingLut(x[5], y[5]);
2777  distance += IHammingLut(x[6], y[6]);
2778  distance += IHammingLut(x[7], y[7]);
2779  distance += IHammingLut(x[8], y[8]);
2780  distance += IHammingLut(x[9], y[9]);
2781  distance += IHammingLut(x[10], y[10]);
2782  distance += IHammingLut(x[11], y[11]);
2783  distance += IHammingLut(x[12], y[12]);
2784  distance += IHammingLut(x[13], y[13]);
2785  distance += IHammingLut(x[14], y[14]);
2786  distance += IHammingLut(x[15], y[15]);
2787  return distance;
2788 }
2789 
2790 // Compute the L2 norm of n-dimensional vector x
2791 inline double IL2Norm(const unsigned char *x, int n) {
2792  return (ISqrt(ISquaresumU(x, n)));
2793 }
2794 inline double IL2Norm(const int *x, int n) { return (ISqrt(ISquaresum(x, n))); }
2795 inline float IL2Norm(const float *x, int n) {
2796  return (ISqrt(ISquaresum(x, n)));
2797 }
2798 inline double IL2Norm(const double *x, int n) {
2799  return (ISqrt(ISquaresum(x, n)));
2800 }
2801 
2802 // For type float and double only
2803 template <typename T>
2804 inline T IL2Norm1(const T x[1]) {
2805  return (x[0]);
2806 }
2807 template <typename T>
2808 inline T IL2Norm2(const T x[2]) {
2809  return (ISqrt(ISquaresum2(x)));
2810 }
2811 template <typename T>
2812 inline T IL2Norm3(const T x[3]) {
2813  return (ISqrt(ISquaresum3(x)));
2814 }
2815 template <typename T>
2816 inline T IL2Norm4(const T x[4]) {
2817  return (ISqrt(ISquaresum4(x)));
2818 }
2819 template <typename T>
2820 inline T IL2Norm5(const T x[5]) {
2821  return (ISqrt(ISquaresum5(x)));
2822 }
2823 template <typename T>
2824 inline T IL2Norm6(const T x[6]) {
2825  return (ISqrt(ISquaresum6(x)));
2826 }
2827 template <typename T>
2828 inline T IL2Norm7(const T x[7]) {
2829  return (ISqrt(ISquaresum7(x)));
2830 }
2831 template <typename T>
2832 inline T IL2Norm8(const T x[8]) {
2833  return (ISqrt(ISquaresum8(x)));
2834 }
2835 template <typename T>
2836 inline T IL2Norm9(const T x[9]) {
2837  return (ISqrt(ISquaresum9(x)));
2838 }
2839 template <typename T>
2840 inline T IL2Norm10(const T x[10]) {
2841  return (ISqrt(ISquaresum10(x)));
2842 }
2843 template <typename T>
2844 inline T IL2Norm11(const T x[11]) {
2845  return (ISqrt(ISquaresum11(x)));
2846 }
2847 template <typename T>
2848 inline T IL2Norm12(const T x[12]) {
2849  return (ISqrt(ISquaresum12(x)));
2850 }
2851 template <typename T>
2852 inline T IL2Norm13(const T x[13]) {
2853  return (ISqrt(ISquaresum13(x)));
2854 }
2855 template <typename T>
2856 inline T IL2Norm14(const T x[14]) {
2857  return (ISqrt(ISquaresum14(x)));
2858 }
2859 template <typename T>
2860 inline T IL2Norm15(const T x[15]) {
2861  return (ISqrt(ISquaresum15(x)));
2862 }
2863 template <typename T>
2864 inline T IL2Norm16(const T x[16]) {
2865  return (ISqrt(ISquaresum16(x)));
2866 }
2867 
2868 // Compute sqrt( a^2 + b^2 ) with decent precision, for type float and double
2869 // only
2870 template <typename T>
2871 inline T IL2NormAdv(T a, T b) {
2872  T absa = IAbs(a);
2873  T absb = IAbs(b);
2874  if (absa > absb) {
2875  return (absa * ISqrt(static_cast<T>(1.0) + ISqr(IDiv(absb, absa))));
2876  } else {
2877  return (absb == static_cast<T>(0.0)
2878  ? static_cast<T>(0.0)
2879  : absb * ISqrt(static_cast<T>(1.0) + ISqr(IDiv(absa, absb))));
2880  }
2881 }
2882 
2883 // Compute sqrt( x[0]^2 + x[1]^2 ) with decent precision, for type float and
2884 // double only
2885 template <typename T>
2886 inline T IL2NormAdv(const T x[2]) {
2887  return IL2NormAdv<T>(x[0], x[1]);
2888 }
2889 
2890 // Compute the infinity-norm of a m x n matrix A
2891 template <typename T>
2892 inline T IInfinityNorm(const T *A, int m, int n) {
2893  T infinity_norm = IAbsSum(A, n);
2894  T tmp;
2895  int i, ni = n;
2896  for (i = 1; i < m; ++i, ni += n) {
2897  tmp = IAbsSum(A + ni, n);
2898  if (infinity_norm < tmp) {
2899  infinity_norm = tmp;
2900  }
2901  }
2902  return infinity_norm;
2903 }
2904 
2905 // Unitize n-dimensional vector x by dividing its L2-norm and save the result
2906 // * inplace, for type float and double only
2907 inline void IUnitize(double *x, int n) { IScale(x, n, IRec(IL2Norm(x, n))); }
2908 inline void IUnitize(float *x, int n) { IScale(x, n, IRec(IL2Norm(x, n))); }
2909 inline void ISafeUnitize(double *x, int n) {
2910  double norm = IL2Norm(x, n);
2912  IFill(x, n, IRec(ISqrt(n)));
2913  } else {
2914  IScale(x, n, 1.0 / norm);
2915  }
2916 }
2917 inline void ISafeUnitize(float *x, int n) {
2918  float norm = IL2Norm(x, n);
2920  IFill(x, n, static_cast<float>(IRec(ISqrt(n))));
2921  } else {
2922  IScale(x, n, static_cast<float>(1.0) / norm);
2923  }
2924 }
2925 // Unitize n-dimensional vector x by dividing its L2-norm and save the result in
2926 // * vector y, for type float and double only
2927 inline void IUnitize(const double *x, double *y, int n) {
2928  IScale(x, y, n, IRec(IL2Norm(x, n)));
2929 }
2930 inline void IUnitize(const float *x, float *y, int n) {
2931  IScale(x, y, n, IRec(IL2Norm(x, n)));
2932 }
2933 inline void ISafeUnitize(const double *x, double *y, int n) {
2934  double norm = IL2Norm(x, n);
2936  IFill(y, n, IRec(ISqrt(n)));
2937  } else {
2938  IScale(x, y, n, 1.0 / norm);
2939  }
2940 }
2941 inline void ISafeUnitize(float *x, float *y, int n) {
2942  float norm = IL2Norm(x, n);
2944  IFill(y, n, static_cast<float>(IRec(ISqrt(n))));
2945  } else {
2946  IScale(x, y, n, static_cast<float>(1.0) / norm);
2947  }
2948 }
2949 // For type float and double only!
2950 template <typename T>
2951 inline void IUnitize2(T x[2]) {
2952  IScale2(x, IRec(IL2Norm2(x)));
2953 }
2954 template <typename T>
2955 inline void IUnitize3(T x[3]) {
2956  IScale3(x, IRec(IL2Norm3(x)));
2957 }
2958 template <typename T>
2959 inline void IUnitize4(T x[4]) {
2960  IScale4(x, IRec(IL2Norm4(x)));
2961 }
2962 template <typename T>
2963 inline void IUnitize5(T x[5]) {
2964  IScale5(x, IRec(IL2Norm5(x)));
2965 }
2966 template <typename T>
2967 inline void IUnitize6(T x[6]) {
2968  IScale6(x, IRec(IL2Norm6(x)));
2969 }
2970 template <typename T>
2971 inline void IUnitize7(T x[7]) {
2972  IScale7(x, IRec(IL2Norm7(x)));
2973 }
2974 template <typename T>
2975 inline void IUnitize8(T x[8]) {
2976  IScale8(x, IRec(IL2Norm8(x)));
2977 }
2978 template <typename T>
2979 inline void IUnitize9(T x[9]) {
2980  IScale9(x, IRec(IL2Norm9(x)));
2981 }
2982 template <typename T>
2983 inline void IUnitize10(T x[10]) {
2984  IScale10(x, IRec(IL2Norm10(x)));
2985 }
2986 template <typename T>
2987 inline void IUnitize11(T x[11]) {
2988  IScale11(x, IRec(IL2Norm11(x)));
2989 }
2990 template <typename T>
2991 inline void IUnitize12(T x[12]) {
2992  IScale12(x, IRec(IL2Norm12(x)));
2993 }
2994 template <typename T>
2995 inline void IUnitize13(T x[13]) {
2996  IScale13(x, IRec(IL2Norm13(x)));
2997 }
2998 template <typename T>
2999 inline void IUnitize14(T x[14]) {
3000  IScale14(x, IRec(IL2Norm14(x)));
3001 }
3002 template <typename T>
3003 inline void IUnitize15(T x[15]) {
3004  IScale15(x, IRec(IL2Norm15(x)));
3005 }
3006 template <typename T>
3007 inline void IUnitize16(T x[16]) {
3008  IScale16(x, IRec(IL2Norm16(x)));
3009 }
3010 template <typename T>
3011 inline void IUnitize2(const T x[2], T y[2]) {
3012  IScale2(x, y, IRec(IL2Norm2(x)));
3013 }
3014 template <typename T>
3015 inline void IUnitize3(const T x[3], T y[3]) {
3016  IScale3(x, y, IRec(IL2Norm3(x)));
3017 }
3018 template <typename T>
3019 inline void IUnitize4(const T x[4], T y[4]) {
3020  IScale4(x, y, IRec(IL2Norm4(x)));
3021 }
3022 template <typename T>
3023 inline void IUnitize5(const T x[5], T y[5]) {
3024  IScale5(x, y, IRec(IL2Norm5(x)));
3025 }
3026 template <typename T>
3027 inline void IUnitize6(const T x[6], T y[6]) {
3028  IScale6(x, y, IRec(IL2Norm6(x)));
3029 }
3030 template <typename T>
3031 inline void IUnitize7(const T x[7], T y[7]) {
3032  IScale7(x, y, IRec(IL2Norm7(x)));
3033 }
3034 template <typename T>
3035 inline void IUnitize8(const T x[8], T y[8]) {
3036  IScale8(x, y, IRec(IL2Norm8(x)));
3037 }
3038 template <typename T>
3039 inline void IUnitize9(const T x[9], T y[9]) {
3040  IScale9(x, y, IRec(IL2Norm9(x)));
3041 }
3042 template <typename T>
3043 inline void IUnitize10(const T x[10], T y[10]) {
3044  IScale10(x, y, IRec(IL2Norm10(x)));
3045 }
3046 template <typename T>
3047 inline void IUnitize11(const T x[11], T y[11]) {
3048  IScale11(x, y, IRec(IL2Norm11(x)));
3049 }
3050 template <typename T>
3051 inline void IUnitize12(const T x[12], T y[12]) {
3052  IScale12(x, y, IRec(IL2Norm12(x)));
3053 }
3054 template <typename T>
3055 inline void IUnitize13(const T x[13], T y[13]) {
3056  IScale13(x, y, IRec(IL2Norm13(x)));
3057 }
3058 template <typename T>
3059 inline void IUnitize14(const T x[14], T y[14]) {
3060  IScale14(x, y, IRec(IL2Norm14(x)));
3061 }
3062 template <typename T>
3063 inline void IUnitize15(const T x[15], T y[15]) {
3064  IScale15(x, y, IRec(IL2Norm15(x)));
3065 }
3066 template <typename T>
3067 inline void IUnitize16(const T x[16], T y[16]) {
3068  IScale16(x, y, IRec(IL2Norm16(x)));
3069 }
3070 
3071 template <typename T>
3072 inline void ISignedUnitize2(T x[2]) {
3073  IScale2(x, ISignNeverZero(x[1]) * IRec(IL2Norm2(x)));
3074 }
3075 template <typename T>
3076 inline void ISignedUnitize3(T x[3]) {
3077  IScale3(x, ISignNeverZero(x[2]) * IRec(IL2Norm3(x)));
3078 }
3079 template <typename T>
3080 inline void ISignedUnitize4(T x[4]) {
3081  IScale4(x, ISignNeverZero(x[3]) * IRec(IL2Norm4(x)));
3082 }
3083 
3084 template <typename T>
3085 inline void IHomogeneousUnitize(T *x, int n) {
3086  IScale(x, n, IRec(x[n - 1]));
3087 }
3088 template <typename T>
3089 inline void IHomogeneousUnitize2(T x[2]) {
3090  IScale2(x, IRec(x[1]));
3091 }
3092 template <typename T>
3093 inline void IHomogeneousUnitize3(T x[3]) {
3094  IScale3(x, IRec(x[2]));
3095 }
3096 template <typename T>
3097 inline void IHomogeneousUnitize4(T x[4]) {
3098  IScale4(x, IRec(x[3]));
3099 }
3100 template <typename T>
3101 inline void IHomogeneousUnitize9(T x[9]) {
3102  IScale9(x, IRec(x[8]));
3103 }
3104 
3105 template <typename T>
3106 inline void IHomogeneousUnitize(const T *x, T *y, int n) {
3107  IScale(x, y, n, IRec(x[n - 1]));
3108 }
3109 template <typename T>
3110 inline void IHomogeneousUnitize2(const T x[2], T y[2]) {
3111  IScale2(x, y, IRec(x[1]));
3112 }
3113 template <typename T>
3114 inline void IHomogeneousUnitize3(const T x[3], T y[3]) {
3115  IScale3(x, y, IRec(x[2]));
3116 }
3117 template <typename T>
3118 inline void IHomogeneousUnitize4(const T x[4], T y[4]) {
3119  IScale4(x, y, IRec(x[3]));
3120 }
3121 template <typename T>
3122 inline void IHomogeneousUnitize9(const T x[9], T y[9]) {
3123  IScale9(x, y, IRec(x[8]));
3124 }
3125 
3126 // Compute the centroid of n 3-dimensional vectors
3127 inline void ICentroid3(const double *a, int n, double centroid[3]) {
3128  int length = 3 * n;
3129  IFill3(centroid, 0.0);
3130  for (int i = 0; i < length; i += 3) {
3131  IAdd3(a + i, centroid);
3132  }
3133  IScale3(centroid, IRec(n));
3134 }
3135 inline void ICentroid3(const float *a, int n, float centroid[3]) {
3136  int length = 3 * n;
3137  IFill3(centroid, 0.f);
3138  for (int i = 0; i < length; i += 3) {
3139  IAdd3(a + i, centroid);
3140  }
3141  IScale3(centroid, static_cast<float>(IRec(n)));
3142 }
3143 // Compute the centroid of n 2-dimensional vectors
3144 inline void ICentroid2(const double *a, int n, double centroid[2]) {
3145  int length = 2 * n;
3146  IFill2(centroid, 0.0);
3147  for (int i = 0; i < length; i += 2) {
3148  IAdd2(a + i, centroid);
3149  }
3150  IScale2(centroid, IRec(n));
3151 }
3152 inline void ICentroid2(const float *a, int n, float centroid[2]) {
3153  int length = 2 * n;
3154  IFill2(centroid, 0.f);
3155  for (int i = 0; i < length; i += 2) {
3156  IAdd2(a + i, centroid);
3157  }
3158  IScale2(centroid, static_cast<float>(IRec(n)));
3159 }
3160 
3161 // Compute the centroid of n 3-dimensional vectors and their Euclidean distances
3162 // * to the centroid
3163 inline void ICentroid3(const double *a, int n, double centroid[3],
3164  double *distances) {
3165  int length = 3 * n;
3166  int i, j;
3167  IFill3(centroid, 0.0);
3168  for (i = 0; i < length; i += 3) {
3169  IAdd3(a + i, centroid);
3170  }
3171  IScale3(centroid, IRec(n));
3172  for (i = 0, j = 0; i < n; ++i, j += 3) {
3173  distances[i] = ISqrt(ISquaresumDiffU3(a + j, centroid));
3174  }
3175 }
3176 inline void ICentroid3(const float *a, int n, float centroid[3],
3177  float *distances) {
3178  int length = 3 * n;
3179  int i, j;
3180  IFill3(centroid, 0.f);
3181  for (i = 0; i < length; i += 3) {
3182  IAdd3(a + i, centroid);
3183  }
3184  IScale3(centroid, static_cast<float>(IRec(n)));
3185  for (i = 0, j = 0; i < n; ++i, j += 3) {
3186  distances[i] = ISqrt(ISquaresumDiffU3(a + j, centroid));
3187  }
3188 }
3189 // Compute the centroid of n 2-dimensional vectors and their Euclidean distances
3190 // * to the centroid
3191 inline void ICentroid2(const double *a, int n, double centroid[2],
3192  double *distances) {
3193  int length = 2 * n;
3194  int i, j;
3195  IFill2(centroid, 0.0);
3196  for (i = 0; i < length; i += 2) {
3197  IAdd2(a + i, centroid);
3198  }
3199  IScale2(centroid, IRec(n));
3200  for (i = 0, j = 0; i < n; ++i, j += 2) {
3201  distances[i] = ISqrt(ISquaresumDiffU2(a + j, centroid));
3202  }
3203 }
3204 inline void ICentroid2(const float *a, int n, float centroid[2],
3205  float *distances) {
3206  int length = 2 * n;
3207  int i, j;
3208  IFill2(centroid, 0.f);
3209  for (i = 0; i < length; i += 2) {
3210  IAdd2(a + i, centroid);
3211  }
3212  IScale2(centroid, static_cast<float>(IRec(n)));
3213  for (i = 0, j = 0; i < n; ++i, j += 2) {
3214  distances[i] = ISqrt(ISquaresumDiffU2(a + j, centroid));
3215  }
3216 }
3217 
3218 // Compute the minimum element in array
3219 template <typename T>
3220 inline T IMinElement(const T *a, int n) {
3221  T val, temp;
3222  if (n <= 0) {
3223  return (static_cast<T>(0.0));
3224  }
3225  val = a[0];
3226  for (int i = 1; i < n; i++) {
3227  temp = a[i];
3228  if (temp < val) {
3229  val = temp;
3230  }
3231  }
3232  return (val);
3233 }
3234 // Compute the maximum element in array
3235 template <typename T>
3236 inline T IMaxElement(const T *a, int n) {
3237  T val, temp;
3238  if (n <= 0) {
3239  return (static_cast<T>(0.0));
3240  }
3241  val = a[0];
3242  for (int i = 1; i < n; i++) {
3243  temp = a[i];
3244  if (temp > val) {
3245  val = temp;
3246  }
3247  }
3248  return (val);
3249 }
3250 // Compute the minimum element and its index in array
3251 // template <typename T>
3252 // inline void IMinElement(const T *a, int n, T &min_val, int &index) {
3253 // T temp;
3254 // index = 0;
3255 // if (n <= 0) {
3256 // min_val = static_cast<T>(0.0);
3257 // return;
3258 // }
3259 // min_val = a[0];
3260 // for (int i = 1; i < n; i++) {
3261 // temp = a[i];
3262 // if (temp < min_val) {
3263 // min_val = temp;
3264 // index = i;
3265 // }
3266 // }
3267 // }
3268 // Compute the maximum element and its index in array
3269 // template <typename T>
3270 // inline void IMaxElement(const T *a, int n, T &max_val, int &index) {
3271 // T temp;
3272 // index = 0;
3273 // if (n <= 0) {
3274 // max_val = static_cast<T>(0.0);
3275 // return;
3276 // }
3277 // max_val = a[0];
3278 // for (int i = 1; i < n; i++) {
3279 // temp = a[i];
3280 // if (temp > max_val) {
3281 // max_val = temp;
3282 // index = i;
3283 // }
3284 // }
3285 // }
3286 
3287 // Compute the maximum diagonal element of a n x n square matrix
3288 template <typename T>
3289 inline T IMaxDiagonalElement(const T *a, int n) {
3290  T val, temp;
3291  if (n <= 0) {
3292  return (static_cast<T>(0.0));
3293  }
3294  val = a[0];
3295  int i, ni = n;
3296  for (i = 1; i < n; i++, ni += n) {
3297  temp = a[ni + i];
3298  if (temp > val) {
3299  val = temp;
3300  }
3301  }
3302  return (val);
3303 }
3304 
3305 // Compute the index of element with largest element in array
3306 inline int IMaxIndex(const double *a, int n) {
3307  int bi;
3308  double b, t;
3309  if (n <= 0) {
3310  return (0);
3311  }
3312  b = a[0];
3313  bi = 0;
3314  for (int i = 1; i < n; i++)
3315  if ((t = a[i]) > b) {
3316  b = t;
3317  bi = i;
3318  }
3319  return (bi);
3320 }
3321 inline int IMaxIndex(const float *a, int n) {
3322  int bi;
3323  float b, t;
3324  if (n <= 0) {
3325  return (0);
3326  }
3327  b = a[0];
3328  bi = 0;
3329  for (int i = 1; i < n; i++)
3330  if ((t = a[i]) > b) {
3331  b = t;
3332  bi = i;
3333  }
3334  return (bi);
3335 }
3336 inline int IMaxIndex(const int *a, int n) {
3337  int bi;
3338  int b, t;
3339  if (n <= 0) {
3340  return (0);
3341  }
3342  b = a[0];
3343  bi = 0;
3344  for (int i = 1; i < n; i++)
3345  if ((t = a[i]) > b) {
3346  b = t;
3347  bi = i;
3348  }
3349  return (bi);
3350 }
3351 
3352 // Compute the index of element with largest magnitude element in array
3353 inline int IMaxAbsIndex(const double *a, int n) {
3354  int bi;
3355  double b, t;
3356  if (n <= 0) {
3357  return (0);
3358  }
3359  b = IAbs(a[0]);
3360  bi = 0;
3361  for (int i = 1; i < n; i++)
3362  if ((t = IAbs(a[i])) > b) {
3363  b = t;
3364  bi = i;
3365  }
3366  return (bi);
3367 }
3368 inline int IMaxAbsIndex(const float *a, int n) {
3369  int bi;
3370  float b, t;
3371  if (n <= 0) {
3372  return (0);
3373  }
3374  b = IAbs(a[0]);
3375  bi = 0;
3376  for (int i = 1; i < n; i++)
3377  if ((t = IAbs(a[i])) > b) {
3378  b = t;
3379  bi = i;
3380  }
3381  return (bi);
3382 }
3383 inline int IMaxAbsIndex(const int *a, int n) {
3384  int bi;
3385  int b, t;
3386  if (n <= 0) {
3387  return (0);
3388  }
3389  b = IAbs(a[0]);
3390  bi = 0;
3391  for (int i = 1; i < n; i++)
3392  if ((t = IAbs(a[i])) > b) {
3393  b = t;
3394  bi = i;
3395  }
3396  return (bi);
3397 }
3398 // Compute the index of element with smallest magnitude element in array
3399 inline int IMinAbsIndex(const double *a, int n) {
3400  int bi;
3401  double b, t;
3402  if (n <= 0) {
3403  return (0);
3404  }
3405  b = IAbs(a[0]);
3406  bi = 0;
3407  for (int i = 1; i < n; i++)
3408  if ((t = IAbs(a[i])) < b) {
3409  b = t;
3410  bi = i;
3411  }
3412  return (bi);
3413 }
3414 inline int IMinAbsIndex(const float *a, int n) {
3415  int bi;
3416  float b, t;
3417  if (n <= 0) {
3418  return (0);
3419  }
3420  b = IAbs(a[0]);
3421  bi = 0;
3422  for (int i = 1; i < n; i++)
3423  if ((t = IAbs(a[i])) < b) {
3424  b = t;
3425  bi = i;
3426  }
3427  return (bi);
3428 }
3429 inline int IMinAbsIndex(const int *a, int n) {
3430  int bi;
3431  int b, t;
3432  if (n <= 0) {
3433  return (0);
3434  }
3435  b = IAbs(a[0]);
3436  bi = 0;
3437  for (int i = 1; i < n; i++)
3438  if ((t = IAbs(a[i])) < b) {
3439  b = t;
3440  bi = i;
3441  }
3442  return (bi);
3443 }
3444 
3445 // Compute the index of element in interval [i1,i2) with largest magnitude
3446 // * element in array
3447 inline int IMaxAbsIndexInterval(const double *a, int i1, int i2) {
3448  int bi;
3449  double b, t;
3450  b = IAbs(a[i1]);
3451  bi = i1;
3452  for (int i = i1 + 1; i < i2; i++) {
3453  if ((t = IAbs(a[i])) > b) {
3454  b = t;
3455  bi = i;
3456  }
3457  }
3458  return (bi);
3459 }
3460 inline int IMaxAbsIndexInterval(const float *a, int i1, int i2) {
3461  int bi;
3462  float b, t;
3463  b = IAbs(a[i1]);
3464  bi = i1;
3465  for (int i = i1 + 1; i < i2; i++) {
3466  if ((t = IAbs(a[i])) > b) {
3467  b = t;
3468  bi = i;
3469  }
3470  }
3471  return (bi);
3472 }
3473 inline int IMaxAbsIndexInterval(const int *a, int i1, int i2) {
3474  int bi;
3475  int b, t;
3476  b = IAbs(a[i1]);
3477  bi = i1;
3478  for (int i = i1 + 1; i < i2; i++) {
3479  if ((t = IAbs(a[i])) > b) {
3480  b = t;
3481  bi = i;
3482  }
3483  }
3484  return (bi);
3485 }
3486 // Compute the index of element in interval [i1,i2) with smallest magnitude
3487 // * element in array
3488 inline int IMinAbsIndexInterval(const double *a, int i1, int i2) {
3489  int bi;
3490  double b, t;
3491  b = IAbs(a[i1]);
3492  bi = i1;
3493  for (int i = i1 + 1; i < i2; i++) {
3494  if ((t = IAbs(a[i])) < b) {
3495  b = t;
3496  bi = i;
3497  }
3498  }
3499  return (bi);
3500 }
3501 inline int IMinAbsIndexInterval(const float *a, int i1, int i2) {
3502  int bi;
3503  float b, t;
3504  b = IAbs(a[i1]);
3505  bi = i1;
3506  for (int i = i1 + 1; i < i2; i++) {
3507  if ((t = IAbs(a[i])) < b) {
3508  b = t;
3509  bi = i;
3510  }
3511  }
3512  return (bi);
3513 }
3514 inline int IMinAbsIndexInterval(const int *a, int i1, int i2) {
3515  int bi;
3516  int b, t;
3517  b = IAbs(a[i1]);
3518  bi = i1;
3519  for (int i = i1 + 1; i < i2; i++) {
3520  if ((t = IAbs(a[i])) < b) {
3521  b = t;
3522  bi = i;
3523  }
3524  }
3525  return (bi);
3526 }
3527 
3528 // Compute the index of element in interval [i1,i2) with largest magnitude
3529 // * element in a column of a mxn matrix
3530 inline int IMaxAbsIndexIntervalColumn(const double *a, int i1, int i2, int n) {
3531  int bi;
3532  double b, t;
3533  const double *ref = a + n * i1;
3534  b = IAbs(*ref);
3535  bi = i1;
3536  ref += n;
3537  for (int i = i1 + 1; i < i2; ++i, ref += n) {
3538  if ((t = IAbs(*ref)) > b) {
3539  b = t;
3540  bi = i;
3541  }
3542  }
3543  return (bi);
3544 }
3545 inline int IMaxAbsIndexIntervalColumn(const float *a, int i1, int i2, int n) {
3546  int bi;
3547  float b, t;
3548  const float *ref = a + i1 * n;
3549  b = IAbs(*ref);
3550  bi = i1;
3551  for (int i = i1 + 1; i < i2; ++i, ref += n) {
3552  if ((t = IAbs(*ref)) > b) {
3553  b = t;
3554  bi = i;
3555  }
3556  }
3557  return (bi);
3558 }
3559 inline int IMaxAbsIndexIntervalColumn(const int *a, int i1, int i2, int n) {
3560  int b, bi, t;
3561  const int *ref = a + i1 * n;
3562  b = IAbs(*ref);
3563  bi = i1;
3564  for (int i = i1; i < i2; ++i, ref += n) {
3565  if ((t = IAbs(*ref)) > b) {
3566  b = t;
3567  bi = i;
3568  }
3569  }
3570  return (bi);
3571 }
3572 
3573 // Compute the index of element in interval [i1,i2) with smallest magnitude
3574 // * element in a column of a mxn matrix
3575 inline int IMinAbsIndexIntervalColumn(const double *a, int i1, int i2, int n) {
3576  int bi;
3577  double b, t;
3578  const double *ref = a + n * i1;
3579  b = IAbs(*ref);
3580  bi = i1;
3581  for (int i = i1 + 1; i < i2; ++i, ref += n) {
3582  if ((t = IAbs(*ref)) < b) {
3583  b = t;
3584  bi = i;
3585  }
3586  }
3587  return (bi);
3588 }
3589 inline int IMinAbsIndexIntervalColumn(const float *a, int i1, int i2, int n) {
3590  int bi;
3591  float b, t;
3592  const float *ref = a + i1 * n;
3593  b = IAbs(*ref);
3594  bi = i1;
3595  for (int i = i1 + 1; i < i2; ++i, ref += n) {
3596  if ((t = IAbs(*ref)) < b) {
3597  b = t;
3598  bi = i;
3599  }
3600  }
3601  return (bi);
3602 }
3603 inline int IMinAbsIndexIntervalColumn(const int *a, int i1, int i2, int n) {
3604  int b, bi, t;
3605  const int *ref = a + i1 * n;
3606  b = IAbs(*ref);
3607  bi = i1;
3608  for (int i = i1; i < i2; ++i, ref += n) {
3609  if ((t = IAbs(*ref)) < b) {
3610  b = t;
3611  bi = i;
3612  }
3613  }
3614  return (bi);
3615 }
3616 
3617 // Find row-index of element on or below the diagonal of column i of (n x n)
3618 // matrix A
3619 // with the largest absolute value.
3620 template <typename T>
3621 inline int IMaxAbsIndexSubdiagonalColumn(const T *A, int i, int n) {
3622  int j, largest_j;
3623  T largest_val, temp;
3624  largest_val = IAbs(A[i * n + i]);
3625  largest_j = i;
3626  for (j = i + 1; j < n; j++) {
3627  temp = IAbs(A[j * n + i]);
3628  if (temp > largest_val) {
3629  largest_val = temp;
3630  largest_j = j;
3631  }
3632  }
3633  return largest_j;
3634 }
3635 
3636 // Compute the minimum and maximum elements in an array
3637 template <typename T>
3638 inline void IMinMaxElements(const T *a, int n, T *min_val, T *max_val) {
3639  T temp;
3640  if (n <= 0) {
3641  *min_val = *max_val = static_cast<T>(0.0);
3642  return;
3643  }
3644  *min_val = *max_val = a[0];
3645  for (int i = 1; i < n; i++) {
3646  temp = a[i];
3647  if (temp > *max_val) {
3648  *max_val = temp;
3649  }
3650  if (temp < *min_val) {
3651  *min_val = temp;
3652  }
3653  }
3654 }
3655 
3656 // Compute the minimum and maximum elements in an array, ingoring the
3657 // * "ignored_val" in a
3658 // template <typename T>
3659 // inline void IMinMaxElements(const T *a, int n, T &min_val, T &max_val,
3660 // const T ignored_val) {
3661 // T temp;
3662 // int i;
3663 // if (n <= 0) {
3664 // min_val = max_val = static_cast<T>(0.0);
3665 // return;
3666 // }
3667 // for (i = 0; i < n; ++i) {
3668 // if (a[i] != ignored_val) {
3669 // break;
3670 // }
3671 // }
3672 // min_val = max_val = a[i];
3673 // for (; i < n; i++) {
3674 // temp = a[i];
3675 // if (temp == ignored_val) {
3676 // continue;
3677 // }
3678 // if (temp > max_val) {
3679 // max_val = temp;
3680 // }
3681 // if (temp < min_val) {
3682 // min_val = temp;
3683 // }
3684 // }
3685 // return;
3686 // }
3687 
3688 // Given a n-dimensional vector x, construct its homogeneous representation
3689 // * (n+1-dimensional vector) y by adding 1 to the last entry
3690 template <typename T>
3691 inline void IHomogenize(const T *x, T *y, int n) {
3692  for (int i = 0; i < n; ++i) {
3693  y[i] = x[i];
3694  }
3695  y[n] = static_cast<T>(1.0);
3696 }
3697 template <typename T>
3698 inline void IHomogenize1(const T x[1], T y[2]) {
3699  y[0] = x[0];
3700  y[1] = static_cast<T>(1.0);
3701 }
3702 template <typename T>
3703 inline void IHomogenize2(const T x[2], T y[3]) {
3704  y[0] = x[0];
3705  y[1] = x[1];
3706  y[2] = static_cast<T>(1.0);
3707 }
3708 template <typename T>
3709 inline void IHomogenize3(const T x[3], T y[4]) {
3710  y[0] = x[0];
3711  y[1] = x[1];
3712  y[2] = x[2];
3713  y[3] = static_cast<T>(1.0);
3714 }
3715 
3716 // Compute the cross product between two 3-dimensional vectors x and y
3717 template <typename T>
3718 inline void ICross(const T x[3], const T y[3], T xxy[3]) {
3719  xxy[0] = x[1] * y[2] - x[2] * y[1];
3720  xxy[1] = x[2] * y[0] - x[0] * y[2];
3721  xxy[2] = x[0] * y[1] - x[1] * y[0];
3722 }
3723 
3724 // Compute the 3x3 skew-symmetric matrix [e]_x such that [e]_x * y is the cross
3725 // * product of 3-dimensional vectors x and y for all y
3726 template <typename T>
3727 inline void IAxiator(const T x[3], T e_x[9]) {
3728  e_x[0] = static_cast<T>(0.0);
3729  e_x[1] = -x[2];
3730  e_x[2] = x[1];
3731  e_x[3] = x[2];
3732  e_x[4] = static_cast<T>(0.0);
3733  e_x[5] = -x[0];
3734  e_x[6] = -x[1];
3735  e_x[7] = x[0];
3736  e_x[8] = static_cast<T>(0.0);
3737 }
3738 
3739 // Compute the square of a 3x3 skew-symmetric matrix [e]_x, e_x2 = [e_x]^2 =
3740 // * [e]_x*[e]_x
3741 template <typename T>
3742 inline void ISqrSkewSymmetric3x3(const T x[3], T e_x2[9]) {
3743  T x0_sqr = ISqr(x[0]);
3744  T x1_sqr = ISqr(x[1]);
3745  T x2_sqr = ISqr(x[2]);
3746  e_x2[0] = -(x1_sqr + x2_sqr);
3747  e_x2[1] = e_x2[3] = x[0] * x[1];
3748  e_x2[4] = -(x2_sqr + x0_sqr);
3749  e_x2[2] = e_x2[6] = x[2] * x[0];
3750  e_x2[8] = -(x0_sqr + x1_sqr);
3751  e_x2[5] = e_x2[7] = x[1] * x[2];
3752 }
3753 
3754 // Set the nxn matrix A to be an identity matrix
3755 template <typename T>
3756 inline void IEye(T *A, int n) {
3757  int in = 0;
3758  IZero(A, n * n);
3759  for (int i = 0; i < n; ++i, in += n) {
3760  A[in + i] = static_cast<T>(1.0);
3761  }
3762 }
3763 template <typename T>
3764 inline void IEye2x2(T A[4]) {
3765  A[0] = A[3] = static_cast<T>(1.0);
3766  A[1] = A[2] = static_cast<T>(0.0);
3767 }
3768 template <typename T>
3769 inline void IEye3x3(T A[9]) {
3770  A[0] = A[4] = A[8] = static_cast<T>(1.0);
3771  A[1] = A[2] = A[3] = A[5] = A[6] = A[7] = static_cast<T>(0.0);
3772 }
3773 template <typename T>
3774 inline void IEye4x4(T A[16]) {
3775  A[0] = A[5] = A[10] = A[15] = static_cast<T>(1.0);
3776  A[1] = A[2] = A[3] = A[4] = A[6] = A[7] = A[8] = A[9] = A[11] = A[12] =
3777  A[13] = A[14] = static_cast<T>(0.0);
3778 }
3779 
3780 // Construct the 2x2 upper triangular matrix A
3781 template <typename T>
3782 inline void IUpperTriangular2x2(T a0, T a1, T a3, T A[4]) {
3783  A[0] = a0;
3784  A[1] = a1;
3785  A[2] = static_cast<T>(0.0);
3786  A[3] = a3;
3787 }
3788 
3789 // Construct the 3x3 upper triangular matrix A
3790 template <typename T>
3791 inline void IUpperTriangular3x3(T a0, T a1, T a2, T a4, T a5, T a8, T A[9]) {
3792  A[0] = a0;
3793  A[1] = a1;
3794  A[2] = a2;
3795  A[3] = static_cast<T>(0.0);
3796  A[4] = a4;
3797  A[5] = a5;
3798  A[6] = static_cast<T>(0.0);
3799  A[7] = static_cast<T>(0.0);
3800  A[8] = a8;
3801 }
3802 
3803 // Compute the trace of a 2x2 matrix A
3804 inline double ITrace2x2(const double A[4]) { return (A[0] + A[3]); }
3805 inline float ITrace2x2(const float A[4]) { return (A[0] + A[3]); }
3806 // Compute the trace of a 3x3 matrix A
3807 inline double ITrace3x3(const double A[9]) { return (A[0] + A[4] + A[8]); }
3808 inline float ITrace3x3(const float A[9]) { return (A[0] + A[4] + A[8]); }
3809 
3810 // Compute the determinant of a 2x2 matrix A
3811 inline double IDeterminant2x2(const double A[4]) {
3812  return (A[0] * A[3] - A[1] * A[2]);
3813 }
3814 inline float IDeterminant2x2(const float A[4]) {
3815  return (A[0] * A[3] - A[1] * A[2]);
3816 }
3817 inline int IDeterminant2x2(const int A[4]) {
3818  return (A[0] * A[3] - A[1] * A[2]);
3819 }
3820 
3821 // Compute the determinant of a 3x3 matrix A
3822 inline double IDeterminant3x3(const double A[9]) {
3823  double r0_x_r1[3];
3824  ICross(A, A + 3, r0_x_r1);
3825  return (IDot3(r0_x_r1, A + 6));
3826 }
3827 inline float IDeterminant3x3(const float A[9]) {
3828  float r0_x_r1[3];
3829  ICross(A, A + 3, r0_x_r1);
3830  return (IDot3(r0_x_r1, A + 6));
3831 }
3832 inline int IDeterminant3x3(const int A[9]) {
3833  int r0_x_r1[3];
3834  ICross(A, A + 3, r0_x_r1);
3835  return (IDot3(r0_x_r1, A + 6));
3836 }
3837 
3838 // Compute the 6 subdeterminants {sd[0],...,sd[5]} of the 2x4 matrix formed by x
3839 // * as row 0 and y as row 1
3840 inline void ISubdeterminants2x4(const double x[4], const double y[4],
3841  double sd[6]) {
3842  sd[0] = x[0] * y[1] - x[1] * y[0];
3843  sd[1] = x[0] * y[2] - x[2] * y[0];
3844  sd[2] = x[0] * y[3] - x[3] * y[0];
3845  sd[3] = x[1] * y[2] - x[2] * y[1];
3846  sd[4] = x[1] * y[3] - x[3] * y[1];
3847  sd[5] = x[2] * y[3] - x[3] * y[2];
3848 }
3849 inline void ISubdeterminants2x4(const float x[4], const float y[4],
3850  float sd[6]) {
3851  sd[0] = x[0] * y[1] - x[1] * y[0];
3852  sd[1] = x[0] * y[2] - x[2] * y[0];
3853  sd[2] = x[0] * y[3] - x[3] * y[0];
3854  sd[3] = x[1] * y[2] - x[2] * y[1];
3855  sd[4] = x[1] * y[3] - x[3] * y[1];
3856  sd[5] = x[2] * y[3] - x[3] * y[2];
3857 }
3858 inline void ISubdeterminants2x4(const int x[4], const int y[4], int sd[6]) {
3859  sd[0] = x[0] * y[1] - x[1] * y[0];
3860  sd[1] = x[0] * y[2] - x[2] * y[0];
3861  sd[2] = x[0] * y[3] - x[3] * y[0];
3862  sd[3] = x[1] * y[2] - x[2] * y[1];
3863  sd[4] = x[1] * y[3] - x[3] * y[1];
3864  sd[5] = x[2] * y[3] - x[3] * y[2];
3865 }
3866 
3867 // Compute the 4 subdeterminants {sd[0],...,sd[3]} of the 3x4 matrix formed by x
3868 // * as row 0, y as row 1, and z as row 2
3869 inline void ISubdeterminants3x4(const double x[4], const double y[4],
3870  const double z[4], double sd[4]) {
3871  double ssd[6];
3872  ISubdeterminants2x4(x, y, ssd);
3873  sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3874  sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3875  sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3876  sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3877 }
3878 inline void ISubdeterminants3x4(const float x[4], const float y[4],
3879  const float z[4], float sd[4]) {
3880  float ssd[6];
3881  ISubdeterminants2x4(x, y, ssd);
3882  sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3883  sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3884  sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3885  sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3886 }
3887 inline void ISubdeterminants3x4(const int x[4], const int y[4], const int z[4],
3888  int sd[4]) {
3889  int ssd[6];
3890  ISubdeterminants2x4(x, y, ssd);
3891  sd[0] = z[1] * ssd[5] - z[2] * ssd[4] + z[3] * ssd[3];
3892  sd[1] = z[0] * ssd[5] - z[2] * ssd[2] + z[3] * ssd[1];
3893  sd[2] = z[0] * ssd[4] - z[1] * ssd[2] + z[3] * ssd[0];
3894  sd[3] = z[0] * ssd[3] - z[1] * ssd[1] + z[2] * ssd[0];
3895 }
3896 
3897 // Compute the determinant of a 4x4 matrix A
3898 inline double IDeterminant4x4(const double A[16]) {
3899  double sd[4];
3900  ISubdeterminants3x4(A, A + 4, A + 8, sd);
3901  return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3902 }
3903 inline float IDeterminant4x4(const float A[16]) {
3904  float sd[4];
3905  ISubdeterminants3x4(A, A + 4, A + 8, sd);
3906  return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3907 }
3908 inline int IDeterminant4x4(const int A[16]) {
3909  int sd[4];
3910  ISubdeterminants3x4(A, A + 4, A + 8, sd);
3911  return -(A[12] * sd[0]) + (A[13] * sd[1]) - (A[14] * sd[2]) + (A[15] * sd[3]);
3912 }
3913 
3914 // Compute the nullptrvector to x,y and z => intersection of three planes x, y,
3915 // z
3916 // is
3917 // * a point
3918 template <typename T>
3919 inline void ICross(const T x[4], const T y[4], const T z[4], T xxyxz[4]) {
3920  ISubdeterminants3x4(x, y, z, xxyxz);
3921  xxyxz[0] = -xxyxz[0];
3922  xxyxz[2] = -xxyxz[2];
3923 }
3924 
3925 // Compute the inverse of a 2x2 matrix A using the Cramer's rule
3926 inline void IInvert2x2(const double A[4], double Ai[4]) {
3927  double d = IDeterminant2x2(A);
3928  double sf = IRec(d);
3929  Ai[0] = sf * (A[3]);
3930  Ai[1] = sf * (-A[1]);
3931  Ai[2] = sf * (-A[2]);
3932  Ai[3] = sf * (A[0]);
3933 }
3934 
3935 inline void IInvert2x2(const float A[4], float Ai[4]) {
3936  float d = IDeterminant2x2(A);
3937  float sf = IRec(d);
3938  Ai[0] = sf * (A[3]);
3939  Ai[1] = sf * (-A[1]);
3940  Ai[2] = sf * (-A[2]);
3941  Ai[3] = sf * (A[0]);
3942 }
3943 
3944 inline void IInvert2x2(const int A[4], double Ai[4]) {
3945  int d = IDeterminant2x2(A);
3946  double sf = IRec(d);
3947  Ai[0] = sf * (A[3]);
3948  Ai[1] = sf * (-A[1]);
3949  Ai[2] = sf * (-A[2]);
3950  Ai[3] = sf * (A[0]);
3951 }
3952 
3953 // inline void ISafeInvert2x2(const double A[4], double Ai[4],
3954 // bool &is_invertible) {
3955 // double d = IDeterminant2x2(A);
3956 // double sf = IRec(d);
3957 // Ai[0] = sf * (A[3]);
3958 // Ai[1] = sf * (-A[1]);
3959 // Ai[2] = sf * (-A[2]);
3960 // Ai[3] = sf * (A[0]);
3961 // is_invertible =
3962 // (IAbs(d) > Constant<double>::MIN_ABS_SAFE_DIVIDEND());
3963 // }
3964 
3965 // inline void ISafeInvert2x2(const float A[4], float Ai[4], bool
3966 // &is_invertible) {
3967 // float d = IDeterminant2x2(A);
3968 // float sf = IRec(d);
3969 // Ai[0] = sf * (A[3]);
3970 // Ai[1] = sf * (-A[1]);
3971 // Ai[2] = sf * (-A[2]);
3972 // Ai[3] = sf * (A[0]);
3973 // is_invertible =
3974 // (IAbs(d) > Constant<float>::MIN_ABS_SAFE_DIVIDEND());
3975 // }
3976 
3977 // inline void ISafeInvert2x2(const int A[4], double Ai[4], bool &is_invertible)
3978 // {
3979 // int d = IDeterminant2x2(A);
3980 // double sf = IRec(d);
3981 // Ai[0] = sf * (A[3]);
3982 // Ai[1] = sf * (-A[1]);
3983 // Ai[2] = sf * (-A[2]);
3984 // Ai[3] = sf * (A[0]);
3985 // is_invertible = (d != 0);
3986 // }
3987 
3988 // Compute the inverse of a 3x3 matrix A using the Cramer's rule
3989 inline void IInvert3x3(const double A[9], double Ai[9]) {
3990  double sd0 = A[4] * A[8] - A[5] * A[7];
3991  double sd1 = A[5] * A[6] - A[3] * A[8];
3992  double sd2 = A[3] * A[7] - A[4] * A[6];
3993  double sd3 = A[2] * A[7] - A[1] * A[8];
3994  double sd4 = A[0] * A[8] - A[2] * A[6];
3995  double sd5 = A[1] * A[6] - A[0] * A[7];
3996  double sd6 = A[1] * A[5] - A[2] * A[4];
3997  double sd7 = A[2] * A[3] - A[0] * A[5];
3998  double sd8 = A[0] * A[4] - A[1] * A[3];
3999  double d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4000  double d_rec = IRec(d);
4001  Ai[0] = d_rec * sd0;
4002  Ai[1] = d_rec * sd3;
4003  Ai[2] = d_rec * sd6;
4004  Ai[3] = d_rec * sd1;
4005  Ai[4] = d_rec * sd4;
4006  Ai[5] = d_rec * sd7;
4007  Ai[6] = d_rec * sd2;
4008  Ai[7] = d_rec * sd5;
4009  Ai[8] = d_rec * sd8;
4010 }
4011 inline void IInvert3x3(const float A[9], float Ai[9]) {
4012  float sd0 = A[4] * A[8] - A[5] * A[7];
4013  float sd1 = A[5] * A[6] - A[3] * A[8];
4014  float sd2 = A[3] * A[7] - A[4] * A[6];
4015  float sd3 = A[2] * A[7] - A[1] * A[8];
4016  float sd4 = A[0] * A[8] - A[2] * A[6];
4017  float sd5 = A[1] * A[6] - A[0] * A[7];
4018  float sd6 = A[1] * A[5] - A[2] * A[4];
4019  float sd7 = A[2] * A[3] - A[0] * A[5];
4020  float sd8 = A[0] * A[4] - A[1] * A[3];
4021  float d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4022  float d_rec = IRec(d);
4023  Ai[0] = d_rec * sd0;
4024  Ai[1] = d_rec * sd3;
4025  Ai[2] = d_rec * sd6;
4026  Ai[3] = d_rec * sd1;
4027  Ai[4] = d_rec * sd4;
4028  Ai[5] = d_rec * sd7;
4029  Ai[6] = d_rec * sd2;
4030  Ai[7] = d_rec * sd5;
4031  Ai[8] = d_rec * sd8;
4032 }
4033 inline void IInvert3x3(const int A[9], double Ai[9]) {
4034  // subdeterminants:
4035  int sd0 = A[4] * A[8] - A[5] * A[7];
4036  int sd1 = A[5] * A[6] - A[3] * A[8];
4037  int sd2 = A[3] * A[7] - A[4] * A[6];
4038  int sd3 = A[2] * A[7] - A[1] * A[8];
4039  int sd4 = A[0] * A[8] - A[2] * A[6];
4040  int sd5 = A[1] * A[6] - A[0] * A[7];
4041  int sd6 = A[1] * A[5] - A[2] * A[4];
4042  int sd7 = A[2] * A[3] - A[0] * A[5];
4043  int sd8 = A[0] * A[4] - A[1] * A[3];
4044  int d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4045  double d_rec = IRec(d);
4046  Ai[0] = d_rec * sd0;
4047  Ai[1] = d_rec * sd3;
4048  Ai[2] = d_rec * sd6;
4049  Ai[3] = d_rec * sd1;
4050  Ai[4] = d_rec * sd4;
4051  Ai[5] = d_rec * sd7;
4052  Ai[6] = d_rec * sd2;
4053  Ai[7] = d_rec * sd5;
4054  Ai[8] = d_rec * sd8;
4055 }
4056 // inline void ISafeInvert3x3(const double A[9], double Ai[9],
4057 // bool &is_invertible) {
4058 // // subdeterminants:
4059 // double sd0 = A[4] * A[8] - A[5] * A[7];
4060 // double sd1 = A[5] * A[6] - A[3] * A[8];
4061 // double sd2 = A[3] * A[7] - A[4] * A[6];
4062 // double sd3 = A[2] * A[7] - A[1] * A[8];
4063 // double sd4 = A[0] * A[8] - A[2] * A[6];
4064 // double sd5 = A[1] * A[6] - A[0] * A[7];
4065 // double sd6 = A[1] * A[5] - A[2] * A[4];
4066 // double sd7 = A[2] * A[3] - A[0] * A[5];
4067 // double sd8 = A[0] * A[4] - A[1] * A[3];
4068 // double d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4069 // is_invertible =
4070 // (IAbs(d) > Constant<double>::MIN_ABS_SAFE_DIVIDEND());
4071 // double d_rec = is_invertible ? IRec(d) : 1.0;
4072 // Ai[0] = d_rec * sd0;
4073 // Ai[1] = d_rec * sd3;
4074 // Ai[2] = d_rec * sd6;
4075 // Ai[3] = d_rec * sd1;
4076 // Ai[4] = d_rec * sd4;
4077 // Ai[5] = d_rec * sd7;
4078 // Ai[6] = d_rec * sd2;
4079 // Ai[7] = d_rec * sd5;
4080 // Ai[8] = d_rec * sd8;
4081 // }
4082 // inline void ISafeInvert3x3(const float A[9], float Ai[9], bool
4083 // &is_invertible) {
4084 // // subdeterminants:
4085 // float sd0 = A[4] * A[8] - A[5] * A[7];
4086 // float sd1 = A[5] * A[6] - A[3] * A[8];
4087 // float sd2 = A[3] * A[7] - A[4] * A[6];
4088 // float sd3 = A[2] * A[7] - A[1] * A[8];
4089 // float sd4 = A[0] * A[8] - A[2] * A[6];
4090 // float sd5 = A[1] * A[6] - A[0] * A[7];
4091 // float sd6 = A[1] * A[5] - A[2] * A[4];
4092 // float sd7 = A[2] * A[3] - A[0] * A[5];
4093 // float sd8 = A[0] * A[4] - A[1] * A[3];
4094 // float d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4095 // is_invertible =
4096 // (IAbs(d) > Constant<float>::MIN_ABS_SAFE_DIVIDEND());
4097 // float d_rec = is_invertible ? IRec(d) : 1.f;
4098 // Ai[0] = d_rec * sd0;
4099 // Ai[1] = d_rec * sd3;
4100 // Ai[2] = d_rec * sd6;
4101 // Ai[3] = d_rec * sd1;
4102 // Ai[4] = d_rec * sd4;
4103 // Ai[5] = d_rec * sd7;
4104 // Ai[6] = d_rec * sd2;
4105 // Ai[7] = d_rec * sd5;
4106 // Ai[8] = d_rec * sd8;
4107 // }
4108 // inline void ISafeInvert3x3(const int A[9], double Ai[9], bool &is_invertible)
4109 // {
4110 // // subdeterminants:
4111 // int sd0 = A[4] * A[8] - A[5] * A[7];
4112 // int sd1 = A[5] * A[6] - A[3] * A[8];
4113 // int sd2 = A[3] * A[7] - A[4] * A[6];
4114 // int sd3 = A[2] * A[7] - A[1] * A[8];
4115 // int sd4 = A[0] * A[8] - A[2] * A[6];
4116 // int sd5 = A[1] * A[6] - A[0] * A[7];
4117 // int sd6 = A[1] * A[5] - A[2] * A[4];
4118 // int sd7 = A[2] * A[3] - A[0] * A[5];
4119 // int sd8 = A[0] * A[4] - A[1] * A[3];
4120 // int d = A[0] * sd0 + A[1] * sd1 + A[2] * sd2;
4121 // is_invertible = (d != 0);
4122 // double d_rec = is_invertible ? IRec(d) : 1.0;
4123 // Ai[0] = d_rec * sd0;
4124 // Ai[1] = d_rec * sd3;
4125 // Ai[2] = d_rec * sd6;
4126 // Ai[3] = d_rec * sd1;
4127 // Ai[4] = d_rec * sd4;
4128 // Ai[5] = d_rec * sd7;
4129 // Ai[6] = d_rec * sd2;
4130 // Ai[7] = d_rec * sd5;
4131 // Ai[8] = d_rec * sd8;
4132 // }
4133 
4134 inline void IInvert3x3UpperTriangular(const double A[9], double Ai[9]) {
4135  double A4A8 = A[4] * A[8];
4136  double A0rec = IRec(A[0]);
4137  Ai[0] = A0rec;
4138  Ai[1] = -IDiv(A[1], A[0] * A[4]);
4139  Ai[2] = (IDiv(A[1] * A[5], A4A8) - IDiv(A[2], A[8])) * A0rec;
4140  Ai[3] = 0;
4141  Ai[4] = IRec(A[4]);
4142  Ai[5] = -IDiv(A[5], A4A8);
4143  Ai[6] = 0;
4144  Ai[7] = 0;
4145  Ai[8] = IRec(A[8]);
4146 }
4147 inline void IInvert3x3UpperTriangular(const float A[9], float Ai[9]) {
4148  float A4A8 = A[4] * A[8];
4149  float A0rec = IRec(A[0]);
4150  Ai[0] = A0rec;
4151  Ai[1] = -IDiv(A[1], A[0] * A[4]);
4152  Ai[2] = (IDiv(A[1] * A[5], A4A8) - IDiv(A[2], A[8])) * A0rec;
4153  Ai[3] = 0;
4154  Ai[4] = IRec(A[4]);
4155  Ai[5] = -IDiv(A[5], A4A8);
4156  Ai[6] = 0;
4157  Ai[7] = 0;
4158  Ai[8] = IRec(A[8]);
4159 }
4160 inline void IInvert3x3UpperTriangular(const int A[9], double Ai[9]) {
4161  double A4A8 = static_cast<double>(A[4] * A[8]);
4162  double A0rec = IRec(A[0]);
4163  Ai[0] = A0rec;
4164  Ai[1] = -IDiv(static_cast<double>(A[1]), static_cast<double>(A[0] * A[4]));
4165  Ai[2] =
4166  (IDiv(static_cast<double>(A[1] * A[5]), A4A8) - IDiv(A[2], A[8])) * A0rec;
4167  Ai[3] = 0;
4168  Ai[4] = IRec(A[4]);
4169  Ai[5] = -IDiv(static_cast<double>(A[5]), A4A8);
4170  Ai[6] = 0;
4171  Ai[7] = 0;
4172  Ai[8] = IRec(A[8]);
4173 }
4174 
4175 // Solve 2x2 linear equation system Ax=b using the Cramer's rule
4176 inline void ISolve2x2(const double A[4], const double b[2], double x[2]) {
4177  double d, rec;
4178  d = IDeterminant2x2(A);
4179  rec = IRec(d);
4180  x[0] = rec * (A[3] * b[0] - A[1] * b[1]);
4181  x[1] = rec * (A[0] * b[1] - A[2] * b[0]);
4182 }
4183 inline void ISolve2x2(const float A[4], const float b[2], float x[2]) {
4184  float d, rec;
4185  d = IDeterminant2x2(A);
4186  rec = IRec(d);
4187  x[0] = rec * (A[3] * b[0] - A[1] * b[1]);
4188  x[1] = rec * (A[0] * b[1] - A[2] * b[0]);
4189 }
4190 // Solve 3x3 linear equation system Ax=b using the Cramer's rule
4191 inline void ISolve3x3(const double A[9], const double b[3], double x[3]) {
4192  double d, rec, da0, da1, da2;
4193  d = IDeterminant3x3(A);
4194  rec = IRec(d);
4195  da0 = b[0] * (A[4] * A[8] - A[5] * A[7]) +
4196  b[1] * (A[2] * A[7] - A[1] * A[8]) + b[2] * (A[1] * A[5] - A[2] * A[4]);
4197  da1 = b[0] * (A[5] * A[6] - A[3] * A[8]) +
4198  b[1] * (A[0] * A[8] - A[2] * A[6]) + b[2] * (A[2] * A[3] - A[0] * A[5]);
4199  da2 = b[0] * (A[3] * A[7] - A[4] * A[6]) +
4200  b[1] * (A[1] * A[6] - A[0] * A[7]) + b[2] * (A[0] * A[4] - A[1] * A[3]);
4201  x[0] = da0 * rec;
4202  x[1] = da1 * rec;
4203  x[2] = da2 * rec;
4204 }
4205 inline void ISolve3x3(const float A[9], const float b[3], float x[3]) {
4206  float d, rec, da0, da1, da2;
4207  d = IDeterminant3x3(A);
4208  rec = IRec(d);
4209  da0 = b[0] * (A[4] * A[8] - A[5] * A[7]) +
4210  b[1] * (A[2] * A[7] - A[1] * A[8]) + b[2] * (A[1] * A[5] - A[2] * A[4]);
4211  da1 = b[0] * (A[5] * A[6] - A[3] * A[8]) +
4212  b[1] * (A[0] * A[8] - A[2] * A[6]) + b[2] * (A[2] * A[3] - A[0] * A[5]);
4213  da2 = b[0] * (A[3] * A[7] - A[4] * A[6]) +
4214  b[1] * (A[1] * A[6] - A[0] * A[7]) + b[2] * (A[0] * A[4] - A[1] * A[3]);
4215  x[0] = da0 * rec;
4216  x[1] = da1 * rec;
4217  x[2] = da2 * rec;
4218 }
4219 
4220 // Compute the transpose of a suqare nxn matrix inplace
4221 template <typename T>
4222 inline void ITranspose(T *A, int n) {
4223  for (int r = 0; r < n; ++r) {
4224  for (int c = r; c < n; ++c) {
4225  ISwap(A[r * n + c], A[c * n + r]);
4226  }
4227  }
4228 }
4229 // Compute the transpose of a mxn matrix A, At is nxm
4230 template <typename T>
4231 inline void ITranspose(const T *A, T *At, int m, int n) {
4232  for (int r = 0; r < m; ++r) {
4233  for (int c = 0; c < n; ++c) {
4234  At[c * m + r] = A[r * n + c];
4235  }
4236  }
4237 }
4238 template <typename T>
4239 inline void ITranspose2x2(T A[4]) {
4240  ISwap(A[1], A[2]);
4241 }
4242 template <typename T>
4243 inline void ITranspose2x2(const T A[4], T At[4]) {
4244  At[0] = A[0];
4245  At[1] = A[2];
4246  At[2] = A[1];
4247  At[3] = A[3];
4248 }
4249 template <typename T>
4250 inline void ITranspose3x3(T A[9]) {
4251  ISwap(A[1], A[3]);
4252  ISwap(A[2], A[6]);
4253  ISwap(A[5], A[7]);
4254 }
4255 template <typename T>
4256 inline void ITranspose3x3(const T A[9], T At[9]) {
4257  At[0] = A[0];
4258  At[1] = A[3];
4259  At[2] = A[6];
4260  At[3] = A[1];
4261  At[4] = A[4];
4262  At[5] = A[7];
4263  At[6] = A[2];
4264  At[7] = A[5];
4265  At[8] = A[8];
4266 }
4267 template <typename T>
4268 inline void ITranspose4x4(T A[16]) {
4269  ISwap(A[1], A[4]);
4270  ISwap(A[2], A[8]);
4271  ISwap(A[6], A[9]);
4272  ISwap(A[3], A[12]);
4273  ISwap(A[7], A[13]);
4274  ISwap(A[11], A[14]);
4275 }
4276 template <typename T>
4277 inline void ITranspose4x4(const T A[16], T At[16]) {
4278  At[0] = A[0];
4279  At[1] = A[4];
4280  At[2] = A[8];
4281  At[3] = A[12];
4282  At[4] = A[1];
4283  At[5] = A[5];
4284  At[6] = A[9];
4285  At[7] = A[13];
4286  At[8] = A[2];
4287  At[9] = A[6];
4288  At[10] = A[10];
4289  At[11] = A[14];
4290  At[12] = A[3];
4291  At[13] = A[7];
4292  At[14] = A[11];
4293  At[15] = A[15];
4294 }
4295 
4296 // Add a constant lambda (inplace) to the diagonal elements of a nxn square
4297 // * matrix A
4298 template <typename T>
4299 inline void IAugmentDiagonal(T *A, int n, const T lambda) {
4300  T *Ai;
4301  int i, ni = 0;
4302  for (i = 0; i < n; ++i, ni += n) {
4303  Ai = A + ni;
4304  Ai[i] += lambda;
4305  }
4306 }
4307 
4308 // Multiply m x n matrix A with n-dimensional vector x
4309 template <typename T>
4310 inline void IMultAx(const T *A, const T *x, T *Ax, int m, int n) {
4311  for (int i = 0; i < m; i++) {
4312  Ax[i] = IDot(A + i * n, x, n);
4313  }
4314 }
4315 
4316 // Multiply m x n matrix A's transpose (n x m matrix At) with m-dimensional
4317 // * vector x
4318 template <typename T>
4319 inline void IMultAtx(const T *A, const T *x, T *Atx, int m, int n) {
4320  T acc;
4321  const T *Ai = nullptr;
4322  for (int i = 0; i < n; i++) {
4323  Ai = A + i;
4324  acc = T(0.0);
4325  for (int j = 0; j < m; j++, Ai += n) {
4326  acc += (*Ai) * x[j];
4327  }
4328  Atx[i] = acc;
4329  }
4330 }
4331 
4332 // Multiply 1 x 3 matrix A with 3-dimensional vector x
4333 template <typename T>
4334 inline void IMultAx1x3(const T A[3], const T x[3], T Ax[1]) {
4335  Ax[0] = A[0] * x[0] + A[1] * x[1] + A[2] * x[2];
4336 }
4337 // Multiply 2 x 2 matrix A with 2-dimensional vector x
4338 template <typename T>
4339 inline void IMultAx2x2(const T A[4], const T x[2], T Ax[2]) {
4340  T x0, x1;
4341  x0 = x[0];
4342  x1 = x[1];
4343  Ax[0] = A[0] * x0 + A[1] * x1;
4344  Ax[1] = A[2] * x0 + A[3] * x1;
4345 }
4346 // Multiply 2 x 3 matrix A with 3-dimensional vector x
4347 template <typename T>
4348 inline void IMultAx2x3(const T A[6], const T x[3], T Ax[2]) {
4349  T x0, x1, x2;
4350  x0 = x[0];
4351  x1 = x[1];
4352  x2 = x[2];
4353  Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4354  Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4355 }
4356 // Multiply 3 x 3 matrix A with 3-dimensional vector x
4357 template <typename T>
4358 inline void IMultAx3x3(const T A[9], const T x[3], T Ax[3]) {
4359  T x0, x1, x2;
4360  x0 = x[0];
4361  x1 = x[1];
4362  x2 = x[2];
4363  Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4364  Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4365  Ax[2] = A[6] * x0 + A[7] * x1 + A[8] * x2;
4366 }
4367 // Multiply 3 x 3 matrix A^t with 3-dimensional vector x
4368 template <typename T>
4369 inline void IMultAtx3x3(const T A[9], const T x[3], T Atx[3]) {
4370  T x0, x1, x2;
4371  x0 = x[0];
4372  x1 = x[1];
4373  x2 = x[2];
4374  Atx[0] = A[0] * x0 + A[3] * x1 + A[6] * x2;
4375  Atx[1] = A[1] * x0 + A[4] * x1 + A[7] * x2;
4376  Atx[2] = A[2] * x0 + A[5] * x1 + A[8] * x2;
4377 }
4378 // Multiply 3 x 4 matrix A with 4-dimensional vector x
4379 template <typename T>
4380 inline void IMultAx3x4(const T A[12], const T x[4], T Ax[3]) {
4381  T x0, x1, x2, x3;
4382  x0 = x[0];
4383  x1 = x[1];
4384  x2 = x[2];
4385  x3 = x[3];
4386  Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2 + A[3] * x3;
4387  Ax[1] = A[4] * x0 + A[5] * x1 + A[6] * x2 + A[7] * x3;
4388  Ax[2] = A[8] * x0 + A[9] * x1 + A[10] * x2 + A[11] * x3;
4389 }
4390 // Multiply 4 x 3 matrix A with 3-dimensional vector x
4391 template <typename T>
4392 inline void IMultAx4x3(const T A[12], const T x[3], T Ax[4]) {
4393  T x0, x1, x2;
4394  x0 = x[0];
4395  x1 = x[1];
4396  x2 = x[2];
4397  Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2;
4398  Ax[1] = A[3] * x0 + A[4] * x1 + A[5] * x2;
4399  Ax[2] = A[6] * x0 + A[7] * x1 + A[8] * x2;
4400  Ax[3] = A[9] * x0 + A[10] * x1 + A[11] * x2;
4401 }
4402 // Multiply 4 x 3 matrix A^t with 3-dimensional vector x
4403 template <typename T>
4404 inline void IMultAtx4x3(const T A[12], const T x[3], T Atx[4]) {
4405  Atx[0] = A[0] * x[0] + A[4] * x[1] + A[8] * x[2];
4406  Atx[1] = A[1] * x[0] + A[5] * x[1] + A[9] * x[2];
4407  Atx[2] = A[2] * x[0] + A[6] * x[1] + A[10] * x[2];
4408  Atx[3] = A[3] * x[0] + A[7] * x[1] + A[11] * x[2];
4409 }
4410 // Multiply 4 x 4 matrix A with 4-dimensional vector x
4411 template <typename T>
4412 inline void IMultAx4x4(const T A[16], const T x[4], T Ax[4]) {
4413  T x0, x1, x2, x3;
4414  x0 = x[0];
4415  x1 = x[1];
4416  x2 = x[2];
4417  x3 = x[3];
4418  Ax[0] = A[0] * x0 + A[1] * x1 + A[2] * x2 + A[3] * x3;
4419  Ax[1] = A[4] * x0 + A[5] * x1 + A[6] * x2 + A[7] * x3;
4420  Ax[2] = A[8] * x0 + A[9] * x1 + A[10] * x2 + A[11] * x3;
4421  Ax[3] = A[12] * x0 + A[13] * x1 + A[14] * x2 + A[15] * x3;
4422 }
4423 // Multiply m x n matrix A with n x o matrix B
4424 template <typename T>
4425 inline void IMultAB(const T *A, const T *B, T *AB, int m, int n, int o) {
4426  int in, io;
4427  T acc;
4428  for (int i = 0; i < m; i++) {
4429  in = i * n;
4430  io = i * o;
4431  for (int j = 0; j < o; j++) {
4432  acc = static_cast<T>(0.0);
4433  for (int k = 0; k < n; k++) {
4434  acc += A[in + k] * B[k * o + j];
4435  }
4436  AB[io + j] = acc;
4437  }
4438  }
4439 }
4440 
4441 // Multiply 3 x 1 matrix A with 1 x 3 matrix B
4442 template <typename T>
4443 inline void IMultAB3x1And1x3(const T A[3], const T B[3], T AB[9]) {
4444  AB[0] = A[0] * B[0];
4445  AB[1] = A[0] * B[1];
4446  AB[2] = A[0] * B[2];
4447  AB[3] = A[1] * B[0];
4448  AB[4] = A[1] * B[1];
4449  AB[5] = A[1] * B[2];
4450  AB[6] = A[2] * B[0];
4451  AB[7] = A[2] * B[1];
4452  AB[8] = A[2] * B[2];
4453 }
4454 
4455 // Multiply 2 x 2 matrix A with 2 x 2 matrix B
4456 template <typename T>
4457 inline void IMultAB2x2And2x2(const T A[4], const T B[4], T AB[4]) {
4458  T a, b;
4459  a = A[0];
4460  b = A[1];
4461  AB[0] = a * B[0] + b * B[2];
4462  AB[1] = a * B[1] + b * B[3];
4463  a = A[2];
4464  b = A[3];
4465  AB[2] = a * B[0] + b * B[2];
4466  AB[3] = a * B[1] + b * B[3];
4467 }
4468 
4469 // Multiply 2 x 3 matrix A with 3 x 2 matrix B
4470 template <typename T>
4471 inline void IMultAB2x3And3x2(const T A[6], const T B[6], T AB[4]) {
4472  T a, b, c;
4473  a = A[0];
4474  b = A[1];
4475  c = A[2];
4476  AB[0] = a * B[0] + b * B[2] + c * B[4];
4477  AB[1] = a * B[1] + b * B[3] + c * B[5];
4478  a = A[3];
4479  b = A[4];
4480  c = A[5];
4481  AB[2] = a * B[0] + b * B[2] + c * B[4];
4482  AB[3] = a * B[1] + b * B[3] + c * B[5];
4483 }
4484 
4485 // Multiply 2 x 3 matrix A with 3 x 3 matrix B
4486 template <typename T>
4487 inline void IMultAB2x3And3x3(const T A[6], const T B[9], T AB[6]) {
4488  T a, b, c;
4489  a = A[0];
4490  b = A[1];
4491  c = A[2];
4492  AB[0] = a * B[0] + b * B[3] + c * B[6];
4493  AB[1] = a * B[1] + b * B[4] + c * B[7];
4494  AB[2] = a * B[2] + b * B[5] + c * B[8];
4495 
4496  a = A[3];
4497  b = A[4];
4498  c = A[5];
4499  AB[3] = a * B[0] + b * B[3] + c * B[6];
4500  AB[4] = a * B[1] + b * B[4] + c * B[7];
4501  AB[5] = a * B[2] + b * B[5] + c * B[8];
4502 }
4503 
4504 // Multiply 2 x 3 matrix A with 3 x 4 matrix B
4505 template <typename T>
4506 inline void IMultAB2x3And3x4(const T A[6], const T B[12], T AB[8]) {
4507  T a, b, c;
4508  a = A[0];
4509  b = A[1];
4510  c = A[2];
4511  AB[0] = a * B[0] + b * B[4] + c * B[8];
4512  AB[1] = a * B[1] + b * B[5] + c * B[9];
4513  AB[2] = a * B[2] + b * B[6] + c * B[10];
4514  AB[3] = a * B[3] + b * B[7] + c * B[11];
4515  a = A[3];
4516  b = A[4];
4517  c = A[5];
4518  AB[4] = a * B[0] + b * B[4] + c * B[8];
4519  AB[5] = a * B[1] + b * B[5] + c * B[9];
4520  AB[6] = a * B[2] + b * B[6] + c * B[10];
4521  AB[7] = a * B[3] + b * B[7] + c * B[11];
4522 }
4523 
4524 // Multiply 3 x 3 matrix A with 3 x 3 matrix B
4525 template <typename T>
4526 inline void IMultAB3x3And3x3(const T A[9], const T B[9], T AB[9]) {
4527  T a, b, c;
4528  a = A[0];
4529  b = A[1];
4530  c = A[2];
4531  AB[0] = a * B[0] + b * B[3] + c * B[6];
4532  AB[1] = a * B[1] + b * B[4] + c * B[7];
4533  AB[2] = a * B[2] + b * B[5] + c * B[8];
4534  a = A[3];
4535  b = A[4];
4536  c = A[5];
4537  AB[3] = a * B[0] + b * B[3] + c * B[6];
4538  AB[4] = a * B[1] + b * B[4] + c * B[7];
4539  AB[5] = a * B[2] + b * B[5] + c * B[8];
4540  a = A[6];
4541  b = A[7];
4542  c = A[8];
4543  AB[6] = a * B[0] + b * B[3] + c * B[6];
4544  AB[7] = a * B[1] + b * B[4] + c * B[7];
4545  AB[8] = a * B[2] + b * B[5] + c * B[8];
4546 }
4547 
4548 // Multiply 4 x 4 matrix A with 4 x 4 matrix B
4549 template <typename T>
4550 inline void IMultAB4x4And4x4(const T A[16], const T B[16], T AB[16]) {
4551  T a, b, c, d;
4552  a = A[0];
4553  b = A[1];
4554  c = A[2];
4555  d = A[3];
4556  AB[0] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4557  AB[1] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4558  AB[2] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4559  AB[3] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4560 
4561  a = A[4];
4562  b = A[5];
4563  c = A[6];
4564  d = A[7];
4565  AB[4] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4566  AB[5] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4567  AB[6] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4568  AB[7] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4569 
4570  a = A[8];
4571  b = A[9];
4572  c = A[10];
4573  d = A[11];
4574  AB[8] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4575  AB[9] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4576  AB[10] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4577  AB[11] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4578 
4579  a = A[12];
4580  b = A[13];
4581  c = A[14];
4582  d = A[15];
4583  AB[12] = a * B[0] + b * B[4] + c * B[8] + d * B[12];
4584  AB[13] = a * B[1] + b * B[5] + c * B[9] + d * B[13];
4585  AB[14] = a * B[2] + b * B[6] + c * B[10] + d * B[14];
4586  AB[15] = a * B[3] + b * B[7] + c * B[11] + d * B[15];
4587 }
4588 
4589 // Multiply 4 x 1 matrix A with 1 x 4 matrix B
4590 template <typename T>
4591 inline void IMultAB4x1And1x4(const T A[4], const T B[4], T AB[16]) {
4592  T sf = A[0];
4593  AB[0] = sf * B[0];
4594  AB[1] = sf * B[1];
4595  AB[2] = sf * B[2];
4596  AB[3] = sf * B[3];
4597 
4598  sf = A[1];
4599  AB[4] = sf * B[0];
4600  AB[5] = sf * B[1];
4601  AB[6] = sf * B[2];
4602  AB[7] = sf * B[3];
4603 
4604  sf = A[2];
4605  AB[8] = sf * B[0];
4606  AB[9] = sf * B[1];
4607  AB[10] = sf * B[2];
4608  AB[11] = sf * B[3];
4609 
4610  sf = A[3];
4611  AB[12] = sf * B[0];
4612  AB[13] = sf * B[1];
4613  AB[14] = sf * B[2];
4614  AB[15] = sf * B[3];
4615 }
4616 
4617 // Multiply upper-triangular 3 x 3 matrix A with 3 x 3 matrix B
4618 template <typename T>
4619 inline void IMultAB3x3And3x3WAUpperTriangular(const T A[9], const T B[9],
4620  T AB[9]) {
4621  T a, b, c;
4622  a = A[0];
4623  b = A[1];
4624  c = A[2];
4625  AB[0] = a * B[0] + b * B[3] + c * B[6];
4626  AB[1] = a * B[1] + b * B[4] + c * B[7];
4627  AB[2] = a * B[2] + b * B[5] + c * B[8];
4628  b = A[4];
4629  c = A[5];
4630  AB[3] = b * B[3] + c * B[6];
4631  AB[4] = b * B[4] + c * B[7];
4632  AB[5] = b * B[5] + c * B[8];
4633  c = A[8];
4634  AB[6] = c * B[6];
4635  AB[7] = c * B[7];
4636  AB[8] = c * B[8];
4637 }
4638 
4639 // Multiply 3 x 3 matrix A with upper-triangular 3 x 3 matrix B
4640 template <typename T>
4641 inline void IMultAB3x3And3x3WBUpperTriangular(const T A[9], const T B[9],
4642  T AB[9]) {
4643  T a, b, c;
4644  a = A[0];
4645  b = A[1];
4646  c = A[2];
4647  AB[0] = a * B[0];
4648  AB[1] = a * B[1] + b * B[4];
4649  AB[2] = a * B[2] + b * B[5] + c * B[8];
4650  a = A[3];
4651  b = A[4];
4652  c = A[5];
4653  AB[3] = a * B[0];
4654  AB[4] = a * B[1] + b * B[4];
4655  AB[5] = a * B[2] + b * B[5] + c * B[8];
4656  a = A[6];
4657  b = A[7];
4658  c = A[8];
4659  AB[6] = a * B[0];
4660  AB[7] = a * B[1] + b * B[4];
4661  AB[8] = a * B[2] + b * B[5] + c * B[8];
4662 }
4663 
4664 template <typename T>
4665 inline void IMultAB3x3And3x4(const T A[9], const T B[12], T AB[12]) {
4666  AB[0] = A[0] * B[0] + A[1] * B[4] + A[2] * B[8];
4667  AB[1] = A[0] * B[1] + A[1] * B[5] + A[2] * B[9];
4668  AB[2] = A[0] * B[2] + A[1] * B[6] + A[2] * B[10];
4669  AB[3] = A[0] * B[3] + A[1] * B[7] + A[2] * B[11];
4670 
4671  AB[4] = A[3] * B[0] + A[4] * B[4] + A[5] * B[8];
4672  AB[5] = A[3] * B[1] + A[4] * B[5] + A[5] * B[9];
4673  AB[6] = A[3] * B[2] + A[4] * B[6] + A[5] * B[10];
4674  AB[7] = A[3] * B[3] + A[4] * B[7] + A[5] * B[11];
4675 
4676  AB[8] = A[6] * B[0] + A[7] * B[4] + A[8] * B[8];
4677  AB[9] = A[6] * B[1] + A[7] * B[5] + A[8] * B[9];
4678  AB[10] = A[6] * B[2] + A[7] * B[6] + A[8] * B[10];
4679  AB[11] = A[6] * B[3] + A[7] * B[7] + A[8] * B[11];
4680 }
4681 
4682 template <typename T>
4683 inline void IMultAB3x4And4x3(const T A[12], const T B[12], T AB[9]) {
4684  T a, b, c, d;
4685  a = A[0];
4686  b = A[1];
4687  c = A[2];
4688  d = A[3];
4689  AB[0] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4690  AB[1] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4691  AB[2] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4692 
4693  a = A[4];
4694  b = A[5];
4695  c = A[6];
4696  d = A[7];
4697  AB[3] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4698  AB[4] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4699  AB[5] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4700 
4701  a = A[8];
4702  b = A[9];
4703  c = A[10];
4704  d = A[11];
4705  AB[6] = a * B[0] + b * B[3] + c * B[6] + d * B[9];
4706  AB[7] = a * B[1] + b * B[4] + c * B[7] + d * B[10];
4707  AB[8] = a * B[2] + b * B[5] + c * B[8] + d * B[11];
4708 }
4709 
4710 // Multiply 3 x 3 matrix A with 3 x 3 matrix B^t
4711 template <typename T>
4712 inline void IMultABt3x3And3x3(const T A[9], const T B[9], T ABt[9]) {
4713  T a, b, c;
4714  a = A[0];
4715  b = A[1];
4716  c = A[2];
4717  ABt[0] = a * B[0] + b * B[1] + c * B[2];
4718  ABt[1] = a * B[3] + b * B[4] + c * B[5];
4719  ABt[2] = a * B[6] + b * B[7] + c * B[8];
4720  a = A[3];
4721  b = A[4];
4722  c = A[5];
4723  ABt[3] = a * B[0] + b * B[1] + c * B[2];
4724  ABt[4] = a * B[3] + b * B[4] + c * B[5];
4725  ABt[5] = a * B[6] + b * B[7] + c * B[8];
4726  a = A[6];
4727  b = A[7];
4728  c = A[8];
4729  ABt[6] = a * B[0] + b * B[1] + c * B[2];
4730  ABt[7] = a * B[3] + b * B[4] + c * B[5];
4731  ABt[8] = a * B[6] + b * B[7] + c * B[8];
4732 }
4733 
4734 // Multiply 2 x 3 matrix A with 3 x 2 matrix B^t
4735 template <typename T>
4736 inline void IMultABt2x3And2x3(const T A[6], const T B[6], T ABt[4]) {
4737  ABt[0] = A[0] * B[0] + A[1] * B[1] + A[2] * B[2];
4738  ABt[1] = A[0] * B[3] + A[1] * B[4] + A[2] * B[5];
4739  ABt[2] = A[3] * B[0] + A[4] * B[1] + A[5] * B[2];
4740  ABt[3] = A[3] * B[3] + A[4] * B[4] + A[5] * B[5];
4741 }
4742 
4743 // Multiply 4 x 4 matrix A with 4 x 3 matrix B^t
4744 template <typename T>
4745 inline void IMultABt4x4And3x4(const T A[16], const T B[12], T ABt[12]) {
4746  ABt[0] = IDot4(A, B);
4747  ABt[1] = IDot4(A, B + 4);
4748  ABt[2] = IDot4(A, B + 8);
4749 
4750  ABt[3] = IDot4(A + 4, B);
4751  ABt[4] = IDot4(A + 4, B + 4);
4752  ABt[5] = IDot4(A + 4, B + 8);
4753 
4754  ABt[6] = IDot4(A + 8, B);
4755  ABt[7] = IDot4(A + 8, B + 4);
4756  ABt[8] = IDot4(A + 8, B + 8);
4757 
4758  ABt[9] = IDot4(A + 12, B);
4759  ABt[10] = IDot4(A + 12, B + 4);
4760  ABt[11] = IDot4(A + 12, B + 8);
4761 }
4762 
4763 // Multiply m x n matrix A's transpose At (n x m) with A and get AtA (n x n)
4764 template <typename T>
4765 inline void IMultAtA(const T *A, T *AtA, int m, int n) {
4766  int i, j, k, ni;
4767  T acc;
4768  const T *Ai, *Aj;
4769  for (i = 0; i < n; ++i) {
4770  ni = n * i;
4771  for (j = 0; j < i; ++j) {
4772  AtA[ni + j] = AtA[j * n + i];
4773  }
4774  for (j = i; j < n; ++j) {
4775  acc = static_cast<T>(0.0);
4776  Ai = A + i;
4777  Aj = A + j;
4778  for (k = 0; k < m; ++k, Ai += n, Aj += n) {
4779  acc += (*Ai) * (*Aj);
4780  }
4781  AtA[ni + j] = acc;
4782  }
4783  }
4784 }
4785 
4786 template <typename T>
4787 inline void IMultAtA2x2(const T A[4], T AtA[4]) {
4788  AtA[0] = A[0] * A[0] + A[2] * A[2];
4789  AtA[1] = AtA[2] = A[0] * A[1] + A[2] * A[3];
4790  AtA[3] = A[1] * A[1] + A[3] * A[3];
4791 }
4792 
4793 template <typename T>
4794 inline void IMultAtA2x2(const T *A, T AtA[4], int n) {
4795  T xx = static_cast<T>(0.0);
4796  T xy = static_cast<T>(0.0);
4797  T yy = static_cast<T>(0.0);
4798  T x, y;
4799  for (int i = 0; i < 2 * n; i += 2) {
4800  x = A[i];
4801  y = A[i + 1];
4802  xx += x * x;
4803  xy += x * y;
4804  yy += y * y;
4805  }
4806  AtA[0] = xx;
4807  AtA[1] = AtA[2] = xy;
4808  AtA[3] = yy;
4809 }
4810 
4811 template <typename T>
4812 inline void IMultAtAnx2(const T *A, T *AtA, int n) {
4813  T xx = static_cast<T>(0.0);
4814  T xy = static_cast<T>(0.0);
4815  T yy = static_cast<T>(0.0);
4816  T x, y;
4817  for (int i = 0; i < 2 * n; i += 2) {
4818  x = A[i];
4819  y = A[i + 1];
4820  xx += x * x;
4821  xy += x * y;
4822  yy += y * y;
4823  }
4824  AtA[0] = xx;
4825  AtA[1] = AtA[2] = xy;
4826  AtA[3] = yy;
4827 }
4828 
4829 template <typename T>
4830 inline void IMultAtA3x3(const T A[9], T AtA[9]) {
4831  AtA[0] = A[0] * A[0] + A[3] * A[3] + A[6] * A[6];
4832  AtA[1] = AtA[3] = A[0] * A[1] + A[3] * A[4] + A[6] * A[7];
4833  AtA[2] = AtA[6] = A[0] * A[2] + A[3] * A[5] + A[6] * A[8];
4834  AtA[4] = A[1] * A[1] + A[4] * A[4] + A[7] * A[7];
4835  AtA[5] = AtA[7] = A[1] * A[2] + A[4] * A[5] + A[7] * A[8];
4836  AtA[8] = A[2] * A[2] + A[5] * A[5] + A[8] * A[8];
4837 }
4838 
4839 template <typename T>
4840 inline void IMultAtAnx3(const T *A, T AtA[9], int n) {
4841  T xx = static_cast<T>(0.0);
4842  T xy = static_cast<T>(0.0);
4843  T xz = static_cast<T>(0.0);
4844  T yy = static_cast<T>(0.0);
4845  T yz = static_cast<T>(0.0);
4846  T zz = static_cast<T>(0.0);
4847  T x, y, z;
4848  int i, j;
4849  for (i = 0, j = 0; i < n; i++) {
4850  x = A[j++];
4851  y = A[j++];
4852  z = A[j++];
4853  xx += x * x;
4854  xy += x * y;
4855  xz += x * z;
4856  yy += y * y;
4857  yz += y * z;
4858  zz += z * z;
4859  }
4860  AtA[0] = xx;
4861  AtA[1] = AtA[3] = xy;
4862  AtA[2] = AtA[6] = xz;
4863  AtA[4] = yy;
4864  AtA[5] = AtA[7] = yz;
4865  AtA[8] = zz;
4866 }
4867 
4868 template <typename T>
4869 inline void IMultAtA4x4(const T A[16], T AtA[16]) {
4870  AtA[0] = A[0] * A[0] + A[4] * A[4] + A[8] * A[8] + A[12] * A[12];
4871  AtA[1] = AtA[4] = A[0] * A[1] + A[4] * A[5] + A[8] * A[9] + A[12] * A[13];
4872  AtA[2] = AtA[8] = A[0] * A[2] + A[4] * A[6] + A[8] * A[10] + A[12] * A[14];
4873  AtA[3] = AtA[12] = A[0] * A[3] + A[4] * A[7] + A[8] * A[11] + A[12] * A[15];
4874  AtA[5] = A[1] * A[1] + A[5] * A[5] + A[9] * A[9] + A[13] * A[13];
4875  AtA[6] = AtA[9] = A[1] * A[2] + A[5] * A[6] + A[9] * A[10] + A[13] * A[14];
4876  AtA[7] = AtA[13] = A[1] * A[3] + A[5] * A[7] + A[9] * A[11] + A[13] * A[15];
4877  AtA[10] = A[2] * A[2] + A[6] * A[6] + A[10] * A[10] + A[14] * A[14];
4878  AtA[11] = AtA[14] = A[2] * A[3] + A[6] * A[7] + A[10] * A[11] + A[14] * A[15];
4879  AtA[15] = A[3] * A[3] + A[7] * A[7] + A[11] * A[11] + A[15] * A[15];
4880 }
4881 
4882 template <typename T>
4883 inline void IMultAtB2x2And2x2(const T A[4], const T B[4], T AtB[4]) {
4884  AtB[0] = A[0] * B[0] + A[2] * B[2];
4885  AtB[1] = A[0] * B[1] + A[2] * B[3];
4886  AtB[2] = A[1] * B[0] + A[3] * B[2];
4887  AtB[3] = A[1] * B[1] + A[3] * B[3];
4888 }
4889 
4890 template <typename T>
4891 inline void IMultAtB3x3And3x3(const T A[9], const T B[9], T AtB[9]) {
4892  AtB[0] = A[0] * B[0] + A[3] * B[3] + A[6] * B[6];
4893  AtB[1] = A[0] * B[1] + A[3] * B[4] + A[6] * B[7];
4894  AtB[2] = A[0] * B[2] + A[3] * B[5] + A[6] * B[8];
4895  AtB[3] = A[1] * B[0] + A[4] * B[3] + A[7] * B[6];
4896  AtB[4] = A[1] * B[1] + A[4] * B[4] + A[7] * B[7];
4897  AtB[5] = A[1] * B[2] + A[4] * B[5] + A[7] * B[8];
4898  AtB[6] = A[2] * B[0] + A[5] * B[3] + A[8] * B[6];
4899  AtB[7] = A[2] * B[1] + A[5] * B[4] + A[8] * B[7];
4900  AtB[8] = A[2] * B[2] + A[5] * B[5] + A[8] * B[8];
4901 }
4902 
4903 template <typename T>
4904 inline void IMultAAt2x3(const T A[6], T AAt[4]) {
4905  AAt[0] = A[0] * A[0] + A[1] * A[1] + A[2] * A[2];
4906  AAt[1] = AAt[2] = A[0] * A[3] + A[1] * A[4] + A[2] * A[5];
4907  AAt[3] = A[3] * A[3] + A[4] * A[4] + A[5] * A[5];
4908 }
4909 
4910 template <typename T>
4911 inline void IMultAAt3x3(const T A[9], T AAt[9]) {
4912  AAt[0] = (A[0] * A[0] + A[1] * A[1] + A[2] * A[2]);
4913  AAt[1] = AAt[3] = (A[0] * A[3] + A[1] * A[4] + A[2] * A[5]);
4914  AAt[2] = AAt[6] = (A[0] * A[6] + A[1] * A[7] + A[2] * A[8]);
4915  AAt[4] = (A[3] * A[3] + A[4] * A[4] + A[5] * A[5]);
4916  AAt[5] = AAt[7] = (A[3] * A[6] + A[4] * A[7] + A[5] * A[8]);
4917  AAt[8] = (A[6] * A[6] + A[7] * A[7] + A[8] * A[8]);
4918 }
4919 
4920 template <typename T>
4921 inline void IMultAAt4x1(const T A[4], T AAt[16]) {
4922  AAt[0] = A[0] * A[0];
4923  AAt[1] = AAt[4] = (A[0] * A[1]);
4924  AAt[2] = AAt[8] = (A[0] * A[2]);
4925  AAt[3] = AAt[12] = (A[0] * A[3]);
4926 
4927  AAt[5] = A[1] * A[1];
4928  AAt[6] = AAt[9] = (A[1] * A[2]);
4929  AAt[7] = AAt[13] = (A[1] * A[3]);
4930 
4931  AAt[10] = A[2] * A[2];
4932  AAt[11] = AAt[14] = (A[2] * A[3]);
4933 
4934  AAt[15] = A[3] * A[3];
4935 }
4936 
4937 template <typename T>
4938 inline void IMultABC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9],
4939  T ABC[9]) {
4940  T BC[9];
4941  IMultAB3x3And3x3(B, C, BC);
4942  IMultAB3x3And3x3(A, BC, ABC);
4943 }
4944 
4945 template <typename T>
4946 inline void IMultAtBC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9],
4947  T AtBC[9]) {
4948  T BC[9];
4949  IMultAB3x3And3x3(B, C, BC);
4950  IMultAtB3x3And3x3(A, BC, AtBC);
4951 }
4952 
4953 template <typename T>
4954 inline void IMultAB4x4WABlockDiagonal(const T A1[4], const T A2[4],
4955  const T B[16], T AB[16]) {
4956  // A = A1 0
4957  // 0 A2
4958  // B = B1 B2
4959  // B3 B4
4960 
4961  // A1B1
4962  AB[0] = A1[0] * B[0] + A1[1] * B[4];
4963  AB[1] = A1[0] * B[1] + A1[1] * B[5];
4964  AB[4] = A1[2] * B[0] + A1[3] * B[4];
4965  AB[5] = A1[2] * B[1] + A1[3] * B[5];
4966 
4967  // A1B2
4968  AB[2] = A1[0] * B[2] + A1[1] * B[6];
4969  AB[3] = A1[0] * B[3] + A1[1] * B[7];
4970  AB[6] = A1[2] * B[2] + A1[3] * B[6];
4971  AB[7] = A1[2] * B[3] + A1[3] * B[7];
4972 
4973  // A2B3
4974  AB[8] = A2[0] * B[8] + A2[1] * B[12];
4975  AB[9] = A2[0] * B[9] + A2[1] * B[13];
4976  AB[12] = A2[2] * B[8] + A2[3] * B[12];
4977  AB[13] = A2[2] * B[9] + A2[3] * B[13];
4978 
4979  // A2B4
4980  AB[10] = A2[0] * B[10] + A2[1] * B[14];
4981  AB[11] = A2[0] * B[11] + A2[1] * B[15];
4982  AB[14] = A2[2] * B[10] + A2[3] * B[14];
4983  AB[15] = A2[2] * B[11] + A2[3] * B[15];
4984 }
4985 
4986 // Swap rows r1 and r2 of a mxn matrix A
4987 template <typename T>
4988 inline void ISwapRows(T *A, int r1, int r2, int m, int n) {
4989  ISwap(A + r1 * n, A + r2 * n, n);
4990 }
4991 // Swap columns c1 and c2 of a mxn matrix A
4992 template <typename T>
4993 inline void ISwapCols(T *A, int c1, int c2, int m, int n) {
4994  T *ref = A;
4995  for (int i = 0; i < m; i++, ref += n) {
4996  ISwap(ref[c1], ref[c2]);
4997  }
4998 }
4999 
5000 // Swap interval [i1,i2) of rows r1 and r2 of a mxn matrix A
5001 template <typename T>
5002 inline void ISwapRowsInterval(T *A, int i1, int i2, int r1, int r2, int m,
5003  int n) {
5004  int i_begin = IMax(0, i1);
5005  int i_end = IMin(n, i2);
5006  ISwap(A + r1 * n + i_begin, A + r2 * n + i_begin, (i_end - i_begin));
5007 }
5008 // Swap interval [i1,i2) of columns c1 and c2 of a mxn matrix A
5009 template <typename T>
5010 inline void ISwapColsInterval(T *A, int i1, int i2, int c1, int c2, int m,
5011  int n) {
5012  int i_begin = IMax(0, i1);
5013  int i_end = IMin(m, i2);
5014  T *ref = A + i_begin * n;
5015  for (int i = i_begin; i < i_end; i++, ref += n) {
5016  ISwap(ref[c1], ref[c2]);
5017  }
5018 }
5019 
5020 // Shift array elements in place so that {x1, y1, ?, x2, y2, ?, x3, y3, ? ...
5021 // xn,
5022 // yn, ?} becomes {x1, y1, x2, y2, x3, y3, ..., xn, yn, ?, ?, ?, ...?} after
5023 // shifting
5024 // The size of input array A is n*3
5025 template <typename T>
5026 inline void IShiftHomogeneous3(T *A, int n) {
5027  if (n <= 1) {
5028  return;
5029  }
5030  int i, nm1 = n - 1;
5031  int dst = 2;
5032  int src = 3;
5033  for (i = 0; i < nm1; ++i) {
5034  ISwap(A[dst], A[src]);
5035  ISwap(A[dst + 1], A[src + 1]);
5036  dst += 2;
5037  src += 3;
5038  }
5039 }
5040 
5041 // Shift array elements in place so that {x1, y1, z1, ?, x2, y2, z2, ?, x3, y3,
5042 // z3, ? ... xn, yn, zn, ?} becomes {x1, y1, z1, x2, y2, z2, x3, y3, z3..., xn,
5043 // yn,
5044 // zn, ?, ?, ?, ...?} after shifting
5045 // The size of input array A is n*4
5046 template <typename T>
5047 inline void IShiftHomogeneous4(T *A, int n) {
5048  if (n <= 1) {
5049  return;
5050  }
5051  int i, nm1 = n - 1;
5052  int dst = 3;
5053  int src = 4;
5054  for (i = 0; i < nm1; ++i) {
5055  ISwap(A[dst], A[src]);
5056  ISwap(A[dst + 1], A[src + 1]);
5057  ISwap(A[dst + 2], A[src + 2]);
5058  dst += 3;
5059  src += 4;
5060  }
5061 }
5062 
5063 } // namespace common
5064 } // namespace perception
5065 } // namespace apollo
void ICopy(const T *src, T *dst, int n)
Definition: i_blas.h:27
void INeg9(T x[9])
Definition: i_blas.h:466
void IAdd(T *x, int n, T k)
Definition: i_blas.h:786
T IDot10(const T x[10], const T y[10])
Definition: i_blas.h:2292
T ISum6(const T x[6])
Definition: i_blas.h:2372
unsigned int IHammingDiff4(const unsigned int x[4], const unsigned int y[4])
Definition: i_blas.h:2746
void ISwapCols(T *A, int c1, int c2, int m, int n)
Definition: i_blas.h:4993
void IFill4(T a[4], T val)
Definition: i_blas.h:260
unsigned int IHammingDiff8(const unsigned int x[8], const unsigned int y[8])
Definition: i_blas.h:2755
void INeg12(T x[12])
Definition: i_blas.h:505
T ISquaresumDiffU4(const T x[4], const T y[4])
Definition: i_blas.h:2641
T IL2Norm10(const T x[10])
Definition: i_blas.h:2840
void ISolve2x2(const double A[4], const double b[2], double x[2])
Definition: i_blas.h:4176
void IHomogenize2(const T x[2], T y[3])
Definition: i_blas.h:3703
void ICopy1(const T src[1], T dst[1])
Definition: i_blas.h:31
void IUnitize7(T x[7])
Definition: i_blas.h:2971
void INeg1(T x[1])
Definition: i_blas.h:406
void IScale5(T x[5], T sf)
Definition: i_blas.h:1881
T ISum5(const T x[5])
Definition: i_blas.h:2368
T ISum12(const T x[12])
Definition: i_blas.h:2397
double IDeterminant2x2(const double A[4])
Definition: i_blas.h:3811
T IL2NormAdv(T a, T b)
Definition: i_blas.h:2871
T IL2Norm9(const T x[9])
Definition: i_blas.h:2836
void ISqrSkewSymmetric3x3(const T x[3], T e_x2[9])
Definition: i_blas.h:3742
void IZero3(T a[3])
Definition: i_blas.h:334
T ISquaresumDiffU9(const T x[9], const T y[9])
Definition: i_blas.h:2668
void INeg11(T x[11])
Definition: i_blas.h:491
void IZero13(T a[13])
Definition: i_blas.h:378
void ICopy6(const T src[6], T dst[6])
Definition: i_blas.h:61
void IMultAtAnx3(const T *A, T AtA[9], int n)
Definition: i_blas.h:4840
T IDot13(const T x[13], const T y[13])
Definition: i_blas.h:2309
void IEye4x4(T A[16])
Definition: i_blas.h:3774
void IMultAB2x2And2x2(const T A[4], const T B[4], T AB[4])
Definition: i_blas.h:4457
void ISub1(const T x[1], const T y[1], T z[1])
Definition: i_blas.h:1396
void IUpperTriangular2x2(T a0, T a1, T a3, T A[4])
Definition: i_blas.h:3782
void ICentroid3(const double *a, int n, double centroid[3])
Definition: i_blas.h:3127
void IUnitize11(T x[11])
Definition: i_blas.h:2987
T IDot1(const T x[1], const T y[1])
Definition: i_blas.h:2252
void IMultAAt4x1(const T A[4], T AAt[16])
Definition: i_blas.h:4921
void ISubScaled8(const T x[8], T y[8], T k)
Definition: i_blas.h:1828
float IAbs(float a)
Definition: i_basic.h:29
void IMultAtx(const T *A, const T *x, T *Atx, int m, int n)
Definition: i_blas.h:4319
void ISubScaled7(const T x[7], T y[7], T k)
Definition: i_blas.h:1818
T ISdv(const T *x, T mean, int n)
Definition: i_blas.h:2500
void ISub11(const T x[11], const T y[11], T z[11])
Definition: i_blas.h:1481
T IDot7(const T x[7], const T y[7])
Definition: i_blas.h:2277
void ITranspose4x4(T A[16])
Definition: i_blas.h:4268
void ISub12(const T x[12], const T y[12], T z[12])
Definition: i_blas.h:1495
unsigned int IHammingDiff(const unsigned int *x, const unsigned int *y, int n)
Definition: i_blas.h:2731
unsigned int IHammingDiff2(const unsigned int x[2], const unsigned int y[2])
Definition: i_blas.h:2739
void IZero6(T a[6])
Definition: i_blas.h:346
T ISquaresum3(const T x[3])
Definition: i_blas.h:2536
T ISum8(const T x[8])
Definition: i_blas.h:2380
void IFill15(T a[15], T val)
Definition: i_blas.h:308
void IScale4(T x[4], T sf)
Definition: i_blas.h:1874
void INeg16(T x[16])
Definition: i_blas.h:571
void IFill6(T a[6], T val)
Definition: i_blas.h:268
T IL2Norm15(const T x[15])
Definition: i_blas.h:2860
T ISum4(const T x[4])
Definition: i_blas.h:2364
void IUnitize4(T x[4])
Definition: i_blas.h:2959
T IL2Norm3(const T x[3])
Definition: i_blas.h:2812
T ISquaresum10(const T x[10])
Definition: i_blas.h:2568
void IAdd1(const T x[1], const T y[1], T z[1])
Definition: i_blas.h:799
void IAdd6(const T x[6], const T y[6], T z[6])
Definition: i_blas.h:829
T ISquaresum16(const T x[16])
Definition: i_blas.h:2603
void IHomogenize1(const T x[1], T y[2])
Definition: i_blas.h:3698
void ISubScaled9(const T x[9], T y[9], T k)
Definition: i_blas.h:1839
double IDeterminant3x3(const double A[9])
Definition: i_blas.h:3822
void IUnitize(double *x, int n)
Definition: i_blas.h:2907
T ISquaresum(const T *x, int n)
Definition: i_blas.h:2520
T IDot14(const T x[14], const T y[14])
Definition: i_blas.h:2315
void IZero12(T a[12])
Definition: i_blas.h:373
T ISquaresumDiffU14(const T x[14], const T y[14])
Definition: i_blas.h:2703
void IAdd16(const T x[16], const T y[16], T z[16])
Definition: i_blas.h:964
void IUnitize12(T x[12])
Definition: i_blas.h:2991
void IUnitize14(T x[14])
Definition: i_blas.h:2999
void ISignedUnitize3(T x[3])
Definition: i_blas.h:3076
void ISwapColsInterval(T *A, int i1, int i2, int c1, int c2, int m, int n)
Definition: i_blas.h:5010
void IScale9(T x[9], T sf)
Definition: i_blas.h:1919
T ISquaresum7(const T x[7])
Definition: i_blas.h:2553
PlanningContext is the runtime context in planning. It is persistent across multiple frames...
Definition: atomic_hash_map.h:25
void IZero5(T a[5])
Definition: i_blas.h:342
T ISquaresum15(const T x[15])
Definition: i_blas.h:2597
T IL2Norm14(const T x[14])
Definition: i_blas.h:2856
void IFill2(T a[2], T val)
Definition: i_blas.h:252
T ISquaresumDiffU5(const T x[5], const T y[5])
Definition: i_blas.h:2646
void IFill12(T a[12], T val)
Definition: i_blas.h:293
int IMaxAbsIndex(const double *a, int n)
Definition: i_blas.h:3353
void ISubScaled1(const T x[1], T y[1], T k)
Definition: i_blas.h:1779
void INegCol(T *A, int c, int m, int n)
Definition: i_blas.h:777
T IMean3(const T x[3])
Definition: i_blas.h:2442
void INeg15(T x[15])
Definition: i_blas.h:553
void ICopy3(const T src[3], T dst[3])
Definition: i_blas.h:40
T ISquaresum8(const T x[8])
Definition: i_blas.h:2558
int IMeanU(const unsigned char *x, int n)
Definition: i_blas.h:2432
void IAdd3(const T x[3], const T y[3], T z[3])
Definition: i_blas.h:808
void ISubScaled6(const T x[6], T y[6], T k)
Definition: i_blas.h:1809
T ISquaresumDiffU10(const T x[10], const T y[10])
Definition: i_blas.h:2674
void IUnitize5(T x[5])
Definition: i_blas.h:2963
unsigned int IHammingLut(unsigned int a, unsigned int b)
Definition: i_basic.h:287
void IUnitize10(T x[10])
Definition: i_blas.h:2983
float IRec(float a)
Definition: i_basic.h:69
void IAugmentDiagonal(T *A, int n, const T lambda)
Definition: i_blas.h:4299
void IAdd14(const T x[14], const T y[14], T z[14])
Definition: i_blas.h:929
void IMultAtAnx2(const T *A, T *AtA, int n)
Definition: i_blas.h:4812
T ISquaresum12(const T x[12])
Definition: i_blas.h:2579
T IDot15(const T x[15], const T y[15])
Definition: i_blas.h:2321
const size_t A
Definition: util.h:160
T IMax(T a, T b)
Definition: i_basic.h:161
void ICopy15(const T src[15], T dst[15])
Definition: i_blas.h:178
void IScale8(T x[8], T sf)
Definition: i_blas.h:1908
T IMean12(const T x[12])
Definition: i_blas.h:2478
void IMultAx(const T *A, const T *x, T *Ax, int m, int n)
Definition: i_blas.h:4310
void IMultAtA2x2(const T A[4], T AtA[4])
Definition: i_blas.h:4787
int IMaxAbsIndexIntervalColumn(const double *a, int i1, int i2, int n)
Definition: i_blas.h:3530
void ISubdeterminants3x4(const double x[4], const double y[4], const double z[4], double sd[4])
Definition: i_blas.h:3869
void ISub14(const T x[14], const T y[14], T z[14])
Definition: i_blas.h:1526
void IMultAx4x4(const T A[16], const T x[4], T Ax[4])
Definition: i_blas.h:4412
T IMaxDiagonalElement(const T *a, int n)
Definition: i_blas.h:3289
T IMean5(const T x[5])
Definition: i_blas.h:2450
void IMultAB2x3And3x2(const T A[6], const T B[6], T AB[4])
Definition: i_blas.h:4471
T IMean7(const T x[7])
Definition: i_blas.h:2458
T IL2Norm11(const T x[11])
Definition: i_blas.h:2844
void IZero11(T a[11])
Definition: i_blas.h:368
T ISquaresumDiffU7(const T x[7], const T y[7])
Definition: i_blas.h:2656
float ISqr(float a)
Definition: i_basic.h:103
void IAdd5(const T x[5], const T y[5], T z[5])
Definition: i_blas.h:821
int IMinAbsIndexInterval(const double *a, int i1, int i2)
Definition: i_blas.h:3488
void IMultAB3x3And3x3WBUpperTriangular(const T A[9], const T B[9], T AB[9])
Definition: i_blas.h:4641
void IHomogeneousUnitize3(T x[3])
Definition: i_blas.h:3093
void IFill(T *a, int n, T val)
Definition: i_blas.h:242
void IAddScaled1(const T x[1], T y[1], T k)
Definition: i_blas.h:1229
T ISignNeverZero(T a)
Definition: i_basic.h:189
T IDot4(const T x[4], const T y[4])
Definition: i_blas.h:2264
T IMean14(const T x[14])
Definition: i_blas.h:2486
void IAxiator(const T x[3], T e_x[9])
Definition: i_blas.h:3727
int ISquaresumDiffU(const unsigned char *x, const unsigned char *y, int n)
Definition: i_blas.h:2612
T IL2Norm16(const T x[16])
Definition: i_blas.h:2864
T ISquaresumDiffU1(const T x[1], const T y[1])
Definition: i_blas.h:2629
T IMaxElement(const T *a, int n)
Definition: i_blas.h:3236
int IMaxAbsIndexSubdiagonalColumn(const T *A, int i, int n)
Definition: i_blas.h:3621
void INeg6(T x[6])
Definition: i_blas.h:436
T ISquaresumDiffU15(const T x[15], const T y[15])
Definition: i_blas.h:2711
T IMean(const T *x, int n)
Definition: i_blas.h:2434
T ISquaresum14(const T x[14])
Definition: i_blas.h:2591
void ISubdeterminants2x4(const double x[4], const double y[4], double sd[6])
Definition: i_blas.h:3840
T ISquaresumDiffU12(const T x[12], const T y[12])
Definition: i_blas.h:2688
T IDot8(const T x[8], const T y[8])
Definition: i_blas.h:2282
void ISub10(const T x[10], const T y[10], T z[10])
Definition: i_blas.h:1468
void IZero16(T a[16])
Definition: i_blas.h:393
T ISum3(const T x[3])
Definition: i_blas.h:2360
T IL2Norm4(const T x[4])
Definition: i_blas.h:2816
void IMultAB3x3And3x3WAUpperTriangular(const T A[9], const T B[9], T AB[9])
Definition: i_blas.h:4619
void IMultABt2x3And2x3(const T A[6], const T B[6], T ABt[4])
Definition: i_blas.h:4736
T IMean11(const T x[11])
Definition: i_blas.h:2474
void IScale7(T x[7], T sf)
Definition: i_blas.h:1898
void IAddScaled(const T *x, T *y, int n, T k)
Definition: i_blas.h:1223
T IL2Norm1(const T x[1])
Definition: i_blas.h:2804
void ISub6(const T x[6], const T y[6], T z[6])
Definition: i_blas.h:1426
void IScale12(T x[12], T sf)
Definition: i_blas.h:1958
void ISignedUnitize2(T x[2])
Definition: i_blas.h:3072
T ISum11(const T x[11])
Definition: i_blas.h:2392
void IAddScaled9(const T x[9], T y[9], T k)
Definition: i_blas.h:1289
Definition: i_constant.h:22
void ICopy8(const T src[8], T dst[8])
Definition: i_blas.h:80
void IScale16(T x[16], T sf)
Definition: i_blas.h:2024
T ISum9(const T x[9])
Definition: i_blas.h:2384
unsigned int IHammingDiff16(const unsigned int x[16], const unsigned int y[16])
Definition: i_blas.h:2768
void IUnitize13(T x[13])
Definition: i_blas.h:2995
double IL2Norm(const unsigned char *x, int n)
Definition: i_blas.h:2791
void IAddScaled4(const T x[4], T y[4], T k)
Definition: i_blas.h:1244
T ISum10(const T x[10])
Definition: i_blas.h:2388
void IHomogenize3(const T x[3], T y[4])
Definition: i_blas.h:3709
void ISub5(const T x[5], const T y[5], T z[5])
Definition: i_blas.h:1418
void ITranspose(T *A, int n)
Definition: i_blas.h:4222
void IScale(T *x, int n, T sf)
Definition: i_blas.h:1853
void IFill8(T a[8], T val)
Definition: i_blas.h:276
T ISum13(const T x[13])
Definition: i_blas.h:2402
T IMean2(const T x[2])
Definition: i_blas.h:2438
void IAddScaled8(const T x[8], T y[8], T k)
Definition: i_blas.h:1278
int ISumU(const unsigned char *x, int n)
Definition: i_blas.h:2336
T ISquaresumDiffU2(const T x[2], const T y[2])
Definition: i_blas.h:2633
void ISub3(const T x[3], const T y[3], T z[3])
Definition: i_blas.h:1405
void IMultAtA(const T *A, T *AtA, int m, int n)
Definition: i_blas.h:4765
void IFill11(T a[11], T val)
Definition: i_blas.h:288
void IScale2(T x[2], T sf)
Definition: i_blas.h:1863
void IScale10(T x[10], T sf)
Definition: i_blas.h:1931
void IZero9(T a[9])
Definition: i_blas.h:358
void IMultABC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9], T ABC[9])
Definition: i_blas.h:4938
T ISquaresum6(const T x[6])
Definition: i_blas.h:2548
void IZero14(T a[14])
Definition: i_blas.h:383
void ICopy2(const T src[2], T dst[2])
Definition: i_blas.h:35
void IHomogeneousUnitize9(T x[9])
Definition: i_blas.h:3101
void ITranspose3x3(T A[9])
Definition: i_blas.h:4250
double IDeterminant4x4(const double A[16])
Definition: i_blas.h:3898
float IDiv(float a, float b)
Definition: i_basic.h:35
void INeg7(T x[7])
Definition: i_blas.h:445
void IUpperTriangular3x3(T a0, T a1, T a2, T a4, T a5, T a8, T A[9])
Definition: i_blas.h:3791
void IScale6(T x[6], T sf)
Definition: i_blas.h:1889
void IAdd8(const T x[8], const T y[8], T z[8])
Definition: i_blas.h:848
void IHomogenize(const T *x, T *y, int n)
Definition: i_blas.h:3691
void IFill13(T a[13], T val)
Definition: i_blas.h:298
void ISub13(const T x[13], const T y[13], T z[13])
Definition: i_blas.h:1510
void IMultAtB3x3And3x3(const T A[9], const T B[9], T AtB[9])
Definition: i_blas.h:4891
void ISignedUnitize4(T x[4])
Definition: i_blas.h:3080
void IHomogeneousUnitize4(T x[4])
Definition: i_blas.h:3097
void IFill10(T a[10], T val)
Definition: i_blas.h:284
void ITranspose2x2(T A[4])
Definition: i_blas.h:4239
void IMultAx3x3(const T A[9], const T x[3], T Ax[3])
Definition: i_blas.h:4358
void ISwapRows(T *A, int r1, int r2, int m, int n)
Definition: i_blas.h:4988
T IMean16(const T x[16])
Definition: i_blas.h:2494
T IMean10(const T x[10])
Definition: i_blas.h:2470
T IDot2(const T x[2], const T y[2])
Definition: i_blas.h:2256
void ISubScaled(const T *x, T *y, int n, T k)
Definition: i_blas.h:1773
void ICentroid2(const double *a, int n, double centroid[2])
Definition: i_blas.h:3144
void IMultAAt2x3(const T A[6], T AAt[4])
Definition: i_blas.h:4904
T IL2Norm7(const T x[7])
Definition: i_blas.h:2828
void IMultAB2x3And3x4(const T A[6], const T B[12], T AB[8])
Definition: i_blas.h:4506
void ISolve3x3(const double A[9], const double b[3], double x[3])
Definition: i_blas.h:4191
void IMultAB3x3And3x3(const T A[9], const T B[9], T AB[9])
Definition: i_blas.h:4526
void IUnitize16(T x[16])
Definition: i_blas.h:3007
void IZero4(T a[4])
Definition: i_blas.h:338
void INeg4(T x[4])
Definition: i_blas.h:421
T IMinElement(const T *a, int n)
Definition: i_blas.h:3220
void IMultAtx4x3(const T A[12], const T x[3], T Atx[4])
Definition: i_blas.h:4404
T IDot6(const T x[6], const T y[6])
Definition: i_blas.h:2272
T ISquaresum4(const T x[4])
Definition: i_blas.h:2540
T IMean8(const T x[8])
Definition: i_blas.h:2462
T ISquaresum9(const T x[9])
Definition: i_blas.h:2563
void IMultAx2x3(const T A[6], const T x[3], T Ax[2])
Definition: i_blas.h:4348
T IL2Norm12(const T x[12])
Definition: i_blas.h:2848
void IAddScaled2(const T x[2], T y[2], T k)
Definition: i_blas.h:1233
T IMean13(const T x[13])
Definition: i_blas.h:2482
void IAdd9(const T x[9], const T y[9], T z[9])
Definition: i_blas.h:859
void ISubScaled2(const T x[2], T y[2], T k)
Definition: i_blas.h:1783
void IAddScaled6(const T x[6], T y[6], T k)
Definition: i_blas.h:1259
void IAdd4(const T x[4], const T y[4], T z[4])
Definition: i_blas.h:814
void INeg(T *x, int n)
Definition: i_blas.h:400
void IZero1(T a[1])
Definition: i_blas.h:326
T ISum(const T *x, int n)
Definition: i_blas.h:2344
void IEye3x3(T A[9])
Definition: i_blas.h:3769
void IMultAB4x4WABlockDiagonal(const T A1[4], const T A2[4], const T B[16], T AB[16])
Definition: i_blas.h:4954
void IZero10(T a[10])
Definition: i_blas.h:363
void IMultAB(const T *A, const T *B, T *AB, int m, int n, int o)
Definition: i_blas.h:4425
T IMean15(const T x[15])
Definition: i_blas.h:2490
T IDot12(const T x[12], const T y[12])
Definition: i_blas.h:2303
void ISub9(const T x[9], const T y[9], T z[9])
Definition: i_blas.h:1456
void IMultAB2x3And3x3(const T A[6], const T B[9], T AB[6])
Definition: i_blas.h:4487
void IAdd20(const T x[20], const T y[20], T z[20])
Definition: i_blas.h:983
void IFill14(T a[14], T val)
Definition: i_blas.h:303
void ICopy10(const T src[10], T dst[10])
Definition: i_blas.h:103
T IDot5(const T x[5], const T y[5])
Definition: i_blas.h:2268
int ISquaresumU(const unsigned char *x, int n)
Definition: i_blas.h:2512
T ISquaresum5(const T x[5])
Definition: i_blas.h:2544
T ISquaresum1(const T x[1])
Definition: i_blas.h:2528
void IMultAx1x3(const T A[3], const T x[3], T Ax[1])
Definition: i_blas.h:4334
T IDot(const T *x, const T *y, int n)
Definition: i_blas.h:2244
void INeg8(T x[8])
Definition: i_blas.h:455
T IL2Norm5(const T x[5])
Definition: i_blas.h:2820
void IFill7(T a[7], T val)
Definition: i_blas.h:272
void IMultAtA3x3(const T A[9], T AtA[9])
Definition: i_blas.h:4830
void IMinMaxElements(const T *a, int n, T *min_val, T *max_val)
Definition: i_blas.h:3638
void IUnitize8(T x[8])
Definition: i_blas.h:2975
T ISquaresumDiffU8(const T x[8], const T y[8])
Definition: i_blas.h:2662
void ISwap(T &a, T &b)
Definition: i_basic.h:299
void IUnitize6(T x[6])
Definition: i_blas.h:2967
T IMean6(const T x[6])
Definition: i_blas.h:2454
void IMultABt3x3And3x3(const T A[9], const T B[9], T ABt[9])
Definition: i_blas.h:4712
void IScale15(T x[15], T sf)
Definition: i_blas.h:2006
void IZero(T *a, int n)
Definition: i_blas.h:320
void IZero7(T a[7])
Definition: i_blas.h:350
void IAddScaled5(const T x[5], T y[5], T k)
Definition: i_blas.h:1251
void IMultAB4x1And1x4(const T A[4], const T B[4], T AB[16])
Definition: i_blas.h:4591
void IAddScaled3(const T x[3], T y[3], T k)
Definition: i_blas.h:1238
T ISquaresumDiffU13(const T x[13], const T y[13])
Definition: i_blas.h:2695
double ITrace3x3(const double A[9])
Definition: i_blas.h:3807
void IMultAtA4x4(const T A[16], T AtA[16])
Definition: i_blas.h:4869
T ISquaresumDiffU3(const T x[3], const T y[3])
Definition: i_blas.h:2637
void IZero8(T a[8])
Definition: i_blas.h:354
void ISub7(const T x[7], const T y[7], T z[7])
Definition: i_blas.h:1435
void ISubScaled5(const T x[5], T y[5], T k)
Definition: i_blas.h:1801
void IAdd10(const T x[10], const T y[10], T z[10])
Definition: i_blas.h:871
void INeg3(T x[3])
Definition: i_blas.h:415
void IAdd7(const T x[7], const T y[7], T z[7])
Definition: i_blas.h:838
void IMultAAt3x3(const T A[9], T AAt[9])
Definition: i_blas.h:4911
T IDot11(const T x[11], const T y[11])
Definition: i_blas.h:2297
void IAdd11(const T x[11], const T y[11], T z[11])
Definition: i_blas.h:884
void IEye(T *A, int n)
Definition: i_blas.h:3756
void ICopy11(const T src[11], T dst[11])
Definition: i_blas.h:116
void IFill9(T a[9], T val)
Definition: i_blas.h:280
T ISquaresumDiffU11(const T x[11], const T y[11])
Definition: i_blas.h:2681
int IMaxIndex(const double *a, int n)
Definition: i_blas.h:3306
void IAdd13(const T x[13], const T y[13], T z[13])
Definition: i_blas.h:913
T IDot3(const T x[3], const T y[3])
Definition: i_blas.h:2260
float ISqrt(float a)
Definition: i_basic.h:76
void ICopy13(const T src[13], T dst[13])
Definition: i_blas.h:145
int IMinAbsIndex(const double *a, int n)
Definition: i_blas.h:3399
void IAdd2(const T x[2], const T y[2], T z[2])
Definition: i_blas.h:803
void ISub15(const T x[15], const T y[15], T z[15])
Definition: i_blas.h:1543
void ISub(T *x, int n, T k)
Definition: i_blas.h:1383
T ISquaresumDiffU6(const T x[6], const T y[6])
Definition: i_blas.h:2651
void IInvert3x3(const double A[9], double Ai[9])
Definition: i_blas.h:3989
void ISubScaled3(const T x[3], T y[3], T k)
Definition: i_blas.h:1788
T IMin(T a, T b)
Definition: i_basic.h:155
void IMultAB4x4And4x4(const T A[16], const T B[16], T AB[16])
Definition: i_blas.h:4550
void IScale14(T x[14], T sf)
Definition: i_blas.h:1989
void ICopy5(const T src[5], T dst[5])
Definition: i_blas.h:53
T ISum2(const T x[2])
Definition: i_blas.h:2356
T IInfinityNorm(const T *A, int m, int n)
Definition: i_blas.h:2892
void IMultAx2x2(const T A[4], const T x[2], T Ax[2])
Definition: i_blas.h:4339
void IMultAB3x3And3x4(const T A[9], const T B[12], T AB[12])
Definition: i_blas.h:4665
void IMultABt4x4And3x4(const T A[16], const T B[12], T ABt[12])
Definition: i_blas.h:4745
void ISub2(const T x[2], const T y[2], T z[2])
Definition: i_blas.h:1400
T IAbsSum(const T *x, int n)
Definition: i_blas.h:2423
void IMultAx4x3(const T A[12], const T x[3], T Ax[4])
Definition: i_blas.h:4392
double ITrace2x2(const double A[4])
Definition: i_blas.h:3804
T IDot9(const T x[9], const T y[9])
Definition: i_blas.h:2287
void INeg14(T x[14])
Definition: i_blas.h:536
void IUnitize3(T x[3])
Definition: i_blas.h:2955
void ICopy7(const T src[7], T dst[7])
Definition: i_blas.h:70
void IHomogeneousUnitize2(T x[2])
Definition: i_blas.h:3089
void IFill3(T a[3], T val)
Definition: i_blas.h:256
int IMaxAbsIndexInterval(const double *a, int i1, int i2)
Definition: i_blas.h:3447
int IMinAbsIndexIntervalColumn(const double *a, int i1, int i2, int n)
Definition: i_blas.h:3575
void IInvert2x2(const double A[4], double Ai[4])
Definition: i_blas.h:3926
void IFill1(T a[1], T val)
Definition: i_blas.h:248
T ISum16(const T x[16])
Definition: i_blas.h:2417
void IZero15(T a[15])
Definition: i_blas.h:388
T ISquaresum13(const T x[13])
Definition: i_blas.h:2585
void ISubScaled4(const T x[4], T y[4], T k)
Definition: i_blas.h:1794
T ISum1(const T x[1])
Definition: i_blas.h:2352
T ISquaresum2(const T x[2])
Definition: i_blas.h:2532
void IShiftHomogeneous4(T *A, int n)
Definition: i_blas.h:5047
void INeg2(T x[2])
Definition: i_blas.h:410
void IHomogeneousUnitize(T *x, int n)
Definition: i_blas.h:3085
void IZero2(T a[2])
Definition: i_blas.h:330
void IScale3(T x[3], T sf)
Definition: i_blas.h:1868
T IMean4(const T x[4])
Definition: i_blas.h:2446
T IDot16(const T x[16], const T y[16])
Definition: i_blas.h:2328
T ISum7(const T x[7])
Definition: i_blas.h:2376
void IAddScaled7(const T x[7], T y[7], T k)
Definition: i_blas.h:1268
void IUnitize15(T x[15])
Definition: i_blas.h:3003
void ISub8(const T x[8], const T y[8], T z[8])
Definition: i_blas.h:1445
T IMean9(const T x[9])
Definition: i_blas.h:2466
T IL2Norm13(const T x[13])
Definition: i_blas.h:2852
void IMultAtx3x3(const T A[9], const T x[3], T Atx[3])
Definition: i_blas.h:4369
T ISum15(const T x[15])
Definition: i_blas.h:2412
T ISquaresumDiffU16(const T x[16], const T y[16])
Definition: i_blas.h:2719
void IUnitize9(T x[9])
Definition: i_blas.h:2979
void IMultAtBC3x3And3x3And3x3(const T A[9], const T B[9], const T C[9], T AtBC[9])
Definition: i_blas.h:4946
void IScale1(T x[1], T sf)
Definition: i_blas.h:1859
void ISub4(const T x[4], const T y[4], T z[4])
Definition: i_blas.h:1411
void ICopy12(const T src[12], T dst[12])
Definition: i_blas.h:130
void IMultAB3x4And4x3(const T A[12], const T B[12], T AB[9])
Definition: i_blas.h:4683
T IL2Norm6(const T x[6])
Definition: i_blas.h:2824
void ISub16(const T x[16], const T y[16], T z[16])
Definition: i_blas.h:1561
void IFill5(T a[5], T val)
Definition: i_blas.h:264
void ICopy14(const T src[14], T dst[14])
Definition: i_blas.h:161
void INeg13(T x[13])
Definition: i_blas.h:520
void ICopy9(const T src[9], T dst[9])
Definition: i_blas.h:91
void IEye2x2(T A[4])
Definition: i_blas.h:3764
void INeg5(T x[5])
Definition: i_blas.h:428
T IL2Norm2(const T x[2])
Definition: i_blas.h:2808
void IFill16(T a[16], T val)
Definition: i_blas.h:313
T ISquaresum11(const T x[11])
Definition: i_blas.h:2573
void IMultAx3x4(const T A[12], const T x[4], T Ax[3])
Definition: i_blas.h:4380
T IL2Norm8(const T x[8])
Definition: i_blas.h:2832
void ICopy4(const T src[4], T dst[4])
Definition: i_blas.h:46
void ICross(const T x[3], const T y[3], T xxy[3])
Definition: i_blas.h:3718
void IMultAB3x1And1x3(const T A[3], const T B[3], T AB[9])
Definition: i_blas.h:4443
void INeg10(T x[10])
Definition: i_blas.h:478
void IAdd12(const T x[12], const T y[12], T z[12])
Definition: i_blas.h:898
void IShiftHomogeneous3(T *A, int n)
Definition: i_blas.h:5026
void IScale13(T x[13], T sf)
Definition: i_blas.h:1973
void IMultAtB2x2And2x2(const T A[4], const T B[4], T AtB[4])
Definition: i_blas.h:4883
void ICopy16(const T src[16], T dst[16])
Definition: i_blas.h:196
void ISwapRowsInterval(T *A, int i1, int i2, int r1, int r2, int m, int n)
Definition: i_blas.h:5002
void IScale11(T x[11], T sf)
Definition: i_blas.h:1944
void ISafeUnitize(double *x, int n)
Definition: i_blas.h:2909
T ISum14(const T x[14])
Definition: i_blas.h:2407
void IAdd15(const T x[15], const T y[15], T z[15])
Definition: i_blas.h:946
void IInvert3x3UpperTriangular(const double A[9], double Ai[9])
Definition: i_blas.h:4134
void IUnitize2(T x[2])
Definition: i_blas.h:2951