Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_physics.c
1/*
2 * Copyright 2014-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test/test_suite_physics.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35
36#include "NE10_physics.h"
37#include "seatest.h"
38#include "unit_test_common.h"
39
40/* ----------------------------------------------------------------------
41** Global defines
42** ------------------------------------------------------------------- */
43#define TEST_LENGTH_SAMPLES 1024
44#define TEST_COUNT 5000
45
46static ne10_int64_t time_c = 0;
47static ne10_int64_t time_neon = 0;
48static ne10_float32_t time_speedup = 0.0f;
49static ne10_float32_t time_savings = 0.0f;
50
51static void float_array_assignment (ne10_float32_t *array, ne10_int32_t len)
52{
53 int i;
54 for (i = 0; i < len; i++)
55 {
56 array[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
57 }
58}
59
60
61void test_compute_aabb_vec2f_conformance()
62{
63#if defined ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
64 ne10_vec2f_t radius = {0.2f, 0.2f};
65 ne10_vec2f_t *vertices_c, *vertices_neon;
66 ne10_mat2x2f_t aabb_c, aabb_neon;
67 ne10_mat2x2f_t xf;
68 ne10_int32_t vertex_count;
69 ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
70
71 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
72
73 /* init input memory */
74 vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
75 vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
76 float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
77 memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
78
79 ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
80 xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
81 xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
82 xf.c2.r1 = sin (tmp);
83 xf.c2.r2 = cos (tmp);
84
85#if defined (REGRESSION_TEST)
86 for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count++)
87 {
88 //C version
89 ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
90 //neon version
91 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
92 printf ("----vertex_count %d\n", vertex_count);
93 assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
94 }
95#else // defined (SMOKE_TEST)
96 for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 3)
97 {
98 //C version
99 ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
100 //neon version
101 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
102 printf ("----vertex_count %d\n", vertex_count);
103 assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
104 }
105#endif
106 free (vertices_c);
107 free (vertices_neon);
108#endif
109}
110
111void test_compute_aabb_vec2f_performance()
112{
113 ne10_vec2f_t radius = {0.2f, 0.2f};
114 ne10_vec2f_t *vertices_c, *vertices_neon;
115 ne10_mat2x2f_t aabb_c;
116 ne10_mat2x2f_t xf;
117 ne10_int32_t i;
118 ne10_int32_t vertex_count;
119 // ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
120
121 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
122 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "vertex count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
123
124 /* init input memory */
125 vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
126 vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
127 float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
128 memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
129
130 ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
131 xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
132 xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
133 xf.c2.r1 = sin (tmp);
134 xf.c2.r2 = cos (tmp);
135
136 for (vertex_count = 4; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 4)
137 {
138 //C version
139 GET_TIME
140 (time_c,
141 {
142 for (i = 0; i < TEST_COUNT; i++)
143 ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
144 }
145 );
146
147#ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
148 //neon version
149 ne10_mat2x2f_t aabb_neon;
150 GET_TIME
151 (time_neon,
152 {
153 for (i = 0; i < TEST_COUNT; i++)
154 ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
155 }
156 );
157#endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
158 time_speedup = (ne10_float32_t) time_c / time_neon;
159 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
160 printf ("vertax count: %10d time C: %10lld time NEON: %10lld\n", vertex_count, time_c, time_neon);
161 //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", vertex_count, time_c, time_neon, time_savings, time_speedup);
162 }
163 free (vertices_c);
164 free (vertices_neon);
165}
166
167void test_relative_v_vec2f_conformance()
168{
169#if defined ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
170 ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
171 ne10_vec2f_t *dv_c, *dv_neon;
172 ne10_vec3f_t *v_wa, *v_wb;
173 ne10_vec2f_t *ra, *rb;
174 ne10_int32_t i;
175 ne10_int32_t count;
176 ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
177
178 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
179
180 /* init input memory */
181 v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
182 v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
183 ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
184 rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
185 float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
186 float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
187 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
188 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
189
190 /* init dst memory */
191 guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
192 guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
193 dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
194 dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
195
196#if defined (REGRESSION_TEST)
197 for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
198 {
199 GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
200 GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
201
202 //C version
203 ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
204 //neon version
205 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
206
207 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
208 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
209 printf ("----count %d\n", count);
210 for (i = 0; i < count; i++)
211 assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
212 }
213#else // defined (SMOKE_TEST)
214 for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
215 {
216 GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
217 GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
218
219 //C version
220 ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
221 //neon version
222 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
223
224 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size);
225 CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size);
226 printf ("----count %d\n", count);
227 for (i = 0; i < count; i++)
228 assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
229 }
230#endif
231 free (v_wa);
232 free (v_wb);
233 free (ra);
234 free (rb);
235 free (guarded_dv_c);
236 free (guarded_dv_neon);
237#endif
238}
239
240void test_relative_v_vec2f_performance()
241{
242 ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
243 ne10_vec2f_t *dv_c, *dv_neon;
244 ne10_vec3f_t *v_wa, *v_wb;
245 ne10_vec2f_t *ra, *rb;
246 ne10_int32_t i;
247 ne10_int32_t count;
248 // ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
249
250 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
251 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
252
253 /* init input memory */
254 v_wa = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
255 v_wb = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
256 ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
257 rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
258 float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
259 float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
260 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
261 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
262
263 /* init dst memory */
264 guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
265 guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
266 dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
267 dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
268
269 for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
270 {
271 //C version
272 GET_TIME
273 (time_c,
274 {
275 for (i = 0; i < TEST_COUNT; i++)
276 ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
277 }
278 );
279#ifdef ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
280 //neon version
281 GET_TIME
282 (time_neon,
283 {
284 for (i = 0; i < TEST_COUNT; i++)
285 ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
286 }
287 );
288 time_speedup = (ne10_float32_t) time_c / time_neon;
289 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
290 printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
291 //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
292#endif // ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
293 }
294
295 free (v_wa);
296 free (v_wb);
297 free (ra);
298 free (rb);
299 free (guarded_dv_c);
300 free (guarded_dv_neon);
301}
302
303void test_apply_impulse_vec2f_conformance()
304{
305#if defined ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
306 ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
307 ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
308 ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
309 ne10_int32_t i;
310 ne10_int32_t count;
311 ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
312
313 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
314
315 /* init input memory */
316 ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
317 rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
318 ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
319 imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
320 p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
321 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
322 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
323 float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
324 float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
325 float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
326
327 /* init dst memory */
328 guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
329 guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
330 guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
331 guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
332 v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
333 v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
334 v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
335 v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
336 float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
337 float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
338 memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
339 memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
340
341#if defined (REGRESSION_TEST)
342 for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
343 {
344 GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
345 GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
346 GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
347 GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
348
349 //C version
350 ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
351 //neon version
352 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
353
354 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
355 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
356 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
357 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
358
359 printf ("----count %d\n", count);
360 for (i = 0; i < count; i++)
361 {
362 assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
363 assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
364 }
365 }
366#else // defined (SMOKE_TEST)
367 for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
368 {
369 GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
370 GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
371 GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
372 GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
373
374 //C version
375 ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
376 //neon version
377 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
378
379 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size);
380 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size);
381 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size);
382 CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size);
383 printf ("----count %d\n", count);
384 for (i = 0; i < count; i++)
385 {
386 assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
387 assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
388 }
389 }
390#endif
391 free (ra);
392 free (rb);
393 free (ima);
394 free (imb);
395 free (p);
396 free (guarded_v_wa_c);
397 free (guarded_v_wa_neon);
398 free (guarded_v_wb_c);
399 free (guarded_v_wb_neon);
400#endif
401}
402
403void test_apply_impulse_vec2f_performance()
404{
405 ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
406 ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
407 ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
408 ne10_int32_t i;
409 ne10_int32_t count;
410 // ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
411
412 fprintf (stdout, "----------%30s start\n", __FUNCTION__);
413 fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time in ms", "NEON Time in ms", "Time Savings", "Performance Ratio");
414
415 /* init input memory */
416 ra = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
417 rb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
418 ima = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
419 imb = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
420 p = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
421 float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
422 float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
423 float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
424 float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
425 float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
426
427 /* init dst memory */
428 guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
429 guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
430 guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
431 guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
432 v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
433 v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
434 v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
435 v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
436 float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
437 float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
438 memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
439 memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
440
441 for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
442 {
443 //C version
444 GET_TIME
445 (time_c,
446 {
447 for (i = 0; i < TEST_COUNT; i++)
448 ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
449 }
450 );
451
452#ifdef ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
453 //neon version
454 GET_TIME
455 (time_neon,
456 {
457 for (i = 0; i < TEST_COUNT; i++)
458 ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
459 }
460 );
461#endif // ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
462 time_speedup = (ne10_float32_t) time_c / time_neon;
463 time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
464 printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
465 //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
466
467 }
468 free (ra);
469 free (rb);
470 free (ima);
471 free (imb);
472 free (p);
473 free (guarded_v_wa_c);
474 free (guarded_v_wa_neon);
475 free (guarded_v_wb_c);
476 free (guarded_v_wb_neon);
477}
478
479void test_compute_aabb_vec2f()
480{
481#if defined (SMOKE_TEST)||(REGRESSION_TEST)
482 test_compute_aabb_vec2f_conformance();
483#endif
484
485#if defined (PERFORMANCE_TEST)
486 test_compute_aabb_vec2f_performance();
487#endif
488}
489
490void test_relative_v_vec2f()
491{
492#if defined (SMOKE_TEST)||(REGRESSION_TEST)
493 test_relative_v_vec2f_conformance();
494#endif
495
496#if defined (PERFORMANCE_TEST)
497 test_relative_v_vec2f_performance();
498#endif
499}
500
501void test_apply_impulse_vec2f()
502{
503#if defined (SMOKE_TEST)||(REGRESSION_TEST)
504 test_apply_impulse_vec2f_conformance();
505#endif
506
507#if defined (PERFORMANCE_TEST)
508 test_apply_impulse_vec2f_performance();
509#endif
510}
511
512void my_test_setup (void)
513{
514 //printf("------%-30s start\r\n", __FUNCTION__);
515}
516
517void my_test_teardown (void)
518{
519 //printf("--------end\r\n");
520}
521
522void test_fixture_physics (void)
523{
524 test_fixture_start(); // starts a fixture
525
526 fixture_setup (my_test_setup);
527 fixture_teardown (my_test_teardown);
528
529 run_test (test_compute_aabb_vec2f); // run tests
530 run_test (test_relative_v_vec2f);
531 run_test (test_apply_impulse_vec2f);
532
533 test_fixture_end(); // ends a fixture
534}
void ne10_physics_compute_aabb_vec2f_c(ne10_mat2x2f_t *aabb, ne10_vec2f_t *vertices, ne10_mat2x2f_t *xf, ne10_vec2f_t *radius, ne10_uint32_t vertex_count)
compute AABB for ploygon.
void ne10_physics_apply_impulse_vec2f_c(ne10_vec3f_t *v_wa, ne10_vec3f_t *v_wb, ne10_vec2f_t *ra, ne10_vec2f_t *rb, ne10_vec2f_t *ima, ne10_vec2f_t *imb, ne10_vec2f_t *p, ne10_uint32_t count)
apply contact impulse.
void ne10_physics_relative_v_vec2f_c(ne10_vec2f_t *dv, ne10_vec3f_t *v_wa, ne10_vec2f_t *ra, ne10_vec3f_t *v_wb, ne10_vec2f_t *rb, ne10_uint32_t count)
calculate relative velocity at contact.
a 2-tuple of ne10_float32_t values.
Definition NE10_types.h:88
a 3-tuple of ne10_float32_t values.
Definition NE10_types.h:97