Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
test_suite_boxfilter.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/*
29 * NE10 Library : test_suite_boxfilter.c
30 */
31
32#include <stdio.h>
33#include <stdlib.h>
34#include <math.h>
35#include <string.h>
36
37#include "NE10_imgproc.h"
38#include "seatest.h"
39#include "unit_test_common.h"
40
41#define BASIC_KERNEL_SIZE 5
42#define KERNEL_COUNT BASIC_KERNEL_SIZE * BASIC_KERNEL_SIZE
43
44ne10_float32_t cal_psnr_uint8_rgba (const ne10_uint8_t *pRef,
45 const ne10_uint8_t *pTest,
46 const ne10_uint32_t buffSize)
47{
48 ne10_float64_t mse = 0.0, max = 255.0;
49 ne10_uint32_t i;
50 ne10_float32_t psnr_value;
51
52 for (i = 0; i < buffSize; i++)
53 {
54 mse += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
55 }
56 mse = mse / buffSize / 4;
57 psnr_value = 10 * log10 (max * max / mse);
58 return psnr_value;
59}
60
61int rand_range (int min, int max)
62{
63 int diff = max - min;
64 return (int) ( ( (double) (diff + 1) / RAND_MAX) * rand() + min);
65}
66
67int valid_kernels (ne10_size_t *kernels, int size)
68{
69 int i;
70 for (i = 0; i < size; i++)
71 {
72 if (kernels[i].x < 1 || kernels[i].y < 1)
73 return NE10_ERR;
74 }
75 return NE10_OK;
76}
77
78/*
79 * this function check whether there is big difference between image1
80 * and image2. Here we employ 2 kinds of check: diff() and
81 * cal_psnr_uint8_rgba_color()
82 */
83int valid_result (const ne10_uint8_t *image1,
84 const ne10_uint8_t *image2,
85 ne10_size_t src_sz,
86 ne10_int32_t src_stride,
87 ne10_int32_t channel)
88{
89 assert ((image1 != 0) && (image2 != 0));
90 assert ((src_sz.x != 0) && (src_sz.y != 0)
91 && (src_stride != 0) && (channel != 0));
92
93 ne10_int32_t *diff_mat = (ne10_int32_t *) malloc (sizeof (ne10_int32_t)
94 * channel
95 * src_sz.x
96 * src_sz.y);
97 ne10_int32_t diff_mat_stride = sizeof (ne10_int32_t) * channel * src_sz.x;
98
99 if (diff_mat == 0)
100 {
101 printf ("**ERROR**: allocating %d bytes memory for kernels fails!",
102 sizeof (ne10_int32_t)
103 * src_sz.x
104 * src_sz.y
105 * channel);
106 return NE10_ERR;
107 }
108
109 diff (image1,
110 image2,
111 diff_mat,
112 diff_mat_stride,
113 src_sz.x,
114 src_sz.y,
115 src_stride,
116 4);
117
118 ne10_int32_t diff_nu = diff_count ( (const ne10_int32_t *) diff_mat,
119 src_sz.x,
120 src_sz.y,
121 diff_mat_stride,
122 4);
123 free (diff_mat);
124
125 ne10_float32_t psnr_value = cal_psnr_uint8_rgba (image1,
126 image2,
127 src_sz.x
128 * src_sz.y
129 * channel);
130 if (diff_nu != 0 && psnr_value < PSNR_THRESHOLD)
131 {
132 printf ("\ndifferent point is:%d\t PSNR value is:%f\n",
133 diff_nu, psnr_value);
134 return NE10_ERR;
135 }
136 else
137 {
138 return NE10_OK;
139 }
140}
141
142void boxfilter_get_kernels (size_t max_kernel_length,
143 ne10_size_t **kernels_ptr,
144 int *size)
145{
146
147 if (max_kernel_length > BASIC_KERNEL_SIZE)
148 {
149 *size = KERNEL_COUNT + 3;
150 }
151 else if (max_kernel_length < BASIC_KERNEL_SIZE)
152 {
153 *size = max_kernel_length * max_kernel_length;
154 }
155 else
156 {
157 *size = KERNEL_COUNT;
158 }
159
160 *kernels_ptr = (ne10_size_t *) malloc (sizeof (ne10_size_t) * (*size));
161 if (*kernels_ptr == 0)
162 {
163 printf ("**ERROR**: allocating %d bytes memory for kernels fails!\n",
164 sizeof (ne10_size_t) * (*size));
165 }
166
167 int x, y, first_part_size;
168
169 if (max_kernel_length < BASIC_KERNEL_SIZE)
170 {
171 first_part_size = max_kernel_length;
172 }
173 else
174 {
175 first_part_size = BASIC_KERNEL_SIZE;
176 }
177
178 for (x = 0; x < first_part_size; x++)
179 {
180 for (y = 0; y < first_part_size; y++)
181 {
182 (*kernels_ptr) [x * first_part_size + y].x = x + 1;
183 (*kernels_ptr) [x * first_part_size + y].y = y + 1;
184 }
185 }
186 /* add:
187 * max_kernel_length x 1
188 * 1 x max_kernel_length
189 * max_kernel_length x max_kernel_length
190 * to kernels.
191 */
192 if (max_kernel_length > BASIC_KERNEL_SIZE)
193 {
194 (*kernels_ptr) [*size - 3].x = max_kernel_length;
195 (*kernels_ptr) [*size - 3].y = 1;
196 (*kernels_ptr) [*size - 2].x = 1;
197 (*kernels_ptr) [*size - 2].y = max_kernel_length;
198 (*kernels_ptr) [*size - 1].x = max_kernel_length;
199 (*kernels_ptr) [*size - 1].y = max_kernel_length;
200 }
201
202 assert (valid_kernels (*kernels_ptr, *size) == NE10_OK);
203}
204
205void create_rgba8888_image (ne10_uint8_t **img, ne10_size_t src_sz)
206{
207 assert ( (src_sz.x != 0) || (src_sz.y != 0));
208
209 int size = sizeof (ne10_uint8_t) * src_sz.x * src_sz.y * 4;
210
211 *img = (ne10_uint8_t *) NE10_MALLOC (sizeof (ne10_uint8_t) *
212 src_sz.x *
213 src_sz.y * 4);
214 int i;
215 for (i = 0; i < size; i++)
216 {
217 * (*img + i) = rand_range (0, 255);
218 }
219
220 assert (*img != NULL);
221}
222
223int boxfilter_conformance_test (ne10_size_t src_sz)
224{
225 assert ( (src_sz.x != 0) || (src_sz.y != 0));
226
227 printf ("\ntest boxfilter on image with size:%d x %d:\n",
228 src_sz.x, src_sz.y);
229
230 int max_kernel_length = src_sz.x < src_sz.y ?
231 src_sz.x : src_sz.y;
232 max_kernel_length = max_kernel_length < ( (1 << 7) - 1) ?
233 max_kernel_length : ( (1 << 7) - 1);
234
235 ne10_size_t *kernels;
236 int kernels_size;
237 boxfilter_get_kernels (max_kernel_length, &kernels, &kernels_size);
238
239 ne10_uint8_t *src, *neon_dst, *c_dst;
240 create_rgba8888_image (&src, src_sz);
241 create_rgba8888_image (&neon_dst, src_sz);
242 create_rgba8888_image (&c_dst, src_sz);
243 ne10_int32_t stride = src_sz.x * 4 * sizeof (ne10_uint8_t);
244
245 int i;
246 for (i = 0; i < kernels_size; i++)
247 {
248 printf ("test kernel size(%d x %d):",
249 kernels[i].x, kernels[i].y);
250 //use ne10 neon version
252 neon_dst,
253 src_sz,
254 stride,
255 stride,
256 kernels[i]);
257 //use ne10 c version
259 c_dst,
260 src_sz,
261 stride,
262 stride,
263 kernels[i]);
264 assert_true (valid_result (c_dst,
265 neon_dst,
266 src_sz,
267 stride,
268 4) == NE10_OK);
269 printf (" OK.\n");
270 }
271
272 free (kernels);
273 free (src);
274 free (c_dst);
275 free (neon_dst);
276 return NE10_OK;
277}
278
279void boxfilter_performance_test (ne10_size_t img_size,
280 ne10_size_t kernel_size,
281 long int *neon_ticks,
282 long int *c_ticks)
283{
284 int run_loop = 10;
285 int i;
286 ne10_uint8_t *src, *neon_dst, *c_dst;
287 create_rgba8888_image (&src, img_size);
288 create_rgba8888_image (&neon_dst, img_size);
289 create_rgba8888_image (&c_dst, img_size);
290 ne10_int32_t stride = img_size.x * 4 * sizeof (ne10_uint8_t);
291
292 long int ticks;
293 /* boxfilter c version, run multiple times to get average time */
294 for (i = 0; i < run_loop; i++)
295 {
296 GET_TIME (ticks,
298 c_dst,
299 img_size,
300 stride,
301 stride,
302 kernel_size););
303 ticks += ticks;
304 }
305 *c_ticks = ticks / run_loop;
306
307 /* boxfilter c version, run multiple times to get average time */
308 for (i = 0; i < run_loop; i++)
309 {
310 GET_TIME (ticks,
312 c_dst,
313 img_size,
314 stride,
315 stride,
316 kernel_size););
317 ticks += ticks;
318 }
319 *neon_ticks = ticks / run_loop;
320}
321
322void test_boxfilter_performance_case()
323{
324 ne10_size_t img_sizes[] = {{240, 320}, {480, 320}, {960, 1280},
325 {1200, 1600}, {2000, 2000}
326 };
327 ne10_size_t kernel_sizes[] = {{3, 3}, {5, 5}, {7, 7}, {9, 9}};
328
329 int i, j, n_img, n_kernel;
330 n_img = sizeof (img_sizes) / sizeof (img_sizes[0]);
331 n_kernel = sizeof (kernel_sizes) / sizeof (kernel_sizes[0]);
332 long int neon_ticks, c_ticks;
333
334 char info[100];
335 for (i = 0; i < n_img; i++)
336 {
337 for (j = 0; j < n_kernel; j++)
338 {
339 boxfilter_performance_test (img_sizes[i],
340 kernel_sizes[j],
341 &neon_ticks,
342 &c_ticks);
343 sprintf (info,
344 "name:box filter\n"
345 "image size:%dx%d\n"
346 "kernel size:%dx%d",
347 img_sizes[i].x, img_sizes[i].y,
348 kernel_sizes[j].x, kernel_sizes[j].y);
349
350 ne10_performance_print (UBUNTU_COMMAND_LINE,
351 neon_ticks,
352 c_ticks,
353 info);
354 }
355 }
356}
357
358void test_boxfilter_smoke_case()
359{
360 ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
361 {240, 320}
362 };
363 int n = sizeof (img_sizes) / sizeof (img_sizes[0]);
364 int i;
365 for (i = 0; i < n; i++)
366 {
367 boxfilter_conformance_test (img_sizes[i]);
368 //progress_bar((float)(i + 1) / n);
369 }
370}
371
372void test_boxfilter_regression_case()
373{
374 ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
375 {239, 319}, {240, 320}, {480, 640},
376 {969, 1280}, {1200, 1600}
377 };
378 int n = sizeof (img_sizes) / sizeof (img_sizes[0]);
379 int i;
380 for (i = 0; i < n; i++)
381 {
382 boxfilter_conformance_test (img_sizes[i]);
383 }
384}
385
386void test_boxfilter()
387{
388#if defined (SMOKE_TEST)
389 test_boxfilter_smoke_case();
390#endif
391
392#if defined (REGRESSION_TEST)
393 test_boxfilter_regression_case();
394#endif
395
396#if defined PERFORMANCE_TEST
397 test_boxfilter_performance_case();
398#endif
399}
400
401static void my_test_setup (void)
402{
403 ne10_log_buffer_ptr = ne10_log_buffer;
404}
405
406void test_fixture_boxfilter (void)
407{
408 test_fixture_start();
409
410 fixture_setup (my_test_setup);
411
412 run_test (test_boxfilter);
413
414 test_fixture_end();
415}
void ne10_img_boxfilter_rgba8888_neon(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
neon optimized box filter
void ne10_img_boxfilter_rgba8888_c(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
box filter