Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_generic_float32.c
1/*
2 * Copyright 2014-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_generic_float32.c
45 */
46
47#include "NE10_types.h"
48#include "NE10_macros.h"
49#include "NE10_fft.h"
50#include "NE10_fft_generic_float32.h"
51
53// Following are butterfly functions
55static inline void ne10_radix_2_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
56 const ne10_fft_cpx_float32_t *Fin,
57 const ne10_fft_cpx_float32_t *twiddles,
58 const ne10_int32_t fstride,
59 const ne10_int32_t out_step,
60 const ne10_int32_t nfft,
61 const ne10_int32_t is_first_stage,
62 const ne10_int32_t is_inverse,
63 const ne10_int32_t is_scaled)
64{
65 ne10_fft_cpx_float32_t scratch_in[2];
66 ne10_fft_cpx_float32_t scratch_out[2];
67
68 const ne10_int32_t in_step = nfft / 2;
69 ne10_int32_t f_count;
70 ne10_int32_t m_count;
71
72 for (f_count = fstride; f_count > 0; f_count--)
73 {
74 for (m_count = out_step; m_count > 0; m_count--)
75 {
76 scratch_in[0] = Fin[0 * in_step];
77 scratch_in[1] = Fin[1 * in_step];
78
79 if (is_inverse)
80 {
81 scratch_in[0].i = -scratch_in[0].i;
82 scratch_in[1].i = -scratch_in[1].i;
83 }
84
85#ifdef NE10_DSP_CFFT_SCALING
86 if (is_scaled && is_first_stage)
87 {
88 const ne10_float32_t one_by_nfft = 1.0 / nfft;
89
90 scratch_in[0].r *= one_by_nfft;
91 scratch_in[0].i *= one_by_nfft;
92 scratch_in[1].r *= one_by_nfft;
93 scratch_in[1].i *= one_by_nfft;
94 }
95#endif
96
97 if (!is_first_stage)
98 {
99 ne10_fft_cpx_float32_t scratch_tw[1];
100 ne10_fft_cpx_float32_t scratch[2];
101
102 scratch_tw[0] = twiddles[0 * out_step];
103
104 FFT2_MUL_TW (scratch, scratch_in, scratch_tw);
105
106 scratch_in[0] = scratch[0];
107 scratch_in[1] = scratch[1];
108 }
109
110 FFT2_FCU (scratch_out, scratch_in);
111
112 if (is_inverse)
113 {
114 scratch_out[0].i = -scratch_out[0].i;
115 scratch_out[1].i = -scratch_out[1].i;
116 }
117
118 Fout[0 * out_step] = scratch_out[0];
119 Fout[1 * out_step] = scratch_out[1];
120
121 Fin++;
122
123 if (!is_first_stage)
124 {
125 Fout++;
126 twiddles++;
127 }
128 else
129 {
130 Fout += 2;
131 }
132 }
133 if (!is_first_stage)
134 {
135 twiddles -= out_step;
136 Fout += (2 - 1) * out_step;
137 }
138 }
139}
140
141static inline void ne10_radix_4_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
142 const ne10_fft_cpx_float32_t *Fin,
143 const ne10_fft_cpx_float32_t *twiddles,
144 const ne10_int32_t fstride,
145 const ne10_int32_t out_step,
146 const ne10_int32_t nfft,
147 const ne10_int32_t is_first_stage,
148 const ne10_int32_t is_inverse,
149 const ne10_int32_t is_scaled)
150{
151 ne10_fft_cpx_float32_t scratch_in[4];
152 ne10_fft_cpx_float32_t scratch_out[4];
153
154 const ne10_int32_t in_step = nfft / 4;
155 ne10_int32_t f_count;
156 ne10_int32_t m_count;
157
158 for (f_count = fstride; f_count > 0; f_count--)
159 {
160 for (m_count = out_step; m_count > 0; m_count--)
161 {
162 scratch_in[0] = Fin[0 * in_step];
163 scratch_in[1] = Fin[1 * in_step];
164 scratch_in[2] = Fin[2 * in_step];
165 scratch_in[3] = Fin[3 * in_step];
166
167 if (is_inverse)
168 {
169 scratch_in[0].i = -scratch_in[0].i;
170 scratch_in[1].i = -scratch_in[1].i;
171 scratch_in[2].i = -scratch_in[2].i;
172 scratch_in[3].i = -scratch_in[3].i;
173 }
174
175#ifdef NE10_DSP_CFFT_SCALING
176 if (is_scaled && is_first_stage)
177 {
178 const ne10_float32_t one_by_nfft = 1.0 / nfft;
179
180 scratch_in[0].r *= one_by_nfft;
181 scratch_in[0].i *= one_by_nfft;
182 scratch_in[1].r *= one_by_nfft;
183 scratch_in[1].i *= one_by_nfft;
184 scratch_in[2].r *= one_by_nfft;
185 scratch_in[2].i *= one_by_nfft;
186 scratch_in[3].r *= one_by_nfft;
187 scratch_in[3].i *= one_by_nfft;
188 }
189#endif
190
191 if (!is_first_stage)
192 {
193 ne10_fft_cpx_float32_t scratch_tw[3];
194 ne10_fft_cpx_float32_t scratch[4];
195
196 scratch_tw[0] = twiddles[0 * out_step];
197 scratch_tw[1] = twiddles[1 * out_step];
198 scratch_tw[2] = twiddles[2 * out_step];
199
200 FFT4_MUL_TW (scratch, scratch_in, scratch_tw);
201
202 scratch_in[0] = scratch[0];
203 scratch_in[1] = scratch[1];
204 scratch_in[2] = scratch[2];
205 scratch_in[3] = scratch[3];
206 }
207
208 FFT4_FCU (scratch_out, scratch_in);
209
210 if (is_inverse)
211 {
212 scratch_out[0].i = -scratch_out[0].i;
213 scratch_out[1].i = -scratch_out[1].i;
214 scratch_out[2].i = -scratch_out[2].i;
215 scratch_out[3].i = -scratch_out[3].i;
216 }
217
218 Fout[0 * out_step] = scratch_out[0];
219 Fout[1 * out_step] = scratch_out[1];
220 Fout[2 * out_step] = scratch_out[2];
221 Fout[3 * out_step] = scratch_out[3];
222
223 Fin++;
224
225 if (!is_first_stage)
226 {
227 Fout++;
228 twiddles++;
229 }
230 else
231 {
232 Fout += 4;
233 }
234 }
235 if (!is_first_stage)
236 {
237 twiddles -= out_step;
238 Fout += (4 - 1) * out_step;
239 }
240 }
241}
242
243static inline void ne10_radix_8_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
244 const ne10_fft_cpx_float32_t *Fin,
245 const ne10_fft_cpx_float32_t *twiddles,
246 const ne10_int32_t fstride,
247 const ne10_int32_t out_step,
248 const ne10_int32_t nfft,
249 const ne10_int32_t is_first_stage,
250 const ne10_int32_t is_inverse,
251 const ne10_int32_t is_scaled)
252{
253 assert (is_first_stage == 1);
254
255 ne10_fft_cpx_float32_t scratch_in[8];
256 ne10_fft_cpx_float32_t scratch_out[8];
257
258 const ne10_int32_t in_step = nfft / 8;
259 ne10_int32_t f_count;
260 ne10_int32_t m_count;
261
262 for (f_count = fstride; f_count > 0; f_count--)
263 {
264 for (m_count = out_step; m_count > 0; m_count--)
265 {
266 scratch_in[0] = Fin[0 * in_step];
267 scratch_in[1] = Fin[1 * in_step];
268 scratch_in[2] = Fin[2 * in_step];
269 scratch_in[3] = Fin[3 * in_step];
270 scratch_in[4] = Fin[4 * in_step];
271 scratch_in[5] = Fin[5 * in_step];
272 scratch_in[6] = Fin[6 * in_step];
273 scratch_in[7] = Fin[7 * in_step];
274
275 if (is_inverse)
276 {
277 scratch_in[0].i = -scratch_in[0].i;
278 scratch_in[1].i = -scratch_in[1].i;
279 scratch_in[2].i = -scratch_in[2].i;
280 scratch_in[3].i = -scratch_in[3].i;
281 scratch_in[4].i = -scratch_in[4].i;
282 scratch_in[5].i = -scratch_in[5].i;
283 scratch_in[6].i = -scratch_in[6].i;
284 scratch_in[7].i = -scratch_in[7].i;
285 }
286
287#ifdef NE10_DSP_CFFT_SCALING
288 if (is_scaled)
289 {
290 const ne10_float32_t one_by_nfft = 1.0 / nfft;
291
292 scratch_in[0].r *= one_by_nfft;
293 scratch_in[0].i *= one_by_nfft;
294 scratch_in[1].r *= one_by_nfft;
295 scratch_in[1].i *= one_by_nfft;
296 scratch_in[2].r *= one_by_nfft;
297 scratch_in[2].i *= one_by_nfft;
298 scratch_in[3].r *= one_by_nfft;
299 scratch_in[3].i *= one_by_nfft;
300 scratch_in[4].r *= one_by_nfft;
301 scratch_in[4].i *= one_by_nfft;
302 scratch_in[5].r *= one_by_nfft;
303 scratch_in[5].i *= one_by_nfft;
304 scratch_in[6].r *= one_by_nfft;
305 scratch_in[6].i *= one_by_nfft;
306 scratch_in[7].r *= one_by_nfft;
307 scratch_in[7].i *= one_by_nfft;
308 }
309#endif
310
311 FFT8_FCU (scratch_out, scratch_in);
312
313 if (is_inverse)
314 {
315 scratch_out[0].i = -scratch_out[0].i;
316 scratch_out[1].i = -scratch_out[1].i;
317 scratch_out[2].i = -scratch_out[2].i;
318 scratch_out[3].i = -scratch_out[3].i;
319 scratch_out[4].i = -scratch_out[4].i;
320 scratch_out[5].i = -scratch_out[5].i;
321 scratch_out[6].i = -scratch_out[6].i;
322 scratch_out[7].i = -scratch_out[7].i;
323 }
324
325 Fout[0*out_step] = scratch_out[0];
326 Fout[1*out_step] = scratch_out[1];
327 Fout[2*out_step] = scratch_out[2];
328 Fout[3*out_step] = scratch_out[3];
329 Fout[4*out_step] = scratch_out[4];
330 Fout[5*out_step] = scratch_out[5];
331 Fout[6*out_step] = scratch_out[6];
332 Fout[7*out_step] = scratch_out[7];
333
334 Fin++;
335 Fout += 8;
336 }
337 }
338}
339
340static inline void ne10_radix_3_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
341 const ne10_fft_cpx_float32_t *Fin,
342 const ne10_fft_cpx_float32_t *twiddles,
343 const ne10_int32_t fstride,
344 const ne10_int32_t out_step,
345 const ne10_int32_t nfft,
346 const ne10_int32_t is_first_stage,
347 const ne10_int32_t is_inverse,
348 const ne10_int32_t is_scaled)
349{
350 ne10_fft_cpx_float32_t scratch_in[3];
351 ne10_fft_cpx_float32_t scratch_out[3];
352
353 const ne10_int32_t in_step = nfft / 3;
354 ne10_int32_t f_count;
355 ne10_int32_t m_count;
356
357 for (f_count = fstride; f_count > 0; f_count--)
358 {
359 for (m_count = out_step; m_count > 0; m_count--)
360 {
361 scratch_in[0] = Fin[0 * in_step];
362 scratch_in[1] = Fin[1 * in_step];
363 scratch_in[2] = Fin[2 * in_step];
364
365 if (is_inverse)
366 {
367 scratch_in[0].i = -scratch_in[0].i;
368 scratch_in[1].i = -scratch_in[1].i;
369 scratch_in[2].i = -scratch_in[2].i;
370 }
371
372#ifdef NE10_DSP_CFFT_SCALING
373 if (is_scaled && is_first_stage)
374 {
375 const ne10_float32_t one_by_nfft = 1.0 / nfft;
376
377 scratch_in[0].r *= one_by_nfft;
378 scratch_in[0].i *= one_by_nfft;
379 scratch_in[1].r *= one_by_nfft;
380 scratch_in[1].i *= one_by_nfft;
381 scratch_in[2].r *= one_by_nfft;
382 scratch_in[2].i *= one_by_nfft;
383 }
384#endif
385
386 if (!is_first_stage)
387 {
388 ne10_fft_cpx_float32_t scratch_tw[2];
389 ne10_fft_cpx_float32_t scratch[3];
390
391 scratch_tw[0] = twiddles[0 * out_step];
392 scratch_tw[1] = twiddles[1 * out_step];
393
394 FFT3_MUL_TW (scratch, scratch_in, scratch_tw);
395
396 scratch_in[0] = scratch[0];
397 scratch_in[1] = scratch[1];
398 scratch_in[2] = scratch[2];
399 }
400
401 FFT3_FCU (scratch_out, scratch_in);
402
403 if (is_inverse)
404 {
405 scratch_out[0].i = -scratch_out[0].i;
406 scratch_out[1].i = -scratch_out[1].i;
407 scratch_out[2].i = -scratch_out[2].i;
408 }
409
410 Fout[0 * out_step] = scratch_out[0];
411 Fout[1 * out_step] = scratch_out[1];
412 Fout[2 * out_step] = scratch_out[2];
413
414 Fin++;
415
416 if (!is_first_stage)
417 {
418 Fout++;
419 twiddles++;
420 }
421 else
422 {
423 Fout += 3;
424 }
425 }
426 if (!is_first_stage)
427 {
428 twiddles -= out_step;
429 Fout += (3 - 1) * out_step;
430 }
431 }
432}
433
434static inline void ne10_radix_5_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
435 const ne10_fft_cpx_float32_t *Fin,
436 const ne10_fft_cpx_float32_t *twiddles,
437 const ne10_int32_t fstride,
438 const ne10_int32_t out_step,
439 const ne10_int32_t nfft,
440 const ne10_int32_t is_first_stage,
441 const ne10_int32_t is_inverse,
442 const ne10_int32_t is_scaled)
443{
444 ne10_fft_cpx_float32_t scratch_in[5];
445 ne10_fft_cpx_float32_t scratch_out[5];
446
447 const ne10_int32_t in_step = nfft / 5;
448 ne10_int32_t f_count;
449 ne10_int32_t m_count;
450
451 for (f_count = fstride; f_count > 0; f_count--)
452 {
453 for (m_count = out_step; m_count > 0; m_count--)
454 {
455 scratch_in[0] = Fin[0 * in_step];
456 scratch_in[1] = Fin[1 * in_step];
457 scratch_in[2] = Fin[2 * in_step];
458 scratch_in[3] = Fin[3 * in_step];
459 scratch_in[4] = Fin[4 * in_step];
460
461 if (is_inverse)
462 {
463 scratch_in[0].i = -scratch_in[0].i;
464 scratch_in[1].i = -scratch_in[1].i;
465 scratch_in[2].i = -scratch_in[2].i;
466 scratch_in[3].i = -scratch_in[3].i;
467 scratch_in[4].i = -scratch_in[4].i;
468 }
469
470#ifdef NE10_DSP_CFFT_SCALING
471 if (is_scaled && is_first_stage)
472 {
473 const ne10_float32_t one_by_nfft = 1.0 / nfft;
474
475 scratch_in[0].r *= one_by_nfft;
476 scratch_in[0].i *= one_by_nfft;
477 scratch_in[1].r *= one_by_nfft;
478 scratch_in[1].i *= one_by_nfft;
479 scratch_in[2].r *= one_by_nfft;
480 scratch_in[2].i *= one_by_nfft;
481 scratch_in[3].r *= one_by_nfft;
482 scratch_in[3].i *= one_by_nfft;
483 scratch_in[4].r *= one_by_nfft;
484 scratch_in[4].i *= one_by_nfft;
485 }
486#endif
487
488 if (!is_first_stage)
489 {
490 ne10_fft_cpx_float32_t scratch_tw[4];
491 ne10_fft_cpx_float32_t scratch[5];
492
493 scratch_tw[0] = twiddles[0 * out_step];
494 scratch_tw[1] = twiddles[1 * out_step];
495 scratch_tw[2] = twiddles[2 * out_step];
496 scratch_tw[3] = twiddles[3 * out_step];
497
498 FFT5_MUL_TW (scratch, scratch_in, scratch_tw);
499
500 scratch_in[0] = scratch[0];
501 scratch_in[1] = scratch[1];
502 scratch_in[2] = scratch[2];
503 scratch_in[3] = scratch[3];
504 scratch_in[4] = scratch[4];
505 }
506
507 FFT5_FCU (scratch_out, scratch_in);
508
509 if (is_inverse)
510 {
511 scratch_out[0].i = -scratch_out[0].i;
512 scratch_out[1].i = -scratch_out[1].i;
513 scratch_out[2].i = -scratch_out[2].i;
514 scratch_out[3].i = -scratch_out[3].i;
515 scratch_out[4].i = -scratch_out[4].i;
516 }
517
518 Fout[0 * out_step] = scratch_out[0];
519 Fout[1 * out_step] = scratch_out[1];
520 Fout[2 * out_step] = scratch_out[2];
521 Fout[3 * out_step] = scratch_out[3];
522 Fout[4 * out_step] = scratch_out[4];
523
524 Fin++;
525
526 if (!is_first_stage)
527 {
528 Fout++;
529 twiddles++;
530 }
531 else
532 {
533 Fout += 5;
534 }
535 }
536 if (!is_first_stage)
537 {
538 twiddles -= out_step;
539 Fout += (5 - 1) * out_step;
540 }
541 }
542}
543
544static inline void ne10_radix_generic_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
545 const ne10_fft_cpx_float32_t *Fin,
546 const ne10_fft_cpx_float32_t *twiddles,
547 const ne10_int32_t radix,
548 const ne10_int32_t in_step,
549 const ne10_int32_t out_step,
550 const ne10_int32_t is_inverse,
551 const ne10_int32_t is_scaled)
552{
553 ne10_int32_t q, q1;
554 ne10_int32_t f_count = in_step;
555
557 ne10_fft_cpx_float32_t *scratch;
558 scratch = (ne10_fft_cpx_float32_t *) NE10_MALLOC (radix *
559 sizeof (ne10_fft_cpx_float32_t));
560
561 for (; f_count > 0; f_count--)
562 {
563 // load
564 for (q1 = 0; q1 < radix; q1++)
565 {
566 scratch[q1] = Fin[in_step * q1];
567 if (is_inverse)
568 {
569 scratch[q1].i = -scratch[q1].i;
570#ifdef NE10_DSP_CFFT_SCALING
571 if (is_scaled)
572 {
573 const ne10_float32_t one_by_nfft = 1.0 / (radix * in_step);
574 scratch[q1].r *= one_by_nfft;
575 scratch[q1].i *= one_by_nfft;
576 }
577#endif
578 }
579 } // q1
580
581 // compute Fout[q1 * out_step] from definition
582 for (q1 = 0; q1 < radix; q1++)
583 {
584 ne10_int32_t twidx = 0;
585 Fout[q1 * out_step] = scratch[0];
586 for (q = 1; q < radix; q++)
587 {
588 twidx += 1 * q1;
589 if (twidx >= radix)
590 {
591 twidx -= radix;
592 }
593 NE10_CPX_MUL_F32 (tmp, scratch[q], twiddles[twidx]);
594 NE10_CPX_ADDTO (Fout[q1 * out_step], tmp);
595 } // q
596 if (is_inverse)
597 {
598 Fout[q1 * out_step].i = -Fout[q1 * out_step].i;
599 }
600 } // q1
601
602 Fout += radix;
603 Fin++;
604 }
605
606 NE10_FREE (scratch);
607}
608
609static inline void ne10_mixed_radix_generic_butterfly_float32_impl_c (ne10_fft_cpx_float32_t *Fout,
610 const ne10_fft_cpx_float32_t *Fin,
611 const ne10_int32_t *factors,
612 const ne10_fft_cpx_float32_t *twiddles,
614 const ne10_int32_t is_inverse,
615 const ne10_int32_t is_scaled)
616{
617 ne10_int32_t fstride, mstride, radix;
618 ne10_int32_t stage_count;
619 ne10_int32_t nfft;
620
621 // init fstride, mstride, radix, nfft
622 stage_count = factors[0];
623 fstride = factors[1];
624 mstride = 1;
625 radix = factors[stage_count << 1]; // radix of first stage
626 nfft = fstride * radix;
627
628 if (stage_count % 2 == 0)
629 {
630 ne10_swap_ptr (buffer, Fout);
631 }
632
633 // first stage
634 switch (radix)
635 {
636 case 2:
637 ne10_radix_2_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
638 is_inverse, is_scaled);
639 break;
640 case 4:
641 ne10_radix_4_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
642 is_inverse, is_scaled);
643 break;
644 case 3:
645 ne10_radix_3_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
646 is_inverse, is_scaled);
647 break;
648 case 5:
649 ne10_radix_5_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
650 is_inverse, is_scaled);
651 break;
652 case 8:
653 ne10_radix_8_butterfly_float32_c (Fout, Fin, NULL, fstride, 1, nfft, 1,
654 is_inverse, is_scaled);
655 default:
656 ne10_radix_generic_butterfly_float32_c (Fout, Fin, twiddles, radix,
657 fstride, 1, is_inverse, is_scaled);
658 break;
659 }
660
661 stage_count--;
662 if (!stage_count) // finish
663 {
664 return;
665 }
666
667 if (radix % 2)
668 {
669 twiddles += radix;
670 }
671
672 // other stges
673 while (stage_count > 0)
674 {
675 ne10_swap_ptr (buffer, Fout);
676 mstride *= radix;
677
678 // update radix
679 radix = factors[stage_count << 1];
680 assert ((radix > 1) && (radix < 6));
681
682 fstride /= radix;
683 switch (radix)
684 {
685 case 2:
686 ne10_radix_2_butterfly_float32_c (Fout, buffer, twiddles, fstride,
687 mstride, nfft, 0, is_inverse,
688 0); // Only scaling in the first stage.
689 break;
690 case 3:
691 ne10_radix_3_butterfly_float32_c (Fout, buffer, twiddles, fstride,
692 mstride, nfft, 0, is_inverse,
693 0); // Only scaling in the first stage.
694 break;
695 case 4:
696 ne10_radix_4_butterfly_float32_c (Fout, buffer, twiddles, fstride,
697 mstride, nfft, 0, is_inverse,
698 0); // Only scaling in the first stage.
699 break;
700 case 5:
701 ne10_radix_5_butterfly_float32_c (Fout, buffer, twiddles, fstride,
702 mstride, nfft, 0, is_inverse,
703 0); // Only scaling in the first stage.
704 break;
705 } // switch (radix)
706
707 twiddles += mstride * (radix - 1);
708
709 stage_count--;
710 } // while (stage_count)
711}
712
713void ne10_mixed_radix_generic_butterfly_float32_c (ne10_fft_cpx_float32_t *Fout,
714 const ne10_fft_cpx_float32_t *Fin,
715 const ne10_int32_t *factors,
716 const ne10_fft_cpx_float32_t *twiddles,
718 const ne10_int32_t is_scaled)
719{
720 if (is_scaled)
721 {
722 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
723 twiddles, buffer, 0,
724 1); // Scaling.
725 }
726 else
727 {
728 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
729 twiddles, buffer, 0,
730 0); // Unscaling.
731 }
732}
733
734void ne10_mixed_radix_generic_butterfly_inverse_float32_c (ne10_fft_cpx_float32_t *Fout,
735 const ne10_fft_cpx_float32_t *Fin,
736 const ne10_int32_t *factors,
737 const ne10_fft_cpx_float32_t *twiddles,
739 const ne10_int32_t is_scaled)
740{
741 if (is_scaled)
742 {
743 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
744 twiddles, buffer, 1,
745 1); // Scaling
746 }
747 else
748 {
749 ne10_mixed_radix_generic_butterfly_float32_impl_c (Fout, Fin, factors,
750 twiddles, buffer, 1,
751 0); // unscaling
752 }
753}