|
|
|
#!/usr/bin/perl -w
|
|
|
|
# GSL-GENFFT - Power2 FFT C Code Generator
|
|
|
|
# Copyright (C) 2001 Tim Janik
|
|
|
|
#
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
|
|
|
|
# TODO:
|
|
|
|
# - optimize L case more
|
|
|
|
# - join 2 or 3 stages
|
|
|
|
|
|
|
|
my $indent = " ";
|
|
|
|
my $PI = 3.1415926535897932384626433832795029;
|
|
|
|
my $ieee_type = "float";
|
|
|
|
my $tmp_ieee_type = "double"; # don't change, decreases precision _and_ performance
|
|
|
|
|
|
|
|
# arguments
|
|
|
|
my $max_fft_size = 0;
|
|
|
|
my $gen_macros = 1;
|
|
|
|
my $max_unroll = 256;
|
|
|
|
my $min_split = 2048;
|
|
|
|
my $min_compress = 512;
|
|
|
|
my $fft2loop = 0;
|
|
|
|
my $single_stage = 0;
|
|
|
|
my $negate_sign = 0;
|
|
|
|
my $Wtest = 0;
|
|
|
|
|
|
|
|
#
|
|
|
|
# parse options
|
|
|
|
#
|
|
|
|
while ($_ = $ARGV[0], defined $_ && /^-/) {
|
|
|
|
shift;
|
|
|
|
last if (/^--$/);
|
|
|
|
if (/^--fft2loop$/) { $fft2loop = 1 }
|
|
|
|
elsif (/^--analysis$/) { $negate_sign = 0 }
|
|
|
|
elsif (/^--synthesis$/) { $negate_sign = 1 }
|
|
|
|
elsif (/^--Wtest$/) { $Wtest = 1 }
|
|
|
|
elsif (/^--double$/) { $ieee_type = "double" }
|
|
|
|
elsif (/^--skip-macros$/) { $gen_macros = 0 }
|
|
|
|
elsif (/^--max-unroll$/) { $max_unroll = shift }
|
|
|
|
elsif (/^--split-at$/) { $min_split = shift }
|
|
|
|
elsif (/^--min-compress$/) { $min_compress = shift }
|
|
|
|
elsif (/^--single-stage$/) { $single_stage = shift }
|
|
|
|
}
|
|
|
|
# parse arguments
|
|
|
|
my @arguments = 0;
|
|
|
|
if (defined $ARGV[0]) {
|
|
|
|
$max_fft_size = $ARGV[0];
|
|
|
|
shift;
|
|
|
|
}
|
|
|
|
while (defined $ARGV[0]) {
|
|
|
|
push @arguments, $ARGV[0];
|
|
|
|
shift;
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# check arguments and provide help
|
|
|
|
#
|
|
|
|
if ($max_fft_size && ($max_fft_size < 2 || $max_fft_size & ($max_fft_size - 1))) {
|
|
|
|
print(STDERR "usage: gsl-genfft [options] <fftsize>\n".
|
|
|
|
"Produces C code to calculate an FFT of specified <size> which has\n".
|
|
|
|
"to be a power of 2.\n".
|
|
|
|
" --max-unroll <size> max fft size to unroll loops and bodies\n".
|
|
|
|
" --min-compress <size> min fft size to unroll only loop bodies\n".
|
|
|
|
" --split-at <size> max fft size for which recursive ffts are joined\n".
|
|
|
|
" --fft2loop force bitreversing fft2 part into a loop\n".
|
|
|
|
"");
|
|
|
|
# " |\n"
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# utility functions
|
|
|
|
#
|
|
|
|
sub bitreverse {
|
|
|
|
my $h = shift; # fft_size;
|
|
|
|
my $n = shift;
|
|
|
|
my $r = 0;
|
|
|
|
|
|
|
|
while ($n) {
|
|
|
|
$h >>= 1;
|
|
|
|
$r |= $h if ($n & 1);
|
|
|
|
$n >>= 1;
|
|
|
|
}
|
|
|
|
return $r;
|
|
|
|
}
|
|
|
|
sub Wexponent {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $nth = shift; # nth coefficient of this <n_points> stage
|
|
|
|
|
|
|
|
my $r = $nth * $fft_size / $n_points;
|
|
|
|
|
|
|
|
return $r;
|
|
|
|
}
|
|
|
|
sub Wreal {
|
|
|
|
my $n = shift; # fft_size
|
|
|
|
my $k = shift; # power
|
|
|
|
|
|
|
|
my $x = cos ($PI * (2. * $k / $n));
|
|
|
|
|
|
|
|
return $x == 0 ? 0 : $x; # -0.0 == 0
|
|
|
|
}
|
|
|
|
sub Wimag {
|
|
|
|
my $n = shift; # fft_size
|
|
|
|
my $k = shift;
|
|
|
|
|
|
|
|
my $x = sin ($PI * (2. * $k / $n));
|
|
|
|
$x = -$x if ($negate_sign);
|
|
|
|
|
|
|
|
return $x == 0 ? 0 : $x; # -0.0 == 0
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# unrolled fft generation
|
|
|
|
#
|
|
|
|
sub butterfly {
|
|
|
|
my $type = shift;
|
|
|
|
my $offset1 = shift;
|
|
|
|
my $offset2 = shift;
|
|
|
|
my $Wre = shift;
|
|
|
|
my $Wim = shift;
|
|
|
|
my $var = shift;
|
|
|
|
|
|
|
|
# define BUTTERFLY_XY(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim,T1re,T1im,T2re,T2im)
|
|
|
|
printf($indent."BUTTERFLY_%s (%s[%s], %s[%s + 1],\n".
|
|
|
|
$indent." %s[%s], %s[%s + 1],\n".
|
|
|
|
$indent." %s[%s], %s[%s + 1],\n".
|
|
|
|
$indent." %s[%s], %s[%s + 1],\n".
|
|
|
|
$indent." %s, %s);\n",
|
|
|
|
$type,
|
|
|
|
$var, $offset1, $var, $offset1,
|
|
|
|
$var, $offset2, $var, $offset2,
|
|
|
|
$var, $offset1, $var, $offset1,
|
|
|
|
$var, $offset2, $var, $offset2,
|
|
|
|
$Wre, $Wim);
|
|
|
|
}
|
|
|
|
sub butterfly_auto {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $index1 = shift;
|
|
|
|
my $index2 = $index1 + shift;
|
|
|
|
my $wk = shift;
|
|
|
|
my $ofs = shift;
|
|
|
|
my $inplace = shift;
|
|
|
|
my $ib = $inplace ? "Y" : "X";
|
|
|
|
my $rfact = Wreal ($fft_size, $wk);
|
|
|
|
my $ifact = Wimag ($fft_size, $wk);
|
|
|
|
my $index1r = !$inplace ? bitreverse ($fft_size, $index1 >> 1) << 1 : $index1;
|
|
|
|
my $index2r = !$inplace ? bitreverse ($fft_size, $index2 >> 1) << 1 : $index2;
|
|
|
|
my $btype = "XY";
|
|
|
|
my $optimize_10 = $wk == 0;
|
|
|
|
my $optimize_0x = $wk == $fft_size / 4;
|
|
|
|
|
|
|
|
if (abs ($rfact - 1.0) < 0.0000000000000005 &&
|
|
|
|
abs ($ifact - 0.0) < 0.0000000000000005) {
|
|
|
|
$btype = "10";
|
|
|
|
# print STDERR "10: ". $wk ."\n";
|
|
|
|
$optimize_10 = 0 if ($optimize_10);
|
|
|
|
} elsif (abs ($rfact - 0.0) < 0.0000000000000005 &&
|
|
|
|
abs ($ifact - 1.0) < 0.0000000000000005) {
|
|
|
|
$btype = "01";
|
|
|
|
# print STDERR "01: ". $wk ."\n";
|
|
|
|
$optimize_0x = 0 if ($optimize_0x);
|
|
|
|
} elsif (abs ($rfact - 0.0) < 0.0000000000000005 &&
|
|
|
|
abs ($ifact + 1.0) < 0.0000000000000005) {
|
|
|
|
$btype = "0m";
|
|
|
|
# print STDERR "0m: ". $wk ."\n";
|
|
|
|
$optimize_0x = 0 if ($optimize_0x);
|
|
|
|
} elsif (abs ($rfact - $ifact) < 0.0000000000000005) {
|
|
|
|
$btype = "XX"; # grr, this actually slows down the athlon (keep "XY")
|
|
|
|
} elsif (abs ($rfact + $ifact) < 0.0000000000000005) {
|
|
|
|
$btype = "yY"; # grr, this actually slows down the athlon (keep "XY")
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($optimize_10 || $optimize_0x) {
|
|
|
|
die("optimization error for fft$fft_size: 10=". $optimize_10 .", 0x=". $optimize_0x .
|
|
|
|
", Wre=". $rfact ." Wim=". $ifact .", wk=". $wk);
|
|
|
|
}
|
|
|
|
|
|
|
|
# BUTTERFLY_XY (X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim,T1re,T1im,T2re,T2im);
|
|
|
|
printf($indent."BUTTERFLY_%s (%s[%s%u], /* W%u */\n".
|
|
|
|
$indent." %s[%s%u + 1],\n".
|
|
|
|
$indent." %s[%s%u],\n".
|
|
|
|
$indent." %s[%s%u + 1],\n".
|
|
|
|
$indent." Y[%s%u],\n".
|
|
|
|
$indent." Y[%s%u + 1],\n".
|
|
|
|
$indent." Y[%s%u],\n".
|
|
|
|
$indent." Y[%s%u + 1],\n".
|
|
|
|
$indent." (%s) %+.15f, (%s) %+.15f);\n",
|
|
|
|
$btype,
|
|
|
|
$ib, $ofs, $index1r, $wk, $ib, $ofs, $index1r, $ib, $ofs, $index2r, $ib, $ofs, $index2r,
|
|
|
|
$ofs, $index1, $ofs, $index1, $ofs, $index2, $ofs, $index2,
|
|
|
|
$tmp_ieee_type, $rfact, $tmp_ieee_type, $ifact);
|
|
|
|
}
|
|
|
|
sub unroll_stage {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $n_rep = shift; # repetitions
|
|
|
|
my $unroll_outer = shift;
|
|
|
|
|
|
|
|
if ($unroll_outer && $n_points == 2) {
|
|
|
|
for (my $i = 0; $i < $n_points >> 1; $i++) {
|
|
|
|
for (my $j = 0; $j < $n_rep; $j++) {
|
|
|
|
my $offset1 = ($j * $n_points + $i) * 2;
|
|
|
|
my $offset2 = $offset1 + $n_points;
|
|
|
|
my $offset1r = bitreverse ($fft_size, $offset1 >> 1) << 1;
|
|
|
|
my $offset2r = bitreverse ($fft_size, $offset2 >> 1) << 1;
|
|
|
|
my $scale = !$negate_sign ? "__1, __0" : sprintf "1.0 / (%s) %u", $tmp_ieee_type, $fft_size;
|
|
|
|
printf($indent."BUTTERFLY_10%s (X[%s], X[%s + 1],\n".
|
|
|
|
$indent." X[%s], X[%s + 1],\n".
|
|
|
|
$indent." Y[%s], Y[%s + 1],\n".
|
|
|
|
$indent." Y[%s], Y[%s + 1],\n".
|
|
|
|
$indent." %s);\n",
|
|
|
|
$negate_sign ? "scale" : "",
|
|
|
|
$offset1r, $offset1r,
|
|
|
|
$offset2r, $offset2r,
|
|
|
|
$offset1, $offset1,
|
|
|
|
$offset2, $offset2,
|
|
|
|
$scale);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} elsif ($unroll_outer) {
|
|
|
|
for (my $i = 0; $i < $n_points >> 1; $i++) {
|
|
|
|
for (my $j = 0; $j < $n_rep; $j++) {
|
|
|
|
butterfly_auto ($fft_size,
|
|
|
|
($j * $n_points + $i) * 2,
|
|
|
|
$n_points,
|
|
|
|
Wexponent ($fft_size, $n_points, $i),
|
|
|
|
"", $n_points != 2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
die "cannot skip outer loop unrolling for fft2" if $n_points == 2;
|
|
|
|
|
|
|
|
printf "%sfor (block = 0; block < %u; block += %u) {\n", $indent, $n_points * 2 * $n_rep, $n_points * 2;
|
|
|
|
$indent .= " ";
|
|
|
|
for (my $i = 0; $i < $n_points >> 1; $i++) {
|
|
|
|
butterfly_auto ($fft_size,
|
|
|
|
$i * 2,
|
|
|
|
$n_points,
|
|
|
|
Wexponent ($fft_size, $n_points, $i),
|
|
|
|
"block + ", $n_points != 2);
|
|
|
|
}
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sub table_stage {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $n_rep = shift; # repetitions
|
|
|
|
|
|
|
|
die "cannot loop over coefficient table for fft2" if $n_points == 2;
|
|
|
|
|
|
|
|
printf "%s{\n", $indent;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%sstatic const %s Wconst%u[] = {\n", $indent, $tmp_ieee_type, $n_points;
|
|
|
|
for (my $i = 1; $i < $n_points >> 2; $i++) {
|
|
|
|
my $wk = Wexponent ($fft_size, $n_points, $i);
|
|
|
|
printf "%s %+.15f, %+.15f,\n", $indent, Wreal ($fft_size, $wk), Wimag ($fft_size, $wk);
|
|
|
|
}
|
|
|
|
printf "%s};\n", $indent;
|
|
|
|
printf "%sconst %s *W = Wconst%u - 2;\n", $indent, $tmp_ieee_type, $n_points;
|
|
|
|
printf "%s%s *Z = Y + %u;\n", $indent, $ieee_type, $n_points >> 1;
|
|
|
|
|
|
|
|
# first half loops
|
|
|
|
printf("%sfor (offset = 0; offset < %u; offset += %u) {\n",
|
|
|
|
$indent, $n_points * 2 * $n_rep, $n_points * 2);
|
|
|
|
$indent .= " ";
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("10", "offset", $ofs2, "__1", "__0", "Y");
|
|
|
|
butterfly ($negate_sign ? "0m" : "01", "offset", $ofs2, "__0", "__1", "Z");
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
printf "%sfor (butterfly = 2; butterfly < %u; butterfly += 2) {\n", $indent, $n_points >> 1;
|
|
|
|
printf "%s Wre = W[butterfly]; Wim = W[butterfly + 1];\n", $indent;
|
|
|
|
printf "%s for (block = 0; block < %u; block += %u) {\n", $indent, $n_points * 2 * $n_rep, $n_points * 2;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%soffset = butterfly + block;\n", $indent;
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("XY", "offset", $ofs2, "Wre", "Wim", "Y");
|
|
|
|
butterfly ($negate_sign ? "yX" : "Yx", "offset", $ofs2, "Wre", "Wim", "Z");
|
|
|
|
$indent =~ s/\ \ \ \ $//;
|
|
|
|
printf "%s }\n", $indent;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
|
|
|
|
# second half loops
|
|
|
|
if (0) {
|
|
|
|
printf("%sfor (offset = %u; offset < %u + %u; offset += %u) {\n",
|
|
|
|
$indent, $n_points >> 1, $n_points >> 1, $n_points * 2 * $n_rep, $n_points * 2);
|
|
|
|
$indent .= " ";
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ($negate_sign ? "0m" : "01", "offset", $ofs2, "__0", "__1", "Y");
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
printf "%sW -= %u;\n", $indent, $n_points >> 1;
|
|
|
|
printf "%sfor (butterfly = %u + 2; butterfly < %u; butterfly += 2) {\n", $indent, $n_points >> 1, $n_points;
|
|
|
|
printf "%s Wre = W[butterfly]; Wim = W[butterfly + 1];\n", $indent;
|
|
|
|
printf "%s for (block = 0; block < %u; block += %u) {\n", $indent, $n_points * 2 * $n_rep, $n_points * 2;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%soffset = butterfly + block;\n", $indent;
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ($negate_sign ? "yX" : "Yx", "offset", $ofs2, "Wre", "Wim", "Y");
|
|
|
|
$indent =~ s/\ \ \ \ $//;
|
|
|
|
printf "%s }\n", $indent;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
sub bitreverse_fft2 {
|
|
|
|
# mul_result = gsl_complex (c1.re * c2.re - c1.im * c2.im, c1.re * c2.im + c1.im * c2.re);
|
|
|
|
printf "
|
|
|
|
static inline void
|
|
|
|
bitreverse_fft2analysis (const unsigned int n,
|
|
|
|
const %-6s *X,
|
|
|
|
%-6s *Y)
|
|
|
|
{
|
|
|
|
const unsigned int n2 = n >> 1, n1 = n + n2, max = n >> 2;
|
|
|
|
unsigned int i, r;
|
|
|
|
|
|
|
|
BUTTERFLY_10 (X[0], X[1],
|
|
|
|
X[n], X[n + 1],
|
|
|
|
Y[0], Y[1],
|
|
|
|
Y[2], Y[3],
|
|
|
|
__1, __0);
|
|
|
|
BUTTERFLY_10 (X[n2], X[n2 + 1],
|
|
|
|
X[n1], X[n1 + 1],
|
|
|
|
Y[4], Y[5],
|
|
|
|
Y[6], Y[7],
|
|
|
|
__1, __0);
|
|
|
|
for (i = 1, r = 0; i < max; i++)
|
|
|
|
{
|
|
|
|
unsigned int k, j = n >> 1;
|
|
|
|
|
|
|
|
while (r >= j)
|
|
|
|
{
|
|
|
|
r -= j;
|
|
|
|
j >>= 1;
|
|
|
|
}
|
|
|
|
r |= j;
|
|
|
|
|
|
|
|
k = r >> 1;
|
|
|
|
j = i << 3;
|
|
|
|
BUTTERFLY_10 (X[k], X[k + 1],
|
|
|
|
X[k + n], X[k + n + 1],
|
|
|
|
Y[j], Y[j + 1],
|
|
|
|
Y[j + 2], Y[j + 3],
|
|
|
|
__1, __0);
|
|
|
|
k += n2;
|
|
|
|
j += 4;
|
|
|
|
BUTTERFLY_10 (X[k], X[k + 1],
|
|
|
|
X[k + n], X[k + n + 1],
|
|
|
|
Y[j], Y[j + 1],
|
|
|
|
Y[j + 2], Y[j + 3],
|
|
|
|
__1, __0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static inline void
|
|
|
|
bitreverse_fft2synthesis (const unsigned int n,
|
|
|
|
const %-6s *X,
|
|
|
|
%-6s *Y)
|
|
|
|
{
|
|
|
|
const unsigned int n2 = n >> 1, n1 = n + n2, max = n >> 2;
|
|
|
|
unsigned int i, r;
|
|
|
|
%s scale = n;
|
|
|
|
|
|
|
|
scale = 1.0 / scale;
|
|
|
|
BUTTERFLY_10scale (X[0], X[1],
|
|
|
|
X[n], X[n + 1],
|
|
|
|
Y[0], Y[1],
|
|
|
|
Y[2], Y[3],
|
|
|
|
scale);
|
|
|
|
BUTTERFLY_10scale (X[n2], X[n2 + 1],
|
|
|
|
X[n1], X[n1 + 1],
|
|
|
|
Y[4], Y[5],
|
|
|
|
Y[6], Y[7],
|
|
|
|
scale);
|
|
|
|
for (i = 1, r = 0; i < max; i++)
|
|
|
|
{
|
|
|
|
unsigned int k, j = n >> 1;
|
|
|
|
|
|
|
|
while (r >= j)
|
|
|
|
{
|
|
|
|
r -= j;
|
|
|
|
j >>= 1;
|
|
|
|
}
|
|
|
|
r |= j;
|
|
|
|
|
|
|
|
k = r >> 1;
|
|
|
|
j = i << 3;
|
|
|
|
BUTTERFLY_10scale (X[k], X[k + 1],
|
|
|
|
X[k + n], X[k + n + 1],
|
|
|
|
Y[j], Y[j + 1],
|
|
|
|
Y[j + 2], Y[j + 3],
|
|
|
|
scale);
|
|
|
|
k += n2;
|
|
|
|
j += 4;
|
|
|
|
BUTTERFLY_10scale (X[k], X[k + 1],
|
|
|
|
X[k + n], X[k + n + 1],
|
|
|
|
Y[j], Y[j + 1],
|
|
|
|
Y[j + 2], Y[j + 3],
|
|
|
|
scale);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
", $ieee_type, $ieee_type, $ieee_type, $ieee_type, $tmp_ieee_type;
|
|
|
|
|
|
|
|
# testing:
|
|
|
|
# define BUTTERFLY_10(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim,T1re,T1im,T2re,T2im) \
|
|
|
|
# { int a = &X1re-X, b = &X2re-X, c = &Y1re-Y; \
|
|
|
|
# printf ("bufferfly: [%u,%u] -> [%u,%u]\n", (a)>>1, (b)>>1, (c)>>1, (c+2)>>1); \
|
|
|
|
# if (((a)|(b)|(c))&1) perror("****eek, extraneous bits!");}
|
|
|
|
# int main (int argc, char *argv[]) {
|
|
|
|
# bitreverse_fft2a (atoi (argv[1]), 0, 0);
|
|
|
|
# return 0;
|
|
|
|
# }
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# fft generation routines
|
|
|
|
#
|
|
|
|
sub gen_fft_loop_unoptimized {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $n_rep = shift; # repetitions
|
|
|
|
my $ofs2;
|
|
|
|
|
|
|
|
die "invalid fft size: " . $n_points if ($n_points < 4);
|
|
|
|
|
|
|
|
# incremental coefficient W for this fft
|
|
|
|
my $theta = $PI / ($n_points >> 1);
|
|
|
|
$theta = -$theta if ($negate_sign);
|
|
|
|
my $re = sin (0.5 * $theta);
|
|
|
|
my $im = sin ($theta);
|
|
|
|
$re = $re * $re * -2.;
|
|
|
|
|
|
|
|
# init variables, start loops
|
|
|
|
printf "%sWre = 1.0; Wim = 0.0;\n", $indent;
|
|
|
|
printf "%sfor (butterfly = 0; butterfly < %u; butterfly += 2) {\n", $indent, $n_points;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%sfor (block = 0; block < %u; block += %u) {\n", $indent, $n_rep * $n_points << 1, $n_points << 1;
|
|
|
|
$indent .= " ";
|
|
|
|
|
|
|
|
# do the butterfly
|
|
|
|
printf "%soffset = butterfly + block;\n", $indent;
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("XY", "offset", $ofs2, "Wre", "Wim", "Y");
|
|
|
|
|
|
|
|
# close inner loop, update W
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
printf("%sWMULTIPLY (Wre, Wim, %+.15f, %+.15f);\n",
|
|
|
|
$indent, $re, $im);
|
|
|
|
|
|
|
|
# close outer loop, done
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
sub gen_fft_loop_o10o0x {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $n_rep = shift; # repetitions
|
|
|
|
my $ofs2;
|
|
|
|
|
|
|
|
die "invalid fft size: " . $n_points if ($n_points < 4);
|
|
|
|
|
|
|
|
# coefficient {1,0} loop
|
|
|
|
printf "%sfor (offset = 0; offset < %u; offset += %u) {\n", $indent, $n_rep * $n_points << 1, $n_points << 1;
|
|
|
|
$indent .= " ";
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("10", "offset", $ofs2, "__1", "__0", "Y");
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
|
|
|
|
# incremental coefficient W for this fft
|
|
|
|
my $theta = $PI / ($n_points >> 1);
|
|
|
|
$theta = -$theta if ($negate_sign);
|
|
|
|
my $re = sin (0.5 * $theta);
|
|
|
|
my $im = sin ($theta);
|
|
|
|
$re = $re * $re * -2.;
|
|
|
|
|
|
|
|
# loop first half
|
|
|
|
if (2 < $n_points >> 1) {
|
|
|
|
printf "%sWre = %+.15f; Wim = %+.15f;\n", $indent, $re + 1.0, $im;
|
|
|
|
printf "%sfor (butterfly = 2; butterfly < %u; butterfly += 2) {\n", $indent, $n_points >> 1;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%sfor (block = 0; block < %u; block += %u) {\n", $indent, $n_rep * $n_points << 1, $n_points << 1;
|
|
|
|
$indent .= " ";
|
|
|
|
# do the butterfly
|
|
|
|
printf "%soffset = butterfly + block;\n", $indent;
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("XY", "offset", $ofs2, "Wre", "Wim", "Y");
|
|
|
|
# close inner loop, update W
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
printf("%sWMULTIPLY (Wre, Wim, %+.15f, %+.15f);\n",
|
|
|
|
$indent, $re, $im);
|
|
|
|
# close outer loop, done
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
|
|
|
|
# coefficient {0,1} loop
|
|
|
|
printf "%sfor (offset = %u; offset < %u; offset += %u) {\n", $indent, $n_points >> 1, $n_rep * $n_points << 1, $n_points << 1;
|
|
|
|
$indent .= " ";
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ($negate_sign ? "0m" : "01", "offset", $ofs2, "__0", "__1", "Y");
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
|
|
|
|
# loop second half
|
|
|
|
if (($n_points >> 1) + 2 < $n_points) {
|
|
|
|
if ($negate_sign) {
|
|
|
|
printf "%sWre = %+.15f; Wim = %+.15f;\n", $indent, $im, -$re - 1.0;
|
|
|
|
} else {
|
|
|
|
printf "%sWre = %+.15f; Wim = %+.15f;\n", $indent, -$im, $re + 1.0;
|
|
|
|
}
|
|
|
|
printf "%sfor (butterfly = %u; butterfly < %u; butterfly += 2) {\n", $indent, ($n_points >> 1) + 2, $n_points;
|
|
|
|
$indent .= " ";
|
|
|
|
printf "%sfor (block = 0; block < %u; block += %u) {\n", $indent, $n_rep * $n_points << 1, $n_points << 1;
|
|
|
|
$indent .= " ";
|
|
|
|
# do the butterfly
|
|
|
|
printf "%soffset = butterfly + block;\n", $indent;
|
|
|
|
$ofs2 = sprintf "offset + %u", $n_points;
|
|
|
|
butterfly ("XY", "offset", $ofs2, "Wre", "Wim", "Y");
|
|
|
|
# close inner loop, update W
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
printf("%sWMULTIPLY (Wre, Wim, %+.15f, %+.15f);\n",
|
|
|
|
$indent, $re, $im);
|
|
|
|
# close outer loop, done
|
|
|
|
$indent =~ s/\ \ $//;
|
|
|
|
printf "%s}\n", $indent;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sub fft_loop_macros {
|
|
|
|
# mul_result = gsl_complex (c1.re * c2.re - c1.im * c2.im, c1.re * c2.im + c1.im * c2.re);
|
|
|
|
print "
|
|
|
|
#define WMULTIPLY(Wre,Wim,Dre,Dim) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = Wre * Dre; \\
|
|
|
|
T1im = Wim * Dre; \\
|
|
|
|
T2re = Wim * Dim; \\
|
|
|
|
T2im = Wre * Dim; \\
|
|
|
|
T1re -= T2re; \\
|
|
|
|
T1im += T2im; \\
|
|
|
|
Wre += T1re; \\
|
|
|
|
Wim += T1im; \\
|
|
|
|
}";
|
|
|
|
}
|
|
|
|
sub butterfly_macros {
|
|
|
|
# mul_result = gsl_complex (c1.re * c2.re - c1.im * c2.im, c1.re * c2.im + c1.im * c2.re);
|
|
|
|
# add_result = gsl_complex (c1.re + c2.re, c1.im + c2.im);
|
|
|
|
print "
|
|
|
|
#define BUTTERFLY_XY(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = X2re * Wre; \\
|
|
|
|
T1im = X2im * Wre; \\
|
|
|
|
T2re = X2im * Wim; \\
|
|
|
|
T2im = X2re * Wim; \\
|
|
|
|
T1re -= T2re; \\
|
|
|
|
T1im += T2im; \\
|
|
|
|
T2re = X1re - T1re; \\
|
|
|
|
T2im = X1im - T1im; \\
|
|
|
|
Y1re = X1re + T1re; \\
|
|
|
|
Y1im = X1im + T1im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_Yx(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = X2re * Wim; \\
|
|
|
|
T1im = X2im * Wim; \\
|
|
|
|
T2re = X2im * Wre; \\
|
|
|
|
T2im = X2re * Wre; \\
|
|
|
|
T1re += T2re; \\
|
|
|
|
T1im -= T2im; \\
|
|
|
|
T2re = X1re + T1re; \\
|
|
|
|
T2im = X1im + T1im; \\
|
|
|
|
Y1re = X1re - T1re; \\
|
|
|
|
Y1im = X1im - T1im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_yX(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,Wim) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = X2re * Wim; \\
|
|
|
|
T1im = X2im * Wim; \\
|
|
|
|
T2re = X2im * Wre; \\
|
|
|
|
T2im = X2re * Wre; \\
|
|
|
|
T1re += T2re; \\
|
|
|
|
T1im -= T2im; \\
|
|
|
|
T2re = X1re - T1re; \\
|
|
|
|
T2im = X1im - T1im; \\
|
|
|
|
Y1re = X1re + T1re; \\
|
|
|
|
Y1im = X1im + T1im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_10(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \\
|
|
|
|
register $tmp_ieee_type T2re, T2im; \\
|
|
|
|
T2re = X1re - X2re; \\
|
|
|
|
T2im = X1im - X2im; \\
|
|
|
|
Y1re = X1re + X2re; \\
|
|
|
|
Y1im = X1im + X2im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_01(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \\
|
|
|
|
register $tmp_ieee_type T2re, T2im; \\
|
|
|
|
T2re = X1re + X2im; \\
|
|
|
|
T2im = X1im - X2re; \\
|
|
|
|
Y1re = X1re - X2im; \\
|
|
|
|
Y1im = X1im + X2re; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_0m(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,_1,_2) { \\
|
|
|
|
register $tmp_ieee_type T2re, T2im; \\
|
|
|
|
T2re = X1re - X2im; \\
|
|
|
|
T2im = X1im + X2re; \\
|
|
|
|
Y1re = X1re + X2im; \\
|
|
|
|
Y1im = X1im - X2re; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_XX(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,_2) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = X2re * Wre; \\
|
|
|
|
T1im = X2im * Wre; \\
|
|
|
|
T2re = T1im; \\
|
|
|
|
T2im = T1re; \\
|
|
|
|
T1re -= T2re; \\
|
|
|
|
T1im += T2im; \\
|
|
|
|
T2re = X1re - T1re; \\
|
|
|
|
T2im = X1im - T1im; \\
|
|
|
|
Y1re = X1re + T1re; \\
|
|
|
|
Y1im = X1im + T1im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_yY(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,Wre,_2) { \\
|
|
|
|
register $tmp_ieee_type T1re, T1im, T2re, T2im; \\
|
|
|
|
T1re = X2re * Wre; \\
|
|
|
|
T1im = X2im * Wre; \\
|
|
|
|
T2re = T1im; \\
|
|
|
|
T2im = T1re; \\
|
|
|
|
T1re += T2re; \\
|
|
|
|
T1im -= T2im; \\
|
|
|
|
T2re = X1re - T1re; \\
|
|
|
|
T2im = X1im - T1im; \\
|
|
|
|
Y1re = X1re + T1re; \\
|
|
|
|
Y1im = X1im + T1im; \\
|
|
|
|
Y2re = T2re; \\
|
|
|
|
Y2im = T2im; \\
|
|
|
|
}
|
|
|
|
#define BUTTERFLY_10scale(X1re,X1im,X2re,X2im,Y1re,Y1im,Y2re,Y2im,S) { \\
|
|
|
|
register $tmp_ieee_type T2re, T2im; \\
|
|
|
|
T2re = X1re - X2re; \\
|
|
|
|
T2im = X1im - X2im; \\
|
|
|
|
Y1re = X1re + X2re; \\
|
|
|
|
Y1im = X1im + X2im; \\
|
|
|
|
Y2re = T2re * S; \\
|
|
|
|
Y2im = T2im * S; \\
|
|
|
|
Y1re *= S; \\
|
|
|
|
Y1im *= S; \\
|
|
|
|
}
|
|
|
|
";
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# stage generation
|
|
|
|
#
|
|
|
|
sub gen_stage {
|
|
|
|
my $fft_size = shift;
|
|
|
|
my $n_points = shift;
|
|
|
|
my $kind = shift;
|
|
|
|
my $times = $fft_size / $n_points;
|
|
|
|
|
|
|
|
if ($kind eq 'S') {
|
|
|
|
printf "\n%s/* skipping %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if ($n_points == 2) { # input stage needs special handling
|
|
|
|
if ($kind eq 'L') {
|
|
|
|
printf "\n%s/* perform fft2 and bitreverse input */\n", $indent;
|
|
|
|
printf "%sbitreverse_fft2%s (%u, X, Y);\n", $indent, $negate_sign ? "synthesis" : "analysis", $fft_size;
|
|
|
|
} elsif ($kind eq 'F') {
|
|
|
|
printf "\n%s/* perform %u times fft2 */\n", $indent, $times;
|
|
|
|
unroll_stage ($fft_size, $n_points, $times, 1);
|
|
|
|
} else {
|
|
|
|
die "need one of 'L' or 'F' for input stage";
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ($kind eq 'L') {
|
|
|
|
printf "\n%s/* perform %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
gen_fft_loop_o10o0x ($fft_size, $n_points, $times);
|
|
|
|
} elsif ($kind eq 'F') {
|
|
|
|
printf "\n%s/* perform %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
unroll_stage ($fft_size, $n_points, $times, 1);
|
|
|
|
} elsif ($kind eq 'R') {
|
|
|
|
printf "\n%s/* perform %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
unroll_stage ($fft_size, $n_points, $times, $times == 1);
|
|
|
|
} elsif ($kind eq 'T') {
|
|
|
|
printf "\n%s/* perform %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
table_stage ($fft_size, $n_points, $times);
|
|
|
|
} elsif ($kind eq 'X') {
|
|
|
|
printf "\n%s/* perform %u times fft%u */\n", $indent, $times, $n_points;
|
|
|
|
for (my $i = 0; $i < $times; $i++) {
|
|
|
|
if ($i) {
|
|
|
|
printf($indent."gsl_power2_fft%u%s_skip2 (X + %u, Y + %u);\n",
|
|
|
|
$n_points, $negate_sign ? "synthesis" : "analysis", $n_points * $i << 1, $n_points * $i << 1);
|
|
|
|
} else {
|
|
|
|
printf($indent."gsl_power2_fft%u%s_skip2 (X, Y);\n",
|
|
|
|
$n_points, $negate_sign ? "synthesis" : "analysis");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
die "unknown kind: $kind";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#
|
|
|
|
# test output
|
|
|
|
#
|
|
|
|
if ($Wtest) {
|
|
|
|
my $fft_size = $max_fft_size;
|
|
|
|
|
|
|
|
for (my $i = 0; $i < $fft_size >> 1; $i++) {
|
|
|
|
my $wk = Wexponent ($fft_size, $fft_size, $i);
|
|
|
|
printf "$fft_size: %4u: %+.15f, %+.15f,\n", $i, Wreal ($fft_size, $wk), Wimag ($fft_size, $wk);
|
|
|
|
}
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
if (0) {
|
|
|
|
# reversal test
|
|
|
|
for (my $i = 0; $i < $fft_size; $i++) {
|
|
|
|
printf STDERR "%-3u <-> %3u %s\n", $i, bitreverse ($fft_size, $i), $i % 4 ? "" : "XXX";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# main output
|
|
|
|
#
|
|
|
|
if ($gen_macros) {
|
|
|
|
butterfly_macros ();
|
|
|
|
fft_loop_macros ();
|
|
|
|
bitreverse_fft2 ();
|
|
|
|
}
|
|
|
|
exit 0 if (!$max_fft_size);
|
|
|
|
|
|
|
|
my $max_stages = 0;
|
|
|
|
for ($i = $max_fft_size >> 1; $i; $i >>= 1) {
|
|
|
|
$max_stages++;
|
|
|
|
}
|
|
|
|
die "missing stage specifications ($max_stages stages, $#arguments specs)" if ($max_stages > $#arguments);
|
|
|
|
die "too many stage specifications ($max_stages stages, $#arguments specs)" if ($max_stages < $#arguments);
|
|
|
|
print "/**\n";
|
|
|
|
printf(" ** Generated data (by gsl-genfft $max_fft_size");
|
|
|
|
for (my $i = 1; $i < @arguments; $i++) {
|
|
|
|
printf " %s", uc ($arguments[$i]);
|
|
|
|
}
|
|
|
|
printf ")\n";
|
|
|
|
print " **/\n";
|
|
|
|
{
|
|
|
|
my $fft_size = $max_fft_size;
|
|
|
|
my $skip2 = uc ($arguments[1]) eq 'S';
|
|
|
|
|
|
|
|
printf STDERR "FFT-%-5u: ", $fft_size;
|
|
|
|
|
|
|
|
printf "static void\n";
|
|
|
|
printf("gsl_power2_fft%u%s%s (const %s *X, %s *Y)\n{\n",
|
|
|
|
$fft_size, $negate_sign ? "synthesis" : "analysis",
|
|
|
|
$skip2 ? "_skip2" : "",
|
|
|
|
$ieee_type, $ieee_type);
|
|
|
|
printf "%sregister unsigned int butterfly, block, offset;\n", $indent;
|
|
|
|
printf "%sregister %s Wre, Wim;\n\n", $indent, $tmp_ieee_type, $tmp_ieee_type;
|
|
|
|
printf "%sbutterfly = block = offset = 0, Wre = Wim = 0.0; /* silence compiler */\n", $indent;
|
|
|
|
|
|
|
|
my $seen_rule = 0;
|
|
|
|
for (my $i = 1; $i < @arguments && 1 << $i <= $fft_size; $i++) {
|
|
|
|
my $stage = uc ($arguments[$i]);
|
|
|
|
printf STDERR "%u-%s ", 1 << $i, $stage;
|
|
|
|
die "X follows non Skip" if ($stage eq "X" && $seen_rule);
|
|
|
|
gen_stage ($fft_size, 1 << $i, $stage);
|
|
|
|
$seen_rule |= (1 << $i) > 2 && !($stage eq "S");
|
|
|
|
}
|
|
|
|
printf "}\n";
|
|
|
|
printf STDERR "\n";
|
|
|
|
}
|
|
|
|
print "\n/**\n";
|
|
|
|
print " ** Generated data ends here\n";
|
|
|
|
print " **/\n";
|