/* Fast Fourier transform C++ header class for the FFTW3 Library Copyright (C) 2004-2024 John C. Bowman, University of Alberta Malcolm Roberts, University of Strasbourg This program is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __fftwpp_h__ #define __fftwpp_h__ 1 #define __FFTWPP_H_VERSION__ 3.01 #include #include #include #include #include #include #include #include #include "seconds.h" #include "parallel.h" #ifndef __Complex_h__ #include typedef std::complex Complex; #endif #include "statistics.h" #include "align.h" namespace fftwpp { // Return the memory alignment used by FFTW. // Use of this function requires applying patches/fftw-3.3.8-alignment.patch // to the FFTW source, recompiling, and reinstalling the FFW library. extern "C" size_t fftw_alignment(); class fftw; extern "C" fftw_plan Planner(fftw *F, Complex *in, Complex *out); void loadWisdom(); void saveWisdom(); extern std::string wisdomName; extern const char *inout; class ThreadBase { public: size_t threads; size_t innerthreads; ThreadBase(); ThreadBase(size_t threads) : threads(threads) {} void Threads(size_t nthreads) {threads=nthreads;} size_t Threads() {return threads;} size_t Innerthreads() {return innerthreads;} void multithread(size_t n) { if(n >= threads) { innerthreads=1; } else { innerthreads=threads; threads=1; } } }; inline size_t realsize(size_t n, bool inplace) { return inplace ? 2*(n/2+1) : n; } inline size_t Inplace(Complex *in, Complex *out=NULL) { return !out || in == out; } inline size_t Inplace(Complex *in, double *out) { return Inplace(in,(Complex *) out); } inline size_t Inplace(double *in, Complex *out) { return Inplace((Complex *) in,out); } class Doubles { public: size_t rsize,csize; Doubles(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, bool inplace) { rsize=(realsize(nx,inplace)-2)*istride+(M-1)*idist+2; csize=2*(nx/2*ostride+(M-1)*odist+1); if(inplace) rsize=csize=std::max(rsize,csize); } }; // Base clase for fft routines // class fftw : public ThreadBase { protected: size_t doubles; // number of double precision values in output int sign; size_t threads; double norm; fftw_plan plan; bool inplace; size_t Dist(size_t n, size_t stride, size_t dist) { return dist ? dist : ((stride == 1) ? n : 1); } static const double twopi; public: static size_t effort; static size_t maxthreads; static fftw_plan (*planner)(fftw *f, Complex *in, Complex *out); static bool wiser; virtual size_t Threads() {return threads;} static const char *oddshift; // In-place shift of Fourier origin to (nx/2,0) for even nx. static void Shift(Complex *data, size_t nx, size_t ny, size_t threads) { size_t nyp=ny/2+1; size_t stop=nx*nyp; if(nx % 2 == 0) { size_t inc=2*nyp; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=nyp; i < stop; i += inc) { Complex *p=data+i; for(size_t j=0; j < nyp; j++) p[j]=-p[j]; } } else { std::cerr << oddshift << std::endl; exit(1); } } // Out-of-place shift of Fourier origin to (nx/2,0) for even nx. static void Shift(double *data, size_t nx, size_t ny, size_t threads) { if(nx % 2 == 0) { size_t stop=nx*ny; size_t inc=2*ny; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=ny; i < stop; i += inc) { double *p=data+i; for(size_t j=0; j < ny; j++) p[j]=-p[j]; } } else { std::cerr << oddshift << std::endl; exit(1); } } // In-place shift of Fourier origin to (nx/2,ny/2,0) for even nx and ny. static void Shift(Complex *data, size_t nx, size_t ny, size_t nz, size_t threads) { size_t nzp=nz/2+1; size_t nyzp=ny*nzp; if(nx % 2 == 0 && ny % 2 == 0) { size_t pinc=2*nzp; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; i++) { Complex *pstart=data+i*nyzp; Complex *pstop=pstart+nyzp; for(Complex *p=pstart+(1-(i % 2))*nzp; p < pstop; p += pinc) { for(size_t k=0; k < nzp; k++) p[k]=-p[k]; } } } else { std::cerr << oddshift << " or odd ny" << std::endl; exit(1); } } // Out-of-place shift of Fourier origin to (nx/2,ny/2,0) for even nx and ny. static void Shift(double *data, size_t nx, size_t ny, size_t nz, size_t threads) { size_t nyz=ny*nz; if(nx % 2 == 0 && ny % 2 == 0) { size_t pinc=2*nz; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; i++) { double *pstart=data+i*nyz; double *pstop=pstart+nyz; for(double *p=pstart+(1-(i % 2))*nz; p < pstop; p += pinc) { for(size_t k=0; k < nz; k++) p[k]=-p[k]; } } } else { std::cerr << oddshift << " or odd ny" << std::endl; exit(1); } } fftw() : plan(NULL) {} fftw(size_t doubles, int sign, size_t threads, size_t n=0) : doubles(doubles), sign(sign), threads(threads), norm(1.0/(n ? n : doubles/2)), plan(NULL) { #ifndef FFTWPP_SINGLE_THREAD fftw_init_threads(); #endif } virtual ~fftw() { if(plan) fftw_destroy_plan(plan); } virtual fftw_plan Plan(Complex *in, Complex *out) {return NULL;}; inline void CheckAlign(Complex *p, const char *s) { if((size_t) p % sizeof(Complex) == 0) return; std::cerr << "WARNING: " << s << " array is not " << sizeof(Complex) << "-byte aligned: address " << p << std::endl; } void noplan() { std::cerr << "Unable to construct FFTW plan" << std::endl; exit(1); } static void planThreads(size_t threads) { #ifndef FFTWPP_SINGLE_THREAD omp_set_num_threads(threads); fftw_plan_with_nthreads(threads); #endif } inline Complex *CheckAlign(Complex *in, Complex *out, bool constructor=true) { #ifndef NO_CHECK_ALIGN CheckAlign(in,constructor ? "constructor input" : "input"); if(out) CheckAlign(out,constructor ? "constructor output" : "output"); else out=in; #else if(!out) out=in; #endif return out; } void Setup(Complex *in, Complex *out=NULL) { bool alloc=!in; if(alloc) in=utils::ComplexAlign((doubles+1)/2); out=CheckAlign(in,out); inplace=(out==in); parallel::Threshold(threads); if(doubles < 2*threshold) threads=1; planThreads(threads); plan=(*planner)(this,in,out); if(!plan) noplan(); if(alloc) Array::deleteAlign(in,(doubles+1)/2); #ifdef FFTWPP_VERBOSE if(threads > 1) std::cout << "Using " << threads << " threads." << std::endl; #endif } void Setup(Complex *in, double *out) { parallel::Threshold(threads); if(doubles < 4*threshold) threads=1; Setup(in,(Complex *) out); } void Setup(double *in, Complex *out=NULL) { parallel::Threshold(threads); if(doubles < 4*threshold) threads=1; Setup((Complex *) in,out); } virtual void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out); } Complex *Setout(Complex *in, Complex *out) { out=CheckAlign(in,out,false); if(inplace ^ (out == in)) { std::cerr << "ERROR: fft " << inout << std::endl; exit(1); } return out; } void fft(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out); } void fft(double *in, Complex *out=NULL) { fft((Complex *) in,out); } void fft(Complex *in, double *out) { fft(in,(Complex *) out); } void fft0(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out,true); } void fft0(double *in, Complex *out=NULL) { fft0((Complex *) in,out); } void fft0(Complex *in, double *out) { fft0(in,(Complex *) out); } void Normalize(Complex *out) { size_t stop=doubles/2; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < stop; i++) out[i] *= norm; } void Normalize(double *out) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < doubles; i++) out[i] *= norm; } void fftNormalized(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out); Normalize(out); } void fftNormalized(Complex *in, double *out) { out=(double *) Setout(in,(Complex *) out); Execute(in,(Complex *) out); Normalize(out); } void fftNormalized(double *in, Complex *out) { out=Setout((Complex *) in,out); Execute((Complex *) in,out); Normalize(out); } void fft0Normalized(Complex *in, Complex *out=NULL) { out=Setout(in,out); Execute(in,out,true); Normalize(out); } void fft0Normalized(Complex *in, double *out) { out=(double *) Setout(in,(Complex *) out); Execute(in,(Complex *) out,true); Normalize(out); } void fft0Normalized(double *in, Complex *out) { out=Setout((Complex *) in,out); Execute((Complex *) in,out,true); Normalize(out); } template void Normalize(size_t nx, size_t M, size_t ostride, size_t odist, O *out) { size_t stop=nx*ostride; O *outMdist=out+M*odist; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < stop; i += ostride) { O *pstop=outMdist+i; for(O *p=out+i; p < pstop; p += odist) { *p *= norm; } } } template void fftNormalized(size_t nx, size_t M, size_t ostride, size_t odist, I *in, O *out) { out=(O *) Setout((Complex *) in,(Complex *) out); Execute((Complex *) in,(Complex *) out); Normalize(nx,M,ostride,odist,out); } }; // class fftw class Transpose { fftw_plan plan; bool inplace; public: template Transpose(size_t rows, size_t cols, size_t length, T *in, T *out=NULL, size_t threads=fftw::maxthreads) { size_t size=sizeof(T); if(size % sizeof(double) != 0) { std::cerr << "ERROR: Transpose is not implemented for type of size " << size; exit(1); } plan=NULL; if(!out) out=in; inplace=(out==in); if(rows == 0 || cols == 0) return; size /= sizeof(double); length *= size; parallel::Threshold(threads); if(length*rows*cols/2 < threshold) threads=1; fftw::planThreads(threads); fftw_iodim dims[3]; dims[0].n=rows; dims[0].is=cols*length; dims[0].os=length; dims[1].n=cols; dims[1].is=length; dims[1].os=rows*length; dims[2].n=length; dims[2].is=1; dims[2].os=1; // A plan with rank=0 is a transpose. plan=fftw_plan_guru_r2r(0,NULL,3,dims,(double *) in,(double *) out, NULL,fftw::effort); } ~Transpose() { if(plan) fftw_destroy_plan(plan); } template void transpose(T *in, T *out=NULL) { if(!plan) return; if(!out) out=in; if(inplace ^ (out == in)) { std::cerr << "ERROR: Transpose " << inout << std::endl; exit(1); } fftw_execute_r2r(plan,(double *) in,(double*) out); } }; template class Threadtable { public: typedef std::map Table; size_t Lookup(Table& table, T key) { typename Table::iterator p=table.find(key); return p == table.end() ? 0 : p->second; } void Store(Table& threadtable, T key, size_t t) { threadtable[key]=t; } }; struct keytype { size_t nx; size_t M; size_t threads; bool inplace; keytype(size_t nx, size_t M, size_t threads, bool inplace) : nx(nx), M(M), threads(threads), inplace(inplace) {} }; struct keyless { bool operator()(const keytype& a, const keytype& b) const { return a.nx < b.nx || (a.nx == b.nx && (a.M < b.M || (a.M == b.M && (a.threads < b.threads || (a.threads == b.threads && a.inplace < b.inplace))))); } }; // Compute the complex Fourier transform of n complex values. // Before calling fft(), the arrays in and out (which may coincide) must be // allocated as Complex[n]. // // Out-of-place usage: // // fft1d Forward(n,-1,in,out); // Forward.fft(in,out); // // fft1d Backward(n,1,in,out); // Backward.fft(in,out); // // fft1d Backward(n,1,in,out); // Backward.fftNormalized(in,out); // True inverse of Forward.fft(out,in); // // In-place usage: // // fft1d Forward(n,-1); // Forward.fft(in); // // fft1d Backward(n,1); // Backward.fft(in); // class fft1d : public fftw { size_t nx; public: fft1d(size_t nx, int sign, Complex *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx,sign,threads), nx(nx) {Setup(in,out);} #ifdef __Array_h__ fft1d(int sign, const Array::array1& in, const Array::array1& out=Array::NULL1, size_t threads=maxthreads) : fftw(2*in.Nx(),sign,threads), nx(in.Nx()) {Setup(in,out);} #endif fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_1d(nx,(fftw_complex *) in,(fftw_complex *) out, sign,effort); } }; template class fftwblock : public virtual fftw, public virtual Threadtable { public: int nx; size_t M; size_t istride,ostride; size_t idist,odist; fftw_plan plan1,plan2; size_t T,Q,R; fftwblock() : plan1(NULL), plan2(NULL) {} fftwblock(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist) : fftw(), nx(nx), M(M), istride(istride), ostride(ostride), idist(Dist(nx,istride,idist)), odist(Dist(nx,ostride,odist)), plan1(NULL), plan2(NULL) {} void init(Complex *in, Complex *out, size_t Threads, Table& threadtable) { T=1; Q=M; R=0; if(Threads > M && M > 1) Threads=M; threads=Threads; Setup(in,out); Threads=threads; size_t T0=Threads; if(T0 > 1) { size_t nxp=nx/2+1; size_t olength=0; size_t ilength=0; if(typeid(I) == typeid(double)) { ilength=nx; olength=nxp; } if(typeid(O) == typeid(double)) { ilength=nxp; olength=nx; } if(!inplace || (ostride*olength*sizeof(O) <= idist*sizeof(I) && odist*sizeof(O) >= istride*ilength*sizeof(I))) { T=T0; Q=T > 0 ? M/T : 0; R=M-Q*T; size_t data=Lookup(threadtable,keytype(nx,M,Threads,inplace)); if(data == 1) T0=1; else { fftw_plan planFFTW=plan; threads=1; Setup(in,out); plan1=plan; if(data == T) { plan=NULL; return; } plan=planFFTW; } } else T0=1; } if(T0 == 1 || time(in,out)) { // Use FFTW's multithreading T=1; if(plan1) { fftw_destroy_plan(plan1); plan1=NULL; if(plan2) { fftw_destroy_plan(plan2); plan2=NULL; } threads=Threads; Store(threadtable,keytype(nx,M,Threads,inplace),T); } } else { // Do the multithreading ourselves T=T0; threads=T; Store(threadtable,keytype(nx,M,Threads,inplace),T); } } bool time(Complex *in, Complex *out) { utils::statistics S(true),ST(true); utils::statistics medianS(true),medianST(true); double eps=0.02; size_t T0=T; do { T=1; // FFTW utils::cpuTimer C; inplace ? fftNormalized(in,out) : fft(in,out); S.add(C.nanoseconds()); T=T0; // BLOCK utils::cpuTimer CT; inplace ? fftNormalized(in,out) : fft(in,out); ST.add(CT.nanoseconds()); if(S.count() >= 4 && ST.min() >= S.max()) return true; if(S.count() >= 4 && S.min() >= ST.max()) return false; medianS.add(S.median()); medianST.add(ST.median()); } while(S.count() < 5 || medianS.stderror() > eps*medianS.mean() || medianST.stderror() > eps*medianST.mean()); return S.median() <= ST.median(); } fftw_plan Plan(int Q, fftw_complex *in, fftw_complex *out) { return fftw_plan_many_dft(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,sign,effort); } fftw_plan Plan(int Q, double *in, fftw_complex *out) { return fftw_plan_many_dft_r2c(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,effort); } fftw_plan Plan(int Q, fftw_complex *in, double *out) { return fftw_plan_many_dft_c2r(1,&nx,Q,in,NULL,istride,idist, out,NULL,ostride,odist,effort); } fftw_plan Plan(Complex *in, Complex *out) { if(R > 0) { plan2=Plan(Q+1,(I *) in,(O *) out); if(!plan2) return NULL; } return Plan(Q,(I *) in,(O *) out); } void Execute(fftw_plan plan, fftw_complex *in, fftw_complex *out) { fftw_execute_dft(plan,in,out); } void Execute(fftw_plan plan, double *in, fftw_complex *out) { fftw_execute_dft_r2c(plan,in,out); } void Execute(fftw_plan plan, fftw_complex *in, double *out) { fftw_execute_dft_c2r(plan,in,out); } void Execute(Complex *in, Complex *out, bool=false) { if(T == 1) Execute(plan,(I *) in,(O *) out); else { size_t extra=T-R; #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(T) #endif for(size_t i=0; i < T; ++i) { size_t iQ=i*Q; if(i < extra) Execute(plan1,(I *) in+iQ*idist,(O *) out+iQ*odist); else { size_t offset=iQ+i-extra; Execute(plan2,(I *) in+offset*idist,(O *) out+offset*odist); } } } } size_t Threads() {return std::max(T,threads);} ~fftwblock() { if(plan1) fftw_destroy_plan(plan1); if(plan2) fftw_destroy_plan(plan2); } }; class Mfft1d : public fftwblock, public virtual Threadtable { static Table threadtable; public: Mfft1d(size_t nx, int sign, size_t M=1, Complex *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*((nx-1)+(M-1)*nx+1),sign,threads,nx), fftwblock(nx,M,1,1,nx,nx) { init(in,out,threads,threadtable); } Mfft1d(size_t nx, int sign, size_t M, size_t stride=1, size_t dist=0, Complex *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*((nx-1)*stride+(M-1)*Dist(nx,stride,dist)+1),sign,threads,nx), fftwblock (nx,M,stride,stride,dist,dist) { init(in,out,threads,threadtable); } Mfft1d(size_t nx, int sign, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in, Complex *out, size_t threads=maxthreads): fftw(std::max(2*((nx-1)*istride+(M-1)*Dist(nx,istride,idist)+1), 2*((nx-1)*ostride+(M-1)*Dist(nx,ostride,odist)+1)),sign, threads,nx), fftwblock(nx,M,istride,ostride,idist,odist) { init(in,out,threads,threadtable); } }; // Compute the complex Fourier transform of M complex vectors, each of // length n. // Before calling fft(), the arrays in and out (which may coincide) must be // allocated as Complex[M*n]. // // Out-of-place usage: // // mfft1d Forward(n,-1,M,stride,dist,in,out); // Forward.fft(in,out); // // mfft1d Forward(n,-1,M,istride,ostride,idist,odist,in,out); // Forward.fft(in,out); // // In-place usage: // // mfft1d Forward(n,-1,M,stride,dist); // Forward.fft(in); // // // // Notes: // stride is the spacing between the elements of each Complex vector; // dist is the spacing between the first elements of the vectors. // // class mfft1d { bool single; fft1d *fft1; Mfft1d *fftm; public: mfft1d(size_t nx, int sign, size_t M=1, Complex *in=NULL, Complex *out=NULL, size_t threads=fftw::maxthreads) : single(M == 1) { if(single) fft1=new fft1d(nx,sign,in,out,threads); else fftm=new Mfft1d(nx,sign,M,in,out,threads); } mfft1d(size_t nx, int sign, size_t M, size_t stride=1, size_t dist=0, Complex *in=NULL, Complex *out=NULL, size_t threads=fftw::maxthreads) : single(M == 1 && stride == 1) { if(single) fft1=new fft1d(nx,sign,in,out,threads); else fftm=new Mfft1d(nx,sign,M,stride,dist,in,out,threads); } mfft1d(size_t nx, int sign, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in, Complex *out, size_t threads=fftw::maxthreads) : single(M == 1 && istride == 1 && ostride == 1) { if(single) fft1=new fft1d(nx,sign,in,out,threads); else fftm=new Mfft1d(nx,sign,M,istride,ostride,idist,odist,in,out,threads); } size_t Threads() { return single ? fft1->Threads() : fftm->Threads(); } template void fft(I in) { single ? fft1->fft(in) : fftm->fft(in); } template void fft(I in, O out) { single ? fft1->fft(in,out) : fftm->fft(in,out); } template void fftNormalized(I in) { single ? fft1->fftNormalized(in) : fftm->fftNormalized(in); } template void fftNormalized(I in, O out) { single ? fft1->fftNormalized(in,out) : fftm->fftNormalized(in,out); } template void Normalize(O out) { single ? fft1->Normalize(out) : fftm->Normalize(out); } ~mfft1d() { if(single) delete fft1; else delete fftm; } }; // Compute the complex Fourier transform of n real values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[n] and // the array out must be allocated as Complex[n/2+1]. The arrays in and out // may coincide, allocated as Complex[n/2+1]. // // Out-of-place usage: // // rcfft1d Forward(n,in,out); // Forward.fft(in,out); // // In-place usage: // // rcfft1d Forward(n); // Forward.fft(out); // // Notes: // in contains the n real values stored as a Complex array; // out contains the first n/2+1 Complex Fourier values. // class rcfft1d : public fftw { size_t nx; public: rcfft1d(size_t nx, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(out,(double*) NULL);} rcfft1d(size_t nx, double *in, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*(nx/2+1),-1,threads,nx), nx(nx) {Setup(in,out);} fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_1d(nx,(double *) in,(fftw_complex *) out, effort); } void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } }; // Compute the real inverse Fourier transform of the n/2+1 Complex values // corresponding to the non-negative part of the frequency spectrum, using // phase sign +1. // Before calling fft(), the array in must be allocated as Complex[n/2+1] // and the array out must be allocated as double[n]. The arrays in and out // may coincide, allocated as Complex[n/2+1]. // // Out-of-place usage (input destroyed): // // crfft1d Backward(n,in,out); // Backward.fft(in,out); // // In-place usage: // // crfft1d Backward(n); // Backward.fft(in); // // Notes: // in contains the first n/2+1 Complex Fourier values. // out contains the n real values stored as a Complex array; // class crfft1d : public fftw { size_t nx; public: crfft1d(size_t nx, double *out=NULL, size_t threads=maxthreads) : fftw(2*(nx/2+1),1,threads,nx), nx(nx) {Setup(out);} crfft1d(size_t nx, Complex *in, double *out=NULL, size_t threads=maxthreads) : fftw(realsize(nx,Inplace(in,out)),1,threads,nx), nx(nx) {Setup(in,out);} fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_1d(nx,(fftw_complex *) in,(double *) out,effort); } void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); } }; class Mrcfft1d : public fftwblock, public virtual Threadtable { static Table threadtable; public: Mrcfft1d(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, double *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(Doubles(nx,M,istride,ostride,idist,odist,Inplace(in,out)).csize, -1,threads,nx), fftwblock (nx,M,istride,ostride,idist,odist) { init((Complex *) in,out,threads,threadtable); } void Normalize(Complex *out) { fftw::Normalize(nx/2+1,M,ostride,odist,out); } void fftNormalized(double *in, Complex *out=NULL) { fftw::fftNormalized(nx/2+1,M,ostride,odist,in,out); } }; // Compute the real Fourier transform of M real vectors, each of length n, // using phase sign -1. Before calling fft(), the array in must be // allocated as double[M*n] and the array out must be allocated as // Complex[M*(n/2+1)]. The arrays in and out may coincide, // allocated as Complex[M*(n/2+1)]. // // Out-of-place usage: // // mrcfft1d Forward(n,M,istride,ostride,idist,odist,in,out); // Forward.fft(in,out); // // In-place usage: // // mrcfft1d Forward(n,M,istride,ostride,idist,odist); // Forward.fft(out); // // Notes: // istride is the spacing between the elements of each real vector; // ostride is the spacing between the elements of each Complex vector; // idist is the spacing between the first elements of the real vectors; // odist is the spacing between the first elements of the Complex vectors; // in contains the n real values stored as a Complex array; // out contains the first n/2+1 Complex Fourier values. // class mrcfft1d { bool single; rcfft1d *fft1; Mrcfft1d *fftm; public: mrcfft1d(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, double *in=NULL, Complex *out=NULL, size_t threads=fftw::maxthreads) : single(M == 1 && istride == 1 && ostride == 1) { if(single) fft1=new rcfft1d(nx,in,out,threads); else fftm=new Mrcfft1d(nx,M,istride,ostride,idist,odist,in,out,threads); } size_t Threads() { return single ? fft1->Threads() : fftm->Threads(); } template void fft(I in) { single ? fft1->fft(in) : fftm->fft(in); } template void fft(I in, O out) { single ? fft1->fft(in,out) : fftm->fft(in,out); } void Normalize(Complex *out) { single ? fft1->Normalize(out) : fftm->Normalize(out); } template void fftNormalized(I in) { single ? fft1->fftNormalized(in) : fftm->fftNormalized(in); } template void fftNormalized(I in, O out=NULL) { single ? fft1->fftNormalized(in,out) : fftm->fftNormalized(in,out); } ~mrcfft1d() { if(single) delete fft1; else delete fftm; } }; class Mcrfft1d : public fftwblock, public virtual Threadtable { static Table threadtable; public: Mcrfft1d(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, double *out=NULL, size_t threads=maxthreads) : fftw(Doubles(nx,M,ostride,istride,odist,idist,Inplace(in,out)).rsize, 1,threads,nx), fftwblock (nx,M,istride,ostride,idist,odist) { init(in,(Complex *) out,threads,threadtable); } void Normalize(double *out) { fftw::Normalize(nx,M,ostride,odist,out); } void fftNormalized(Complex *in, double *out=NULL) { fftw::fftNormalized(nx,M,ostride,odist,in,out); } }; // Compute the real inverse Fourier transform of M complex vectors, each of // length n/2+1, corresponding to the non-negative parts of the frequency // spectra, using phase sign +1. Before calling fft(), the array in must be // allocated as Complex[M*(n/2+1)] and the array out must be allocated as // double[M*n]. The arrays in and out may coincide, // allocated as Complex[M*(n/2+1)]. // // Out-of-place usage (input destroyed): // // mcrfft1d Backward(n,M,istride,ostride,idist,odist,in,out); // Backward.fft(in,out); // // In-place usage: // // mcrfft1d Backward(n,M,istride,ostride,idist,odist); // Backward.fft(out); // // Notes: // stride is the spacing between the elements of each Complex vector; // dist is the spacing between the first elements of the vectors; // in contains the first n/2+1 Complex Fourier values; // out contains the n real values stored as a Complex array. // class mcrfft1d { bool single; crfft1d *fft1; Mcrfft1d *fftm; public: mcrfft1d(size_t nx, size_t M, size_t istride, size_t ostride, size_t idist, size_t odist, Complex *in=NULL, double *out=NULL, size_t threads=fftw::maxthreads) : single(M == 1 && istride == 1 && ostride == 1) { if(single) fft1=new crfft1d(nx,in,out,threads); else fftm=new Mcrfft1d(nx,M,istride,ostride,idist,odist,in,out,threads); } size_t Threads() { return single ? fft1->Threads() : fftm->Threads(); } template void fft(I in) { single ? fft1->fft(in) : fftm->fft(in); } template void fft(I in, O out) { single ? fft1->fft(in,out) : fftm->fft(in,out); } void Normalize(double *out) { single ? fft1->Normalize(out) : fftm->Normalize(out); } template void fftNormalized(I in) { single ? fft1->fftNormalized(in) : fftm->fftNormalized(in); } template void fftNormalized(I in, O out=NULL) { single ? fft1->fftNormalized(in,out) : fftm->fftNormalized(in,out); } ~mcrfft1d() { if(single) delete fft1; else delete fftm; } }; // Compute the complex two-dimensional Fourier transform of nx times ny // complex values. Before calling fft(), the arrays in and out (which may // coincide) must be allocated as Complex[nx*ny]. // // Out-of-place usage: // // fft2d Forward(nx,ny,-1,in,out); // Forward.fft(in,out); // // fft2d Backward(nx,ny,1,in,out); // Backward.fft(in,out); // // fft2d Backward(nx,ny,1,in,out); // Backward.fftNormalized(in,out); // True inverse of Forward.fft(out,in); // // In-place usage: // // fft2d Forward(nx,ny,-1); // Forward.fft(in); // // fft2d Backward(nx,ny,1); // Backward.fft(in); // // Note: // in[ny*i+j] contains the ny Complex values for each i=0,...,nx-1. // class fft2d : public fftw { size_t nx; size_t ny; public: fft2d(size_t nx, size_t ny, int sign, Complex *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*ny,sign,threads), nx(nx), ny(ny) {Setup(in,out);} #ifdef __Array_h__ fft2d(int sign, const Array::array2& in, const Array::array2& out=Array::NULL2, size_t threads=maxthreads) : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()) { Setup(in,out); } #endif fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_2d(nx,ny,(fftw_complex *) in,(fftw_complex *) out, sign,effort); } void Execute(Complex *in, Complex *out, bool=false) { fftw_execute_dft(plan,(fftw_complex *) in,(fftw_complex *) out); } }; // Compute the complex two-dimensional Fourier transform of nx times ny real // values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[nx*ny] and // the array out must be allocated as Complex[nx*(ny/2+1)]. The arrays in // and out may coincide, allocated as Complex[nx*(ny/2+1)]. // // Out-of-place usage: // // rcfft2d Forward(nx,ny,in,out); // Forward.fft(in,out); // Origin of Fourier domain at (0,0) // Forward.fft0(in,out); // Origin of Fourier domain at (nx/2,0); // input destroyed. // // In-place usage: // // rcfft2d Forward(nx,ny); // Forward.fft(in); // Origin of Fourier domain at (0,0) // Forward.fft0(in); // Origin of Fourier domain at (nx/2,0) // // Notes: // in contains the nx*ny real values stored as a Complex array; // out contains the upper-half portion (ky >= 0) of the Complex transform. // class rcfft2d : public fftw { size_t nx; size_t ny; public: rcfft2d(size_t nx, size_t ny, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} rcfft2d(size_t nx, size_t ny, double *in, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*(ny/2+1),-1,threads,nx*ny), nx(nx), ny(ny) { Setup(in,out); } fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_2d(nx,ny,(double *) in,(fftw_complex *) out, effort); } void Execute(Complex *in, Complex *out, bool shift=false) { if(shift) { if(inplace) Shift(in,nx,ny,threads); else Shift((double *) in,nx,ny,threads); } fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { size_t nyp=ny/2+1; if(nx % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t j=0; j < nyp; ++j) f[j]=0.0; if(ny % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) f[(i+1)*nyp-1]=0.0; } }; // Compute the real two-dimensional inverse Fourier transform of the // nx*(ny/2+1) Complex values corresponding to the spectral values in the // half-plane ky >= 0, using phase sign +1. // Before calling fft(), the array in must be allocated as // Complex[nx*(ny/2+1)] and the array out must be allocated as // double[nx*ny]. The arrays in and out may coincide, // allocated as Complex[nx*(ny/2+1)]. // // Out-of-place usage (input destroyed): // // crfft2d Backward(nx,ny,in,out); // Backward.fft(in,out); // Origin of Fourier domain at (0,0) // Backward.fft0(in,out); // Origin of Fourier domain at (nx/2,0) // // In-place usage: // // crfft2d Backward(nx,ny); // Backward.fft(in); // Origin of Fourier domain at (0,0) // Backward.fft0(in); // Origin of Fourier domain at (nx/2,0) // // Notes: // in contains the upper-half portion (ky >= 0) of the Complex transform; // out contains the nx*ny real values stored as a Complex array. // class crfft2d : public fftw { size_t nx; size_t ny; public: crfft2d(size_t nx, size_t ny, double *out=NULL, size_t threads=maxthreads) : fftw(2*nx*(ny/2+1),1,threads,nx*ny), nx(nx), ny(ny) {Setup(out);} crfft2d(size_t nx, size_t ny, Complex *in, double *out=NULL, size_t threads=maxthreads) : fftw(nx*realsize(ny,Inplace(in,out)),1,threads,nx*ny), nx(nx), ny(ny) { Setup(in,out); } fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_2d(nx,ny,(fftw_complex *) in,(double *) out, effort); } void Execute(Complex *in, Complex *out, bool shift=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); if(shift) { if(inplace) Shift(out,nx,ny,threads); else Shift((double *) out,nx,ny,threads); } } // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { size_t nyp=ny/2+1; if(nx % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t j=0; j < nyp; ++j) f[j]=0.0; if(ny % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) f[(i+1)*nyp-1]=0.0; } }; // Compute the complex three-dimensional Fourier transform of // nx times ny times nz complex values. Before calling fft(), the arrays in // and out (which may coincide) must be allocated as Complex[nx*ny*nz]. // // Out-of-place usage: // // fft3d Forward(nx,ny,nz,-1,in,out); // Forward.fft(in,out); // // fft3d Backward(nx,ny,nz,1,in,out); // Backward.fft(in,out); // // fft3d Backward(nx,ny,nz,1,in,out); // Backward.fftNormalized(in,out); // True inverse of Forward.fft(out,in); // // In-place usage: // // fft3d Forward(nx,ny,nz,-1); // Forward.fft(in); // // fft3d Backward(nx,ny,nz,1); // Backward.fft(in); // // Note: // in[nz*(ny*i+j)+k] contains the (i,j,k)th Complex value, // indexed by i=0,...,nx-1, j=0,...,ny-1, and k=0,...,nz-1. // class fft3d : public fftw { size_t nx; size_t ny; size_t nz; public: fft3d(size_t nx, size_t ny, size_t nz, int sign, Complex *in=NULL, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*ny*nz,sign,threads), nx(nx), ny(ny), nz(nz) {Setup(in,out);} #ifdef __Array_h__ fft3d(int sign, const Array::array3& in, const Array::array3& out=Array::NULL3, size_t threads=maxthreads) : fftw(2*in.Size(),sign,threads), nx(in.Nx()), ny(in.Ny()), nz(in.Nz()) {Setup(in,out);} #endif fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_3d(nx,ny,nz,(fftw_complex *) in, (fftw_complex *) out, sign, effort); } }; // Compute the complex two-dimensional Fourier transform of // nx times ny times nz real values, using phase sign -1. // Before calling fft(), the array in must be allocated as double[nx*ny*nz] // and the array out must be allocated as Complex[nx*ny*(nz/2+1)]. The // arrays in and out may coincide, allocated as Complex[nx*ny*(nz/2+1)]. // // Out-of-place usage: // // rcfft3d Forward(nx,ny,nz,in,out); // Forward.fft(in,out); // Origin of Fourier domain at (0,0) // Forward.fft0(in,out); // Origin of Fourier domain at (nx/2,ny/2,0); // input destroyed // In-place usage: // // rcfft3d Forward(nx,ny,nz); // Forward.fft(in); // Origin of Fourier domain at (0,0) // Forward.fft0(in); // Origin of Fourier domain at (nx/2,ny/2,0) // // Notes: // in contains the nx*ny*nz real values stored as a Complex array; // out contains the upper-half portion (kz >= 0) of the Complex transform. // class rcfft3d : public fftw { size_t nx; size_t ny; size_t nz; public: rcfft3d(size_t nx, size_t ny, size_t nz, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) { Setup(out); } rcfft3d(size_t nx, size_t ny, size_t nz, double *in, Complex *out=NULL, size_t threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),-1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) {Setup(in,out);} fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_r2c_3d(nx,ny,nz,(double *) in,(fftw_complex *) out, effort); } void Execute(Complex *in, Complex *out, bool shift=false) { if(shift) { if(inplace) Shift(in,nx,ny,nz,threads); else Shift((double *) in,nx,ny,nz,threads); } fftw_execute_dft_r2c(plan,(double *) in,(fftw_complex *) out); } // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { size_t nzp=nz/2+1; size_t yz=ny*nzp; if(nx % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t k=0; k < yz; ++k) f[k]=0.0; } if(ny % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) { size_t iyz=i*yz; for(size_t k=0; k < nzp; ++k) f[iyz+k]=0.0; } } if(nz % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) for(size_t j=0; j < ny; ++j) f[i*yz+(j+1)*nzp-1]=0.0; } }; // Compute the real two-dimensional inverse Fourier transform of the // nx*ny*(nz/2+1) Complex values corresponding to the spectral values in the // half-plane kz >= 0, using phase sign +1. // Before calling fft(), the array in must be allocated as // Complex[nx*ny*(nz+1)/2] and the array out must be allocated as // double[nx*ny*nz]. The arrays in and out may coincide, // allocated as Complex[nx*ny*(nz/2+1)]. // // Out-of-place usage (input destroyed): // // crfft3d Backward(nx,ny,nz,in,out); // Backward.fft(in,out); // Origin of Fourier domain at (0,0) // Backward.fft0(in,out); // Origin of Fourier domain at (nx/2,ny/2,0) // // In-place usage: // // crfft3d Backward(nx,ny,nz); // Backward.fft(in); // Origin of Fourier domain at (0,0) // Backward.fft0(in); // Origin of Fourier domain at (nx/2,ny/2,0) // // Notes: // in contains the upper-half portion (kz >= 0) of the Complex transform; // out contains the nx*ny*nz real values stored as a Complex array. // class crfft3d : public fftw { size_t nx; size_t ny; size_t nz; public: crfft3d(size_t nx, size_t ny, size_t nz, double *out=NULL, size_t threads=maxthreads) : fftw(2*nx*ny*(nz/2+1),1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) {Setup(out);} crfft3d(size_t nx, size_t ny, size_t nz, Complex *in, double *out=NULL, size_t threads=maxthreads) : fftw(nx*ny*(realsize(nz,Inplace(in,out))),1,threads,nx*ny*nz), nx(nx), ny(ny), nz(nz) {Setup(in,out);} fftw_plan Plan(Complex *in, Complex *out) { return fftw_plan_dft_c2r_3d(nx,ny,nz,(fftw_complex *) in,(double *) out, effort); } void Execute(Complex *in, Complex *out, bool shift=false) { fftw_execute_dft_c2r(plan,(fftw_complex *) in,(double *) out); if(shift) { if(inplace) Shift(out,nx,ny,nz,threads); else Shift((double *) out,nx,ny,nz,threads); } } // Set Nyquist modes of even shifted transforms to zero. void deNyquist(Complex *f) { size_t nzp=nz/2+1; size_t yz=ny*nzp; if(nx % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t k=0; k < yz; ++k) f[k]=0.0; } if(ny % 2 == 0) { #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) { size_t iyz=i*yz; for(size_t k=0; k < nzp; ++k) f[iyz+k]=0.0; } } if(nz % 2 == 0) #ifndef FFTWPP_SINGLE_THREAD #pragma omp parallel for num_threads(threads) #endif for(size_t i=0; i < nx; ++i) for(size_t j=0; j < ny; ++j) f[i*yz+(j+1)*nzp-1]=0.0; } }; } #endif