Commit 1ef5617e authored by Nathanael Schaeffer @lgit-1177's avatar Nathanael Schaeffer @lgit-1177
Browse files

New : introducing sht_gauss_fly to force on-the-fly computations only.

parent c9687149
......@@ -44,11 +44,11 @@ double *ct, *st, *st_1; // cos(theta), sin(theta), 1/sin(theta);
double *wg = NULL; // gauss weights (if current grid is a gauss grid).
int *tm; // start theta value for SH (polar optimization : near the poles the legendre polynomials go to zero for high m's)
double** ylm; // matrix for inverse transform (synthesis)
double** zlm; // matrix for direct transform (analysis)
double** ylm = NULL; // matrix for inverse transform (synthesis)
double** zlm = NULL; // matrix for direct transform (analysis)
#ifndef SHT_SCALAR_ONLY
struct DtDp** dylm; // theta and phi derivative of Ylm matrix
struct DtDp** dzlm;
struct DtDp** dylm = NULL; // theta and phi derivative of Ylm matrix
struct DtDp** dzlm = NULL;
#endif
double** ykm_dct; // matrix for inverse transform (synthesis) using dct.
......@@ -139,6 +139,37 @@ int fft_int(int n, int fmax)
return n;
}
/// \code return (mmax+1)*(lmax+1) - mres*(mmax*(mmax+1))/2; \endcode */
/// \ingroup init
int nlm_calc(int lmax, int mmax, int mres)
{
if (mmax*mres > lmax) mmax = lmax/mres;
return( (mmax+1)*(lmax+1) - mres*(mmax*(mmax+1))/2 ); // this is wrong if lmax < mmax*mres
}
/*
long int nlm_calc_eo(long int lmax, long int mmax, long int mres) {
long int im,l,lm;
for (im=0, lm=0; im<=mmax; im++) {
if (im*mres <= lmax) lm += (lmax+2-im*mres)/2;
}
return lm;
}
*/
/// returns an aproximation of the memory usage in mega bytes.
/// \ingroup init
double sht_mem_size(int lmax, int mmax, int mres, int nlat)
{
double s = 1./(1024*1024);
s *= (nlat+1) * sizeof(double) * nlm_calc(lmax, mmax, mres);
#ifndef SHT_SCALAR_ONLY
s *= 3.0; // scalar + vector arrays.
#endif
return s;
}
/*
SHT FUNCTIONS
*/
......@@ -319,7 +350,7 @@ void SHqst_to_lat(complex double *Qlm, complex double *Slm, complex double *Tlm,
INITIALIZATION FUNCTIONS
*/
void alloc_SHTarrays()
void alloc_SHTarrays(int on_the_fly)
{
long int im,m, l0;
long int lstride;
......@@ -327,6 +358,8 @@ void alloc_SHTarrays()
l0 = ((NLAT+1)>>1)*2; // round up to even
ct = (double *) fftw_malloc(sizeof(double) * l0*3);
st = ct + l0; st_1 = ct + 2*l0;
if (on_the_fly == 0) {
ylm = (double **) fftw_malloc( sizeof(double *) * (MMAX+1)*3 );
zlm = ylm + (MMAX+1); ykm_dct = ylm + (MMAX+1)*2;
#ifndef SHT_SCALAR_ONLY
......@@ -357,43 +390,27 @@ void alloc_SHTarrays()
#if SHT_VERBOSE > 1
printf(" Memory used for Ylm and Zlm matrices = %.3f Mb x2\n",3.0*sizeof(double)*NLM*NLAT_2/(1024.*1024.));
#endif
}
}
void free_SHTarrays()
{
#ifndef SHT_SCALAR_ONLY
fftw_free(dzlm[0]);
#endif
fftw_free(zlm[0]);
if (dzlm != NULL) fftw_free(dzlm[0]);
#endif
if (zlm != NULL) fftw_free(zlm[0]);
#ifndef SHT_SCALAR_ONLY
fftw_free(dylm[0]);
if (dylm != NULL) fftw_free(dylm[0]);
#endif
fftw_free(ylm[0]);
if (ylm != NULL) fftw_free(ylm[0]);
#ifndef SHT_SCALAR_ONLY
fftw_free(dylm);
if (dylm != NULL) fftw_free(dylm);
#endif
fftw_free(ylm); fftw_free(ct);
}
if (ylm != NULL) fftw_free(ylm);
/// \code return (mmax+1)*(lmax+1) - mres*(mmax*(mmax+1))/2; \endcode */
/// \ingroup init
int nlm_calc(int lmax, int mmax, int mres)
{
if (mmax*mres > lmax) mmax = lmax/mres;
return( (mmax+1)*(lmax+1) - mres*(mmax*(mmax+1))/2 ); // this is wrong if lmax < mmax*mres
fftw_free(ct);
}
/*
long int nlm_calc_eo(long int lmax, long int mmax, long int mres) {
long int im,l,lm;
for (im=0, lm=0; im<=mmax; im++) {
if (im*mres <= lmax) lm += (lmax+2-im*mres)/2;
}
return lm;
}
*/
/// Generates an equi-spaced theta grid including the poles, for synthesis only.
void EqualPolarGrid()
{
......@@ -659,6 +676,41 @@ double Find_Optimal_SHT()
return(tsum/tsum0); // returns ratio of "optimal" time over "no_dct" time
}
/// Sets the value tm[im] used for polar optimiation on-the-fly.
void PolarOptimize(double eps)
{
int im, m, l, it;
double v;
double y[LMAX+1];
for (im=0;im<=MMAX;im++) tm[im] = 0;
if (eps > 0.0) {
for (im=1;im<=MMAX;im++) {
m = im*MRES;
it = -1;
do {
it++;
legendre_sphPlm_array(LMAX, im, ct[it], y+m);
v = 0.0;
for (l=m; l<=LMAX; l++) {
double ya = fabs(y[l]);
if ( v < ya ) v = ya;
}
} while (v < eps);
tm[im] = it;
}
#if SHT_VERBOSE > 0
printf(" + polar optimization threshold = %.1e\n",eps);
#endif
#if SHT_VERBOSE > 1
printf(" tm[im]=");
for (im=0;im<=MMAX;im++)
printf(" %d",tm[im]);
printf("\n");
#endif
}
}
/// Perform some optimization on the SHT matrices.
void OptimizeMatrices(double eps)
......@@ -687,6 +739,7 @@ void OptimizeMatrices(double eps)
printf(" %d",tm[im]);
printf("\n");
#endif
for (im=1; im<=MMAX; im++) { // im >= 1
if (tm[im] > 0) { // we can remove the data corresponding to polar values.
m = im*MRES;
......@@ -750,7 +803,6 @@ void OptimizeMatrices(double eps)
#endif
}
/// Precompute the matrix for SH synthesis.
void init_SH_synth()
{
......@@ -778,13 +830,13 @@ void init_SH_synth()
/// Precompute matrices for SH synthesis and analysis, on a Gauss-Legendre grid.
void init_SH_gauss()
void init_SH_gauss(int on_the_fly)
{
double t,tmax;
long int it,im,m,l;
long double iylm_fft_norm;
long double xg[NLAT], wgl[NLAT]; // gauss points and weights.
wg = malloc((NLAT_2 +15) * sizeof(double)); // gauss weights, double precision.
if ((SHT_NORM == sht_fourpi)||(SHT_NORM == sht_schmidt)) {
......@@ -827,6 +879,8 @@ void init_SH_gauss()
}
#endif
if (on_the_fly != 0) return;
init_SH_synth();
// for analysis (decomposition, direct transform) : transpose and multiply by gauss weight and other normalizations.
......@@ -1560,6 +1614,8 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
int im,m,l,lm;
int layout, nspat;
int n_gauss = 0;
int on_the_fly = 0;
int quick_init = 0;
if (nl_order <= 0) nl_order = SHT_DEFAULT_NL_ORDER;
/* shtns.lshift = 0;
......@@ -1571,8 +1627,14 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
layout = flags & 0xFFFF00;
flags = flags & 255; // clear higher bits.
if ((flags == sht_reg_poles)||(flags == sht_quick_init))
fftw_plan_mode = FFTW_ESTIMATE; // quick fftw init
switch (flags) {
case sht_gauss_fly : flags = sht_gauss; on_the_fly = 1;
break;
case sht_quick_init : flags = sht_gauss;
case sht_reg_poles : quick_init = 1; fftw_plan_mode = FFTW_ESTIMATE; // quick fftw init.
break;
default : break;
}
if (*nphi == 0) {
*nphi = fft_int((nl_order+1)*MMAX+1, 7); // required fft nodes
......@@ -1586,6 +1648,16 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
} else *nlat = n_gauss;
}
t = sht_mem_size(shtns.lmax, shtns.mmax, shtns.mres, *nlat);
if ( (t > SHTNS_MAX_MEMORY) && (on_the_fly == 0) ) {
on_the_fly = 1;
if ( (flags == sht_reg_dct) || (flags == sht_reg_fast) ) shtns_runerr("Memory limit exceeded, try using sht_gauss or increase SHTNS_MAX_MEMORY in sht_config.h");
if (flags != sht_reg_poles) {
flags = sht_gauss;
if (n_gauss > 0) *nlat = n_gauss;
}
}
// copy to global variables.
#ifdef SHT_AXISYM
shtns.nphi = 1;
......@@ -1596,7 +1668,7 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
NLAT_2 = (*nlat+1)/2; NLAT = *nlat;
if ((NLAT_2)*2 <= LMAX) shtns_runerr("NLAT_2*2 should be at least LMAX+1");
alloc_SHTarrays(); // allocate dynamic arrays
alloc_SHTarrays(on_the_fly); // allocate dynamic arrays
nspat = planFFT(layout); // initialize fftw
zlm_dct0 = NULL; // used as a flag.
......@@ -1656,7 +1728,7 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
free_SHTarrays();
*nlat = n_gauss;
NLAT_2 = (*nlat+1)/2; NLAT = *nlat;
alloc_SHTarrays();
alloc_SHTarrays(on_the_fly);
nspat = planFFT(layout); // fft must be replanned because NLAT has changed.
}
}
......@@ -1669,8 +1741,8 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
#endif
{
MTR_DCT = -1; // we do not use DCT !!!
init_SH_gauss();
OptimizeMatrices(eps);
init_SH_gauss(on_the_fly);
if (on_the_fly == 0) OptimizeMatrices(eps);
}
if (flags == sht_reg_poles)
{
......@@ -1680,17 +1752,21 @@ int shtns_precompute_auto(enum shtns_type flags, double eps, int nl_order, int *
#endif
fftw_free(zlm[0]); zlm[0] = NULL; // no inverse transform, mark as unused.
EqualPolarGrid();
init_SH_synth();
OptimizeMatrices(0.0);
if (on_the_fly == 0) {
init_SH_synth();
OptimizeMatrices(0.0);
}
}
if (flags == sht_quick_init)
{
MTR_DCT = -1; // we do not use DCT !!!
init_SH_gauss();
OptimizeMatrices(eps);
if (on_the_fly == 1) {
#if SHT_VERBOSE > 0
printf(" + using on-the-fly transforms.\n");
#endif
PolarOptimize(eps);
set_fly(); set_fly_l(); set_fly_m0(); // switch function pointers to "on-the-fly" functions.
}
if ((flags != sht_reg_poles)&&(flags != sht_quick_init)) {
if (quick_init == 0) {
t = SHT_error(); // compute SHT accuracy.
#if SHT_VERBOSE > 0
printf(" + SHT accuracy = %.3g\n",t);
......
......@@ -157,8 +157,8 @@ Q ((v2d*)(((double*)BrF)+k))[j] = re[j]+ro[j];
Q *((v2d*)(((double*)BrF)+NLAT-k-2-2*j)) = vxchg(re[j]-ro[j]);
S ((v2d*)(((double*)BtF)+k))[j] = te[j]+to[j];
S *((v2d*)(((double*)BtF)+NLAT-k-2-2*j)) = vxchg(te[j]-to[j]);
S ((v2d*)(((double*)BpF)+k))[j] = pe[j]+po[j];
S *((v2d*)(((double*)BpF)+NLAT-k-2-2*j)) = vxchg(pe[j]-po[j]);
T ((v2d*)(((double*)BpF)+k))[j] = pe[j]+po[j];
T *((v2d*)(((double*)BpF)+NLAT-k-2-2*j)) = vxchg(pe[j]-po[j]);
}
#endif
k+=2*NWAY;
......@@ -206,8 +206,8 @@ Q v2d re[NWAY], ro[NWAY];
#if _GCC_VEC_
Q v2d rex[NWAY], rox[NWAY];
#endif
S v2d te[NWAY], to[NWAY], dpe[NWAY], dpo[NWAY];
T v2d pe[NWAY], po[NWAY], dte[NWAY], dto[NWAY];
V v2d te[NWAY], to[NWAY], dpe[NWAY], dpo[NWAY];
V v2d pe[NWAY], po[NWAY], dte[NWAY], dto[NWAY];
for (int j=0; j<NWAY; j++) {
cost[j] = ((s2d*)(st+k))[j];
y0[j] = vdup(al[0]);
......
......@@ -53,7 +53,7 @@ void GEN(spat_to_SH_gauss,SUFFIX)(double *Vr, complex double *Qlm SUPARG)
void GEN(spat_to_SH_fly,SUFFIX)(double *Vr, complex double *Qlm SUPARG)
{
if (wg == NULL) return; // no gauss weights defined.
// if (wg == NULL) return; // no gauss weights defined.
#define NWAY 6
#include "spat_to_SH_fly.c"
#undef NWAY
......@@ -110,7 +110,7 @@ void GEN(spat_to_SH,SUFFIX)(double *Vr, complex double *Qlm SUPARG)
//@{
/// Backward \b Vector Spherical Harmonic Transform (synthesis).
void GEN(SHsphtor_to_spat,SUFFIX)(complex double *Slm, complex double *Tlm, double *Vt, double *Vp SUPARG)
void GEN(SHsphtor_to_spat_hyb,SUFFIX)(complex double *Slm, complex double *Tlm, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "SHst_to_spat.c"
......@@ -128,42 +128,79 @@ void GEN(SHsphtor_to_spat_fly,SUFFIX)(complex double *Slm, complex double *Tlm,
#ifndef SHT_AXISYM
/// Spheroidal only synthesis.
void GEN(SHsph_to_spat,SUFFIX)(complex double *Slm, double *Vt, double *Vp SUPARG)
void GEN(SHsph_to_spat_hyb,SUFFIX)(complex double *Slm, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "SHs_to_spat.c"
#endif
}
void GEN(SHsph_to_spat_fly,SUFFIX)(complex double *Slm, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#define NWAY 2
#include "SHs_to_spat_fly.c"
#undef NWAY
#endif
}
/// Toroidal only synthesis.
void GEN(SHtor_to_spat,SUFFIX)(complex double *Tlm, double *Vt, double *Vp SUPARG)
void GEN(SHtor_to_spat_hyb,SUFFIX)(complex double *Tlm, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "SHt_to_spat.c"
#endif
}
void GEN(SHtor_to_spat_fly,SUFFIX)(complex double *Tlm, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#define NWAY 2
#include "SHt_to_spat_fly.c"
#undef NWAY
#endif
}
#else
/// Spheroidal m=0 only synthesis (results in theta component only).
void GEN(SHsph_to_spat,SUFFIX)(complex double *Slm, double *Vt SUPARG)
void GEN(SHsph_to_spat_hyb,SUFFIX)(complex double *Slm, double *Vt SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "SHs_to_spat.c"
#endif
}
void GEN(SHsph_to_spat_fly,SUFFIX)(complex double *Slm, double *Vt SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#define NWAY 2
#include "SHs_to_spat_fly.c"
#undef NWAY
#endif
}
/// Toroidal m=0 only synthesis (results in phi component only).
void GEN(SHtor_to_spat,SUFFIX)(complex double *Tlm, double *Vp SUPARG)
void GEN(SHtor_to_spat_hyb,SUFFIX)(complex double *Tlm, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "SHt_to_spat.c"
#endif
}
void GEN(SHtor_to_spat_fly,SUFFIX)(complex double *Tlm, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#define NWAY 2
#include "SHt_to_spat_fly.c"
#undef NWAY
#endif
}
#endif
/// \b Vector Spherical Harmonics Transform (analysis) : convert a spatial vector field (theta,phi components) to its spheroidal/toroidal spherical harmonic representation.
void GEN(spat_to_SHsphtor,SUFFIX)(double *Vt, double *Vp, complex double *Slm, complex double *Tlm SUPARG)
void GEN(spat_to_SHsphtor_hyb,SUFFIX)(double *Vt, double *Vp, complex double *Slm, complex double *Tlm SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#include "spat_to_SHst.c"
......@@ -172,6 +209,7 @@ void GEN(spat_to_SHsphtor,SUFFIX)(double *Vt, double *Vp, complex double *Slm, c
void GEN(spat_to_SHsphtor_fly,SUFFIX)(double *Vt, double *Vp, complex double *Slm, complex double *Tlm SUPARG)
{
// if (wg == NULL) return;
#ifndef SHT_SCALAR_ONLY
#define NWAY 2
#include "spat_to_SHst_fly.c"
......@@ -179,6 +217,60 @@ void GEN(spat_to_SHsphtor_fly,SUFFIX)(double *Vt, double *Vp, complex double *Sl
#endif
}
void (*GEN(spat_to_SHsphtor_ptr,SUFFIX))(double*, double*, complex double*, complex double* SUPARG) = &GEN(spat_to_SHsphtor_hyb,SUFFIX);
void (*GEN(SHsphtor_to_spat_ptr,SUFFIX))(complex double*, complex double*, double*, double* SUPARG) = &GEN(SHsphtor_to_spat_hyb,SUFFIX);
#ifndef SHT_AXISYM
void (*GEN(SHsph_to_spat_ptr,SUFFIX))(complex double*, double*, double* SUPARG) = &GEN(SHsph_to_spat_hyb,SUFFIX);
void (*GEN(SHtor_to_spat_ptr,SUFFIX))(complex double*, double*, double* SUPARG) = &GEN(SHtor_to_spat_hyb,SUFFIX);
#else
void (*GEN(SHsph_to_spat_ptr,SUFFIX))(complex double*, double* SUPARG) = &GEN(SHsph_to_spat_hyb,SUFFIX);
void (*GEN(SHtor_to_spat_ptr,SUFFIX))(complex double*, double* SUPARG) = &GEN(SHtor_to_spat_hyb,SUFFIX);
#endif
/// Backward \b Vector Spherical Harmonic Transform (synthesis).
void GEN(SHsphtor_to_spat,SUFFIX)(complex double *Slm, complex double *Tlm, double *Vt, double *Vp SUPARG)
{
(*GEN(SHsphtor_to_spat_ptr,SUFFIX))(Slm, Tlm, Vt, Vp SUPARG2);
return;
}
#ifndef SHT_AXISYM
/// Spheroidal only synthesis.
void GEN(SHsph_to_spat,SUFFIX)(complex double *Slm, double *Vt, double *Vp SUPARG)
{
(*GEN(SHsph_to_spat_ptr,SUFFIX))(Slm, Vt, Vp SUPARG2);
return;
}
/// Toroidal only synthesis.
void GEN(SHtor_to_spat,SUFFIX)(complex double *Tlm, double *Vt, double *Vp SUPARG)
{
(*GEN(SHtor_to_spat_ptr,SUFFIX))(Tlm, Vt, Vp SUPARG2);
return;
}
#else
/// Spheroidal m=0 only synthesis (results in theta component only).
void GEN(SHsph_to_spat,SUFFIX)(complex double *Slm, double *Vt SUPARG)
{
(*GEN(SHsph_to_spat_ptr,SUFFIX))(Slm, Vt SUPARG2);
return;
}
/// Toroidal m=0 only synthesis (results in phi component only).
void GEN(SHtor_to_spat,SUFFIX)(complex double *Tlm, double *Vp SUPARG)
{
(*GEN(SHtor_to_spat_ptr,SUFFIX))(Tlm, Vp SUPARG2);
return;
}
#endif
/// \b Vector Spherical Harmonics Transform (analysis) : convert a spatial vector field (theta,phi components) to its spheroidal/toroidal spherical harmonic representation.
void GEN(spat_to_SHsphtor,SUFFIX)(double *Vt, double *Vp, complex double *Slm, complex double *Tlm SUPARG)
{
(*GEN(spat_to_SHsphtor_ptr,SUFFIX))(Vt, Vp, Slm, Tlm SUPARG2);
return;
}
//@}
......@@ -194,7 +286,7 @@ void GEN(spat_to_SHsphtor_fly,SUFFIX)(double *Vt, double *Vp, complex double *Sl
/// \b 3D Vector Spherical Harmonics Transform (analysis) : convert a 3D vector field (r,theta,phi components) to its radial/spheroidal/toroidal spherical harmonic representation.
/// This is basically a shortcut to call both spat_to_SH* and spat_to_SHsphtor* but may be significantly faster.
void GEN(spat_to_SHqst,SUFFIX)(double *Vr, double *Vt, double *Vp, complex double *Qlm, complex double *Slm, complex double *Tlm SUPARG)
void GEN(spat_to_SHqst_hyb,SUFFIX)(double *Vr, double *Vt, double *Vp, complex double *Qlm, complex double *Slm, complex double *Tlm SUPARG)
{
#ifndef SHT_SCALAR_ONLY
#define SHT_3COMP
......@@ -205,6 +297,7 @@ void GEN(spat_to_SHqst,SUFFIX)(double *Vr, double *Vt, double *Vp, complex doubl
void GEN(spat_to_SHqst_fly,SUFFIX)(double *Vr, double *Vt, double *Vp, complex double *Qlm, complex double *Slm, complex double *Tlm SUPARG)
{
// if (wg == NULL) return;
#ifndef SHT_SCALAR_ONLY
#define SHT_3COMP
#define NWAY 2
......@@ -216,7 +309,7 @@ void GEN(spat_to_SHqst_fly,SUFFIX)(double *Vr, double *Vt, double *Vp, complex d
/// Backward \b 3D Vector Spherical Harmonic Transform (synthesis).
/// This is basically a shortcut to call both SH_to_spat* and SHsphtor_to spat* but may be significantly faster.
void GEN(SHqst_to_spat,SUFFIX)(complex double *Qlm, complex double *Slm, complex double *Tlm, double *Vr, double *Vt, double *Vp SUPARG)
void GEN(SHqst_to_spat_hyb,SUFFIX)(complex double *Qlm, complex double *Slm, complex double *Tlm, double *Vr, double *Vt, double *Vp SUPARG)
{
#ifndef SHT_SCALAR_ONLY
if (MTR_DCT >= 0) {
......@@ -241,8 +334,36 @@ void GEN(SHqst_to_spat_fly,SUFFIX)(complex double *Qlm, complex double *Slm, com
#endif
}
// function pointers.
void (*GEN(spat_to_SHqst_ptr,SUFFIX))(double*, double*, double*, complex double*, complex double*, complex double* SUPARG) = &GEN(spat_to_SHqst_hyb,SUFFIX);
void (*GEN(SHqst_to_spat_ptr,SUFFIX))(complex double*, complex double*, complex double*, double*, double*, double* SUPARG) = &GEN(SHqst_to_spat_hyb,SUFFIX);
void GEN(spat_to_SHqst,SUFFIX)(double *Vr, double *Vt, double *Vp, complex double *Qlm, complex double *Slm, complex double *Tlm SUPARG)
{
(*GEN(spat_to_SHqst_ptr,SUFFIX))(Vr, Vt, Vp, Qlm, Slm, Tlm SUPARG2);
return;
}
void GEN(SHqst_to_spat,SUFFIX)(complex double *Qlm, complex double *Slm, complex double *Tlm, double *Vr, double *Vt, double *Vp SUPARG)
{
(*GEN(SHqst_to_spat_ptr,SUFFIX))(Qlm, Slm, Tlm, Vr, Vt, Vp SUPARG2);
return;
}
//@}
void GEN(set_fly,SUFFIX)()
{
GEN(SH_to_spat_ptr,SUFFIX) = &GEN(SH_to_spat_fly, SUFFIX);
GEN(spat_to_SH_ptr,SUFFIX) = &GEN(spat_to_SH_fly, SUFFIX);
GEN(spat_to_SHsphtor_ptr,SUFFIX) = &GEN(spat_to_SHsphtor_fly,SUFFIX);
GEN(SHsphtor_to_spat_ptr,SUFFIX) = &GEN(SHsphtor_to_spat_fly,SUFFIX);
GEN(SHsph_to_spat_ptr,SUFFIX) = &GEN(SHsph_to_spat_fly,SUFFIX);
GEN(SHtor_to_spat_ptr,SUFFIX) = &GEN(SHtor_to_spat_fly,SUFFIX);
GEN(SHqst_to_spat_ptr,SUFFIX) = &GEN(SHqst_to_spat_fly, SUFFIX);
GEN(spat_to_SHqst_ptr,SUFFIX) = &GEN(spat_to_SHqst_fly, SUFFIX);
}
//@}
// Fortran 77 api
......
......@@ -23,10 +23,10 @@
/// \file sht_config.h compile-time configuration.
/// 0:no output, 1:output info to stdout, 2:more output (debug info), 3:also print fftw plans.
#define SHT_VERBOSE 2
#define SHT_VERBOSE 1
// defines the maximum amount of memory in megabytes that SHTns should use.
#define SHTNS_MAX_MEMORY 1024
#define SHTNS_MAX_MEMORY 2048
// if SHT_SCALAR_ONLY is defined, it will disable the vector transform (which saves some memory)
//#define SHT_SCALAR_ONLY
......
......@@ -48,3 +48,5 @@ c
PARAMETER (SHT_QUICK_INIT=4)
INTEGER SHT_REG_POLES
PARAMETER (SHT_REG_POLES=5)
INTEGER SHT_GAUSS_FLY
PARAMETER (SHT_GAUSS_FLY=6)
......@@ -34,7 +34,8 @@ enum shtns_type {
sht_reg_fast, ///< use fastest algorithm, on a <b>regular grid</b>, mixing dct and regular quadrature.
sht_reg_dct, ///< use pure dct algorithm, on a <b>regular grid</b>.
sht_quick_init, ///< gauss grid, with minimum initialization time (useful for pre/post-processing)
sht_reg_poles ///< use a <b>synthesis only</b> algo <b>including poles</b>, not suitable for computations. Useful for vizualisation.
sht_reg_poles, ///< use a <b>synthesis only</b> algo <b>including poles</b>, not suitable for computations. Useful for vizualisation.
sht_gauss_fly ///< legendre polynomials are recomputed on-the-fly for each transform (may be faster on some machines, saves memory and bandwidth).
};
/// structure containing useful information about the SHT.
......
......@@ -42,6 +42,8 @@ int NPHI = 0;
// number of SH iterations
long int SHT_ITER = 50; // do 50 iterations by default
// access to Gauss weights.
extern double *wg;
void write_vect(char *fn, double *vec, int N)
{
......@@ -176,13 +178,18 @@ int test_SHT_fly()
tcpu = clock() - tcpu;
ts = tcpu / (1000.*SHT_ITER);
tcpu = clock();
spat_to_SH_fly(Sh,Slm);
for (jj=1; jj< SHT_ITER; jj++) {
spat_to_SH_fly(Sh,Tlm);
if (wg == NULL) {
spat_to_SH(Sh,Slm);
ta = 0.0;
} else {
tcpu = clock();
spat_to_SH_fly(Sh,Slm);
for (jj=1; jj< SHT_ITER; jj++) {
spat_to_SH_fly(Sh,Tlm);
}
tcpu = clock() - tcpu;
ta = tcpu / (1000.*SHT_ITER);
}
tcpu = clock() - tcpu;
ta = tcpu / (1000.*SHT_ITER);
printf(" SHT time on-the-fly : \t synthesis = %f ms \t analysis = %f ms\n", ts, ta);