Commit 213cb50b authored by Nathanaël Schaeffer's avatar Nathanaël Schaeffer
Browse files

also test for accuracy in quick_init mode for small transforms + some cleanup.

parent d32957e1
......@@ -78,6 +78,9 @@ DOCUMENTATION:
CHANGE LOG:
-----------
* v3.4.2 (28 Jun 2020)
- fix critical bug leading to wrong analysis in some multiple-plan cases.
* v3.4.1 (22 Jun 2020)
- fix several bugs (segfaults and compilation issues), thanks to 3 reporters.
......
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.69 for SHTns 3.4.1.
# Generated by GNU Autoconf 2.69 for SHTns 3.4.2.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
......@@ -577,8 +577,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='SHTns'
PACKAGE_TARNAME='shtns'
PACKAGE_VERSION='3.4.1'
PACKAGE_STRING='SHTns 3.4.1'
PACKAGE_VERSION='3.4.2'
PACKAGE_STRING='SHTns 3.4.2'
PACKAGE_BUGREPORT=''
PACKAGE_URL='https://bitbucket.org/nschaeff/shtns'
......@@ -1253,7 +1253,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures SHTns 3.4.1 to adapt to many kinds of systems.
\`configure' configures SHTns 3.4.2 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
......@@ -1314,7 +1314,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of SHTns 3.4.1:";;
short | recursive ) echo "Configuration of SHTns 3.4.2:";;
esac
cat <<\_ACEOF
......@@ -1422,7 +1422,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
SHTns configure 3.4.1
SHTns configure 3.4.2
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
......@@ -1879,7 +1879,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by SHTns $as_me 3.4.1, which was
It was created by SHTns $as_me 3.4.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
......@@ -5957,7 +5957,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by SHTns $as_me 3.4.1, which was
This file was extended by SHTns $as_me 3.4.2, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
......@@ -6020,7 +6020,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
SHTns config.status 3.4.1
SHTns config.status 3.4.2
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
......
......@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.62])
AC_INIT([SHTns],[3.4.1],[],[shtns],[https://bitbucket.org/nschaeff/shtns])
AC_INIT([SHTns],[3.4.2],[],[shtns],[https://bitbucket.org/nschaeff/shtns])
AC_LANG([C])
AC_CONFIG_SRCDIR([sht_init.c])
AC_CONFIG_HEADERS([sht_config.h])
......
......@@ -174,9 +174,9 @@ In this layout, increasing latitude are stored next to each other for each longi
That is \f$ A(\theta,\phi) \f$ = \c A[ip*NLAT + it] in C or \c A(it,ip) in Fortran.
Use \ref SHT_THETA_CONTIGUOUS to instruct \ref shtns_init to use this spatial data layout.
Additionally, \ref SHT_ALLOW_PADDING instructs to shtns to optimize the layout to avoid cache bank conflicts.
Additionally, \ref SHT_ALLOW_PADDING instructs shtns to optimize the layout to avoid cache bank conflicts.
This can lead to significant performance boost (from 1% to 50% depending on the architecture).
In that case, shtns_info#nlat_padded > shtns_inf#nlat and shtns_info#nspat > shtns_info#nlat * shtns_info*nphi to reflect the data layout.
In that case, shtns_info#nlat_padded > shtns_info#nlat and shtns_info#nspat > shtns_info#nlat * shtns_info#nphi to reflect the data layout.
\section native Native layout
......
......@@ -801,7 +801,7 @@ static void choose_best_sht(shtns_cfg shtns, int* nlp, int vector)
tcpu = clock();
t0 = get_time(shtns, nloop, 2, "", sht_func[SHT_STD][ref_alg][SHT_TYP_SSY], Slm, Tlm, Qlm, Sh, Th, Qh, LMAX);
tcpu = clock() - tcpu; tt = 1.e-6 * tcpu;
if (tt >= SHT_TIME_LIMIT) break; // we should not exceed 1 second
if (tt >= SHT_TIME_LIMIT) break; // we should not exceed some time-limit
t = get_time(shtns, nloop, 2, "", sht_func[SHT_STD][ref_alg][SHT_TYP_SSY], Slm, Tlm, Qlm, Sh, Th, Qh, LMAX);
r = fabs(2.0*(t-t0)/(t+t0));
#if SHT_VERBOSE > 1
......@@ -1289,7 +1289,7 @@ static int choose_nlat(int n)
*/
int shtns_set_grid_auto(shtns_cfg shtns, enum shtns_type flags, double eps, int nl_order, int *nlat, int *nphi)
{
double t, mem;
double t;
int im,m;
int layout;
int nloop = 0;
......@@ -1368,15 +1368,6 @@ int shtns_set_grid_auto(shtns_cfg shtns, enum shtns_type flags, double eps, int
}
}
mem = sht_mem_size(shtns->lmax, shtns->mmax, shtns->mres, *nlat);
t=mem; if (analys) t*=2; if (vector) t*=3;
#if SHT_VERBOSE > 1
if (verbose>1) printf("Memory required for precomputed matrices (estimate) : %.3f Mb\n",t);
#endif
if ( t > SHTNS_MAX_MEMORY ) { // huge transform has been requested
// if (t > 10*SHTNS_MAX_MEMORY) quick_init =1; // do not time such large transforms.
}
if (quick_init == 0) { // do not waste too much time finding optimal fftw.
//shtns->fftw_plan_mode = FFTW_EXHAUSTIVE; // defines the default FFTW planner mode.
shtns->fftw_plan_mode = FFTW_PATIENT; // defines the default FFTW planner mode.
......@@ -1385,14 +1376,6 @@ int shtns_set_grid_auto(shtns_cfg shtns, enum shtns_type flags, double eps, int
if (*nphi > 1024) shtns->fftw_plan_mode = FFTW_MEASURE;
} else {
shtns->fftw_plan_mode = FFTW_ESTIMATE;
if ((mem < 1.0) && (SHT_VERBOSE < 2)) shtns->nthreads = 1; // disable threads for small transforms (in quickinit mode).
}
if (flags == sht_auto) {
if ( ((nl_order>=2)&&(MMAX*MRES > LMAX/2)) || (*nlat < SHT_MIN_NLAT_DCT) || (*nlat & 1) || (*nlat <= LMAX+1) ) {
flags = sht_gauss; // avoid computing DCT stuff when it is not expected to be faster.
if (n_gauss > 0) *nlat = n_gauss;
}
}
if ((flags == sht_gauss)&&(*nlat <= shtns->lmax)) shtns_runerr("Nlat must be larger than Lmax");
......@@ -1443,21 +1426,22 @@ int shtns_set_grid_auto(shtns_cfg shtns, enum shtns_type flags, double eps, int
#endif
if ((layout & SHT_LOAD_SAVE_CFG) && (!cfg_loaded)) cfg_loaded = (config_load(shtns, req_flags) > 0);
if (quick_init == 0) {
if (!cfg_loaded) {
choose_best_sht(shtns, &nloop, vector);
if (layout & SHT_LOAD_SAVE_CFG) config_save(shtns, req_flags);
}
if ((quick_init == 0) && (!cfg_loaded)) {
choose_best_sht(shtns, &nloop, vector);
if (layout & SHT_LOAD_SAVE_CFG) config_save(shtns, req_flags);
}
double t_estimate = 5e-10*LMAX*NLAT*MMAX/VSIZE2; // very rough cost estimate (in seconds for 1 core @ 1Ghz).
if ((t_estimate < 0.3*shtns->nthreads) || ((quick_init == 0) && (!cfg_loaded))) { // don't perform accuracy checks for too large transforms (takes too much time).
t = SHT_error(shtns, vector); // compute SHT accuracy.
#if SHT_VERBOSE > 0
if (verbose) printf(" + SHT accuracy = %.3g\n",t);
#endif
#if SHT_VERBOSE < 2
if ((t > 1.e-3) || isNotFinite(t)) {
shtns_print_cfg(shtns);
#if SHT_VERBOSE > 0
if (verbose) printf(" + SHT accuracy = %.3g\n",t);
#endif
if ((t > 1.e-6) || isNotFinite(t)) {
printf("\033[93m Accuracy test failed. Please file a bug report at https://bitbucket.org/nschaeff/shtns/issues \033[0m\n");
#if SHT_VERBOSE < 2
shtns_runerr("bad SHT accuracy"); // stop if something went wrong (but not in debug mode)
#endif
}
#endif
}
// set_sht_fly(shtns, SHT_TYP_VAN);
......
......@@ -61,15 +61,6 @@ int cushtns_use_gpu(int);
/* BEGIN COMPILE-TIME SETTINGS */
/// defines the maximum amount of memory in megabytes that SHTns should use.
#define SHTNS_MAX_MEMORY 2048
/// Minimum performance improve for DCT in \ref sht_auto mode. If not atained, we may switch back to gauss.
#define MIN_PERF_IMPROVE_DCT 1.05
/// Try to enforce at least this accuracy for DCT in sht_auto mode.
#define MIN_ACCURACY_DCT 1.e-8
/// The default \ref opt_polar threshold (0 disabled, 1.e-6 is aggressive, 1.e-10 is safe, 1.e-14 is VERY safe)
#define SHT_DEFAULT_POLAR_OPT 1.e-10
......@@ -82,9 +73,6 @@ int cushtns_use_gpu(int);
/// must be larger or equal to 1.
#define SHT_DEFAULT_NL_ORDER 1
/// minimum NLAT to consider the use of DCT acceleration.
#define SHT_MIN_NLAT_DCT 64
/// time-limit for timing individual transforms (in seconds)
#define SHT_TIME_LIMIT 0.2
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment