diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ba7898d89434dbd0ca93c6dfe4dc6ef74209d575..65b6e27da23c55e2a55149b7236de01c47b5d4a8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,52 +1,35 @@ + stages: - configure - build - test +before_script: + script: "sh ci/scripts/version.sh > /tmp/host_info" + atrifacts: + paths: + - /tmp/host_info - -config:debian: - image: fperignon/hysop:debian - stage: configure - script: "sh ci/config.sh" - artifacts: - paths: - - build/ - -build:debian: - image: fperignon/hysop:debian - stage: build - script: "sh ci/build_install_clean.sh" - dependencies: - - config:debian - -test:debian: - image: fperignon/hysop:debian - stage: test - script: "sh ci/run_tests.sh" - dependencies: - - build:debian - -config:ubuntu: - image: fperignon/hysop:ubuntu +config:ubuntu_zesty: + image: keckj/hysop:ubuntu_zesty stage: configure - script: "sh ci/config.sh" + script: "sh ci/scripts/config.sh" artifacts: paths: - build/ -build:ubuntu: - image: fperignon/hysop:ubuntu +build:ubuntu_zesty: + image: keckj/hysop:ubuntu_zesty stage: build - script: "sh ci/build_install_clean.sh" + script: "sh ci/scripts/build_install_clean.sh" dependencies: - - config:ubuntu + - config:ubuntu_zesty -test:ubuntu: - image: fperignon/hysop:ubuntu +test:ubuntu_zesty: + image: keckj/hysop:ubuntu_zesty stage: test script: - useradd "hysoprunner" - - su "hysoprunner" -c "sh ci/run_tests.sh" + - su "hysoprunner" -c "sh ci/scripts/run_tests.sh" dependencies: - - build:ubuntu + - build:ubuntu_zesty diff --git a/CMakeLists.txt b/CMakeLists.txt index 25e5e54b022aa6ddb33b5991e2d3d1062d593941..8c4b38fd7e0a24f0f90c009305fe76701ace6c28 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,11 +38,12 @@ option(WITH_SCALES "compile/create scales lib and link it with HySoP. Default = option(WITH_FFTW "Link with fftw library (required for some HySoP solvers), default = ON" ON) option(WITH_EXTRAS "Link with some extra fortran libraries (like arnoldi solver), default = OFF" OFF) option(WITH_GPU "Use of GPU (required for some HySoP solvers), default = ON" ON) -option(WITH_MAIN_FORTRAN "Create an executable (test purpose) from fortran sources in src/main, linked with libhysop, default = ON" OFF) -option(WITH_MAIN_CXX "Create an executable (test purpose) from cxx sources in src/hysop++/main, linked with libhysop, default = ON" ON) -option(DEBUG "Enable debug mode for HySoP (0:disabled, 1:verbose, 2:trace, 3:verbose+trace). Default = 0" 1) +option(WITH_MAIN_FORTRAN "Create an executable (test purpose) from fortran sources in src/main, linked with libhysop, default = OFF" OFF) +option(WITH_MAIN_CXX "Create an executable (test purpose) from cxx sources in src/hysop++/main, linked with libhysop, default = OFF" OFF) +option(PROFILE "Enable profiling mode for HySoP. Default = ON" ON) +option(VERBOSE "Enable verbose mode for HySoP. Default = ON" ON) +option(DEBUG "Enable debug mode for HySoP. Default = OFF" OFF) option(FULL_TEST "Enable all test options (pep8, mpi ...) - Default = OFF" OFF) -option(PROFILE "Enable profiling mode for HySoP. 0:disabled, 1: enabled. Default = 0" 0) option(OPTIM "To allow python -OO run, some packages must be deactivated. Set this option to 'ON' to do so. Default = OFF" OFF) option(WITH_MPI_TESTS "Enable mpi tests. Default = ON if USE_MPI is ON." ON) option(WITH_GOOGLE_TESTS "Enable google tests (c++). Default = OFF." OFF) @@ -131,14 +132,14 @@ find_package(PythonFull REQUIRED) include(FindPythonModule) # - python packages - find_python_module(numpy REQUIRED) -find_python_module(scipy) -find_python_module(matplotlib) -if(NOT matplotlib_FOUND) - find_python_module(Gnuplot) -endif() -find_python_module(scitools) +find_python_module(scipy REQUIRED) +find_python_module(scitools REQUIRED) find_python_module(h5py REQUIRED) -find_python_module(sympy) +find_python_module(sympy REQUIRED) +find_python_module(psutil REQUIRED) +find_python_module(cpuinfo REQUIRED) +find_python_module(gmpy2 REQUIRED) +find_python_module(graph_tool REQUIRED) # --- OpenCL --- find_python_module(pyopencl REQUIRED) # --- MPI --- @@ -146,6 +147,11 @@ if(USE_MPI) find_package(MPI REQUIRED) find_python_module(mpi4py REQUIRED) endif() +# --- PLOT -- +find_python_module(matplotlib) +if(NOT matplotlib_FOUND) + find_python_module(Gnuplot) +endif() # --- Wheel, required for a proper build/install process --- find_python_module(wheel REQUIRED) if(USE_CXX) diff --git a/ci/docker_images/ubuntu/xenial/Dockerfile b/ci/docker_images/ubuntu/xenial/Dockerfile index daae74853c3eb048429373184b588165381b06f7..9dafb4408f0216eca86e15b77413f6bf616c1da3 100644 --- a/ci/docker_images/ubuntu/xenial/Dockerfile +++ b/ci/docker_images/ubuntu/xenial/Dockerfile @@ -38,9 +38,9 @@ RUN apt-get install -y libsparsehash-dev RUN apt-get install -y python RUN apt-get install -y python-dev RUN apt-get install -y python-pip -RUN apt-get install -y python-scitools RUN apt-get install -y opencl-headers -RUN apt-get install -y ocl-icd-opencl-dev +RUN apt-get install -y ocl-icd-libopencl1 +RUN apt-get install -y clinfo # python packages RUN pip install --upgrade pip @@ -60,16 +60,13 @@ RUN pip install psutil RUN pip install py-cpuinfo RUN pip install Mako -# pyopencl +# scitools (python-scitools does not exist on ubuntu:xenial) RUN cd /tmp \ -&& git clone https://github.com/inducer/pyopencl \ -&& cd pyopencl \ -&& git submodule update --init \ -&& ./configure.py \ -&& make \ -&& make install \ -&& cd - \ -&& rm -Rf /tmp/pyopencl + && git clone https://github.com/hplgit/scitools \ + && cd scitools \ + && pip install . \ + && cd - \ + && rm -Rf /tmp/scitools # patchelf RUN cd /tmp \ @@ -82,30 +79,56 @@ RUN cd /tmp \ && cd - \ && rm -Rf /tmp/patchelf -# clang 6.0 +# precompiled python graphtools (need to be patched with patchelf) +RUN echo 'deb http://downloads.skewed.de/apt/xenial xenial universe' >> /etc/apt/sources.list \ + && echo 'deb-src http://downloads.skewed.de/apt/xenial xenial universe' >> /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --assume-yes --allow-unauthenticated python-graph-tool \ + && patchelf --add-needed libboost_context.so $(find /usr/ -name 'libgraph_tool_search.so') + RUN echo 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial main' >> /etc/apt/sources.list \ && echo 'deb-src http://apt.llvm.org/xenial/ llvm-toolchain-xenial main' >> /etc/apt/sources.list \ - && apt-get update \ - && apt-get install --assume-yes --allow-unauthenticated clang-6.0 libclang-6.0-dev + && apt-get update \ + && apt-get install --assume-yes --allow-unauthenticated llvm-3.8 clang-3.8 libllvm3.8 libclang-3.8-dev -# oclgrind +# POCL +RUN cd /tmp \ +&& git clone http://github.com/pocl/pocl \ +&& cd pocl \ +&& mkdir build \ +&& cd build \ +&& cmake -DCMAKE_BUILD_TYPE=Release .. \ +&& make \ +&& make install \ +&& cd -- \ +&& rm -Rf /tmp/pocl + +# pyopencl RUN cd /tmp \ - && git clone https://github.com/jrprice/Oclgrind \ - && cd Oclgrind \ - && mkdir build \ - && cd build \ - && cmake -DCMAKE_BUILD_TYPE=Release .. \ - && make \ - && make install \ - && cd - \ - && rm -Rf /tmp/Oclgrind +&& rm -f /usr/lib/x86_64-linux-gnu/libOpenCL.so.* \ +&& ldconfig \ +&& git clone https://github.com/inducer/pyopencl \ +&& cd pyopencl \ +&& git submodule update --init \ +&& ./configure.py \ +&& echo 'CL_PRETEND_VERSION="1.2"' >> siteconf.py \ +&& make \ +&& pip install --upgrade . \ +&& cd - \ +&& rm -Rf /tmp/pyopencl -# precompiled python graphtools (need to be patched with patchelf) -RUN echo 'deb http://downloads.skewed.de/apt/xenial xenial universe' >> /etc/apt/sources.list \ - && echo 'deb-src http://downloads.skewed.de/apt/xenial xenial universe' >> /etc/apt/sources.list \ - && apt-get update \ - && apt-get install --assume-yes --allow-unauthenticated python-graph-tool \ - && patchelf --add-needed libboost_context.so $(find /usr/ -name 'libgraph_tool_search.so') +# oclgrind +RUN apt-get install --assume-yes --allow-unauthenticated llvm-6.0 clang-6.0 libllvm6.0 libclang-6.0-dev +RUN cd /tmp \ + && git clone https://github.com/jrprice/Oclgrind \ + && cd Oclgrind \ + && mkdir build \ + && cd build \ + && cmake -DCMAKE_BUILD_TYPE=Release .. \ + && make \ + && make install \ + && cd - \ + && rm -Rf /tmp/Oclgrind # clean cached packages RUN rm -rf /var/lib/apt/lists/* diff --git a/ci/docker_images/ubuntu/zesty/Dockerfile b/ci/docker_images/ubuntu/zesty/Dockerfile index 501321cf88fb7a005f98dc609dea1b4ceda369b3..a04d4e5087131c5b0e28ee2cc94aa6f1a783fc2b 100644 --- a/ci/docker_images/ubuntu/zesty/Dockerfile +++ b/ci/docker_images/ubuntu/zesty/Dockerfile @@ -38,11 +38,14 @@ RUN apt-get install -y libsparsehash-dev RUN apt-get install -y python RUN apt-get install -y python-dev RUN apt-get install -y python-pip -RUN apt-get install -y python-pyopencl +RUN apt-get install -y opencl-headers +RUN apt-get install -y ocl-icd-libopencl1 +RUN apt-get install -y clinfo # python packages RUN pip install --upgrade pip RUN pip install --upgrade setuptools +RUN pip install cffi RUN pip install wheel RUN pip install pytest RUN pip install numpy @@ -55,6 +58,7 @@ RUN pip install h5py RUN pip install gmpy2 RUN pip install psutil RUN pip install py-cpuinfo +RUN pip install Mako # scitools (python-scitools does not exist on ubuntu:zesty) RUN cd /tmp \ @@ -75,24 +79,6 @@ RUN cd /tmp \ && cd - \ && rm -Rf /tmp/patchelf -# clang -RUN echo 'deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty main' >> /etc/apt/sources.list \ - && echo 'deb-src http://apt.llvm.org/zesty/ llvm-toolchain-zesty main' >> /etc/apt/sources.list \ - && apt-get update \ - && apt-get install --assume-yes --allow-unauthenticated clang-6.0 libclang-6.0-dev - -# oclgrind -RUN cd /tmp \ - && git clone https://github.com/jrprice/Oclgrind \ - && cd Oclgrind \ - && mkdir build \ - && cd build \ - && cmake -DCMAKE_BUILD_TYPE=Release .. \ - && make \ - && make install \ - && cd - \ - && rm -Rf /tmp/Oclgrind - # precompiled python graphtools (need to be patched with patchelf) RUN echo 'deb http://downloads.skewed.de/apt/zesty zesty universe' >> /etc/apt/sources.list \ && echo 'deb-src http://downloads.skewed.de/apt/zesty zesty universe' >> /etc/apt/sources.list \ @@ -100,6 +86,50 @@ RUN echo 'deb http://downloads.skewed.de/apt/zesty zesty universe' >> /etc/a && apt-get install --assume-yes --allow-unauthenticated python-graph-tool \ && patchelf --add-needed libboost_context.so $(find /usr/ -name 'libgraph_tool_search.so') +RUN echo 'deb http://apt.llvm.org/zesty/ llvm-toolchain-zesty main' >> /etc/apt/sources.list \ + && echo 'deb-src http://apt.llvm.org/zesty/ llvm-toolchain-zesty main' >> /etc/apt/sources.list \ + && apt-get update \ + && apt-get install --assume-yes --allow-unauthenticated llvm-3.8 clang-3.8 libllvm3.8 libclang-3.8-dev + +# POCL +RUN cd /tmp \ +&& git clone http://github.com/pocl/pocl \ +&& cd pocl \ +&& mkdir build \ +&& cd build \ +&& cmake -DCMAKE_BUILD_TYPE=Release .. \ +&& make \ +&& make install \ +&& cd -- \ +&& rm -Rf /tmp/pocl + +# pyopencl +RUN cd /tmp \ +&& rm -f /usr/lib/x86_64-linux-gnu/libOpenCL.so.* \ +&& ldconfig \ +&& git clone https://github.com/inducer/pyopencl \ +&& cd pyopencl \ +&& git submodule update --init \ +&& ./configure.py \ +&& echo 'CL_PRETEND_VERSION="1.2"' >> siteconf.py \ +&& make \ +&& pip install --upgrade . \ +&& cd - \ +&& rm -Rf /tmp/pyopencl + +# oclgrind +RUN apt-get install --assume-yes --allow-unauthenticated llvm-6.0 clang-6.0 libllvm6.0 libclang-6.0-dev +RUN cd /tmp \ + && git clone https://github.com/jrprice/Oclgrind \ + && cd Oclgrind \ + && mkdir build \ + && cd build \ + && cmake -DCMAKE_BUILD_TYPE=Release .. \ + && make \ + && make install \ + && cd - \ + && rm -Rf /tmp/Oclgrind + # clean cached packages RUN rm -rf /var/lib/apt/lists/* RUN rm -rf $HOME/.cache/pip/* diff --git a/ci/build_install_clean.sh b/ci/scripts/build_install_clean.sh similarity index 90% rename from ci/build_install_clean.sh rename to ci/scripts/build_install_clean.sh index d2304fb9c62a7cdd5fb81028bee54fa39ab07483..a0663d04a595d0431d4a4add64a9df9df33f31b5 100644 --- a/ci/build_install_clean.sh +++ b/ci/scripts/build_install_clean.sh @@ -2,11 +2,10 @@ cd build make make install -cd .. +cd - python -c 'import hysop' cd build echo $'y\n'|make uninstall make clean -cd .. +cd - rm -rf build - diff --git a/ci/config.sh b/ci/scripts/config.sh similarity index 100% rename from ci/config.sh rename to ci/scripts/config.sh diff --git a/ci/run_tests.sh b/ci/scripts/run_tests.sh similarity index 100% rename from ci/run_tests.sh rename to ci/scripts/run_tests.sh diff --git a/ci/scripts/version.sh b/ci/scripts/version.sh new file mode 100644 index 0000000000000000000000000000000000000000..2e8fdc77edc8a85f9d22a1c2e1d8e8183886bc76 --- /dev/null +++ b/ci/scripts/version.sh @@ -0,0 +1,16 @@ +echo "HOST" +uname -a +echo +echo "BUILD TOOLS:" +echo $(cmake --version | head -1) +echo $(make --version | head -1) +echo $(gcc --version | head -1) +echo $(swig -version | head -2 | tail -1) +echo "f2py version $(f2py -v)" +echo +echo "PYTHON PACKAGES:" +pip list --format=columns +echo +echo "CL_INFO" +clinfo +echo diff --git a/examples/scalar_advection/scalar_advection.py b/examples/scalar_advection/scalar_advection.py index 6580c2a12cf9e8ad6558ecbdf12eeff5a6bc17ae..5ed1d7cfa87d5e5fe3f71823013541e9e230adb8 100644 --- a/examples/scalar_advection/scalar_advection.py +++ b/examples/scalar_advection/scalar_advection.py @@ -40,8 +40,8 @@ def run(npts=64+1, cfl=0.5): if impl is Implementation.OPENCL_CODEGEN: autotuner_config = OpenClKernelAutotunerConfig( - autotuner_flag=AutotunerFlags.ESTIMATE, - prune_threshold=1.2, override_cache=True, verbose=0) + autotuner_flag=AutotunerFlags.ESTIMATE, max_candidates=10, + prune_threshold=1.2, override_cache=True, verbose=0,) kernel_config = OpenClKernelConfig(autotuner_config=autotuner_config) method = { OpenClKernelConfig : kernel_config } else: @@ -60,11 +60,10 @@ def run(npts=64+1, cfl=0.5): ) splitting = StrangSplitting(splitting_dim=dim, - order=StrangOrder.STRANG_SECOND_ORDER, - method=method) + order=StrangOrder.STRANG_SECOND_ORDER) splitting.push_operators(advec) - problem = Problem() + problem = Problem(method=method) problem.insert(splitting) io_params = IOParams(filename='S0', frequency=1) diff --git a/hysop/__init__.py b/hysop/__init__.py index 279e86abeb02be2bfb61b0e3665f0412e9c2564c..da7a5a741cc7440e3168119b618ab1f056fa6148 100644 --- a/hysop/__init__.py +++ b/hysop/__init__.py @@ -16,12 +16,12 @@ __FFTW_ENABLED__ = "ON" is "ON" __SCALES_ENABLED__ = "ON" is "ON" __OPTIMIZE__ = not __debug__ -__VERBOSE__ = False -__DEBUG__ = False +__VERBOSE__ = True +__DEBUG__ = "OFF" is "ON" +__PROFILE__ = "ON" is "ON" __TRACE__ = False __TRACE_WARNINGS__ = False -__KERNEL_DEBUG__ = True -__PROFILE__ = True +__KERNEL_DEBUG__ = False __ENABLE_LONG_TESTS__ = "OFF" is "ON" diff --git a/hysop/__init__.py.in b/hysop/__init__.py.in index 1a7bd799e2c874ef17bb64e939dba42a170db50e..6666b268fe21d75d24d131e72e641a000d472d31 100644 --- a/hysop/__init__.py.in +++ b/hysop/__init__.py.in @@ -16,12 +16,12 @@ __FFTW_ENABLED__ = "@WITH_FFTW@" is "ON" __SCALES_ENABLED__ = "@WITH_SCALES@" is "ON" __OPTIMIZE__ = not __debug__ -__VERBOSE__ = "@DEBUG@" in ["1", "3"] -__DEBUG__ = "@DEBUG@" in ["2", "3"] +__VERBOSE__ = "@VERBOSE@" is "ON" +__DEBUG__ = "@DEBUG@" is "ON" +__PROFILE__ = "@PROFILE@" is "ON" __TRACE__ = False __TRACE_WARNINGS__ = False __KERNEL_DEBUG__ = False -__PROFILE__ = True __ENABLE_LONG_TESTS__ = "@ENABLE_LONG_TESTS@" is "ON" diff --git a/hysop/backend/device/kernel_autotuner.py b/hysop/backend/device/kernel_autotuner.py index a5e0fb0efae54999a66ffff340eda6b979cc6495..d92437fc6c97929dd1210fb3939b3ee9270a33ed 100644 --- a/hysop/backend/device/kernel_autotuner.py +++ b/hysop/backend/device/kernel_autotuner.py @@ -112,7 +112,8 @@ class KernelAutotuner(object): force_verbose, force_debug) result_keys = ('extra_parameters', 'work_load', 'global_work_size', 'local_work_size', - 'program', 'kernel', 'kernel_statistics', 'kernel_src', 'kernel_name', 'src_hash') + 'program', 'kernel', 'kernel_statistics', 'kernel_src', 'kernel_name', + 'src_hash') assert len(result_keys) == len(best_candidate) return dict(zip(result_keys, best_candidate)) @@ -180,6 +181,7 @@ class KernelAutotuner(object): force_verbose, force_debug): autotuner_config = self.autotuner_config nruns = autotuner_config.nruns + max_candidates = autotuner_config.max_candidates bench_results = {} best_stats = None @@ -190,6 +192,7 @@ class KernelAutotuner(object): with Timer() as timer: params = tkernel.compute_parameters(extra_kwds=extra_kwds) total_count, pruned_count, kept_count, failed_count = 0,0,0,0 + abort = False for extra_parameters in params.iter_parameters(): work_bounds = tkernel.compute_work_bounds(extra_parameters=extra_parameters, @@ -287,9 +290,20 @@ class KernelAutotuner(object): statistics = None from_cache=False total_count += 1 - - self._print_candidate((statistics is None), from_cache, total_count) + abort = (max_candidates is not None) and \ + ((pruned_count + kept_count) >= max_candidates) + self._print_candidate((statistics is None), from_cache, total_count, abort) + if abort: + break + if abort: + break self._dump_cache(silent=True) + if abort: + break + if abort: + msg='>Achieved maximum number of configured candidates: {}'.format(max_candidates) + if self.verbose>1: + print msg assert total_count == (kept_count+pruned_count+failed_count) if (kept_count == 0): @@ -480,7 +494,7 @@ class KernelAutotuner(object): self._print_separator() print msg.upper() - def _print_candidate(self, failed, from_cache, total_count): + def _print_candidate(self, failed, from_cache, total_count, abort): if self.verbose==2: if total_count==1: sys.stdout.write(self.indent(2)) @@ -490,6 +504,8 @@ class KernelAutotuner(object): sys.stdout.write(':') else: sys.stdout.write('.') + if abort: + sys.stdout.write('|') if total_count % 40 == 0: sys.stdout.write('\n'+self.indent(2)) elif total_count % 5 == 0: diff --git a/hysop/backend/device/kernel_autotuner_config.py b/hysop/backend/device/kernel_autotuner_config.py index 0cb7733313d4be6ff58d60e055be7f468a973fc0..6fbf1a015dc64bf2f7b61a538cd0d6829d6941bd 100644 --- a/hysop/backend/device/kernel_autotuner_config.py +++ b/hysop/backend/device/kernel_autotuner_config.py @@ -19,6 +19,7 @@ class KernelAutotunerConfig(object): def __init__(self, dump_folder = None, autotuner_flag = DEFAULT_AUTOTUNER_FLAG, prune_threshold = DEFAULT_AUTOTUNER_PRUNE_THRESHOLD, + max_candidates = None, verbose = None, debug = __KERNEL_DEBUG__, dump_kernels = True, @@ -38,6 +39,7 @@ class KernelAutotunerConfig(object): check_instance(verbose, int) check_instance(debug, int) check_instance(nruns, int) + check_instance(max_candidates, int, allow_none=True) self.autotuner_flag = autotuner_flag self.prune_threshold = prune_threshold @@ -47,6 +49,7 @@ class KernelAutotunerConfig(object): self.nruns = nruns self.dump_folder = dump_folder self.dump_kernels = dump_kernels + self.max_candidates = max_candidates @abstractmethod def default_dump_folder(self): diff --git a/hysop/backend/device/opencl/opencl_env.py b/hysop/backend/device/opencl/opencl_env.py index da81b51c678b5868afc9324a6e372b66f40d8ad8..da75933a9eac55eba84dd765f790ade19caa05af 100644 --- a/hysop/backend/device/opencl/opencl_env.py +++ b/hysop/backend/device/opencl/opencl_env.py @@ -573,7 +573,7 @@ Dumped OpenCL Kernel '{}' build_opts = ' '.join(set(self.default_build_opts).update(build_options)) else: build_opts = ' '.join(self.default_build_opts) + ' ' + build_options - VERBOSE = __VERBOSE__ if (force_verbose is None) else force_verbose + VERBOSE = False if (force_verbose is None) else force_verbose DEBUG = __KERNEL_DEBUG__ if (force_debug is None) else force_debug gpu_src = src diff --git a/hysop/backend/device/opencl/opencl_operator.py b/hysop/backend/device/opencl/opencl_operator.py index 1419a725f8728ab6f958a8798cfc5a32a5f4306e..ff074ab31ab0f459df9f0fd956535d418cab06e7 100644 --- a/hysop/backend/device/opencl/opencl_operator.py +++ b/hysop/backend/device/opencl/opencl_operator.py @@ -126,7 +126,7 @@ class OpenClOperator(ComputationalGraphOperator): super(OpenClOperator,self).handle_method(method) assert OpenClKernelConfig in method - + kernel_config = method.pop(OpenClKernelConfig) autotuner_config = kernel_config.autotuner_config diff --git a/hysop/backend/device/opencl/operator/transpose.py b/hysop/backend/device/opencl/operator/transpose.py index 6c20bda7d6a45d0810f0a5b9b9a1d395c0417b2d..238ec7a35db2e885ed0485c27a654c786c4be315 100644 --- a/hysop/backend/device/opencl/operator/transpose.py +++ b/hysop/backend/device/opencl/operator/transpose.py @@ -40,7 +40,7 @@ class OpenClTranspose(TransposeOperatorBase, OpenClOperator): compute_inplace = False hardcode_arrays = (compute_inplace or not is_inplace) - transpose, _ = kernel.autotune(axes=axes, force_debug=True, + transpose, _ = kernel.autotune(axes=axes, hardcode_arrays=hardcode_arrays, is_inplace=compute_inplace, input_field=input_field, output_field=output_field) diff --git a/hysop/constants.py.in b/hysop/constants.py.in index 13d8a48574a322af63d820d50e73e086262f8bb7..a402100c3a3bedcfd5bf86d17a63554ef3c6af0a 100644 --- a/hysop/constants.py.in +++ b/hysop/constants.py.in @@ -152,7 +152,7 @@ SpaceDiscretization = EnumFactory.create('SpaceDiscretization', AutotunerFlags = EnumFactory.create('AutotunerFlags', - ['ESTIMATE', 'MEASURE', 'PATIENT', 'EXHAUSTIVE' ]) + ['ESTIMATE', 'MEASURE', 'PATIENT', 'EXHAUSTIVE']) """Configuration flags for kernel autotuner (automatic runtime parameters tuning for cuda and opencl).""" DEFAULT_AUTOTUNER_FLAG = AutotunerFlags.MEASURE diff --git a/hysop/core/graph/computational_graph.py b/hysop/core/graph/computational_graph.py index 17a40b22191e4e2e37fc4bb03295cd353b17a8e2..a8a409edc699581c43800af4009e1d9041fdb46a 100644 --- a/hysop/core/graph/computational_graph.py +++ b/hysop/core/graph/computational_graph.py @@ -240,7 +240,7 @@ class ComputationalGraph(ComputationalGraphNode): if is_root: field_requirements = self.get_and_set_field_requirements() field_requirements.build_topologies() - + self._build_graph(outputs_are_inputs=outputs_are_inputs, current_level=0) # fix for auto generated nodes @@ -291,7 +291,7 @@ class ComputationalGraph(ComputationalGraphNode): builder = GraphBuilder(node=self) builder.configure(current_level=current_level, outputs_are_inputs=outputs_are_inputs, **kwds) - + builder.build_graph() # keep variables diff --git a/hysop/core/graph/computational_operator.py b/hysop/core/graph/computational_operator.py index e5ec4ec59303a087f567b05ff21a0b21805b1cd2..53493d07cae0b03878027c79bf77a81cb74b35a5 100644 --- a/hysop/core/graph/computational_operator.py +++ b/hysop/core/graph/computational_operator.py @@ -495,7 +495,7 @@ class ComputationalGraphOperator(ComputationalGraphNode): fields, io_params, op_kwds = self._input_fields_to_dump variables = {k: self.input_vars[k] for k in fields} op = HDF_Writer(io_params=io_params, variables=variables, **op_kwds) - op.initialize() + op.initialize(topgraph_method=self.method) op.get_and_set_field_requirements() ops.append(op) @@ -505,7 +505,7 @@ class ComputationalGraphOperator(ComputationalGraphNode): fields, io_params, op_kwds = self._output_fields_to_dump variables = {k: self.output_vars[k] for k in fields} op = HDF_Writer(io_params=io_params, variables=variables, **op_kwds) - op.initialize() + op.initialize(topgraph_method=self.method) op.get_and_set_field_requirements() ops.append(op) diff --git a/hysop/core/graph/graph_builder.py b/hysop/core/graph/graph_builder.py index 4fa00ddb91221b441be214f4191b5cb1f18f2193..565ddc46d5be584f1c1ccd6d4e6a99200f09592f 100644 --- a/hysop/core/graph/graph_builder.py +++ b/hysop/core/graph/graph_builder.py @@ -133,7 +133,8 @@ class GraphBuilder(object): return self.__ContinuousFieldState(field, self.op_input_topology_states, self.op_output_topology_states, - self.target_node._input_fields_to_dump) + self.target_node._input_fields_to_dump, + self.target_node.method) def new_node(self, opname, op, subgraph, current_level, node, node_id, @@ -415,7 +416,7 @@ class GraphBuilder(object): io_leader=io_params.io_leader) op = HDF_Writer(io_params=io_params, variables=variables, **op_kwds) - op.initialize() + op.initialize(topgraph_method=self.target_node.method) op.get_and_set_field_requirements() opnode = self.new_node(opname, op, None, current_level, None, None, None, None) @@ -640,7 +641,7 @@ class GraphBuilder(object): def __init__(self, field, op_input_topology_states, op_output_topology_states, - input_fields_to_dump): + input_fields_to_dump, topgraph_method): # all states are related to this continuous field self.field = field @@ -669,6 +670,8 @@ class GraphBuilder(object): # dictionnary (topology -> TopologyState) self.discrete_topology_states = {} + + self.method = topgraph_method def add_vertex(self, graph, vertex_properties, operator): vertex = graph.add_vertex() @@ -708,7 +711,7 @@ class GraphBuilder(object): op.name = '{}_{}'.format(op_name_prefix, field.name) if len(op_generator.nodes)>1: op.name += '__{}'.format(i) - op.initialize() + op.initialize(topgraph_method=self.method) assert len(op.input_vars) == 1 assert len(op.output_vars) == 1 @@ -910,7 +913,7 @@ class GraphBuilder(object): variables = {ifield: target_topo} writer_op = HDF_Writer(io_params=io_params, variables=variables, **op_kwds) - writer_op.initialize() + writer_op.initialize(topgraph_method=self.method) writer_op.get_and_set_field_requirements() writer_opnode = self.add_vertex(graph, vertex_properties, writer_op) self.add_edge(graph, edge_properties,