diff --git a/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl b/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl new file mode 100644 index 0000000000000000000000000000000000000000..9ecd87b49b58065c3d27c0d0dcb866f24645ebe9 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection/velocity_cache.cl @@ -0,0 +1,395 @@ +void fill_velocity_cache(__global const float* gvelo, + uint gidX, uint gidY, uint gidZ, + float4 dx, float4 v_dx, + __local float* gvelo_loc); + + +inline float alpha_l2_1(float y){ + return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);} +inline float beta_l2_1(float y){ + return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);} +inline float gamma_l2_1(float y){ + return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);} +inline float delta_l2_1(float y){ + return ((y * y * (y - 1.0)) / 2.0);} + + +inline float alpha_l4_2(float y){ + return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);} +inline float beta_l4_2(float y){ + return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);} +inline float gamma_l4_2(float y){ + return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);} +inline float delta_l4_2(float y){ + return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);} +inline float eta_l4_2(float y){ + return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);} +inline float zeta_l4_2(float y){ + return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);} + + +inline float alpha_l4_4(float y){ + return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);} +inline float beta_l4_4(float y){ + return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);} +inline float gamma_l4_4(float y){ + return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);} +inline float delta_l4_4(float y){ + return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);} +inline float eta_l4_4(float y){ + return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);} +inline float zeta_l4_4(float y){ + return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);} + +/*** TODO: correct this file to work properly with vector enable remehsing weights ***/ + +void fill_velocity_cache(__global const float* gvelo, + uint gidX, uint gidY, uint gidZ, + float4 dx, float4 v_dx, + __local float* velocity_cache) +{ + uint i; + float__N__ v; +#if V_NB_I == NB_I + // Single scale : Velocity and scalar grids are identical : cache is just read from global + uint line_index = gidY*NB_I + gidZ*NB_I*NB_II; /* Current 1D problem index */ + for(i=gidX*__N__; i<NB_I; i+=(WI_NB*__N__)) + { + /* Read velocity */ + v = vload__N__((i+line_index)/__N__, gvelo); + /* Fill the cache */ + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + } +#else + // Multi-scale: Velocity cache is interpolated from global + +#if NB_III == 1 + // 2D case + + + float line_posY, hY; + int indY; +#if MS_FORMULA == MS_LINEAR + int2 v_line_index; + float2 wY; +#elif MS_FORMULA == MS_L2_1 + int4 v_line_index; + float4 wY; +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + // Only the 6 first elements will be used + int8 v_line_index; + float8 wY; +#endif + + line_posY = (gidY * dx.y) / v_dx.y; + indY = convert_int_rtn(line_posY); + hY = line_posY - convert_float(indY); + +#if MS_FORMULA == MS_LINEAR + wY.s1 = hY; + wY.s0 = 1.0 - wY.s1; +#elif MS_FORMULA == MS_L2_1 + wY.s0 = alpha_l2_1(hY); + wY.s1 = beta_l2_1(hY); + wY.s2 = gamma_l2_1(hY); + wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2; +#elif MS_FORMULA == MS_L4_2 + wY.s0 = alpha_l4_2(hY); + wY.s1 = beta_l4_2(hY); + wY.s2 = gamma_l4_2(hY); + wY.s3 = delta_l4_2(hY); + wY.s4 = eta_l4_2(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; +#elif MS_FORMULA == MS_l4_4 + wY.s0 = alpha_l4_4(hY); + wY.s1 = beta_l4_4(hY); + wY.s2 = gamma_l4_4(hY); + wY.s3 = delta_l4_4(hY); + wY.s4 = eta_l4_4(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; +#endif + + indY = indY + V_GHOSTS_NB; + +#if MS_FORMULA == MS_LINEAR + v_line_index.s0 = indY * V_NB_I; + v_line_index.s1 = (indY + 1) * V_NB_I; +#elif MS_FORMULA == MS_L2_1 + v_line_index.s0 = (indY - 1) * V_NB_I; + v_line_index.s1 = (indY) * V_NB_I; + v_line_index.s2 = (indY + 1) * V_NB_I; + v_line_index.s3 = (indY + 2) * V_NB_I; +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + v_line_index.s0 = (indY - 2) * V_NB_I; + v_line_index.s1 = (indY - 1) * V_NB_I; + v_line_index.s2 = (indY) * V_NB_I; + v_line_index.s3 = (indY + 1) * V_NB_I; + v_line_index.s4 = (indY + 2) * V_NB_I; + v_line_index.s5 = (indY + 3) * V_NB_I; +#endif + + + for(i=gidX*__N__; i<V_NB_I; i+=(WI_NB*__N__)) + { +#if MS_FORMULA == MS_LINEAR + v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; +#elif MS_FORMULA == MS_L2_1 + v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s2)/__N__, gvelo) * wY.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s3)/__N__, gvelo) * wY.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + v = vload__N__((i+v_line_index.s0)/__N__, gvelo) * wY.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i+v_line_index.s1)/__N__, gvelo) * wY.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s2)/__N__, gvelo) * wY.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s3)/__N__, gvelo) * wY.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s4)/__N__, gvelo) * wY.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i+v_line_index.s5)/__N__, gvelo) * wY.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; +#endif + } + +#else + // 3D case + + + float line_posY, hY; + float line_posZ, hZ; + int indY, indZ; +#if MS_FORMULA == MS_LINEAR + int2 v_line_indexY, v_line_indexZ; + float2 wY, wZ; +#elif MS_FORMULA == MS_L2_1 + int4 v_line_indexY, v_line_indexZ; + float4 wY, wZ; +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + int8 v_line_indexY, v_line_indexZ; + float8 wY, wZ; +#endif + + line_posY = (gidY * dx.y) / v_dx.y; + line_posZ = (gidZ * dx.z) / v_dx.z; + indY = convert_int_rtn(line_posY); + indZ = convert_int_rtn(line_posZ); + hY = line_posY - convert_float(indY); + hZ = line_posZ - convert_float(indZ); + +#if MS_FORMULA == MS_LINEAR + wY.s1 = hY; + wY.s0 = 1.0 - wY.s1; + wZ.s1 = hZ; + wZ.s0 = 1.0 - wZ.s1; +#elif MS_FORMULA == MS_L2_1 + wY.s0 = alpha_l2_1(hY); + wY.s1 = beta_l2_1(hY); + wY.s2 = gamma_l2_1(hY); + wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2; + wZ.s0 = alpha_l2_1(hZ); + wZ.s1 = beta_l2_1(hZ); + wZ.s2 = gamma_l2_1(hZ); + wZ.s3 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2; +#elif MS_FORMULA == MS_L4_2 + wY.s0 = alpha_l4_2(hY); + wY.s1 = beta_l4_2(hY); + wY.s2 = gamma_l4_2(hY); + wY.s3 = delta_l4_2(hY); + wY.s4 = eta_l4_2(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; + wZ.s0 = alpha_l4_2(hZ); + wZ.s1 = beta_l4_2(hZ); + wZ.s2 = gamma_l4_2(hZ); + wZ.s3 = delta_l4_2(hZ); + wZ.s4 = eta_l4_2(hZ); + wZ.s5 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2 - wZ.s3 - wZ.s4; +#elif MS_FORMULA == MS_L4_4 + wY.s0 = alpha_l4_4(hY); + wY.s1 = beta_l4_4(hY); + wY.s2 = gamma_l4_4(hY); + wY.s3 = delta_l4_4(hY); + wY.s4 = eta_l4_4(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; + wZ.s0 = alpha_l4_4(hZ); + wZ.s1 = beta_l4_4(hZ); + wZ.s2 = gamma_l4_4(hZ); + wZ.s3 = delta_l4_4(hZ); + wZ.s4 = eta_l4_4(hZ); + wZ.s5 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2 - wZ.s3 - wZ.s4; +#endif + + indY = indY + V_GHOSTS_NB; + indZ = indZ + V_GHOSTS_NB; + +#if MS_FORMULA == MS_LINEAR + v_line_indexY.s0 = indY * V_NB_I; + v_line_indexY.s1 = (indY + 1) * V_NB_I; + v_line_indexZ.s0 = indZ * V_NB_I * V_NB_II; + v_line_indexZ.s1 = (indZ + 1) * V_NB_I * V_NB_II; +#elif MS_FORMULA == MS_L2_1 + v_line_indexY.s0 = (indY - 1) * V_NB_I; + v_line_indexY.s1 = (indY) * V_NB_I; + v_line_indexY.s2 = (indY + 1) * V_NB_I; + v_line_indexY.s3 = (indY + 2) * V_NB_I; + v_line_indexZ.s0 = (indZ - 1) * V_NB_I * V_NB_II; + v_line_indexZ.s1 = (indZ) * V_NB_I * V_NB_II; + v_line_indexZ.s2 = (indZ + 1) * V_NB_I * V_NB_II; + v_line_indexZ.s3 = (indZ + 2) * V_NB_I * V_NB_II; +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + v_line_indexY.s0 = (indY - 2) * V_NB_I; + v_line_indexY.s1 = (indY - 1) * V_NB_I; + v_line_indexY.s2 = (indY) * V_NB_I; + v_line_indexY.s3 = (indY + 1) * V_NB_I; + v_line_indexY.s4 = (indY + 2) * V_NB_I; + v_line_indexY.s5 = (indY + 3) * V_NB_I; + v_line_indexZ.s0 = (indZ - 2) * V_NB_I * V_NB_II; + v_line_indexZ.s1 = (indZ - 1) * V_NB_I * V_NB_II; + v_line_indexZ.s2 = (indZ) * V_NB_I * V_NB_II; + v_line_indexZ.s3 = (indZ + 1) * V_NB_I * V_NB_II; + v_line_indexZ.s4 = (indZ + 2) * V_NB_I * V_NB_II; + v_line_indexZ.s5 = (indZ + 3) * V_NB_I * V_NB_II; +#endif + + + for(i=gidX*__N__; i<V_NB_I; i+=(WI_NB*__N__)) + { +#if MS_FORMULA == MS_LINEAR + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + +#elif MS_FORMULA == MS_L2_1 + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s2)/__N__, gvelo) * wY.s0 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s3)/__N__, gvelo) * wY.s0 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s2)/__N__, gvelo) * wY.s1 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s3)/__N__, gvelo) * wY.s1 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s0)/__N__, gvelo) * wY.s2 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s1)/__N__, gvelo) * wY.s2 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s2)/__N__, gvelo) * wY.s2 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s3)/__N__, gvelo) * wY.s2 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s0)/__N__, gvelo) * wY.s3 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s1)/__N__, gvelo) * wY.s3 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s2)/__N__, gvelo) * wY.s3 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s3)/__N__, gvelo) * wY.s3 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + +#elif MS_FORMULA == MS_L4_2 || MS_FORMULA == MS_L4_4 + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s0)/__N__, gvelo) * wY.s0 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] = v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s1)/__N__, gvelo) * wY.s0 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s2)/__N__, gvelo) * wY.s0 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s3)/__N__, gvelo) * wY.s0 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s4)/__N__, gvelo) * wY.s0 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s0 + v_line_indexZ.s5)/__N__, gvelo) * wY.s0 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s0)/__N__, gvelo) * wY.s1 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s1)/__N__, gvelo) * wY.s1 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s2)/__N__, gvelo) * wY.s1 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s3)/__N__, gvelo) * wY.s1 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s4)/__N__, gvelo) * wY.s1 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s1 + v_line_indexZ.s5)/__N__, gvelo) * wY.s1 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s0)/__N__, gvelo) * wY.s2 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s1)/__N__, gvelo) * wY.s2 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s2)/__N__, gvelo) * wY.s2 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s3)/__N__, gvelo) * wY.s2 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s4)/__N__, gvelo) * wY.s2 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s2 + v_line_indexZ.s5)/__N__, gvelo) * wY.s2 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s0)/__N__, gvelo) * wY.s3 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s1)/__N__, gvelo) * wY.s3 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s2)/__N__, gvelo) * wY.s3 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s3)/__N__, gvelo) * wY.s3 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s4)/__N__, gvelo) * wY.s3 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s3 + v_line_indexZ.s5)/__N__, gvelo) * wY.s3 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s0)/__N__, gvelo) * wY.s4 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s1)/__N__, gvelo) * wY.s4 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s2)/__N__, gvelo) * wY.s4 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s3)/__N__, gvelo) * wY.s4 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s4)/__N__, gvelo) * wY.s4 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s4 + v_line_indexZ.s5)/__N__, gvelo) * wY.s4 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s0)/__N__, gvelo) * wY.s5 * wZ.s0; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s1)/__N__, gvelo) * wY.s5 * wZ.s1; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s2)/__N__, gvelo) * wY.s5 * wZ.s2; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s3)/__N__, gvelo) * wY.s5 * wZ.s3; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s4)/__N__, gvelo) * wY.s5 * wZ.s4; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; + v = vload__N__((i + v_line_indexY.s5 + v_line_indexZ.s5)/__N__, gvelo) * wY.s5 * wZ.s5; + velocity_cache[noBC_id(i+__NN__)] += v.s__NN__; +#endif + } +#endif +#endif +} diff --git a/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl b/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl new file mode 100644 index 0000000000000000000000000000000000000000..60a19c0cafa30017b4a40884a145786effd7eff8 --- /dev/null +++ b/HySoP/hysop/gpu/cl_src/advection/velocity_cache_noVec.cl @@ -0,0 +1,217 @@ +void fill_velocity_cache(__global const float* gvelo, + uint gidX, uint gidY, uint gidZ, + float4 dx, float4 v_dx, + __local float* gvelo_loc); + +void fill_velocity_cache(__global const float* gvelo, + uint gidX, uint gidY, uint gidZ, + float4 dx, float4 v_dx, + __local float* gvelo_loc) +{ + uint i; + + // ******************************** + // ** Single Scale + // ******************************** +#if V_NB_I == NB_I + // Single scale : Velocity and scalar grids are identical : cache is just read from global + uint line_index = gidY*NB_I + gidZ*NB_I*NB_II; /* Current 1D problem index */ + for(i=gidX; i<NB_I; i+=(WI_NB)) + { + /* Read velocity */ + /* Fill velocity cache */ + gvelo_loc[noBC_id(i)] = gvelo[i+line_index]; + } + + + // ******************************** + // ** Multi-Scale + // ******************************** + // Velocity cache is interpolated from global memory +#else + + +#if NB_III == 1 + // Multi-Scale (2D) + + float line_posY, hY; + int indY; +#if MS_FORMULA == LINEAR + int2 v_line_index; + float2 wY; +#elif MS_FORMULA == L2_1 + int4 v_line_index; + float4 wY; +#elif MS_FORMULA == L4_2 || MS_FORMULA == L4_4 + // Only the 6 first elements will be used + int8 v_line_index; + float8 wY; +#endif + + line_posY = (gidY * dx.y) / v_dx.y; + indY = convert_int_rtn(line_posY); + hY = line_posY - convert_float(indY); + + +#if MS_FORMULA == LINEAR + wY.s1 = hY; + wY.s0 = 1.0 - wY.s1; +#else + wY.s0 = MS_INTERPOL(alpha)(hY); + wY.s1 = MS_INTERPOL(beta)(hY); + wY.s2 = MS_INTERPOL(gamma)(hY); +#if MS_INTERPOL_SHIFT > 1 + wY.s3 = MS_INTERPOL(delta)(hY); + wY.s4 = MS_INTERPOL(eta)(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; +#else + wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2; +#endif +#endif + + indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT; + + v_line_index.s0 = indY * V_NB_I; + v_line_index.s1 = (indY + 1) * V_NB_I; +#if MS_INTERPOL_SHIFT > 0 + v_line_index.s2 = (indY + 2) * V_NB_I; + v_line_index.s3 = (indY + 3) * V_NB_I; +#elif MS_INTERPOL_SHIFT > 1 + v_line_index.s4 = (indY + 4) * V_NB_I; + v_line_index.s5 = (indY + 5) * V_NB_I; +#endif + + + for(i=gidX; i<V_NB_I; i+=(WI_NB)){ + gvelo_loc[noBC_id(i)] = wY.s0 * gvelo[i + v_line_index.s0]; + gvelo_loc[noBC_id(i)] += wY.s1 * gvelo[i + v_line_index.s1]; +#if MS_INTERPOL_SHIFT > 0 + gvelo_loc[noBC_id(i)] += wY.s2 * gvelo[i + v_line_index.s2]; + gvelo_loc[noBC_id(i)] += wY.s3 * gvelo[i + v_line_index.s3]; +#elif MS_INTERPOL_SHIFT > 1 + gvelo_loc[noBC_id(i)] += wY.s4 * gvelo[i + v_line_index.s4]; + gvelo_loc[noBC_id(i)] += wY.s5 * gvelo[i + v_line_index.s5]; +#endif + } + + +#else + // Multi-Scale (3D) + + float line_posY, hY; + float line_posZ, hZ; + int indY, indZ; +#if MS_FORMULA == LINEAR + int2 v_line_indexY, v_line_indexZ; + float2 wY, wZ; +#elif MS_FORMULA == L2_1 + int4 v_line_indexY, v_line_indexZ; + float4 wY, wZ; +#elif MS_FORMULA == L4_2 || MS_FORMULA == L4_4 + int8 v_line_indexY, v_line_indexZ; + float8 wY, wZ; +#endif + + line_posY = (gidY * dx.y) / v_dx.y; + line_posZ = (gidZ * dx.z) / v_dx.z; + indY = convert_int_rtn(line_posY); + indZ = convert_int_rtn(line_posZ); + hY = line_posY - convert_float(indY); + hZ = line_posZ - convert_float(indZ); + +#if MS_FORMULA == LINEAR + wY.s1 = hY; + wY.s0 = 1.0 - wY.s1; + wZ.s1 = hZ; + wZ.s0 = 1.0 - wZ.s1; +#else + wY.s0 = MS_INTERPOL(alpha)(hY); + wY.s1 = MS_INTERPOL(beta)(hY); + wY.s2 = MS_INTERPOL(gamma)(hY); + wZ.s0 = MS_INTERPOL(alpha)(hZ); + wZ.s1 = MS_INTERPOL(beta)(hZ); + wZ.s2 = MS_INTERPOL(gamma)(hZ); +#if MS_INTERPOL_SHIFT > 1 + wY.s3 = MS_INTERPOL(delta)(hY); + wY.s4 = MS_INTERPOL(eta)(hY); + wY.s5 = 1.0 - wY.s0 - wY.s1 - wY.s2 - wY.s3 - wY.s4; + wZ.s3 = MS_INTERPOL(delta)(hZ); + wZ.s4 = MS_INTERPOL(eta)(hZ); + wZ.s5 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2 - wZ.s3 - wZ.s4; +#else + wY.s3 = 1.0 - wY.s0 - wY.s1 - wY.s2; + wZ.s3 = 1.0 - wZ.s0 - wZ.s1 - wZ.s2; +#endif +#endif + + indY = indY + V_GHOSTS_NB - MS_INTERPOL_SHIFT; + indZ = indZ + V_GHOSTS_NB - MS_INTERPOL_SHIFT; + + v_line_indexY.s0 = indY * V_NB_I; + v_line_indexY.s1 = (indY + 1) * V_NB_I; + v_line_indexZ.s0 = indZ * V_NB_I * V_NB_II; + v_line_indexZ.s1 = (indZ + 1) * V_NB_I * V_NB_II; +#if MS_INTERPOL_SHIFT > 0 + v_line_indexY.s2 = (indY + 2) * V_NB_I; + v_line_indexY.s3 = (indY + 3) * V_NB_I; + v_line_indexZ.s2 = (indZ + 2) * V_NB_I * V_NB_II; + v_line_indexZ.s3 = (indZ + 3) * V_NB_I * V_NB_II; +#elif MS_INTERPOL_SHIFT > 1 + v_line_indexY.s4 = (indY + 4) * V_NB_I; + v_line_indexY.s5 = (indY + 5) * V_NB_I; + v_line_indexZ.s4 = (indZ + 4) * V_NB_I * V_NB_II; + v_line_indexZ.s5 = (indZ + 5) * V_NB_I * V_NB_II; +#endif + + for(i=gidX; i<V_NB_I; i+=(WI_NB)){ + gvelo_loc[noBC_id(i)] = wY.s0 * wZ.s0 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s1 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s1]; + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s0 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s1 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s1]; +#if MS_INTERPOL_SHIFT > 0 + gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s2 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s3 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s3]; + + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s2 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s3 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s3]; + + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s0 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s1 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s1]; + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s2 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s3 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s3]; + + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s0 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s1 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s1]; + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s2 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s3 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s3]; +#elif MS_INTERPOL_SHIFT > 1 + gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s4 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s0 * wZ.s5 * gvelo[i + v_line_indexY.s0 + v_line_indexZ.s5]; + + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s4 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s1 * wZ.s5 * gvelo[i + v_line_indexY.s1 + v_line_indexZ.s5]; + + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s4 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s2 * wZ.s5 * gvelo[i + v_line_indexY.s2 + v_line_indexZ.s5]; + + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s4 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s3 * wZ.s5 * gvelo[i + v_line_indexY.s3 + v_line_indexZ.s5]; + + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s0 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s1 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s1]; + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s2 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s3 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s3]; + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s4 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s4 * wZ.s5 * gvelo[i + v_line_indexY.s4 + v_line_indexZ.s5]; + + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s0 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s0]; + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s1 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s1]; + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s2 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s2]; + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s3 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s3]; + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s4 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s4]; + gvelo_loc[noBC_id(i)] += wY.s5 * wZ.s5 * gvelo[i + v_line_indexY.s5 + v_line_indexZ.s5]; +#endif + } +#endif +#endif +} diff --git a/HySoP/hysop/gpu/cl_src/common.cl b/HySoP/hysop/gpu/cl_src/common.cl index d767cc798fca10cc8125dc545cef46db282b0974..d482af6bb012b35139ef6bcd2d119ecb09cbd55b 100644 --- a/HySoP/hysop/gpu/cl_src/common.cl +++ b/HySoP/hysop/gpu/cl_src/common.cl @@ -59,40 +59,69 @@ inline uint noBC_id(int id){ #define L6_6 11 #define L8_4 12 #define M8PRIME 13 -#define MS_LINEAR 14 -#define MS_L2_1 15 +#define LINEAR 14 /** - * Shift to left grid point + * Remeshing configuration */ #if FORMULA == L2_1 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_1 #elif FORMULA == L2_2 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_2 #elif FORMULA == L2_3 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_3 #elif FORMULA == L2_4 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_4 #elif FORMULA == L4_2 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_2 #elif FORMULA == L4_3 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_3 #elif FORMULA == L4_4 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_4 #elif FORMULA == M8PRIME #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_M8p #elif FORMULA == L6_3 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_3 #elif FORMULA == L6_4 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_4 #elif FORMULA == L6_5 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_5 #elif FORMULA == L6_6 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_6 #elif FORMULA == L8_4 #define REMESH_SHIFT 4 +#define REMESH(greek) greek##_l8_4 #endif + +/** + * Multi-scale configuration + */ +#if MS_FORMULA == LINEAR +#define MS_INTERPOL_SHIFT 0 +// MS_INTERPOL not used +#elif MS_FORMULA == L2_1 +#define MS_INTERPOL_SHIFT 1 +#define MS_INTERPOL(greek) greek##_l2_1 +#elif MS_FORMULA == L4_2 +#define MS_INTERPOL_SHIFT 2 +#define MS_INTERPOL(greek) greek##_l4_2 +#elif MS_FORMULA == L4_4 +#define MS_INTERPOL_SHIFT 2 +#define MS_INTERPOL(greek) greek##_l4_4 +#endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl index e24e8befb733026ecf7022de1580d96d33923af8..71cf2163d3487b3d6e48a886d8c5cd8f5985bc2c 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,45 +44,45 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -89,12 +90,12 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl index 3ba843f5c6cb8cc11c549eb0db5dbc6d810b07ff..61b3cf0b91a0607d8e1409d39d9a4dd8be4d9eeb 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,57 +46,57 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl index 822a3d0499db57b13b1f5d49997d237e7245b869..abb672668533bcc6ba6bd534f3c683b8baf3d230 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,38 +44,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); @@ -82,13 +83,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); @@ -96,13 +97,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl index 531ee957922d3ce989c5adbfdbc248b14af0aba3..c912769d9fb96fbce7284933bc8a46df117599a7 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,28 +44,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -72,14 +73,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -88,14 +89,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -104,14 +105,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl index d024d83847c63efce98a7f2e0bd6b11b464395a9..da8d9234bea16158cf5d215410ca0cec2cbc3fe1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,38 +44,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -82,13 +83,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -96,13 +97,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl index e7e4b5a320de4b13d0c91b960cc5190ddd8acbdf..ed3f4a397583984ef7b1b65a9ce6fa03b4e2e461 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,28 +44,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -72,14 +73,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -88,14 +89,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -104,14 +105,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private.cl b/HySoP/hysop/gpu/cl_src/remeshing/private.cl index 816225c899b6270e959e982555215c1c0a653277..bce6c17900ac11af584af475985a2d83af6e8e39 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,38 +46,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -84,13 +85,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -98,13 +99,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl index 325edc0a05f193611660454fe522f9a6e5a78d86..e1afa6a3a34fe4771f1bf215c35c77982616f92d 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,38 +46,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); @@ -84,13 +85,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); @@ -98,13 +99,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl index 366163fa2983c54c7ad040ebf049f1b72617ee7a..fcb1e443c40e568e7ba9c665659f6936343509c1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -44,38 +45,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -83,13 +84,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -97,13 +98,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl index 0c2c7fd0dfa18f6fe0e725d591ac283dc357796a..dabd8e5d8dc285696a5129933adf1da1b759f4b3 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -44,28 +45,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -73,14 +74,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -89,14 +90,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -105,14 +106,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl index 7c15e1c4dc92da344f94677c6ea7742a856c81df..0b102c36964697ab7240e58a6dd401bf8750b4fc 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl @@ -4,226 +4,197 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_1(float__N__ y){ return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_1(float__N__ y){ return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_1(float__N__ y){ return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_1(float__N__ y){ return ((y * y * (y - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_2(float__N__ y){ return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_2(float__N__ y){ return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_2(float__N__ y){ return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_2(float__N__ y){ return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_3(float__N__ y){ return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_3(float__N__ y){ return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_3(float__N__ y){ return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_4(float__N__ y){ return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_4(float__N__ y){ return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_4(float__N__ y){ return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);} - -#elif FORMULA == L4_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_2(float__N__ y){ return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_2(float__N__ y){ return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_2(float__N__ y){ return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_3(float__N__ y){ return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);} - -#elif FORMULA == M8PRIME - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_M8p(float__N__ y){ return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_M8p(float__N__ y){ return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_M8p(float__N__ y){ return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_3(float__N__ y){ return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_5(float__N__ y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_5(float__N__ y){ return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_6(float__N__ y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_6(float__N__ y){ return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l8_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);} -inline float__N__ kappa(float__N__ y){ +inline float__N__ kappa_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);} -inline float__N__ mu(float__N__ y){ +inline float__N__ mu_l8_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);} diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl index 04fb0093718121cf2c41e06d791631e541aa229d..35f45164f79d1c092637a5486cd6139a83bf29e8 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl @@ -4,224 +4,198 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_1(float__N__ y){ return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_1(float__N__ y){ return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_1(float__N__ y){ return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_1(float__N__ y){ return ((y * y * fma(1.0, y, - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_2(float__N__ y){ return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_2(float__N__ y){ return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_3(float__N__ y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_3(float__N__ y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_4(float__N__ y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_4(float__N__ y){ return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_4(float__N__ y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);} -#elif FORMULA == L4_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_2(float__N__ y){ return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_2(float__N__ y){ return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);} -#elif FORMULA == M8PRIME - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_M8p(float__N__ y){ return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_M8p(float__N__ y){ return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_M8p(float__N__ y){ return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_5(float__N__ y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_5(float__N__ y){ return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_6(float__N__ y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_6(float__N__ y){ return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);} -#elif FORMULA == L8_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l8_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);} -inline float__N__ kappa(float__N__ y){ +inline float__N__ kappa_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);} -inline float__N__ mu(float__N__ y){ +inline float__N__ mu_l8_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);} diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl index 133ce6db0fc753ad95c615ec61adbc58a0c5e77a..e023dc5e87c75e83bb22efed1422e416440daa1b 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl @@ -4,228 +4,198 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float alpha(float y){ +inline float alpha_l2_1(float y){ return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_1(float y){ return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_1(float y){ return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_1(float y){ return ((y * y * (y - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float alpha(float y){ +inline float alpha_l2_2(float y){ return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_2(float y){ return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_2(float y){ return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_2(float y){ return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float alpha(float y){ +inline float alpha_l2_3(float y){ return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_3(float y){ return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_3(float y){ return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_3(float y){ return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float alpha(float y){ +inline float alpha_l2_4(float y){ return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_4(float y){ return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_4(float y){ return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);} - -#elif FORMULA == L4_2 - -inline float alpha(float y){ +inline float alpha_l4_2(float y){ return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_2(float y){ return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_2(float y){ return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_2(float y){ return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_2(float y){ return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_2(float y){ return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float alpha(float y){ +inline float alpha_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_3(float y){ return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_3(float y){ return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float alpha(float y){ +inline float alpha_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);} - -#elif FORMULA == M8PRIME - -inline float alpha(float y){ +inline float alpha_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);} -inline float beta(float y){ +inline float beta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);} -inline float gamma(float y){ +inline float gamma_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);} -inline float delta(float y){ +inline float delta_M8p(float y){ return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);} -inline float eta(float y){ +inline float eta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);} -inline float zeta(float y){ +inline float zeta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);} -inline float theta(float y){ +inline float theta_M8p(float y){ return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);} -inline float iota(float y){ +inline float iota_M8p(float y){ return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float alpha(float y){ +inline float alpha_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_3(float y){ return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_3(float y){ return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float alpha(float y){ +inline float alpha_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float alpha(float y){ +inline float alpha_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_5(float y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_5(float y){ return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float alpha(float y){ +inline float alpha_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_6(float y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_6(float y){ return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float alpha(float y){ +inline float alpha_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);} -inline float beta(float y){ +inline float beta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);} -inline float gamma(float y){ +inline float gamma_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);} -inline float delta(float y){ +inline float delta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);} -inline float eta(float y){ +inline float eta_l8_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);} -inline float zeta(float y){ +inline float zeta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);} -inline float theta(float y){ +inline float theta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);} -inline float iota(float y){ +inline float iota_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);} -inline float kappa(float y){ +inline float kappa_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);} -inline float mu(float y){ +inline float mu_l8_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);} - #endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl index e218b66fd571eb645a9eba348f7ed8173fc52ab5..e1dc7d29e23baf3415ddc1801bf9ca3a0f846aa1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl @@ -4,225 +4,196 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float alpha(float y){ +inline float alpha_l2_1(float y){ return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);} -inline float beta(float y){ +inline float beta_l2_1(float y){ return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_1(float y){ return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_1(float y){ return ((y * y * fma(1.0, y, - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float alpha(float y){ +inline float alpha_l2_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_2(float y){ return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_2(float y){ return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float alpha(float y){ +inline float alpha_l2_3(float y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_3(float y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float alpha(float y){ +inline float alpha_l2_4(float y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_4(float y){ return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_4(float y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);} -#elif FORMULA == L4_2 - -inline float alpha(float y){ +inline float alpha_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_2(float y){ return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_2(float y){ return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float alpha(float y){ +inline float alpha_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float alpha(float y){ +inline float alpha_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);} -#elif FORMULA == M8PRIME - -inline float alpha(float y){ +inline float alpha_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);} -inline float beta(float y){ +inline float beta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);} -inline float gamma(float y){ +inline float gamma_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);} -inline float delta(float y){ +inline float delta_M8p(float y){ return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);} -inline float eta(float y){ +inline float eta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);} -inline float zeta(float y){ +inline float zeta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);} -inline float theta(float y){ +inline float theta_M8p(float y){ return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);} -inline float iota(float y){ +inline float iota_M8p(float y){ return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float alpha(float y){ +inline float alpha_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float alpha(float y){ +inline float alpha_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float alpha(float y){ +inline float alpha_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_5(float y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_5(float y){ return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float alpha(float y){ +inline float alpha_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_6(float y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_6(float y){ return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float alpha(float y){ +inline float alpha_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);} -inline float beta(float y){ +inline float beta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);} -inline float gamma(float y){ +inline float gamma_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);} -inline float delta(float y){ +inline float delta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);} -inline float eta(float y){ +inline float eta_l8_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);} -inline float zeta(float y){ +inline float zeta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);} -inline float theta(float y){ +inline float theta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);} -inline float iota(float y){ +inline float iota_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);} -inline float kappa(float y){ +inline float kappa_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);} -inline float mu(float y){ +inline float mu_l8_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);} - -#endif diff --git a/HySoP/hysop/gpu/gpu_particle_advection.py b/HySoP/hysop/gpu/gpu_particle_advection.py index d1c756ddb070527dacbfa5867ea54340d2001634..3ec20fe332291e2d1135604a60616c1888a018d5 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection.py +++ b/HySoP/hysop/gpu/gpu_particle_advection.py @@ -37,11 +37,7 @@ class GPUParticleAdvection(ParticleAdvection): part_position=None, part_advectedFields=None, platform_id=0, device_id=0, device_type='gpu', - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: 'gpu_2k', - Splitting: 'o2'}, + method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): """ @@ -65,6 +61,12 @@ class GPUParticleAdvection(ParticleAdvection): @param splittingConfig : Directional splitting configuration (parmepy.numerics.splitting.Splitting.__init__) """ + if method is None: + method = {TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: 'gpu_2k', + Splitting: 'o2'} ParticleAdvection.__init__(self, velocity, advectedFields, d, part_position, part_advectedFields, method, isMultiScale=isMultiScale) diff --git a/HySoP/hysop/gpu/gpu_particle_advection_1k.py b/HySoP/hysop/gpu/gpu_particle_advection_1k.py index 4ef0ac522a963caccd4d6fb7516e1393d1570243..94c3a0bf57cac4ffd1cf346b7268c2116a1e6677 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_1k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_1k.py @@ -26,11 +26,7 @@ class GPUParticleAdvection1k(GPUParticleAdvection): part_position=None, part_advectedFields=None, platform_id=0, device_id=0, device_type='gpu', - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: 'gpu_1k', - Splitting: 'o2'}, + method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): """ @@ -56,6 +52,12 @@ class GPUParticleAdvection1k(GPUParticleAdvection): @param isMultiScale : Flag to specify if the velocity and advected fields are on different grids. """ + if method is None: + method = {TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: 'gpu_1k', + Splitting: 'o2'} GPUParticleAdvection.__init__(self, velocity, advectedFields, d, part_position, part_advectedFields, platform_id, device_id, @@ -151,7 +153,7 @@ class GPUParticleAdvection1k(GPUParticleAdvection): WINb = lwi[0] build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() if self._isMultiScale: - build_options += " -D MS_FORMULA=MS_" + build_options += " -D MS_FORMULA=" build_options += self.method[MultiScale].__name__.upper() if is_noBC: build_options += " -D WITH_NOBC=1" @@ -159,9 +161,11 @@ class GPUParticleAdvection1k(GPUParticleAdvection): build_options += " -D PART_NB_PER_WI=" build_options += str(self.resol_dir[0] / WINb) build_options += self._constants[self.dir] + print build_options ## Build code src = [s.replace('RKN', self.method[TimeIntegrator].__name__.lower()) for s in src] + print src prg = self.cl_env.build_src( src, build_options, vec, nb_remesh_components=self.part_advectedFields[0].nbComponents) diff --git a/HySoP/hysop/gpu/gpu_particle_advection_2k.py b/HySoP/hysop/gpu/gpu_particle_advection_2k.py index c004f741f218b5ae0e85813e79803820c5db4762..ade1d26d081ce1afd7eff59798c011ba52e2ebf6 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_2k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_2k.py @@ -25,11 +25,7 @@ class GPUParticleAdvection2k(GPUParticleAdvection): part_position=None, part_advectedFields=None, platform_id=0, device_id=0, device_type='gpu', - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: 'gpu_2k', - Splitting: 'o2'}, + method=None, src=None, precision=PARMES_REAL, batch_nb=None, isMultiScale=False): """ @@ -55,6 +51,12 @@ class GPUParticleAdvection2k(GPUParticleAdvection): @param isMultiScale : Flag to specify if the velocity and advected fields are on different grids. """ + if method is None: + method = {TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: 'gpu_2k', + Splitting: 'o2'} GPUParticleAdvection.__init__(self, velocity, advectedFields, d, part_position, part_advectedFields, platform_id, device_id, @@ -169,7 +171,7 @@ class GPUParticleAdvection2k(GPUParticleAdvection): build_options += " -D WITH_NOBC=1" build_options += " -D WI_NB=" + str(WINb) if self._isMultiScale: - build_options += " -D MS_FORMULA=MS_" + build_options += " -D MS_FORMULA=" build_options += self.method[MultiScale].__name__.upper() build_options += self._constants[self.dir] ## Build code diff --git a/HySoP/hysop/operator/advection.py b/HySoP/hysop/operator/advection.py index af1d91033db0fdbe59e35bdbc7de1e2153c496d7..ee976271b076ae55be77768ce226a537abc93537 100644 --- a/HySoP/hysop/operator/advection.py +++ b/HySoP/hysop/operator/advection.py @@ -50,12 +50,7 @@ class Advection(Operator): @debug def __init__(self, velocity, advectedFields, resolutions, - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: '', - Splitting: 'o2'}, - topo=None, ghosts=None, **other_config): + method=None, topo=None, ghosts=None, **other_config): """ Create a Transport operator from given variables velocity and scalar. @@ -68,6 +63,13 @@ class Advection(Operator): @param other_config : Method specific parameters @param topo : a predefined topology to discretize variables """ + if method is None: + method = {TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: '', + Splitting: 'o2', + MultiScale: L2_1} v = [velocity] if isinstance(advectedFields, list): self.advectedFields = advectedFields diff --git a/HySoP/hysop/operator/advection_dir.py b/HySoP/hysop/operator/advection_dir.py index 79581f7998eeac243d5c1f40ee43d870dabbe646..dbf6d1c958327cc8cc5478d83d7cc964d109572c 100644 --- a/HySoP/hysop/operator/advection_dir.py +++ b/HySoP/hysop/operator/advection_dir.py @@ -8,7 +8,7 @@ from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh, \ Support, Splitting, MultiScale from parmepy.numerics.integrators.runge_kutta2 import RK2 from parmepy.numerics.interpolation import Linear -from parmepy.numerics.remeshing import L2_1 +from parmepy.numerics.remeshing import L2_1, L4_2, L4_4 from parmepy.operator.continuous import Operator from parmepy.mpi import main_size from parmepy.tools.timers import Timer @@ -31,13 +31,16 @@ class AdvectionDir(Operator): @debug def __init__(self, velocity, advectedFields, d, particle_fields, particle_positions, resolutions, - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: '', - Splitting: 'o2'}, + method=None, topo=None, ghosts=None, isMultiScale=False, **other_config): + if method is None: + method = {TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: '', + Splitting: 'o2', + MultiScale: L2_1} v = [velocity] [v.append(f) for f in advectedFields] Operator.__init__(self, v, method, topo=topo, @@ -54,16 +57,22 @@ class AdvectionDir(Operator): ## resolutions[velocity] may return [32,32,32]. self.resolutions = resolutions self._isMultiScale = isMultiScale - self._v_ghosts = [0, ]*self.domain.dimension + self._v_ghosts = [0, ] * self.domain.dimension if self._isMultiScale: - self._v_ghosts = [2, ]*self.domain.dimension - if MultiScale in method.keys(): - if method[MultiScale] == Linear: - self._v_ghosts = [1, ]*self.domain.dimension - else: - assert method[MultiScale] == L2_1 + if self.method[Support].find('gpu') < 0: + raise ValueError("Multiscale advection is not supported in " + "Python yet, user should use Scales or GPU.") + if not MultiScale in method.keys(): + method[MultiScale] == L2_1 + if method[MultiScale] == Linear: + self._v_ghosts = [1, ] * self.domain.dimension + elif method[MultiScale] == L2_1: + self._v_ghosts = [2, ] * self.domain.dimension + elif method[MultiScale] == L4_2 or \ + method[MultiScale] == L4_4: + self._v_ghosts = [3, ] * self.domain.dimension else: - assert method[MultiScale] == L2_1 + raise ValueError("Unknown multiscale method") ## Extra parameters (depend on the method) self.config = other_config diff --git a/HySoP/hysop/operator/discrete/particle_advection.py b/HySoP/hysop/operator/discrete/particle_advection.py index f79b8da1f7ee0d54d18f43d9dbc6753b3a2f0109..3f4261b8e0a2098f1d80b05b0aa52f469691b6e9 100644 --- a/HySoP/hysop/operator/discrete/particle_advection.py +++ b/HySoP/hysop/operator/discrete/particle_advection.py @@ -6,7 +6,7 @@ Advection solver, particular method, pure-python version. """ from parmepy.constants import debug, WITH_GUESS, PARMES_INDEX from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh, \ - Support, Splitting + Support, Splitting, MultiScale from parmepy.numerics.integrators.runge_kutta2 import RK2 from parmepy.numerics.interpolation import Linear from parmepy.numerics.remeshing import L2_1 @@ -26,11 +26,7 @@ class ParticleAdvection(DiscreteOperator): @debug def __init__(self, velocity, advectedFields, d, part_position=None, part_advectedFields=None, - method={TimeIntegrator: RK2, - Interpolation: Linear, - Remesh: L2_1, - Support: '', - Splitting: 'o2'}, + method=None, isMultiScale=False): """ Constructor. @@ -56,6 +52,13 @@ class ParticleAdvection(DiscreteOperator): - 'l8_4' : Labmda8,4 : 10 point formula, C4 regularity - 'm8prime' : M8prime formula """ + if method is None: + method={TimeIntegrator: RK2, + Interpolation: Linear, + Remesh: L2_1, + Support: '', + Splitting: 'o2', + MultiScale: L2_1} ## Advection velocity self.velocity = velocity variables = [self.velocity] @@ -149,7 +152,8 @@ class ParticleAdvection(DiscreteOperator): self._synchroGhosts = None if self._isMultiScale: - self._synchroGhosts = UpdateGhosts(self.velocity.topology, self.velocity.nbComponents) + self._synchroGhosts = UpdateGhosts(self.velocity.topology, + self.velocity.nbComponents) self._isUpToDate = True diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py index bf48da5445c93fb0efb2f57b3c93f2d669902c59..da18b50790fe62417fe6bf43b5cc6d59cae6a546 100644 --- a/HySoP/hysop/problem/problem.py +++ b/HySoP/hysop/problem/problem.py @@ -21,9 +21,6 @@ class Problem(object): To solve the problem, a loop over time-steps is launched. A step consists in calling the apply method of each operators and monitors.\n To finish, a finalize method is called.\ - - \remark: In case of GPU real-time rendering (i.e. use of an - OpenGLRendering object), The loop over time-steps is passed to Qt4 """ @debug @@ -31,7 +28,7 @@ class Problem(object): return object.__new__(cls, *args, **kw) @debug - def __init__(self, operators, simulation, monitors=[], + def __init__(self, operators, simulation, monitors=None, dumpFreq=100, name=None): """ Create a transport problem instance. @@ -48,6 +45,8 @@ class Problem(object): self.name = name ## Problem operators self.operators = operators + if monitors is None: + monitors = [] for m in monitors: self.operators.append(m) ## Computes time step and manage iterations @@ -124,13 +123,6 @@ class Problem(object): if __VERBOSE__ and main_rank == 0: print "====" - for op in self.operators: - try: - if op.isGLRender: - self.glRenderer = op - op.setMainLoop(self) - except AttributeError: - pass def pre_setUp(self): """ @@ -186,24 +178,19 @@ class Problem(object): """ if main_rank == 0: print "\n\n Start solving ..." - try: - ## Pass the main loop to the OpenGL viewer - if not self.glRenderer is None: - self.glRenderer.startMainLoop() - except AttributeError: - while not self.simulation.isOver: - self.simulation.printState() - - for op in self.operators: - if __VERBOSE__: - print main_rank, op.__class__.__name__ - op.apply(self.simulation) - - testdump = \ - self.simulation.currentIteration % self.dumpFreq is 0 - self.simulation.advance() - if self._doDump and testdump: - self.dump() + while not self.simulation.isOver: + self.simulation.printState() + + for op in self.operators: + if __VERBOSE__: + print main_rank, op.__class__.__name__ + op.apply(self.simulation) + + testdump = \ + self.simulation.currentIteration % self.dumpFreq is 0 + self.simulation.advance() + if self._doDump and testdump: + self.dump() @debug def finalize(self):