diff --git a/HySoP/hysop/gpu/cl_src/common.cl b/HySoP/hysop/gpu/cl_src/common.cl index d767cc798fca10cc8125dc545cef46db282b0974..d482af6bb012b35139ef6bcd2d119ecb09cbd55b 100644 --- a/HySoP/hysop/gpu/cl_src/common.cl +++ b/HySoP/hysop/gpu/cl_src/common.cl @@ -59,40 +59,69 @@ inline uint noBC_id(int id){ #define L6_6 11 #define L8_4 12 #define M8PRIME 13 -#define MS_LINEAR 14 -#define MS_L2_1 15 +#define LINEAR 14 /** - * Shift to left grid point + * Remeshing configuration */ #if FORMULA == L2_1 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_1 #elif FORMULA == L2_2 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_2 #elif FORMULA == L2_3 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_3 #elif FORMULA == L2_4 #define REMESH_SHIFT 1 +#define REMESH(greek) greek##_l2_4 #elif FORMULA == L4_2 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_2 #elif FORMULA == L4_3 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_3 #elif FORMULA == L4_4 #define REMESH_SHIFT 2 +#define REMESH(greek) greek##_l4_4 #elif FORMULA == M8PRIME #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_M8p #elif FORMULA == L6_3 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_3 #elif FORMULA == L6_4 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_4 #elif FORMULA == L6_5 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_5 #elif FORMULA == L6_6 #define REMESH_SHIFT 3 +#define REMESH(greek) greek##_l6_6 #elif FORMULA == L8_4 #define REMESH_SHIFT 4 +#define REMESH(greek) greek##_l8_4 #endif + +/** + * Multi-scale configuration + */ +#if MS_FORMULA == LINEAR +#define MS_INTERPOL_SHIFT 0 +// MS_INTERPOL not used +#elif MS_FORMULA == L2_1 +#define MS_INTERPOL_SHIFT 1 +#define MS_INTERPOL(greek) greek##_l2_1 +#elif MS_FORMULA == L4_2 +#define MS_INTERPOL_SHIFT 2 +#define MS_INTERPOL(greek) greek##_l4_2 +#elif MS_FORMULA == L4_4 +#define MS_INTERPOL_SHIFT 2 +#define MS_INTERPOL(greek) greek##_l4_4 +#endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl index e24e8befb733026ecf7022de1580d96d33923af8..71cf2163d3487b3d6e48a886d8c5cd8f5985bc2c 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,45 +44,45 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -89,12 +90,12 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += (w__NN__ * s__ID__.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl index 3ba843f5c6cb8cc11c549eb0db5dbc6d810b07ff..61b3cf0b91a0607d8e1409d39d9a4dd8be4d9eeb 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,57 +46,57 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Igscal_loc__ID__[noBC_id(index)] += (w * s__ID__); barrier(CLK_LOCAL_MEM_FENCE); #endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl index 822a3d0499db57b13b1f5d49997d237e7245b869..abb672668533bcc6ba6bd534f3c683b8baf3d230 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_2d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,38 +44,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); @@ -82,13 +83,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); @@ -96,13 +97,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl index 531ee957922d3ce989c5adbfdbc248b14af0aba3..c912769d9fb96fbce7284933bc8a46df117599a7 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_noVec_vector_3d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,28 +44,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -72,14 +73,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -88,14 +89,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); @@ -104,14 +105,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index)] += (w * v_X); gvec_Y_loc[noBC_id(index)] += (w * v_Y); gvec_Z_loc[noBC_id(index)] += (w * v_Z); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl index d024d83847c63efce98a7f2e0bd6b11b464395a9..da8d9234bea16158cf5d215410ca0cec2cbc3fe1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_2d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,38 +44,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -82,13 +83,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); @@ -96,13 +97,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl index e7e4b5a320de4b13d0c91b960cc5190ddd8acbdf..ed3f4a397583984ef7b1b65a9ce6fa03b4e2e461 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/basic_vector_3d.cl @@ -26,6 +26,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -43,28 +44,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w__NN__ = alpha(y.s__NN__); + w__NN__ = REMESH(alpha)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = beta(y.s__NN__); + w__NN__ = REMESH(beta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = gamma(y.s__NN__); + w__NN__ = REMESH(gamma)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = delta(y.s__NN__); + w__NN__ = REMESH(delta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -72,14 +73,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w__NN__ = eta(y.s__NN__); + w__NN__ = REMESH(eta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = zeta(y.s__NN__); + w__NN__ = REMESH(zeta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -88,14 +89,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w__NN__ = theta(y.s__NN__); + w__NN__ = REMESH(theta)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = iota(y.s__NN__); + w__NN__ = REMESH(iota)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); @@ -104,14 +105,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w__NN__ = kappa(y.s__NN__); + w__NN__ = REMESH(kappa)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w__NN__ = mu(y.s__NN__); + w__NN__ = REMESH(mu)(y.s__NN__); gvec_X_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_X.s__NN__); gvec_Y_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Y.s__NN__); gvec_Z_loc[noBC_id(index.s__NN__)] += (w__NN__ * v_Z.s__NN__); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private.cl b/HySoP/hysop/gpu/cl_src/remeshing/private.cl index 816225c899b6270e959e982555215c1c0a653277..bce6c17900ac11af584af475985a2d83af6e8e39 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float__N__ s__ID__, float__ * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,38 +46,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -84,13 +85,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -98,13 +99,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index.s__NN__)] += temp__ID__.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl index 325edc0a05f193611660454fe522f9a6e5a78d86..e1afa6a3a34fe4771f1bf215c35c77982616f92d 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_noVec.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, __RCOMP_P float s__ID__, float p, __R * @remark <code>__RCOMP_I</code> flag is for instruction expansion for the different remeshed components. * @remark <code>__RCOMP_P</code> flag is for function parameter expansion for the different remeshed components. * @remark <code>__ID__</code> is replaced by the remeshed component id in an expansion. + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -45,38 +46,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); @@ -84,13 +85,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); @@ -98,13 +99,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); __RCOMP_Itemp__ID__ = w * s__ID__; __RCOMP_Igscal_loc__ID__[noBC_id(index)] += temp__ID__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl index 366163fa2983c54c7ad040ebf049f1b72617ee7a..fcb1e443c40e568e7ba9c665659f6936343509c1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_2d.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -44,38 +45,38 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -83,13 +84,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); @@ -97,13 +98,13 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); diff --git a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl index 0c2c7fd0dfa18f6fe0e725d591ac283dc357796a..dabd8e5d8dc285696a5129933adf1da1b759f4b3 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/private_vector_3d.cl @@ -27,6 +27,7 @@ void remesh(uint i, float dx, float invdx, * @remark <code>__N__</code> is expanded at compilation time by vector width. * @remark <code>__NN__</code> is expanded at compilation time by a sequence of integer for each vector component. * @remark <code>FORMULA</code> : remeshing formula flag {<code>M4PRIME</code>, <code>M6PRIME</code>, <code>M8PRIME</code>, <code>L6STAR</code>} + * @remark <code>REMESH</code> is a function-like macro expanding to the proper remeshing formula (i.e.: <code>REMESH(alpha)</code> -> <code>alpha_l2_1</code>) * @see parmepy.gpu.tools.parse_file * @see parmepy.gpu.cl_src.common */ @@ -44,28 +45,28 @@ void remesh(uint i, float dx, float invdx, index = convert_uint__N__((ind - REMESH_SHIFT + NB_I) % NB_I); - w = alpha(y); + w = REMESH(alpha)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = beta(y); + w = REMESH(beta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = gamma(y); + w = REMESH(gamma)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = delta(y); + w = REMESH(delta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -73,14 +74,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 1 index = (index + 1) % NB_I; - w = eta(y); + w = REMESH(eta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = zeta(y); + w = REMESH(zeta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -89,14 +90,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 2 index = (index + 1) % NB_I; - w = theta(y); + w = REMESH(theta)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = iota(y); + w = REMESH(iota)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; @@ -105,14 +106,14 @@ void remesh(uint i, float dx, float invdx, #if REMESH_SHIFT > 3 index = (index + 1) % NB_I; - w = kappa(y); + w = REMESH(kappa)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; barrier(CLK_LOCAL_MEM_FENCE); index = (index + 1) % NB_I; - w = mu(y); + w = REMESH(mu)(y); gvec_X_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_X.s__NN__; gvec_Y_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Y.s__NN__; gvec_Z_loc[noBC_id(index.s__NN__)] += w.s__NN__ * v_Z.s__NN__; diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl index 7c15e1c4dc92da344f94677c6ea7742a856c81df..0b102c36964697ab7240e58a6dd401bf8750b4fc 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights.cl @@ -4,226 +4,197 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_1(float__N__ y){ return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_1(float__N__ y){ return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_1(float__N__ y){ return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_1(float__N__ y){ return ((y * y * (y - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_2(float__N__ y){ return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_2(float__N__ y){ return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_2(float__N__ y){ return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_2(float__N__ y){ return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_3(float__N__ y){ return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_3(float__N__ y){ return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_3(float__N__ y){ return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_4(float__N__ y){ return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_4(float__N__ y){ return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_4(float__N__ y){ return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);} - -#elif FORMULA == L4_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_2(float__N__ y){ return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_2(float__N__ y){ return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_2(float__N__ y){ return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_2(float__N__ y){ return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_3(float__N__ y){ return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);} - -#elif FORMULA == M8PRIME - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_M8p(float__N__ y){ return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_M8p(float__N__ y){ return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_M8p(float__N__ y){ return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_M8p(float__N__ y){ return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_3(float__N__ y){ return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_3(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_3(float__N__ y){ return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_5(float__N__ y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_5(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_5(float__N__ y){ return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_6(float__N__ y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_6(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_6(float__N__ y){ return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l8_4(float__N__ y){ return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);} -inline float__N__ kappa(float__N__ y){ +inline float__N__ kappa_l8_4(float__N__ y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);} -inline float__N__ mu(float__N__ y){ +inline float__N__ mu_l8_4(float__N__ y){ return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);} diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl index 04fb0093718121cf2c41e06d791631e541aa229d..35f45164f79d1c092637a5486cd6139a83bf29e8 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_builtin.cl @@ -4,224 +4,198 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_1(float__N__ y){ return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_1(float__N__ y){ return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_1(float__N__ y){ return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_1(float__N__ y){ return ((y * y * fma(1.0, y, - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_2(float__N__ y){ return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_2(float__N__ y){ return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_3(float__N__ y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_3(float__N__ y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l2_4(float__N__ y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l2_4(float__N__ y){ return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l2_4(float__N__ y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l2_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);} -#elif FORMULA == L4_2 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_2(float__N__ y){ return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_2(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_2(float__N__ y){ return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l4_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l4_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l4_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);} -#elif FORMULA == M8PRIME - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_M8p(float__N__ y){ return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_M8p(float__N__ y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_M8p(float__N__ y){ return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_M8p(float__N__ y){ return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_3(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_3(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_3(float__N__ y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_5(float__N__ y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_5(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_5(float__N__ y){ return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l6_6(float__N__ y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l6_6(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l6_6(float__N__ y){ return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);} -#elif FORMULA == L8_4 - -inline float__N__ alpha(float__N__ y){ +inline float__N__ alpha_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);} -inline float__N__ beta(float__N__ y){ +inline float__N__ beta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);} -inline float__N__ gamma(float__N__ y){ +inline float__N__ gamma_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);} -inline float__N__ delta(float__N__ y){ +inline float__N__ delta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);} -inline float__N__ eta(float__N__ y){ +inline float__N__ eta_l8_4(float__N__ y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);} -inline float__N__ zeta(float__N__ y){ +inline float__N__ zeta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);} -inline float__N__ theta(float__N__ y){ +inline float__N__ theta_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);} -inline float__N__ iota(float__N__ y){ +inline float__N__ iota_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);} -inline float__N__ kappa(float__N__ y){ +inline float__N__ kappa_l8_4(float__N__ y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);} -inline float__N__ mu(float__N__ y){ +inline float__N__ mu_l8_4(float__N__ y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);} diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl index 133ce6db0fc753ad95c615ec61adbc58a0c5e77a..e023dc5e87c75e83bb22efed1422e416440daa1b 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec.cl @@ -4,228 +4,198 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float alpha(float y){ +inline float alpha_l2_1(float y){ return ((y * (y * (-y + 2.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_1(float y){ return ((y * y * (3.0 * y - 5.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_1(float y){ return ((y * (y * (-3.0 * y + 4.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_1(float y){ return ((y * y * (y - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float alpha(float y){ +inline float alpha_l2_2(float y){ return ((y * (y * (y * (y * (2.0 * y - 5.0) + 3.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_2(float y){ return ((y * y * (y * (y * (-6.0 * y + 15.0) - 9.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_2(float y){ return ((y * (y * (y * (y * (6.0 * y - 15.0) + 9.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_2(float y){ return ((y * y * y * (y * (-2.0 * y + 5.0) - 3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float alpha(float y){ +inline float alpha_l2_3(float y){ return ((y * (y * (y * y * (y * (y * (-6.0 * y + 21.0) - 25.0) + 10.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_3(float y){ return ((y * y * (y * y * (y * (y * (18.0 * y - 63.0) + 75.0) - 30.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_3(float y){ return ((y * (y * (y * y * (y * (y * (-18.0 * y + 63.0) - 75.0) + 30.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_3(float y){ return ((y * y * y * y * (y * (y * (6.0 * y - 21.0) + 25.0) - 10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float alpha(float y){ +inline float alpha_l2_4(float y){ return ((y * (y * (y * y * y * (y * (y * (y * (20.0 * y - 90.0) + 154.0) - 119.0) + 35.0) + 1.0) - 1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_4(float y){ return ((y * y * (y * y * y * (y * (y * (y * (-60.0 * y + 270.0) - 462.0) + 357.0) - 105.0) - 2.0) + 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_4(float y){ return ((y * (y * (y * y * y * (y * (y * (y * (60.0 * y - 270.0) + 462.0) - 357.0) + 105.0) + 1.0) + 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (-20.0 * y + 90.0) - 154.0) + 119.0) - 35.0)) / 2.0);} - -#elif FORMULA == L4_2 - -inline float alpha(float y){ +inline float alpha_l4_2(float y){ return ((y * (y * (y * (y * (-5.0 * y + 13.0) - 9.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_2(float y){ return ((y * (y * (y * (y * (25.0 * y - 64.0) + 39.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_2(float y){ return ((y * y * (y * (y * (-50.0 * y + 126.0) - 70.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_2(float y){ return ((y * (y * (y * (y * (50.0 * y - 124.0) + 66.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_2(float y){ return ((y * (y * (y * (y * (-25.0 * y + 61.0) - 33.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_2(float y){ return ((y * y * y * (y * (5.0 * y - 12.0) + 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float alpha(float y){ +inline float alpha_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (14.0 * y - 49.0) + 58.0) - 22.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (-70.0 * y + 245.0) - 290.0) + 111.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_3(float y){ return ((y * y * (y * y * (y * (y * (140.0 * y - 490.0) + 580.0) - 224.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (-140.0 * y + 490.0) - 580.0) + 226.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_3(float y){ return ((y * (y * (y * (y * (y * (y * (70.0 * y - 245.0) + 290.0) - 114.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_3(float y){ return ((y * y * y * y * (y * (y * (-14.0 * y + 49.0) - 58.0) + 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float alpha(float y){ +inline float alpha_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-46.0 * y + 207.0) - 354.0) + 273.0) - 80.0) + 1.0) - 2.0) - 1.0) + 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (230.0 * y - 1035.0) + 1770.0) - 1365.0) + 400.0) - 4.0) + 4.0) + 16.0) - 16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-460.0 * y + 2070.0) - 3540.0) + 2730.0) - 800.0) + 6.0) - 30.0) + 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (460.0 * y - 2070.0) + 3540.0) - 2730.0) + 800.0) - 4.0) - 4.0) + 16.0) + 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-230.0 * y + 1035.0) - 1770.0) + 1365.0) - 400.0) + 1.0) + 2.0) - 1.0) - 2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (46.0 * y - 207.0) + 354.0) - 273.0) + 80.0)) / 24.0);} - -#elif FORMULA == M8PRIME - -inline float alpha(float y){ +inline float alpha_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-10.0*y + 21.0) + 28.0) - 105.0) + 70.0) + 35.0) - 56.0) + 17.0) / 3360.0);} -inline float beta(float y){ +inline float beta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(70.0*y - 175.0) - 140.0) + 770.0) - 560.0) - 350.0) + 504.0) - 102.0) / 3360.0);} -inline float gamma(float y){ +inline float gamma_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-210.0*y + 609.0) + 224.0) - 2135.0) + 910.0) + 2765.0) - 2520.0) + 255.0) / 3360.0);} -inline float delta(float y){ +inline float delta_M8p(float y){ return ((y*y* (y*y* (y*y* (70.0*y - 231.0) + 588.0) - 980.0) + 604.0) / 672.0);} -inline float eta(float y){ +inline float eta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(-70.0*y+ 259.0) - 84.0) - 427.0) - 182.0)+ 553.0) + 504.0)+ 51.0) / 672.0);} -inline float zeta(float y){ +inline float zeta_M8p(float y){ return ((y*(y*(y*(y*(y*(y*(210.0*y- 861.0) + 532.0) + 770.0) + 560.0) - 350.0) - 504.0) - 102.0) / 3360.0);} -inline float theta(float y){ +inline float theta_M8p(float y){ return ((y* (y* (y* (y* (y* (y* (-70.0* y+ 315.0) -280.0) -105.0) -70.0) +35.0)+ 56.0) +17.0) / 3360.0);} -inline float iota(float y){ +inline float iota_M8p(float y){ return ((y * y * y * y * y * (y * (10.0 * y - 49.0) + 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float alpha(float y){ +inline float alpha_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-89.0 * y + 312.0) - 370.0) + 140.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (623.0 * y - 2183.0) + 2581.0) - 955.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-1869.0 * y + 6546.0) - 7722.0) + 2850.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_3(float y){ return ((y * y * (y * y * (y * (y * (3115.0 * y - 10905.0) + 12845.0) - 4795.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-3115.0 * y + 10900.0) - 12830.0) + 4880.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (1869.0 * y - 6537.0) + 7695.0) - 2985.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_3(float y){ return ((y * (y * (y * (y * (y * (y * (-623.0 * y + 2178.0) - 2566.0) + 1010.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_3(float y){ return ((y * y * y * y * (y * (y * (89.0 * y - 311.0) + 367.0) - 145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float alpha(float y){ +inline float alpha_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (290.0 * y - 1305.0) + 2231.0) - 1718.0) + 500.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-2030.0 * y + 9135.0) - 15617.0) + 12027.0) - 3509.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (6090.0 * y - 27405.0) + 46851.0) - 36084.0) + 10548.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-10150.0 * y + 45675.0) - 78085.0) + 60145.0) - 17605.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (10150.0 * y - 45675.0) + 78085.0) - 60150.0) + 17620.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-6090.0 * y + 27405.0) - 46851.0) + 36093.0) - 10575.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (2030.0 * y - 9135.0) + 15617.0) - 12032.0) + 3524.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (-290.0 * y + 1305.0) - 2231.0) + 1719.0) - 503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float alpha(float y){ +inline float alpha_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-1006.0 * y + 5533.0) - 12285.0) + 13785.0) - 7829.0) + 1803.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (7042.0 * y - 38731.0) + 85995.0) - 96495.0) + 54803.0) - 12620.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-21126.0 * y + 116193.0) - 257985.0) + 289485.0) - 164409.0) + 37857.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_5(float y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (35210.0 * y - 193655.0) + 429975.0) - 482475.0) + 274015.0) - 63090.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-35210.0 * y + 193655.0) - 429975.0) + 482475.0) - 274015.0) + 63085.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (21126.0 * y - 116193.0) + 257985.0) - 289485.0) + 164409.0) - 37848.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_5(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-7042.0 * y + 38731.0) - 85995.0) + 96495.0) - 54803.0) + 12615.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_5(float y){ return ((y * y * y * y * y * y * (y * (y * (y * (y * (1006.0 * y - 5533.0) + 12285.0) - 13785.0) + 7829.0) - 1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float alpha(float y){ +inline float alpha_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (3604.0 * y - 23426.0) + 63866.0) - 93577.0) + 77815.0) - 34869.0) + 6587.0) + 1.0) - 3.0) - 5.0) + 15.0) + 4.0) - 12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-25228.0 * y + 163982.0) - 447062.0) + 655039.0) - 544705.0) + 244083.0) - 46109.0) - 6.0) + 12.0) + 60.0) - 120.0) - 54.0) + 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (75684.0 * y - 491946.0) + 1341186.0) - 1965117.0) + 1634115.0) - 732249.0) + 138327.0) + 15.0) - 15.0) - 195.0) + 195.0) + 540.0) - 540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_6(float y){ return ((y * y * (y * y * (y * y * (y * (y * (y * (y * (y * (y * (-126140.0 * y + 819910.0) - 2235310.0) + 3275195.0) - 2723525.0) + 1220415.0) - 230545.0) - 20.0) + 280.0) - 980.0) + 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (126140.0 * y - 819910.0) + 2235310.0) - 3275195.0) + 2723525.0) - 1220415.0) + 230545.0) + 15.0) + 15.0) - 195.0) - 195.0) + 540.0) + 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (-75684.0 * y + 491946.0) - 1341186.0) + 1965117.0) - 1634115.0) + 732249.0) - 138327.0) - 6.0) - 12.0) + 60.0) + 120.0) - 54.0) - 108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_6(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (y * (25228.0 * y - 163982.0) + 447062.0) - 655039.0) + 544705.0) - 244083.0) + 46109.0) + 1.0) + 3.0) - 5.0) - 15.0) + 4.0) + 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_6(float y){ return ((y * y * y * y * y * y * y * (y * (y * (y * (y * (y * (-3604.0 * y + 23426.0) - 63866.0) + 93577.0) - 77815.0) + 34869.0) - 6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float alpha(float y){ +inline float alpha_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-3569.0 * y + 16061.0) - 27454.0) + 21126.0) - 6125.0) + 49.0) - 196.0) - 36.0) + 144.0)) / 40320.0);} -inline float beta(float y){ +inline float beta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (32121.0 * y - 144548.0) + 247074.0) - 190092.0) + 55125.0) - 672.0) + 2016.0) + 512.0) - 1536.0)) / 40320.0);} -inline float gamma(float y){ +inline float gamma_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-128484.0 * y + 578188.0) - 988256.0) + 760312.0) - 221060.0) + 4732.0) - 9464.0) - 4032.0) + 8064.0)) / 40320.0);} -inline float delta(float y){ +inline float delta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (299796.0 * y - 1349096.0) + 2305856.0) - 1774136.0) + 517580.0) - 13664.0) + 13664.0) + 32256.0) - 32256.0)) / 40320.0);} -inline float eta(float y){ +inline float eta_l8_4(float y){ return ((y * y * (y * y * (y * (y * (y * (y * (-449694.0 * y + 2023630.0) - 3458700.0) + 2661540.0) - 778806.0) + 19110.0) - 57400.0) + 40320.0) / 40320.0);} -inline float zeta(float y){ +inline float zeta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (449694.0 * y - 2023616.0) + 3458644.0) - 2662016.0) + 780430.0) - 13664.0) - 13664.0) + 32256.0) + 32256.0)) / 40320.0);} -inline float theta(float y){ +inline float theta_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-299796.0 * y + 1349068.0) - 2305744.0) + 1775032.0) - 520660.0) + 4732.0) + 9464.0) - 4032.0) - 8064.0)) / 40320.0);} -inline float iota(float y){ +inline float iota_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (128484.0 * y - 578168.0) + 988176.0) - 760872.0) + 223020.0) - 672.0) - 2016.0) + 512.0) + 1536.0)) / 40320.0);} -inline float kappa(float y){ +inline float kappa_l8_4(float y){ return ((y * (y * (y * (y * (y * (y * (y * (y * (-32121.0 * y + 144541.0) - 247046.0) + 190246.0) - 55685.0) + 49.0) + 196.0) - 36.0) - 144.0)) / 40320.0);} -inline float mu(float y){ +inline float mu_l8_4(float y){ return ((y * y * y * y * y * (y * (y * (y * (3569.0 * y - 16060.0) + 27450.0) - 21140.0) + 6181.0)) / 40320.0);} - #endif diff --git a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl index e218b66fd571eb645a9eba348f7ed8173fc52ab5..e1dc7d29e23baf3415ddc1801bf9ca3a0f846aa1 100644 --- a/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl +++ b/HySoP/hysop/gpu/cl_src/remeshing/weights_noVec_builtin.cl @@ -4,225 +4,196 @@ * Polynomials under Horner form. */ -#if FORMULA == L2_1 - -inline float alpha(float y){ +inline float alpha_l2_1(float y){ return (y*fma(y,fma(y,-1.0, 2.0), - 1.0)/2.0);} -inline float beta(float y){ +inline float beta_l2_1(float y){ return (fma(y*y, fma(y, 3.0, -5.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_1(float y){ return ((y * fma(y , fma(-3.0, y, 4.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_1(float y){ return ((y * y * fma(1.0, y, - 1.0)) / 2.0);} -#elif FORMULA == L2_2 - -inline float alpha(float y){ +inline float alpha_l2_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 2.0, -5.0), 3.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_2(float y){ return (fma(y * y, fma(y, fma(y, fma(y, -6.0, 15.0), -9.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 6.0, -15.0), 9.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_2(float y){ return ((y * y * y * fma(y, fma(y, -2.0, 5.0), -3.0)) / 2.0);} -#elif FORMULA == L2_3 - -inline float alpha(float y){ +inline float alpha_l2_3(float y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -6.0, 21.0), -25.0), 10.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 18.0, -63.0), 75.0), -30.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_3(float y){ return ((y * fma(y, fma(y * y, fma(y, fma(y, fma(y, -18.0, 63.0), -75.0), 30.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 6.0, -21.0), 25.0), -10.0)) / 2.0);} -#elif FORMULA == L2_4 - -inline float alpha(float y){ +inline float alpha_l2_4(float y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 20.0, -90.0), 154.0), -119.0), 35.0), 1.0), -1.0)) / 2.0);} -inline float beta(float y){ +inline float beta_l2_4(float y){ return (fma(y * y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, -60.0, 270.0), -462.0), 357.0), -105.0), -2.0), 2.0) / 2.0);} -inline float gamma(float y){ +inline float gamma_l2_4(float y){ return ((y * fma(y, fma(y * y * y, fma(y, fma(y, fma(y, fma(y, 60.0, -270.0), 462.0), -357.0), 105.0), 1.0), 1.0)) / 2.0);} -inline float delta(float y){ +inline float delta_l2_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -20.0, 90.0), -154.0), 119.0), -35.0)) / 2.0);} -#elif FORMULA == L4_2 - -inline float alpha(float y){ +inline float alpha_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, -5.0, 13.0), -9.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 25.0, -64.0), 39.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_2(float y){ return (fma(y * y, fma(y, fma(y, fma(y, -50.0, 126.0), -70.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, 50.0, -124.0), 66.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_2(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, -25.0, 61.0), -33.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_2(float y){ return ((y * y * y * fma(y, fma(y, 5.0, -12.0), 7.0)) / 24.0);} -#elif FORMULA == L4_3 - -inline float alpha(float y){ +inline float alpha_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 14.0, -49.0), 58.0), -22.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -70.0, 245.0), -290.0), 111.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 140.0, -490.0), 580.0), -224.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -140.0, 490.0), -580.0), 226.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 70.0, -245.0), 290.0), -114.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, -14.0, 49.0), -58.0), 23.0)) / 24.0);} -#elif FORMULA == L4_4 - -inline float alpha(float y){ +inline float alpha_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -46.0, 207.0), -354.0), 273.0), -80.0), 1.0), -2.0), -1.0), 2.0)) / 24.0);} -inline float beta(float y){ +inline float beta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 230.0, -1035.0), 1770.0), -1365.0), 400.0), -4.0), 4.0), 16.0), -16.0)) / 24.0);} -inline float gamma(float y){ +inline float gamma_l4_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -460.0, 2070.0), -3540.0), 2730.0), -800.0), 6.0), -30.0), 24.0) / 24.0);} -inline float delta(float y){ +inline float delta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 460.0, -2070.0), 3540.0), -2730.0), 800.0), -4.0), -4.0), 16.0), 16.0)) / 24.0);} -inline float eta(float y){ +inline float eta_l4_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -230.0, 1035.0), -1770.0), 1365.0), -400.0), 1.0), 2.0), -1.0), -2.0)) / 24.0);} -inline float zeta(float y){ +inline float zeta_l4_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 46.0, -207.0), 354.0), -273.0), 80.0)) / 24.0);} -#elif FORMULA == M8PRIME - -inline float alpha(float y){ +inline float alpha_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-10.0,y, + 21.0), + 28.0), - 105.0), + 70.0), + 35.0), - 56.0), + 17.0) / 3360.0);} -inline float beta(float y){ +inline float beta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(70.0,y, - 175.0), - 140.0), + 770.0), - 560.0), - 350.0), + 504.0), - 102.0) / 3360.0);} -inline float gamma(float y){ +inline float gamma_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-210.0,y, + 609.0), + 224.0), - 2135.0), + 910.0), + 2765.0), - 2520.0), + 255.0) / 3360.0);} -inline float delta(float y){ +inline float delta_M8p(float y){ return (fma(y*y, fma(y*y, fma(y*y, fma(70.0,y, - 231.0), + 588.0), - 980.0), + 604.0) / 672.0);} -inline float eta(float y){ +inline float eta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(-70.0,y, 259.0), - 84.0), - 427.0), - 182.0), + 553.0), + 504.0), + 51.0) / 672.0);} -inline float zeta(float y){ +inline float zeta_M8p(float y){ return (fma(y,fma(y,fma(y,fma(y,fma(y,fma(y,fma(210.0,y,- 861.0), + 532.0), + 770.0), + 560.0), - 350.0), - 504.0), - 102.0) / 3360.0);} -inline float theta(float y){ +inline float theta_M8p(float y){ return (fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(-70.0, y, 315.0), -280.0), -105.0), -70.0), 35.0), 56.0), 17.0) / 3360.0);} -inline float iota(float y){ +inline float iota_M8p(float y){ return ((y * y * y * y * y * fma(y , fma(10.0 , y ,- 49.0) , 56.0)) / 3360.0);} -#elif FORMULA == L6_3 - -inline float alpha(float y){ +inline float alpha_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -89.0, 312.0), -370.0), 140.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 623.0, -2183.0), 2581.0), -955.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1869.0, 6546.0), -7722.0), 2850.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_3(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, 3115.0, -10905.0), 12845.0), -4795.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3115.0, 10900.0), -12830.0), 4880.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 1869.0, -6537.0), 7695.0), -2985.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_3(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -623.0, 2178.0), -2566.0), 1010.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_3(float y){ return ((y * y * y * y * fma(y, fma(y, fma(y, 89.0, -311.0), 367.0), -145.0)) / 720.0);} -#elif FORMULA == L6_4 - -inline float alpha(float y){ +inline float alpha_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 290.0, -1305.0), 2231.0), -1718.0), 500.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -2030.0, 9135.0), -15617.0), 12027.0), -3509.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 6090.0, -27405.0), 46851.0), -36084.0), 10548.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -10150.0, 45675.0), -78085.0), 60145.0), -17605.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 10150.0, -45675.0), 78085.0), -60150.0), 17620.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -6090.0, 27405.0), -46851.0), 36093.0), -10575.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 2030.0, -9135.0), 15617.0), -12032.0), 3524.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, -290.0, 1305.0), -2231.0), 1719.0), -503.0)) / 720.0);} -#elif FORMULA == L6_5 - -inline float alpha(float y){ +inline float alpha_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -1006.0, 5533.0), -12285.0), 13785.0), -7829.0), 1803.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 7042.0, -38731.0), 85995.0), -96495.0), 54803.0), -12620.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -21126.0, 116193.0), -257985.0), 289485.0), -164409.0), 37857.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_5(float y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, 35210.0, -193655.0), 429975.0), -482475.0), 274015.0), -63090.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -35210.0, 193655.0), -429975.0), 482475.0), -274015.0), 63085.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 21126.0, -116193.0), 257985.0), -289485.0), 164409.0), -37848.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_5(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -7042.0, 38731.0), -85995.0), 96495.0), -54803.0), 12615.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_5(float y){ return ((y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, 1006.0, -5533.0), 12285.0), -13785.0), 7829.0), -1802.0)) / 720.0);} -#elif FORMULA == L6_6 - -inline float alpha(float y){ +inline float alpha_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 3604.0, -23426.0), 63866.0), -93577.0), 77815.0), -34869.0), 6587.0), 1.0), -3.0), -5.0), 15.0), 4.0), -12.0)) / 720.0);} -inline float beta(float y){ +inline float beta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -25228.0, 163982.0), -447062.0), 655039.0), -544705.0), 244083.0), -46109.0), -6.0), 12.0), 60.0), -120.0), -54.0), 108.0)) / 720.0);} -inline float gamma(float y){ +inline float gamma_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 75684.0, -491946.0), 1341186.0), -1965117.0), 1634115.0), -732249.0), 138327.0), 15.0), -15.0), -195.0), 195.0), 540.0), -540.0)) / 720.0);} -inline float delta(float y){ +inline float delta_l6_6(float y){ return (fma(y * y, fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -126140.0, 819910.0), -2235310.0), 3275195.0), -2723525.0), 1220415.0), -230545.0), -20.0), 280.0), -980.0), 720.0) / 720.0);} -inline float eta(float y){ +inline float eta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 126140.0, -819910.0), 2235310.0), -3275195.0), 2723525.0), -1220415.0), 230545.0), 15.0), 15.0), -195.0), -195.0), 540.0), 540.0)) / 720.0);} -inline float zeta(float y){ +inline float zeta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -75684.0, 491946.0), -1341186.0), 1965117.0), -1634115.0), 732249.0), -138327.0), -6.0), -12.0), 60.0), 120.0), -54.0), -108.0)) / 720.0);} -inline float theta(float y){ +inline float theta_l6_6(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 25228.0, -163982.0), 447062.0), -655039.0), 544705.0), -244083.0), 46109.0), 1.0), 3.0), -5.0), -15.0), 4.0), 12.0)) / 720.0);} -inline float iota(float y){ +inline float iota_l6_6(float y){ return ((y * y * y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3604.0, 23426.0), -63866.0), 93577.0), -77815.0), 34869.0), -6587.0)) / 720.0);} - -#elif FORMULA == L8_4 - -inline float alpha(float y){ +inline float alpha_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -3569.0, 16061.0), -27454.0), 21126.0), -6125.0), 49.0), -196.0), -36.0), 144.0)) / 40320.0);} -inline float beta(float y){ +inline float beta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 32121.0, -144548.0), 247074.0), -190092.0), 55125.0), -672.0), 2016.0), 512.0), -1536.0)) / 40320.0);} -inline float gamma(float y){ +inline float gamma_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -128484.0, 578188.0), -988256.0), 760312.0), -221060.0), 4732.0), -9464.0), -4032.0), 8064.0)) / 40320.0);} -inline float delta(float y){ +inline float delta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 299796.0, -1349096.0), 2305856.0), -1774136.0), 517580.0), -13664.0), 13664.0), 32256.0), -32256.0)) / 40320.0);} -inline float eta(float y){ +inline float eta_l8_4(float y){ return (fma(y * y, fma(y * y, fma(y, fma(y, fma(y, fma(y, fma(y, -449694.0, 2023630.0), -3458700.0), 2661540.0), -778806.0), 19110.0), -57400.0), 40320.0) / 40320.0);} -inline float zeta(float y){ +inline float zeta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 449694.0, -2023616.0), 3458644.0), -2662016.0), 780430.0), -13664.0), -13664.0), 32256.0), 32256.0)) / 40320.0);} -inline float theta(float y){ +inline float theta_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -299796.0, 1349068.0), -2305744.0), 1775032.0), -520660.0), 4732.0), 9464.0), -4032.0), -8064.0)) / 40320.0);} -inline float iota(float y){ +inline float iota_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, 128484.0, -578168.0), 988176.0), -760872.0), 223020.0), -672.0), -2016.0), 512.0), 1536.0)) / 40320.0);} -inline float kappa(float y){ +inline float kappa_l8_4(float y){ return ((y * fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, fma(y, -32121.0, 144541.0), -247046.0), 190246.0), -55685.0), 49.0), 196.0), -36.0), -144.0)) / 40320.0);} -inline float mu(float y){ +inline float mu_l8_4(float y){ return ((y * y * y * y * y * fma(y, fma(y, fma(y, fma(y, 3569.0, -16060.0), 27450.0), -21140.0), 6181.0)) / 40320.0);} - -#endif diff --git a/HySoP/hysop/gpu/gpu_particle_advection_1k.py b/HySoP/hysop/gpu/gpu_particle_advection_1k.py index 5291647aaf4f0b15c5a15045fe41cf3ecdf4f2a2..113701c20c20d7c24c8502eb9f1a0fd4a6e6536a 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_1k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_1k.py @@ -153,7 +153,7 @@ class GPUParticleAdvection1k(GPUParticleAdvection): WINb = lwi[0] build_options += " -D FORMULA=" + self.method[Remesh].__name__.upper() if self._isMultiScale: - build_options += " -D MS_FORMULA=MS_" + build_options += " -D MS_FORMULA=" build_options += self.method[MultiScale].__name__.upper() if is_noBC: build_options += " -D WITH_NOBC=1" diff --git a/HySoP/hysop/gpu/gpu_particle_advection_2k.py b/HySoP/hysop/gpu/gpu_particle_advection_2k.py index b66a79bec3c948a317f41bc2673a2119d37e0e49..ade1d26d081ce1afd7eff59798c011ba52e2ebf6 100644 --- a/HySoP/hysop/gpu/gpu_particle_advection_2k.py +++ b/HySoP/hysop/gpu/gpu_particle_advection_2k.py @@ -171,7 +171,7 @@ class GPUParticleAdvection2k(GPUParticleAdvection): build_options += " -D WITH_NOBC=1" build_options += " -D WI_NB=" + str(WINb) if self._isMultiScale: - build_options += " -D MS_FORMULA=MS_" + build_options += " -D MS_FORMULA=" build_options += self.method[MultiScale].__name__.upper() build_options += self._constants[self.dir] ## Build code diff --git a/HySoP/hysop/operator/advection_dir.py b/HySoP/hysop/operator/advection_dir.py index 44042b140f8b847f23cd3d4f8329be5cb61d27f8..f4f987f2cf868a9ece7b54f4635fffa1e83b5b1f 100644 --- a/HySoP/hysop/operator/advection_dir.py +++ b/HySoP/hysop/operator/advection_dir.py @@ -8,7 +8,7 @@ from parmepy.methods_keys import TimeIntegrator, Interpolation, Remesh, \ Support, Splitting, MultiScale from parmepy.numerics.integrators.runge_kutta2 import RK2 from parmepy.numerics.interpolation import Linear -from parmepy.numerics.remeshing import L2_1 +from parmepy.numerics.remeshing import L2_1, L4_2, L4_4 from parmepy.operator.continuous import Operator from parmepy.tools.timers import Timer @@ -61,16 +61,20 @@ class AdvectionDir(Operator): self._v_ghosts = np.array([0, ] * self.domain.dimension, dtype=PARMES_INDEX) if self._isMultiScale: - self._v_ghosts = np.array([2, ] * self.domain.dimension, - dtype=PARMES_INDEX) - if MultiScale in method.keys(): - if method[MultiScale] == Linear: - self._v_ghosts = np.array([1, ] * self.domain.dimension, - dtype=PARMES_INDEX) - else: - assert method[MultiScale] == L2_1 + if self.method[Support].find('gpu') < 0: + raise ValueError("Multiscale advection is not supported in " + "Python yet, user should use Scales or GPU.") + if not MultiScale in method.keys(): + method[MultiScale] == L2_1 + if method[MultiScale] == Linear: + self._v_ghosts = [1, ] * self.domain.dimension + elif method[MultiScale] == L2_1: + self._v_ghosts = [2, ] * self.domain.dimension + elif method[MultiScale] == L4_2 or \ + method[MultiScale] == L4_4: + self._v_ghosts = [3, ] * self.domain.dimension else: - assert method[MultiScale] == L2_1 + raise ValueError("Unknown multiscale method") ## Extra parameters (depend on the method) self.config = other_config diff --git a/HySoP/hysop/problem/problem.py b/HySoP/hysop/problem/problem.py index 46c05d89d3ff7f86968b6dec331de8b15111be5e..6e4dea7dde00e408041b134b05cce4b599136d4c 100644 --- a/HySoP/hysop/problem/problem.py +++ b/HySoP/hysop/problem/problem.py @@ -185,7 +185,7 @@ class Problem(object): if __VERBOSE__: print main_rank, op.__class__.__name__ - op.apply(self.simulation) + op.apply(self.simulation) testdump = \ self.simulation.currentIteration % self.dumpFreq is 0