/* * Copyright (C) 2017, UChicago Argonne, LLC * All Rights Reserved * * Hardware/Hybrid Cosmology Code (HACC), Version 1.0 * * Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann, * Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel * (Argonne National Laboratory) * * David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens * (Los Alamos National Laboratory) * * George Zagaris * (Kitware) * * OPEN SOURCE LICENSE * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. Software changes, * modifications, or derivative works, should be noted with comments and * the author and organization’s name. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. Neither the names of UChicago Argonne, LLC or the Department of Energy * nor the names of its contributors may be used to endorse or promote * products derived from this software without specific prior written * permission. * * 4. The software and the end-user documentation included with the * redistribution, if any, must include the following acknowledgment: * * "This product includes software produced by UChicago Argonne, LLC under * Contract No. DE-AC02-06CH11357 with the Department of Energy." * * ***************************************************************************** * DISCLAIMER * THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE * UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR * UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, * EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE * ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS, * PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE * PRIVATELY OWNED RIGHTS. * * ***************************************************************************** */ #include "HACCKernels.h" #include <cmath> extern const float PolyCoefficients4[] = { 0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f }; extern const float PolyCoefficients5[] = { 0.269327f, -0.0750978f, 0.0114808f, -0.00109313f, 0.0000605491f, -0.00000147177f }; extern const float PolyCoefficients6[] = { 0.271431f, -0.0783394f, 0.0133122f, -0.00159485f, 0.000132336f, -0.00000663394f, 0.000000147305f }; // HACC's gravity short-range-force kernel represents the part of the 1/r^2 // gravitational force that is not computed by the long-range grid solver. This // kernel computes the acceleration of a target particle from all of the other // particles in the provided interaction lists. It is assumed that the target // particle has unit mass while the interaction-list can contain pseudo // particles with larger mass values. Beyond a distance of MaxSep, the // inter-particle force should be completely accounted for by the long-range // grid solver (and thus we filter out such interactions here). Closer than // MaxSep, we directly compute the inter-particle force, subtracting the // long-range part of the force (as fit to a polynomial of the specified // degree). A softening length, SofteningLen, is also used, as is standard in // N-body codes. template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]> static void GravityForceKernel(int n, float *RESTRICT x, float *RESTRICT y, float *RESTRICT z, float *RESTRICT mass, float x0, float y0, float z0, float MaxSepSqrd, float SofteningLenSqrd, float &RESTRICT ax, float &RESTRICT ay, float &RESTRICT az) { float lax = 0.0f, lay = 0.0f, laz = 0.0f; // As written below, the mass array is conditionally accessed (i.e. accessed // only if the interaction is not filtered by the distance checks). This will // tend to inhibit vectorization on architectures without masked vector loads. // With OpenMP 4+, we can explicitly inform the compiler that vectorization is // safe. #if _OPENMP >= 201307 #pragma omp simd reduction(+:lax,lay,laz) #endif for (int i = 0; i < n; ++i) { float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0; float r2 = dx * dx + dy * dy + dz * dz; if (r2 >= MaxSepSqrd || r2 == 0.0f) continue; float r2s = r2 + SofteningLenSqrd; float f = PolyCoefficients[PolyOrder]; for (int p = 1; p <= PolyOrder; ++p) f = PolyCoefficients[PolyOrder-p] + r2*f; f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i]; lax += f * dx; lay += f * dy; laz += f * dz; } ax += lax; ay += lay; az += laz; } void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y, float *RESTRICT z, float *RESTRICT mass, float x0, float y0, float z0, float MaxSepSqrd, float SofteningLenSqrd, float &RESTRICT ax, float &RESTRICT ay, float &RESTRICT az) { GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0, MaxSepSqrd, SofteningLenSqrd, ax, ay, az); } void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y, float *RESTRICT z, float *RESTRICT mass, float x0, float y0, float z0, float MaxSepSqrd, float SofteningLenSqrd, float &RESTRICT ax, float &RESTRICT ay, float &RESTRICT az) { GravityForceKernel<5, PolyCoefficients5>(n, x, y, z, mass, x0, y0, z0, MaxSepSqrd, SofteningLenSqrd, ax, ay, az); } void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y, float *RESTRICT z, float *RESTRICT mass, float x0, float y0, float z0, float MaxSepSqrd, float SofteningLenSqrd, float &RESTRICT ax, float &RESTRICT ay, float &RESTRICT az) { GravityForceKernel<6, PolyCoefficients6>(n, x, y, z, mass, x0, y0, z0, MaxSepSqrd, SofteningLenSqrd, ax, ay, az); }