/*
 *                 Copyright (C) 2017, UChicago Argonne, LLC
 *                            All Rights Reserved
 *
 *           Hardware/Hybrid Cosmology Code (HACC), Version 1.0
 *
 * Salman Habib, Adrian Pope, Hal Finkel, Nicholas Frontiere, Katrin Heitmann,
 *      Vitali Morozov, Jeffrey Emberson, Thomas Uram, Esteban Rangel
 *                        (Argonne National Laboratory)
 *
 *  David Daniel, Patricia Fasel, Chung-Hsing Hsu, Zarija Lukic, James Ahrens
 *                      (Los Alamos National Laboratory)
 *
 *                               George Zagaris
 *                                 (Kitware)
 *
 *                            OPEN SOURCE LICENSE
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   1. Redistributions of source code must retain the above copyright notice,
 *      this list of conditions and the following disclaimer. Software changes,
 *      modifications, or derivative works, should be noted with comments and
 *      the author and organization’s name.
 *
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the distribution.
 *
 *   3. Neither the names of UChicago Argonne, LLC or the Department of Energy
 *      nor the names of its contributors may be used to endorse or promote
 *      products derived from this software without specific prior written
 *      permission.
 *
 *   4. The software and the end-user documentation included with the
 *      redistribution, if any, must include the following acknowledgment:
 *
 *     "This product includes software produced by UChicago Argonne, LLC under
 *      Contract No. DE-AC02-06CH11357 with the Department of Energy."
 *
 * *****************************************************************************
 *                                DISCLAIMER
 * THE SOFTWARE IS SUPPLIED "AS IS" WITHOUT WARRANTY OF ANY KIND. NEITHER THE
 * UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT OF ENERGY, NOR 
 * UCHICAGO ARGONNE, LLC, NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, 
 * EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE
 * ACCURARY, COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, DATA, APPARATUS,
 * PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
 * PRIVATELY OWNED RIGHTS.
 *
 * *****************************************************************************
 */

#include "HACCKernels.h"
#include <cmath>

extern const float PolyCoefficients4[] = {
  0.263729f, -0.0686285f, 0.00882248f, -0.000592487f, 0.0000164622f
};

extern const float PolyCoefficients5[] = {
  0.269327f, -0.0750978f, 0.0114808f, -0.00109313f, 0.0000605491f,
  -0.00000147177f
};

extern const float PolyCoefficients6[] = {
  0.271431f, -0.0783394f, 0.0133122f, -0.00159485f, 0.000132336f,
  -0.00000663394f, 0.000000147305f
};

// HACC's gravity short-range-force kernel represents the part of the 1/r^2
// gravitational force that is not computed by the long-range grid solver. This
// kernel computes the acceleration of a target particle from all of the other
// particles in the provided interaction lists. It is assumed that the target
// particle has unit mass while the interaction-list can contain pseudo
// particles with larger mass values. Beyond a distance of MaxSep, the
// inter-particle force should be completely accounted for by the long-range
// grid solver (and thus we filter out such interactions here). Closer than
// MaxSep, we directly compute the inter-particle force, subtracting the
// long-range part of the force (as fit to a polynomial of the specified
// degree). A softening length, SofteningLen, is also used, as is standard in
// N-body codes.

template <int PolyOrder, const float (&PolyCoefficients)[PolyOrder+1]>
static void GravityForceKernel(int n, float *RESTRICT x, float *RESTRICT y,
                               float *RESTRICT z, float *RESTRICT mass,
                               float x0, float y0, float z0,
                               float MaxSepSqrd, float SofteningLenSqrd,
                               float &RESTRICT ax, float &RESTRICT ay,
                               float &RESTRICT az) {
  float lax = 0.0f, lay = 0.0f, laz = 0.0f;

// As written below, the mass array is conditionally accessed (i.e. accessed
// only if the interaction is not filtered by the distance checks). This will
// tend to inhibit vectorization on architectures without masked vector loads.
// With OpenMP 4+, we can explicitly inform the compiler that vectorization is
// safe.
#if _OPENMP >= 201307
#pragma omp simd reduction(+:lax,lay,laz)
#endif
  for (int i = 0; i < n; ++i) {
    float dx = x[i] - x0, dy = y[i] - y0, dz = z[i] - z0;
    float r2 = dx * dx + dy * dy + dz * dz;

    if (r2 >= MaxSepSqrd || r2 == 0.0f)
      continue;

    float r2s = r2 + SofteningLenSqrd;
    float f = PolyCoefficients[PolyOrder];
    for (int p = 1; p <= PolyOrder; ++p)
      f = PolyCoefficients[PolyOrder-p] + r2*f;

    f = (1.0f / (r2s * std::sqrt(r2s)) - f) * mass[i];

    lax += f * dx;
    lay += f * dy;
    laz += f * dz; 
  }

  ax += lax;
  ay += lay;
  az += laz;
}

void GravityForceKernel4(int n, float *RESTRICT x, float *RESTRICT y,
                         float *RESTRICT z, float *RESTRICT mass,
                         float x0, float y0, float z0,
                         float MaxSepSqrd, float SofteningLenSqrd,
                         float &RESTRICT ax, float &RESTRICT ay,
                         float &RESTRICT az) {
  GravityForceKernel<4, PolyCoefficients4>(n, x, y, z, mass, x0, y0, z0,
                                           MaxSepSqrd, SofteningLenSqrd,
                                           ax, ay, az);
}

void GravityForceKernel5(int n, float *RESTRICT x, float *RESTRICT y,
                         float *RESTRICT z, float *RESTRICT mass,
                         float x0, float y0, float z0,
                         float MaxSepSqrd, float SofteningLenSqrd,
                         float &RESTRICT ax, float &RESTRICT ay,
                         float &RESTRICT az) {
  GravityForceKernel<5, PolyCoefficients5>(n, x, y, z, mass, x0, y0, z0,
                                           MaxSepSqrd, SofteningLenSqrd,
                                           ax, ay, az);
}

void GravityForceKernel6(int n, float *RESTRICT x, float *RESTRICT y,
                         float *RESTRICT z, float *RESTRICT mass,
                         float x0, float y0, float z0,
                         float MaxSepSqrd, float SofteningLenSqrd,
                         float &RESTRICT ax, float &RESTRICT ay,
                         float &RESTRICT az) {
  GravityForceKernel<6, PolyCoefficients6>(n, x, y, z, mass, x0, y0, z0,
                                           MaxSepSqrd, SofteningLenSqrd,
                                           ax, ay, az);
}