#include <iostream>
#include <cmath> // to include sqrt(), etc.
#include <cstdlib> // for atoi() and atof()
#include <unistd.h> // for getopt()
#include <mpi.h> // include the MPI library
#include "pipe.h" // the MPI pipe routines
using namespace std;
typedef double real; // "real" as a general name for
the
// standard floating-point data
type
// practical for debugging
#define PRI(x) {for (int __pri__ = 0; __pri__ < x; __pri__++) cerr << "
";}
#define PR(x) cerr << #x << " = " << x << " "
#define PRC(x) cerr << #x << " = " << x << ", "
#define PRL(x) cerr << #x << " = " << x << endl
const int NDIM = 3; // number of spatial dimensions
const real VERY_LARGE_NUMBER = 1e300;
const int root = 0; // identity of the root
processor
// The Particle structure
typedef struct {
int id;
real mass;
real pos[NDIM];
real vel[NDIM];
real acc[NDIM];
real jerk[NDIM];
} Particle;
void correct_step(Particle p[], Particle po[], int n, real dt);
void evolve(Particle p[],
int n, real & t, real dt_param, real dt_dia, real dt_out,
real dt_tot, bool init_out, bool x_flag, void *pipe,
MPI_Datatype particletype);
void evolve_step(Particle p[], int n, real & t,
real dt, real & epot, real & coll_time,
void *pipe);
void get_acc_jerk_pot_coll(Particle pl[], int nl,
Particle po[], int no,
real & epot, real & coll_time);
Particle * get_snapshot(int &n, real &t, MPI_Datatype &particletype);
void predict_step(Particle p[], int n, real dt);
void put_snapshot(Particle p[], int n, real t, MPI_Datatype particletype);
bool read_options(int argc, char *argv[], real & dt_param, real & dt_dia,
real & dt_out, real & dt_tot, bool & i_flag, bool &
x_flag);
void write_diagnostics(Particle p[], int n, real t, real epot,
int nsteps, real & einit, bool init_flag,
bool x_flag, real &tcpu);
#define loop(idx,last) for (idx = 0; idx < last ; idx++)
void uniform(real a, real *x);
void uniform(Particle p[], int n);
void get_acc_jerk_pot_coll(Particle p[], int n, real &epot, real
&coll_time,
void *pipe);
int main(int argc, char *argv[])
{
// initialize MPI
int rank, size;
MPI_Init( &argc, &argv );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
real dt_param = 0.03; // control parameter to determine time step
size
real dt_dia = 1; // time interval between diagnostics output
real dt_out = 1; // time interval between output of
snapshots
real dt_tot = 10; // duration of the integration
bool init_out = false; // if true: snapshot output with start at t
= 0
// with an echo of the input
snapshot
bool x_flag = false; // if true: extra debugging diagnostics
output
if (! read_options(argc, argv, dt_param, dt_dia, dt_out, dt_tot,
init_out,
x_flag))
return 1; // halt criterion detected by
read_options()
int n;
real t;
MPI_Datatype particletype;
Particle *p = get_snapshot(n, t, particletype);
real noutp = 1;
real dt;
put_snapshot(p, n, t, particletype);
void * pipe; // Create a MPI pipe for a 1-dimensional ring topology
MPE_Pipe_create( MPI_COMM_WORLD, particletype, n, &pipe );
evolve(p, n, t, dt_param, dt_dia, dt_out, dt_tot, init_out,
x_flag, pipe, particletype);
delete []p;
MPE_Pipe_free( &pipe ); // Clean up MPI
MPI_Type_free( &particletype );
MPI_Finalize();
bool read_options(int argc, char *argv[], real & dt_param, real & dt_dia,
real & dt_out, real & dt_tot, bool & i_flag, bool &
x_flag)
{
int c;
while ((c = getopt(argc, argv, "hd:e:o:t:ix")) != -1)
switch(c){
case 'h': cerr << "usage: " << argv[0]
<< " [-h (for help)]"
<< " [-d step_size_control_parameter]\n"
<< " [-e diagnostics_interval]"
<< " [-o output_interval]\n"
<< " [-t total_duration]"
<< " [-i (start output at t = 0)]\n"
<< " [-x (extra debugging
diagnostics)]"
<< endl;
return false; // execution should stop after
help
case 'd': dt_param = atof(optarg);
break;
case 'e': dt_dia = atof(optarg);
break;
case 'i': i_flag = true;
break;
case 'o': dt_out = atof(optarg);
break;
case 't': dt_tot = atof(optarg);
break;
case 'x': x_flag = true;
break;
case '?': cerr << "usage: " << argv[0]
<< " [-h (for help)]"
<< " [-d step_size_control_parameter]\n"
<< " [-e diagnostics_interval]"
<< " [-o output_interval]\n"
<< " [-t total_duration]"
<< " [-i (start output at t = 0)]\n"
<< " [-x (extra debugging
diagnostics)]"
<< endl;
return false; // execution should stop after
error
}
return true; // ready to continue program
execution
}
Particle *get_snapshot(int &n, real &t,
MPI_Datatype &particletype)
{
int rank, size;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
Particle *p_tmp;
if(rank==root) {
cerr << "Reading snapshot" << endl;
cin >> n;
cin >> t;
p_tmp = new Particle[n];
for(int i=0; i<n; i++) {
p_tmp[i].id = i;
cin >> p_tmp[i].mass; // mass of particle i
for (int k = 0; k < NDIM; k++)
cin >> p_tmp[i].pos[k]; // position of particle i
for (int k = 0; k < NDIM; k++)
cin >> p_tmp[i].vel[k]; // velocity of particle i
}
}
MPI_Bcast(&n,1,MPI_INT,root,MPI_COMM_WORLD); // broadcasts particle
number
int n_local = (int)(floor(1.0*n/size));
if(n != n_local*size && rank==root) {
cerr << "WARNING: Paticle number in input is not a mulitple of the
number of processors."
<< endl;
cerr << " Action: Reduce particle number to n = "
<< n_local*size << "." << endl;
}
Particle *p = new Particle[n_local];
// defining the particletype
int inputblockcounts[2] = {1, 13};
MPI_Datatype ntypes[] = {MPI::INT, MPI::DOUBLE};
MPI::Aint displs[2];
MPI_Address(&p[0].id, &displs[0]);
MPI_Address(&p[0].mass, &displs[1]);
displs[1] -= displs[0]; //make them relative
displs[0] = 0;
MPI_Type_struct(2, inputblockcounts, displs, ntypes, &particletype);
MPI_Type_commit(&particletype);
// Distribute the particles over the processors
MPI_Scatter(p_tmp,n_local,particletype,p,n_local,particletype,root,MPI_COM
M_WORLD);
n = n_local;
MPI_Bcast(&t,1,MPI_DOUBLE,root,MPI_COMM_WORLD);
delete []p_tmp;
return p;
}
void put_snapshot(Particle p[], int n, real t,
MPI_Datatype particletype)
{
int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
int ntot;
MPI_Allreduce(&n, &ntot, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
Particle *p_all;
if(rank==root) {
p_all = new Particle[ntot];
}
MPI_Gather(p,n,particletype,p_all,n,particletype,root,MPI_COMM_WORLD);
cout.precision(16); // full double precision
if(rank==root) {
cout << ntot << endl; // N, total particle
number
cout << t << endl; // current time
for (int i = 0; i < ntot ; i++){
cout << p_all[i].mass; // mass of particle i
for (int k = 0; k < NDIM; k++)
cout << ' ' << p_all[i].pos[k]; // position of
particle i
for (int k = 0; k < NDIM; k++)
cout << ' ' << p_all[i].vel[k]; // velocity of
particle i
cout << endl;
}
delete []p_all;
}
}
void write_diagnostics(Particle p[], int n, real t, real epot_local,
int nsteps, real & einit, bool init_flag,
bool x_flag, real &tcpu)
{
int rank;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
real ekin_local = 0; // kinetic energy of the n-body system
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++)
ekin_local += 0.5 * p[i].mass * p[i].vel[k] * p[i].vel[k];
real ekin;
MPI_Allreduce(&ekin_local, &ekin, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD );
real epot;
MPI_Allreduce(&epot_local, &epot, 1, MPI_DOUBLE, MPI_SUM,
MPI_COMM_WORLD );
epot *= 0.5; // against double counting
real etot = ekin + epot; // total energy of the n-body
system
if (init_flag) // at first pass, pass the
initial
einit = etot; // energy back to the calling
function
tcpu = MPI_Wtime() - tcpu;
if(rank==0) {
cerr << "at time t = " << t << ", after " << nsteps
<< " steps (CPU = " << tcpu << "): \n E_kin = " << ekin
<< " , E_pot = " << epot
<< " , E_tot = " << etot << endl;
cerr << " "
<< "absolute energy error: E_tot - E_init = "
<< etot - einit << endl;
cerr << " "
<< "relative energy error: (E_tot - E_init) / E_init = "
<< (etot - einit) / einit << endl;
}
if (x_flag){
cerr << " for debugging purposes, here is the internal data "
<< "representation:\n";
for (int i = 0; i < n ; i++){
cerr << " internal data for particle " << i+1 << " : " <<
endl;
cerr << " ";
cerr << p[i].id << " " << p[i].mass;
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].pos[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].vel[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].acc[k];
for (int k = 0; k < NDIM; k++)
cerr << ' ' << p[i].jerk[k];
cerr << endl;
}
}
}
void evolve(Particle p[],
int n, real & t, real dt_param, real dt_dia, real dt_out,
real dt_tot, bool init_out, bool x_flag,
void *pipe, MPI_Datatype particletype)
{
int rank, size;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
if(rank==root)
cerr << "Starting a Hermite integration for a " << n*size
<< "-body system,\n from time t = " << t
<< " with time step control parameter dt_param = " << dt_param
<< " until time " << t + dt_tot
<< " ,\n with diagnostics output interval dt_dia = "
<< dt_dia << ",\n and snapshot output interval dt_out = "
<< dt_out << "." << endl;
real tcpu = MPI_Wtime(); // check CPU usage
real epot = 0; // potential energy of the n-body
system
real coll_time = VERY_LARGE_NUMBER;// collision (close encounter) time
scale
get_acc_jerk_pot_coll(p, n, epot, coll_time, pipe);
int nsteps = 0; // number of integration time steps
completed
real einit; // initial total energy of the system
write_diagnostics(p, n, t, epot, nsteps, einit,
true, x_flag, tcpu);
if (init_out) // flag for initial
output
put_snapshot(p, n, t, particletype);
real t_dia = t + dt_dia; // next time for diagnostics output
real t_out = t + dt_out; // next time for snapshot output
real t_end = t + dt_tot; // final time, to finish the
integration
while (true){
while (t < t_dia && t < t_out && t < t_end){
real dt_local = dt_param * coll_time;
real dt;
MPI_Allreduce(&dt_local, &dt, 1, MPI_DOUBLE, MPI_MIN,
MPI_COMM_WORLD); // synchronize time step
evolve_step(p, n, t, dt, epot, coll_time, pipe);
nsteps++;
}
if (t >= t_dia){
write_diagnostics(p, n, t, epot, nsteps,
einit, false, x_flag, tcpu);
t_dia += dt_dia;
}
if (t >= t_out){
put_snapshot(p, n, t, particletype);
t_out += dt_out;
}
if (t >= t_end)
break;
}
}
void evolve_step(Particle p[], int n, real & t,
real dt, real & epot, real & coll_time,
void *pipe)
{
Particle *po = new Particle[n];
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++){
po[i].pos[k] = p[i].pos[k];
po[i].vel[k] = p[i].vel[k];
po[i].acc[k] = p[i].acc[k];
po[i].jerk[k] = p[i].jerk[k];
}
predict_step(p, n, dt);
get_acc_jerk_pot_coll(p, n, epot, coll_time, pipe);
correct_step(p, po, n, dt);
t += dt;
delete[] po;
}
void predict_step(Particle p[], int n, real dt)
{
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++){
p[i].pos[k] += p[i].vel[k]*dt + p[i].acc[k]*dt*dt/2
+ p[i].jerk[k]*dt*dt*dt/6;
p[i].vel[k] += p[i].acc[k]*dt + p[i].jerk[k]*dt*dt/2;
}
}
void correct_step(Particle p[], Particle po[], int n, real dt)
{
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++){
p[i].vel[k] = po[i].vel[k] + (po[i].acc[k] + p[i].acc[k])*dt/2
+ (po[i].jerk[k] -
p[i].jerk[k])*dt*dt/12;
p[i].pos[k] = po[i].pos[k] + (po[i].vel[k] + p[i].vel[k])*dt/2
+ (po[i].acc[k] -
p[i].acc[k])*dt*dt/12;
}
}
void get_acc_jerk_pot_coll(Particle pl[], int nl,
Particle po[], int no,
real & epot, real & coll_time)
{
real coll_time_q = VERY_LARGE_NUMBER; // collision time to 4th
power
real coll_est_q; // collision time scale
estimate
// to 4th power (quartic)
for (int i = 0; i < nl ; i++){
for (int j = 0; j < no ; j++){ // rji[] is the vector
from
if(pl[i].id!=po[j].id) {
real rji[NDIM]; // particle i to
particle j
real vji[NDIM]; // vji[] = d rji[] / d
t
for (int k = 0; k < NDIM ; k++){
rji[k] = po[j].pos[k] - pl[i].pos[k];
vji[k] = po[j].vel[k] - pl[i].vel[k];
}
real r2 = 0; // | rji |^2
real v2 = 0; // | vji |^2
real rv_r2 = 0; // ( rij . vij ) / |
rji |^2
for (int k = 0; k < NDIM ; k++){
r2 += rji[k] * rji[k];
v2 += vji[k] * vji[k];
rv_r2 += rji[k] * vji[k];
}
rv_r2 /= r2;
real r = sqrt(r2); // | rji |
real r3 = r * r2; // | rji |^3
// add the {i,j} contribution to the total potential energy for the
system:
epot -= pl[i].mass * po[j].mass / r;
// add the {j (i)} contribution to the {i (j)} values of acceleration and
jerk:
real da[3]; // main terms in
pairwise
real dj[3]; // acceleration and
jerk
for (int k = 0; k < NDIM ; k++){
da[k] = rji[k] / r3; // see
equations
dj[k] = (vji[k] - 3 * rv_r2 * rji[k]) / r3; // in the
header
}
for (int k = 0; k < NDIM ; k++){
pl[i].acc[k] += po[j].mass * da[k]; // using
symmetry
pl[i].jerk[k] += po[j].mass * dj[k]; // acceleration
// in the original version pij = -pji for acc and jerk.
// in this parallel version this is unpractical.
//po[j].acc[k] -= pl[i].mass * da[k]; // find
pairwise
//po[j].jerk[k] -= pl[i].mass * dj[k]; // and jerk
}
// first collision time estimate, based on unaccelerated linear motion:
coll_est_q = (r2*r2) / (v2*v2);
if (coll_time_q > coll_est_q)
coll_time_q = coll_est_q;
// second collision time estimate, based on free fall:
real da2 = 0; // da2 becomes
the
for (int k = 0; k < NDIM ; k++) // square of
the
da2 += da[k] * da[k]; // pair-wise
accel-
double mij = pl[i].mass + po[j].mass; // eration
between
da2 *= mij * mij; // particles i
and j
coll_est_q = r2/da2;
if (coll_time_q > coll_est_q)
coll_time_q = coll_est_q;
}
}
}
// from q for quartic back to linear collision time and taking the
minimum
coll_time = min(coll_time, sqrt(sqrt(coll_time_q)));
}
void get_acc_jerk_pot_coll(Particle p[], int n, real &epot, real
&coll_time,
void *pipe) {
int rank, size;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
MPI_Comm_size( MPI_COMM_WORLD, &size );
int rlen;
Particle *recvbuf;
for (int i = 0; i < n ; i++)
for (int k = 0; k < NDIM ; k++) {
p[i].acc[k] = p[i].jerk[k] = 0;
}
MPE_Pipe_start( pipe, p, n, 1 ); // load the initial sendbuffer
epot = 0; // initialize epot and coll_time
coll_time = VERY_LARGE_NUMBER;
get_acc_jerk_pot_coll(p, n, p, n, epot, coll_time);
for (int step=1; step<size; step++) { // compute forces for other
particles
MPE_Pipe_push( pipe, (void**)&recvbuf, &rlen ); // get new
data
// Compute forces
get_acc_jerk_pot_coll(p, n, recvbuf, rlen, epot, coll_time);
}
}