本文整理汇总了C++中MPI_Gather函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Gather函数的具体用法?C++ MPI_Gather怎么用?C++ MPI_Gather使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Gather函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
int main ( int argc, char *argv[] ) {
// Auxiliary variables
int rank;
int npcs;
int step;
dmn domain;
double wtime;
// Solution arrays
double *g_u; /* will be allocated in ROOT only */
double *t_u;
double *t_un;
// Initialize MPI
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &npcs);
// Manage Domain sizes
domain = Manage_Domain(rank,npcs);
// Allocate Memory
Manage_Memory(0,domain,&g_u,&t_u,&t_un);
// Root mode: Build Initial Condition and scatter it to the rest of processors
if (domain.rank==ROOT) Call_IC(2,g_u);
MPI_Scatter(g_u, domain.size, MPI_DOUBLE, t_u+NX*NY, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
// Exchage Halo regions
Manage_Comms(domain,&t_u); MPI_Barrier(MPI_COMM_WORLD);
// ROOT mode: Record the starting time.
if (rank==ROOT) wtime=MPI_Wtime();
// Asynchronous MPI Solver
for (step = 0; step < NO_STEPS; step+=2) {
// print iteration in ROOT mode
if (rank==ROOT && step%10000==0) printf(" Step %d of %d\n",step,(int)NO_STEPS);
// Exchange Boundaries and compute stencil
Call_Laplace(domain,&t_u,&t_un); Manage_Comms(domain,&t_un); // 1st iter
Call_Laplace(domain,&t_un,&t_u); Manage_Comms(domain,&t_u ); // 2nd iter
}
MPI_Barrier(MPI_COMM_WORLD);
// ROOT mode: Record the final time.
if (rank==ROOT) {
wtime = MPI_Wtime()-wtime;
printf ("\n Wall clock elapsed seconds = %f\n\n", wtime );
}
// Gather solutions to ROOT and write solution in ROOT mode
MPI_Gather(t_u+NX*NY, domain.size, MPI_DOUBLE, g_u, domain.size, MPI_DOUBLE, ROOT, MPI_COMM_WORLD);
if (rank==ROOT) Save_Results(g_u);
// Free Memory
Manage_Memory(1,domain,&g_u,&t_u,&t_un); MPI_Barrier(MPI_COMM_WORLD);
// Terminate MPI.
MPI_Finalize();
// ROOT mode: Terminate.
if (rank==ROOT) {
printf ("HEAT_MPI:\n" );
printf (" Normal end of execution.\n\n" );
}
return 0;
}
开发者ID:Haider-BA,项目名称:Matlab2CPP,代码行数:70,代码来源:main.cpp
示例2: main
int main (int argc, char *argv[])
{
int err;
double time, time_limit, time_maxMsg;
int iter, iter_limit;
size_t size, messStart, messStop, mem_limit;
int testFlags, ndims, partsize;
int k;
char hostname[256];
char* hostnames;
int root = 0;
struct argList args;
/* process the command-line arguments, printing usage info on error */
if (!processArgs(argc, argv, &args)) { usage(); }
iter = args.iters;
messStart = args.messStart;
messStop = args.messStop;
mem_limit = args.memLimit;
time_limit = args.timeLimit;
testFlags = args.testFlags;
check_buffers = args.checkBuffers;
ndims = args.ndims;
partsize = args.partSize;
/* initialize MPI */
err = MPI_Init(&argc, &argv);
if (err) { printf("Error in MPI_Init\n"); exit(1); }
/* determine who we are in the MPI world */
MPI_Comm_rank(MPI_COMM_WORLD, &rank_local);
MPI_Comm_size(MPI_COMM_WORLD, &rank_count);
#ifdef PRINT_ENV
/* Print environment as part of Sequoia SOW MPI requirements */
extern void printEnv(void);
if (rank_local == 0) { printEnv(); }
#endif
/* mark start of mpiBench output */
if (rank_local == 0) { printf("START mpiBench_Bcast v%s\n", VERS); }
/* collect hostnames of all the processes and print rank layout */
gethostname(hostname, sizeof(hostname));
hostnames = (char*) _ALLOC_MAIN_(sizeof(hostname)*rank_count, "Hostname array");
MPI_Gather(hostname, sizeof(hostname), MPI_CHAR, hostnames, sizeof(hostname), MPI_CHAR, 0, MPI_COMM_WORLD);
if (rank_local == 0) {
for(k=0; k<rank_count; k++) {
printf("%d : %s\n", k, &hostnames[k*sizeof(hostname)]);
}
}
/* allocate message buffers and initailize timing functions */
while(messStop*((size_t)rank_count)*2 > mem_limit && messStop > 0) messStop /= 2;
buffer_size = messStop * rank_count;
sbuffer = (char*) _ALLOC_MAIN_(messStop * rank_count, "Send Buffer");
rbuffer = (char*) _ALLOC_MAIN_(messStop * rank_count, "Receive Buffer");
sendcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Counts");
sdispls = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Send Displacements");
recvcounts = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Counts");
rdispls = (int*) _ALLOC_MAIN_(sizeof(int) * rank_count, "Recv Displacements");
/*time_maxMsg = 2*time_limit; */
time_maxMsg = 0.0;
/* if partsize was specified, calculate the number of partions we need */
int partitions = 0;
if (partsize > 0) {
/* keep dividing comm in half until we get to partsize */
int currentsize = rank_count;
while (currentsize >= partsize) {
partitions++;
currentsize >>= 1;
}
}
开发者ID:8l,项目名称:insieme,代码行数:78,代码来源:bCast.c
示例3: main
//.........这里部分代码省略.........
{
for (i=0; i<ARRAY_SIZE; i++)
{
for (j=ARRAY_SIZE; j>=0; j--)
{
p[0] = player[i];
p[1] = player[j];
for(q=0; q<NUM_GAMES; q++)
{
b2d = ((p[0].history[0]*8) + (p[0].history[1]*4) + (p[0].history[2]*2) + p[0].history[3]);
b2d = ((p[1].history[0]*8) + (p[1].history[1]*4) + (p[1].history[2]*2) + p[1].history[3]);
Strategy(p[0], b2d);
Strategy(p[1], b2d);
Fitness(p);
for (s=4; s>0; s--)
{
p[0].history[s] = p[0].history[s-1];
p[1].history[s] = p[1].history[s-1];
}
p[0].history[0] = p[0].move;
p[1].history[0] = p[1].move;
}
player[i] = p[0];
player[j] = p[1];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(sub_arrays, ARRAY_SIZE, mpi_pop, player, ARRAY_SIZE, mpi_pop, 0, MPI_COMM_WORLD);
/*-----------------------Perform Selection-----------------------------*/
if (world_rank == 0)
{
for(count=0; count<2; count++)
{
int sumFitness = 0;
for (i=0; i<POPSIZE; i++)
{
sumFitness += p[i].fitness;
int RANDOM = lrand48() % sumFitness;
if (sumFitness >= RANDOM)
{
p[count] = player[i];
}
}
}
/*------------------------Crossover-------------------------------------*/
if (RANDOM2 < CROSSOVER)
{
temp [0] = p[0].history[2];
temp [1] = p[0].history[3];
temp2[0] = p[1].history[2];
temp2[1] = p[1].history[3];
p[0].history[2] = temp2[0];
p[0].history[3] = temp2[1];
p[1].history[2] = temp[ 0];
p[1].history[3] = temp[ 1];
}
开发者ID:CaillaRose,项目名称:GeneticAlg,代码行数:66,代码来源:dilemma.c
示例4: main
int main(int argc, char * argv[])
{
int rank, np;
int * D;
int * a;
int i;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &np);
int res=-1;
int * results;
srand(rank + time(0));
for(i = 20; i<100; i+=2)
{
// the matrix that contains the compatatibilies
D = (int*) malloc( sizeof(int)*i*i );
// the array that contains a solution
a = (int*) malloc( sizeof(int)*i );
initArray(a, -1, i);
if(rank==0)
{
//initialize the matrix
genMatrix(D, i);
// allocate the array to receive the gold
results = (int*) malloc( sizeof(int)*np );
}
// generate a solution
genSolution(a, i);
//send compatibily matrix and initial solution to other processes
MPI_Bcast(D, sizeof(int)*i*i, MPI_BYTE, 0, MPI_COMM_WORLD);
//MPI_Bcast(a, sizeof(int)*i, MPI_BYTE, 0, MPI_COMM_WORLD);
res = alg2(i, D, a, rank);
//MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(&res, 1, MPI_INT, results, 1, MPI_INT, 0, MPI_COMM_WORLD);
if(rank==0)
{
printf("%d\t%d\n", i, getMin(results, np) );
// clean
free(results);
}
free(D);
free(a);
}
MPI_Finalize();
return 0;
}
开发者ID:Onumis,项目名称:Simulated-Annealing,代码行数:65,代码来源:main.c
示例5: main
int main(int argc, char* argv[]){
int rank, size, n, i, j, elementiXproc, stage, length, next;
orderedAfterSwap *m;
char *binary;
FILE *file;
float *elementi, *mieiElementi, *result;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(argc<2) {
printf("Numero argomenti non sufficiente: %d richiesto %d", argc-1, 1);
MPI_Abort(MPI_COMM_WORLD, 0);
return 1;
}
if(rank==0) {
writeFile();
file = fopen(argv[1],"rb");
if(file==NULL) {
printf("Non è stato possibile aprire il file: %s", argv[1]);
MPI_Abort(MPI_COMM_WORLD, 0);
return 1;
}
fread(&n, sizeof(int), 1, file);
elementiXproc = n/size;
mieiElementi = malloc(sizeof(float)*elementiXproc);
elementi = malloc(sizeof(float)*elementiXproc);
fread(mieiElementi, sizeof(float), elementiXproc, file);
for(i=1; i<size; i++){
MPI_Send (&elementiXproc, 1, MPI_INT, i, 0, MPI_COMM_WORLD);
fread(elementi, sizeof(float), elementiXproc, file);
MPI_Send (elementi, elementiXproc, MPI_FLOAT, i, 0, MPI_COMM_WORLD);
}
fclose(file);
result = malloc(sizeof(float)*n);
}
else {
MPI_Recv (&elementiXproc, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
mieiElementi = malloc(sizeof(float)*elementiXproc);
MPI_Recv (mieiElementi, elementiXproc, MPI_FLOAT, 0, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
elementi = malloc(sizeof(float)*elementiXproc);
}
qsort(mieiElementi, elementiXproc, sizeof(float), floatcomp);
length = log(size)/log(2);
binary = intToBinary(rank,length);
for(stage=0; stage<length; stage++) {
if(binary[stage]=='0'){
binary[stage] = '1';
next = binaryToInt(binary, length);
binary[stage] = '0';
MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
m = swapMin(mieiElementi,elementi,elementiXproc);
mieiElementi = m->mieiElementi;
}
else {
binary[stage] = '0';
next = binaryToInt(binary, length);
binary[stage] = '1';
MPI_Recv (elementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Send (mieiElementi, elementiXproc, MPI_FLOAT, next, 0, MPI_COMM_WORLD);
m = swapMax(mieiElementi,elementi,elementiXproc);
mieiElementi = m->mieiElementi;
}
}
MPI_Gather(mieiElementi, elementiXproc, MPI_FLOAT, result, elementiXproc, MPI_FLOAT, 0, MPI_COMM_WORLD);
if(rank==0){
printf("[ ");
for(j=0; j<n; j++) {
printf("%f ", result[j]);
}
printf("] \n");
free(result);
}
free(m);
free(binary);
free(mieiElementi);
free(elementi);
MPI_Finalize();
return 0;
}
开发者ID:Dani7B,项目名称:CPD,代码行数:92,代码来源:Bitonic.c
示例6: DisplayGoL
int DisplayGoL(int N, int effective_cols_size, int matrix[N][effective_cols_size], int rank)
{
int realColumnSize = effective_cols_size-2;
int arraySize = N * realColumnSize;
int tempArray[arraySize];
int count = 0;
int r, c;
int displaymatrix[N][N];
int tempTempArray[N*N];
int currentGatherTime = 0;
struct timeval send1s, send1e;
int tSend;
//printf("\nEFFECTIVE COL SIXE :%d",effective_cols_size);
for(c=1;c<effective_cols_size-1;c++){
for(r=0;r<N;r++){
tempArray[count] = matrix[r][c];
count++;
//printf("SETTING RANK:%d, INDEX: %d and %d, VALUE: %d\n", rank, r,c, tempArray[count-1]);
}
}
gettimeofday(&send1s, NULL);
if(rank==0)
{
MPI_Gather(tempArray, N * (realColumnSize), MPI_INT, tempTempArray,N * (realColumnSize), MPI_INT, 0, MPI_COMM_WORLD);
}
else
{
MPI_Gather(tempArray, N * (realColumnSize), MPI_INT, NULL,0, MPI_INT, 0, MPI_COMM_WORLD);
}
gettimeofday(&send1e, NULL);
currentGatherTime += (send1e.tv_sec-send1s.tv_sec)*1000 + (send1e.tv_usec-send1s.tv_usec)/1000;
//printf("%d", currentGatherTime);
int q = 0;
// for(q=0; q< N*realColumnSize; q++){
// printf("RANK: %d, INDEX: %d, VALUE: %d\n", rank, q, tempArray[q]);
// }
if(rank==0){
// If the rank is 0 we will need to gather from the array
// put it into a matrix and
for(c=0;c<N*N;c++){
displaymatrix[c%N][c/N] = tempTempArray[c];
//printf("INDEX 22: %d, VALUE: %d\n", c, tempTempArray[c]);
}
// printf("\n \n GATHER AT RANK %d\n",rank);
for (r = 0; r < N; r++) {
for (c = 0; c < N; c++)
printf("V_G-%d-%d = %d ",r,c, displaymatrix[r][c]);
printf("\n");
}
}
return currentGatherTime;
//return;
}
开发者ID:purohitsumit,项目名称:GameOfLife,代码行数:65,代码来源:Game_Of_Life.c
示例7: main
//.........这里部分代码省略.........
col_matrix = (int*)malloc((lngth*lngth) * sizeof(int));
res_matrix = (int*)malloc((lngth*lngth) * sizeof(int));
if(q>1)
chnkd_MPI_Recv(local_matrix, lngth*lngth, MPI_INT, 0);
else
local_matrix = d_graph;
p_row = ( rank / q );
p_col = ( rank % q );
//CREATE COMMUNICATORS
MPI_Group MPI_GROUP_WORLD;
MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
MPI_Group row_group, col_group;
MPI_Comm row_comm, col_comm, grid_comm;
int tmp_row, tmp_col, proc;
int row_process_ranks[q], col_process_ranks[q];
for(proc = 0; proc < q; proc++){
row_process_ranks[proc] = (p_row * q) + proc;
col_process_ranks[proc] = ((p_col + proc*q) %(q*q));
}
radixsort(col_process_ranks, q);
radixsort(row_process_ranks, q);
MPI_Group_incl(MPI_GROUP_WORLD, q, row_process_ranks, &row_group);
MPI_Group_incl(MPI_GROUP_WORLD, q, col_process_ranks, &col_group);
MPI_Comm_create(MPI_COMM_WORLD, row_group, &row_comm);
MPI_Comm_create(MPI_COMM_WORLD, col_group, &col_comm);
if ((rank / q) == (rank % q)) {
memcpy(row_matrix, local_matrix, (lngth*lngth) * sizeof(int));
}
int ln,d,flag;
int step, rotation_src, rotation_dest, src;
int count = 0;
memcpy(res_matrix, local_matrix, (lngth*lngth) * sizeof(int));
rotation_src = (p_row + 1) % q;
rotation_dest = ((p_row - 1) + q) % q;
ln = (lngth*q) << 1;
start = MPI_Wtime();
for (d = 2; d < ln; d = d << 1) {
memcpy(col_matrix, local_matrix, (lngth*lngth) * sizeof(int));
for ( step = 0; step < q; step++) {
src = (p_row + step) % q;
count++;
if (src == p_col) {
MPI_Bcast(local_matrix, lngth*lngth, MPI_INT, src, row_comm);
floyd_warshall( local_matrix, col_matrix, res_matrix, lngth);
} else {
MPI_Bcast(row_matrix, lngth*lngth, MPI_INT, src, row_comm);
floyd_warshall( row_matrix, col_matrix, res_matrix, lngth);
}
if( step < q-1)
MPI_Sendrecv_replace(col_matrix, lngth*lngth, MPI_INT, rotation_dest, STD_TAG,rotation_src, STD_TAG, col_comm, MPI_STATUS_IGNORE);
}
memcpy(local_matrix, res_matrix, (lngth*lngth) * sizeof(int));
}
int *sol;
sol = malloc(N*N*sizeof(int));
MPI_Gather(res_matrix, lngth*lngth, MPI_INT, sol, lngth*lngth, MPI_INT, 0, MPI_COMM_WORLD);
if (rank == 0) {
finish = MPI_Wtime();
printf("Tempo de execução %f\n",finish - start);
}
if (rank == 0) {
int row, col, pos_x, pos_y, pos, tmp_y, tmp_x;
for (i = 0; i < P; i++) {
pos_x = i / q;
pos_y = i % q;
pos = i * lngth*lngth;
for (row = 0; row < lngth; row++) {
for (col = 0; col < lngth; col++) {
tmp_x = GET_MTRX_POS(pos_x,row,lngth);
tmp_y = GET_MTRX_POS(pos_y,col,lngth);
if (sol[GET_MTRX_POS(row,col,lngth) + pos] == INF)
d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = 0;
else
d_graph[GET_MTRX_POS(tmp_x,tmp_y,N)] = sol[GET_MTRX_POS(row,col,lngth) + pos];
}
}
}
prints_matrix(d_graph,N);
}
MPI_Finalize();
return 0;
}
开发者ID:LopesManuel,项目名称:MPI-Floyd-Warshall-C,代码行数:101,代码来源:floyd.c
示例8: AllgatherDomains
void AllgatherDomains(std::set<int> &setOfDomain){
int i = 0;
int numLDomains = (int)setOfDomain.size();
int domainsarray[numLDomains];
std::set<int>::iterator iter = setOfDomain.begin();
for (;iter != setOfDomain.end(); iter++) domainsarray[i++] = *iter;
int numGDomains[P_size()];
MPI_Gather(&numLDomains,1,MPI_INT,numGDomains,1,MPI_INT,0,MPI_COMM_WORLD);
// if (!P_pid()){
// for(i=0; i<P_size(); i++) printf("rank %d receives %d domains from rank %d\n",P_pid(),numGDomains[i],i);
// }
// allocate enough space to receive nodes from all processors
int *recv_buffer2, *displacements;
int totalDoms = 0;
if ( !P_pid() ){
for(i=0; i<P_size(); i++) totalDoms += numGDomains[i];
recv_buffer2 = new int[totalDoms]; // only root processor allocates memory
displacements = new int[P_size()];
displacements[0] = 0;
for (int i=1; i<P_size(); i++) displacements[i] = displacements[i-1] + numGDomains[i-1];
}
// now it's time to send nodes to root processor
MPI_Gatherv(domainsarray,numLDomains,MPI_INT,
recv_buffer2,numGDomains,displacements,MPI_INT,
0,MPI_COMM_WORLD);
// if (!P_pid()){
// for(i=0; i<totalDoms; i++) printf("rank %d domains %d\n",P_pid(),recv_buffer2[i]);
// }
// let's filter domains flags to avoid repeated values
setOfDomain.clear();
if (!P_pid()){
for(i=0; i<totalDoms; i++) setOfDomain.insert( recv_buffer2[i] );
}
// printf("rank %d setOfDomain.size() = %d\n",P_pid(),setOfDomain.size());
// if (!P_pid()){
// for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) printf("rank %d domains %d\n",P_pid(),*iter);
// }
// Send these domains flags to all processes
i = 0;
int numGDomains2 = (int)setOfDomain.size();
numGDomains2 = P_getSumInt(numGDomains2);
int domainsGarray[numGDomains2];
// if (!P_pid()){
for (iter = setOfDomain.begin(); iter != setOfDomain.end(); iter++) domainsGarray[i++] = *iter;
// }
MPI_Bcast(domainsGarray,numGDomains2,MPI_INT,0,MPI_COMM_WORLD);
for(i=0; i<numGDomains2; i++) setOfDomain.insert( domainsGarray[i] );
//printf("rank %d numGDomains2 %d\n",P_pid(),numGDomains2);
// for(i=0; i<numGDomains2; i++) printf("rank %d domains %d\n",P_pid(),domainsGarray[i]);
}
开发者ID:andreabduque,项目名称:padmec-amr,代码行数:63,代码来源:EBFV1__pre-processors.cpp
示例9: trainOneEpochDenseCPU
void trainOneEpochDenseCPU(int itask, float *data, float *numerator,
float *denominator, float *codebook,
unsigned int nSomX, unsigned int nSomY,
unsigned int nDimensions, unsigned int nVectors,
unsigned int nVectorsPerRank, float radius,
float scale, string mapType, int *globalBmus)
{
unsigned int p1[2] = {0, 0};
unsigned int *bmus = new unsigned int[nVectorsPerRank*2];
#pragma omp parallel default(shared) private(p1)
{
#pragma omp for
for (unsigned int n = 0; n < nVectorsPerRank; n++) {
if (itask*nVectorsPerRank+n<nVectors) {
/// get the best matching unit
get_bmu_coord(codebook, data, nSomY, nSomX,
nDimensions, p1, n);
bmus[2*n] = p1[0]; bmus[2*n+1] = p1[1];
}
}
}
float *localNumerator = new float[nSomY*nSomX*nDimensions];
float *localDenominator = new float[nSomY*nSomX];
#pragma omp parallel default(shared)
{
#pragma omp for
for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
localDenominator[som_y*nSomX + som_x] = 0.0;
for (unsigned int d = 0; d < nDimensions; d++)
localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] = 0.0;
}
}
/// Accumulate denoms and numers
#pragma omp for
for (unsigned int som_y = 0; som_y < nSomY; som_y++) {
for (unsigned int som_x = 0; som_x < nSomX; som_x++) {
for (unsigned int n = 0; n < nVectorsPerRank; n++) {
if (itask*nVectorsPerRank+n<nVectors) {
float dist = 0.0f;
if (mapType == "planar") {
dist = euclideanDistanceOnPlanarMap(som_x, som_y, bmus[2*n], bmus[2*n+1]);
} else if (mapType == "toroid") {
dist = euclideanDistanceOnToroidMap(som_x, som_y, bmus[2*n], bmus[2*n+1], nSomX, nSomY);
}
float neighbor_fuct = getWeight(dist, radius, scale);
for (unsigned int d = 0; d < nDimensions; d++) {
localNumerator[som_y*nSomX*nDimensions + som_x*nDimensions + d] +=
1.0f * neighbor_fuct
* (*(data + n*nDimensions + d));
}
localDenominator[som_y*nSomX + som_x] += neighbor_fuct;
}
}
}
}
}
#ifdef HAVE_MPI
MPI_Reduce(localNumerator, numerator,
nSomY*nSomX*nDimensions, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce(localDenominator, denominator,
nSomY*nSomX, MPI_FLOAT, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Gather(bmus, nVectorsPerRank*2, MPI_INT, globalBmus, nVectorsPerRank*2, MPI_INT, 0, MPI_COMM_WORLD);
#else
for (unsigned int i=0; i < nSomY*nSomX*nDimensions; ++i) {
numerator[i] = localNumerator[i];
}
for (unsigned int i=0; i < nSomY*nSomX; ++i) {
denominator[i] = localDenominator[i];
}
for (unsigned int i=0; i < 2*nVectorsPerRank; ++i) {
globalBmus[i]=bmus[i];
}
#endif
delete [] bmus;
delete [] localNumerator;
delete [] localDenominator;
}
开发者ID:xgdgsc,项目名称:somoclu,代码行数:83,代码来源:denseCpuKernels.cpp
示例10: main
int main(int argc, char** argv) {
// Initialize the MPI environment
MPI_Init(&argc, &argv);
// Get the number of processes
int world_size;
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
// Get the rank of the process
int world_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
char string_buffer[LEN];
char* rbuf = NULL;
if(world_rank == 0)
rbuf = malloc(world_size * LEN * sizeof(char));
// Get the name of the processor
char processor_name[50];
int name_len = 50;
if(gethostname(processor_name, name_len) != 0) {
printf("Error with hostname");
exit(1);
}
// Get current time on host
struct timeval time;
if(gettimeofday(&time, NULL) != 0) {
printf("Error with time");
exit(1);
}
// Generate output
time_t curtime = time.tv_sec;
char time_buffer[30];
strftime(time_buffer, 30, "%Y-%m-%d %T.", localtime(&curtime));
sprintf(string_buffer, "%s: %s%li", processor_name, time_buffer, time.tv_usec);
// Gather output
int rc = MPI_Gather(string_buffer, LEN, MPI_CHAR, rbuf, LEN, MPI_CHAR, 0, MPI_COMM_WORLD);
if(rc != MPI_SUCCESS) {
printf("Error while gathering, rc is: %d", rc);
exit(1);
}
// Print output
if(world_rank == 0) {
for(int i = 0; i < world_size; ++i)
printf("%.*s\n", LEN, rbuf + LEN * i);
}
// Get microseconds
int usec = time.tv_usec;
int * rbuf_usec;
if(world_rank == 0)
rbuf_usec = malloc(world_size * sizeof(int));
// Reduce microseconds
if(MPI_Reduce(&usec, rbuf_usec, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD) != MPI_SUCCESS){
printf("Error in MPI_Reduce\n");
exit(1);
}
// Print microseconds
if(world_rank == 0)
printf("%d\n", usec);
if(MPI_Barrier(MPI_COMM_WORLD) != MPI_SUCCESS){
printf("Error with barrier");
exit(1);
}
printf("Rang %d beendet jetzt!\n", world_rank);
// Finalize the MPI environment.
MPI_Finalize();
}
开发者ID:sKeLeTr0n,项目名称:uni,代码行数:78,代码来源:timempi2.c
示例11: fprintf
//-----------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------
void Image_Exchanger::sync_fragment_info(OverLap_FootPrint* ofp,
ImageFragment_Tile* ift,
int nviewer)
{
#ifdef _DEBUG7
fprintf(stderr, "**** %s:%s() ****\n", __FILE__, __func__);
#endif
std::vector<int> infobuf;
int count = ofp->save_overlap_info(infobuf);
#ifdef _DEBUG6
fprintf(stderr, "%d: %s: olcount=%d, olbuffer size=%ld\n",
m_rank, __func__, count, infobuf.size());
#endif
int c = infobuf.size();
// fprintf(stderr, "%d: nviewer=%d, gather MPI_INT %d\n",
// m_rank, nviewer, c);
memset(m_rcounts, 0, m_runsize*sizeof(unsigned int));
if(nviewer == 1)
{
MPI_Gather(&c, 1, MPI_INT,
m_rcounts, 1, MPI_INT,
0, MPI_COMM_WORLD);
}
else
{
MPI_Allgather(&c, 1, MPI_INT,
m_rcounts, 1, MPI_INT,
MPI_COMM_WORLD);
}
// vector throws a length_error if resized above max_size
//terminate called after throwing an instance of 'std::length_error'
//what(): vector::_M_fill_insert
std::vector<int> ainfobuf(1, 0);
memset(m_rdispls, 0, m_runsize*sizeof(unsigned int));
if( (nviewer == 1 && m_rank==0) || (nviewer > 1) )
{
int total = 0;
for(int i=0; i<m_runsize; i++) total += m_rcounts[i];
// fprintf(stderr, "std::vector max size=%ld, resize to %d\n",
// ainfobuf.max_size(), total);
assert(total > 0);
ainfobuf.resize(total, 0);
}
for(int i=0; i<m_runsize-1; i++)
m_rdispls[i+1] = m_rdispls[i] + m_rcounts[i];
//to make &infobuf[0] a legal call
if(c == 0) infobuf.resize(1);
if(nviewer == 1)
{
MPI_Gatherv(&infobuf[0], c, MPI_INT,
&ainfobuf[0], m_rcounts, m_rdispls,
MPI_INT,
0, MPI_COMM_WORLD);
}
else
{
MPI_Allgatherv(&infobuf[0], c, MPI_INT,
&ainfobuf[0], (int*)m_rcounts, (int*)m_rdispls,
MPI_INT,
MPI_COMM_WORLD);
}
//fprintf(stderr, "MPI_SUCCESS on sync frag info\n");
//only viewer need to have all fragments and count for recv
//non-viewer only need count send for its own fragments
if(m_rank < nviewer)
{
ift->retrieve_fragments(ainfobuf);
}
else if(c > 0)
{
ift->retrieve_fragments(infobuf);
}
//.........这里部分代码省略.........
开发者ID:jinghuage,项目名称:pcaster,代码行数:101,代码来源:image_exchanger.cpp
示例12: main
//.........这里部分代码省略.........
/* Scatter data to local ranks */
MPI_Scatter(data, chunkSize*N, MPI_FLOAT,
local_data, chunkSize*N, MPI_FLOAT,
0, taskcomm);
/* Compute time for distributing data */
if(task_id == 0){
time[2] = MPI_Wtime();
printf("Group 1: Scattering 1_im1(row) to each processor takes %f s.\n", time[2] - time[1]);
}
/* Do 1_im1 2d FFT */
/* Row FFT */
for(i = 0; i < chunkSize; i++){
for(j = 0; j < N; j++){
/* FFT each row for im1 */
temp_data[j].r = local_data[i][j];
temp_data[j].i = 0;
}
c_fft1d(temp_data, N, -1);
for(j = 0; j < N; j++)
local_data[i][j] = temp_data[j].r;
}
/* Gather all the data and distribute in columns */
if(task_id == 0){
time[3] = MPI_Wtime();
printf("Group 1: FFT each row for 1_im1 takes %f s.\n", time[3] - time[2]);
}
/* Gather all the data of 1_im1 */
MPI_Gather(local_data, chunkSize*N, MPI_FLOAT,
data, chunkSize*N, MPI_FLOAT,
0, taskcomm);
if(task_id == 0){
time[4] = MPI_Wtime();
printf("Group 1: Gathering all the data of 1_im1(row) takes %f s.\n", time[4] - time[3]);
}
/* Scatter all the data to column local data */
MPI_Scatter(data, chunkSize, column,
local_data, chunkSize, column,
0, taskcomm);
if(task_id == 0){
time[5] = MPI_Wtime();
printf("Group 1: Scattering 1_im1(column) to each processor takes %f s.\n", time[5] - time[4]);
}
/* Column FFT */
for(i = 0; i < chunkSize; i++){
for(j = 0; j < N; j++){
/* FFT each column for im1 */
temp_data[j].r = local_data[j][i];
temp_data[j].i = 0;
}
c_fft1d(temp_data, N, -1);
for(j = 0; j < N; j++)
local_data[j][i] = temp_data[j].r;
}
开发者ID:ttang10,项目名称:FFT_2D_CONVOLUTION,代码行数:66,代码来源:task_para.c
示例13: online_measurement
//.........这里部分代码省略.........
/* now we bring it to normal format */
/* here we use implicitly DUM_MATRIX and DUM_MATRIX+1 */
convert_eo_to_lexic(g_spinor_field[DUM_MATRIX], g_spinor_field[2], g_spinor_field[3]);
/* now we sum only over local space for every t */
for(t = 0; t < T; t++) {
j = g_ipt[t][0][0][0];
res = 0.;
respa = 0.;
resp4 = 0.;
for(i = j; i < j+LX*LY*LZ; i++) {
res += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], g_spinor_field[DUM_MATRIX][j]);
_gamma0(phi, g_spinor_field[DUM_MATRIX][j]);
respa += _spinor_prod_re(g_spinor_field[DUM_MATRIX][j], phi);
_gamma5(phi, phi);
resp4 += _spinor_prod_im(g_spinor_field[DUM_MATRIX][j], phi);
}
#if defined MPI
MPI_Reduce(&res, &mpi_res, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
res = mpi_res;
MPI_Reduce(&respa, &mpi_respa, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
respa = mpi_respa;
MPI_Reduce(&resp4, &mpi_resp4, 1, MPI_DOUBLE, MPI_SUM, 0, g_mpi_time_slices);
resp4 = mpi_resp4;
sCpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
sCpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
sCp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#else
Cpp[t] = +res/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
Cpa[t] = -respa/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
Cp4[t] = +resp4/(g_nproc_x*LX)/(g_nproc_y*LY)/(g_nproc_z*LZ)*2.;
#endif
}
#ifdef MPI
/* some gymnastics needed in case of parallelisation */
if(g_mpi_time_rank == 0) {
MPI_Gather(sCpp, T, MPI_DOUBLE, Cpp, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
MPI_Gather(sCpa, T, MPI_DOUBLE, Cpa, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
MPI_Gather(sCp4, T, MPI_DOUBLE, Cp4, T, MPI_DOUBLE, 0, g_mpi_SV_slices);
}
#endif
/* and write everything into a file */
if(g_mpi_time_rank == 0 && g_proc_coords[0] == 0) {
ofs = fopen(filename, "w");
fprintf( ofs, "1 1 0 %e %e\n", Cpp[t0], 0.);
for(t = 1; t < g_nproc_t*T/2; t++) {
tt = (t0+t)%(g_nproc_t*T);
fprintf( ofs, "1 1 %d %e ", t, Cpp[tt]);
tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
fprintf( ofs, "%e\n", Cpp[tt]);
}
tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
fprintf( ofs, "1 1 %d %e %e\n", t, Cpp[tt], 0.);
fprintf( ofs, "2 1 0 %e %e\n", Cpa[t0], 0.);
for(t = 1; t < g_nproc_t*T/2; t++) {
tt = (t0+t)%(g_nproc_t*T);
fprintf( ofs, "2 1 %d %e ", t, Cpa[tt]);
tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
fprintf( ofs, "%e\n", Cpa[tt]);
}
tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
fprintf( ofs, "2 1 %d %e %e\n", t, Cpa[tt], 0.);
fprintf( ofs, "6 1 0 %e %e\n", Cp4[t0], 0.);
for(t = 1; t < g_nproc_t*T/2; t++) {
tt = (t0+t)%(g_nproc_t*T);
fprintf( ofs, "6 1 %d %e ", t, Cp4[tt]);
tt = (t0+g_nproc_t*T-t)%(g_nproc_t*T);
fprintf( ofs, "%e\n", Cp4[tt]);
}
tt = (t0+g_nproc_t*T/2)%(g_nproc_t*T);
fprintf( ofs, "6 1 %d %e %e\n", t, Cp4[tt], 0.);
fclose(ofs);
}
#ifdef MPI
if(g_mpi_time_rank == 0) {
free(Cpp);
free(Cpa);
free(Cp4);
}
free(sCpp);
free(sCpa);
free(sCp4);
#else
free(Cpp);
free(Cpa);
free(Cp4);
#endif
etime = gettime();
if(g_proc_id == 0 && g_debug_level > 0) {
printf("ONLINE: measurement done int t/s = %1.4e\n", etime - atime);
}
return;
}
开发者ID:ggscorzato,项目名称:tmLQCD,代码行数:101,代码来源:online_measurement.c
示例14: cuda_mpi_send_forces
void cuda_mpi_send_forces(float *host_forces,
float *host_torques,
CUDA_fluid_composition * host_composition){
int n_part;
int g, pnode;
Cell *cell;
int c;
int i;
int *sizes;
sizes = (int *) Utils::malloc(sizeof(int)*n_nodes);
n_part = cells_get_n_particles();
/* first collect number of particles on each node */
MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);
/* call slave functions to provide the slave data */
if(this_node > 0) {
cuda_mpi_send_forces_slave();
}
else{
/* fetch particle informations into 'result' */
g = 0;
for (pnode = 0; pnode < n_nodes; pnode++) {
if (sizes[pnode] > 0) {
if (pnode == 0) {
for (c = 0; c < local_cells.n; c++) {
int npart;
cell = local_cells.cell[c];
npart = cell->n;
for (i=0;i<npart;i++) {
cell->part[i].f.f[0] += (double)host_forces[(i+g)*3+0];
cell->part[i].f.f[1] += (double)host_forces[(i+g)*3+1];
cell->part[i].f.f[2] += (double)host_forces[(i+g)*3+2];
#ifdef ROTATION
cell->part[i].f.torque[0] += (double)host_torques[(i+g)*3+0];
cell->part[i].f.torque[1] += (double)host_torques[(i+g)*3+1];
cell->part[i].f.torque[2] += (double)host_torques[(i+g)*3+2];
#endif
#ifdef SHANCHEN
for (int ii=0;ii<LB_COMPONENTS;ii++) {
cell->part[i].r.composition[ii] = (double)host_composition[i+g].weight[ii];
}
#endif
}
g += npart;
}
}
else {
/* and send it back to the slave node */
MPI_Send(&host_forces[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#ifdef ROTATION
MPI_Send(&host_torques[3*g], 3*sizes[pnode]*sizeof(float), MPI_BYTE, pnode, REQ_CUDAGETFORCES, comm_cart);
#endif
#ifdef SHANCHEN
MPI_Send(&host_composition[g], sizes[pnode]*sizeof(CUDA_fluid_composition), MPI_BYTE, pnode, REQ_CUDAGETPARTS, comm_cart);
#endif
g += sizes[pnode];
}
}
}
}
COMM_TRACE(fprintf(stderr, "%d: finished send\n", this_node));
free(sizes);
}
开发者ID:Haider-BA,项目名称:espresso,代码行数:66,代码来源:cuda_interface.cpp
示例15: main
int main(int argc, char **argv)
{
int num1, num2, proc_num, proc_rank, comp_result, i;
int buf1[10], buf2[10], buf_result[10];
MPI_Status status;
MPI_Init( &argc, &argv );
MPI_Comm_size( MPI_COMM_WORLD, &proc_num );
MPI_Comm_rank( MPI_COMM_WORLD, &proc_rank );
if( 10 != proc_num ) // проверка на 10 процессов
{
if( 0 == proc_rank ) printf("Wrong number of processes!\n");
MPI_Finalize();
return 0;
}
if( 0 == proc_rank ) // считываем десятичные числа в 0 процессе, они известны только ему
{
scanf("%d%d", &num1, &num2 );
if( num1>1000 ) // ограничения
num1 = MAX_NUM;
if( num2>1000 )
num2 = MAX_NUM;
MPI_Send( &num1, 1, MPI_INT, 1, 0, MPI_COMM_WORLD ); // сделать другой коммуникатор - разослать только им?
MPI_Send( &num2, 1, MPI_INT, 2, 0, MPI_COMM_WORLD ); // посылаем числа процессу 1 и 2
}
if( 1 == proc_rank ) // перевод в бинарный вид в этих процессах
{
MPI_Recv( &num1, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
for( i=0;i<10;i++)
buf1[i] = *( dec_to_bin( num1 ) + i );
printf("The first number is: ");
for( i=0;i<10;i++)
printf("%d ", buf1[i]);
printf("\n");
}
if( 2 == proc_rank)
{
MPI_Recv( &num2, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status );
for(i=0;i<10;i++)
buf2[i] = *( dec_to_bin( num2 ) + 1 );
printf("The second number is: ");
for(i=0;i<10;i++)
printf("%d ", buf2[i] );
printf("\n");
}
MPI_Bcast( buf1, 10, MPI_INT, 1, MPI_COMM_WORLD ); // теперь бинарный вид у всех
MPI_Bcast( buf2, 10, MPI_INT, 2, MPI_COMM_WORLD );
comp_result = ( buf1[proc_rank] == buf2[proc_rank] ) ? 0 : 1; // каждый процесс пар-но сравнивает разряд
MPI_Barrier( MPI_COMM_WORLD );
MPI_Gather( &comp_result, 1, MPI_INT, buf_result, 1, MPI_INT, 0, MPI_COMM_WORLD ); // рез-т сравнения на 0 процессе
if ( 0 == proc_rank )
{
for(i=0;i<10;i++)
printf("%d ", buf_result[i] );
i = 0;
int flag = 1;
while( 0 == buf_result[i] ) // По порядку анализируем разряды чисел - равны или не равны
{
i++;
if( 10 == i)
{
printf("Numbers are equal!\n");
flag = 0;
}
}
if(flag)
{
if( buf1[i] > buf2[i] )
printf("\nFirst number is bigger!\n");
else printf("\nSecond number is bigger!\n");
}
}
MPI_Finalize();
return 0;
}
开发者ID:SkySpecial,项目名称:MPI-studying,代码行数:98,代码来源:MPI_Bin_comparator2.c
示例16: cuda_mpi_get_particles
/*************** REQ_GETPARTS ************/
void cuda_mpi_get_particles(CUDA_particle_data *particle_data_host)
{
int n_part;
int g, pnode;
Cell *cell;
int c;
MPI_Status status;
int i;
int *sizes;
sizes = (int*) Utils::malloc(sizeof(int)*n_nodes);
n_part = cells_get_n_particles();
/* first collect number of particles on each node */
MPI_Gather(&n_part, 1, MPI_INT, sizes, 1, MPI_INT, 0, comm_cart);
/* just check if the number of particles is correct */
if(this_node > 0){
/* call slave functions to provide the slave datas */
cuda_mpi_get_particles_slave();
}
else {
/* master: fetch particle informations into 'result' */
g = 0;
for (pnode = 0; pnode < n_nodes; pnode++) {
if (sizes[pnode] > 0) {
if (pnode == 0) {
for (c = 0; c < local_cells.n; c++) {
Particle *part;
int npart;
int dummy[3] = {0,0,0};
double pos[3];
cell = local_cells.cell[c];
part = cell->part;
npart = cell->n;
for (i=0;i<npart;i++) {
memmove(pos, part[i].r.p, 3*sizeof(double));
fold_position(pos, dummy);
particle_data_host[i+g].p[0] = (float)pos[0];
particle_data_host[i+g].p[1] = (float)pos[1];
particle_data_host[i+g].p[2] = (float)pos[2];
particle_data_host[i+g].v[0] = (float)part[i].m.v[0];
particle_data_host[i+g].v[1] = (float)part[i].m.v[1];
particle_data_host[i+g].v[2] = (float)part[i].m.v[2];
#ifdef IMMERSED_BOUNDARY
particle_data_host[i+g].isVirtual = part[i].p.isVirtual;
#endif
#ifdef DIPOLES
particle_data_host[i+g].dip[0] = (float)part[i].r.dip[0];
particle_data_host[i+g].dip[1] = (float)part[i].r.dip[1];
particle_data_host[i+g].dip[2] = (float)part[i].r.dip[2];
#endif
#ifdef SHANCHEN
// SAW TODO: does this really need to be copied every time?
int ii;
for(ii=0;ii<2*LB_COMPONENTS;ii++){
particle_data_host[i+g].solvation[ii] = (float)part[i].p.solvation[ii];
}
#endif
#ifdef LB_ELECTROHYDRODYNAMICS
particle_data_host[i+g].mu_E[0] = (float)part[i].p.mu_E[0];
particle_data_host[i+g].mu_E[1] = (float)part[i].p.mu_E[1];
particle_data_host[i+g].mu_E[2] = (float)part[i].p.mu_E[2];
#endif
#ifdef ELECTROSTATICS
particle_data_host[i+g].q = (float)part[i].p.q;
#endif
#ifdef ROTATION
particle_data_host[i+g].quatu[0] = (float)part[i].r.quatu[0];
particle_data_host[i+g].quatu[1] = (float)part[i].r.quatu[1];
particle_data_host[i+g].quatu[2] = (float)part[i].r.quatu[2];
#endif
#ifdef ENGINE
particle_data_host[i+g].swim.v_swim = (float)part[i].swim.v_swim;
particle_data_host[i+g].swim.f_swim = (float)part[i].swim.f_swim;
particle_data_host[i+g].swim.quatu[0] = (float)part[i].r.quatu[0];
particle_data_host[i+g].swim.quatu[1] = (float)part[i].r.quatu[1];
particle_data_host[i+g].swim.quatu[2] = (float)part[i].r.quatu[2];
#if defined(LB) || defined(LB_GPU)
particle_data_host[i+g].swim.push_pull = part[i].swim.push_pull;
particle_data_host[i+g].swim.dipole_length = (float)part[i].swim.dipole_length;
#endif
particle_data_host[i+g].swim.swimming = part[i].swim.swimming;
#endif
}
g += npart;
}
}
else {
//........
|
请发表评论