本文整理汇总了C++中MPI_Allreduce函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Allreduce函数的具体用法?C++ MPI_Allreduce怎么用?C++ MPI_Allreduce使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Allreduce函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
//.........这里部分代码省略.........
MPI_Bcast( &nprows, 1, MPI_INT, 0, MPI_COMM_WORLD );
MPI_Bcast( &npcols, 1, MPI_INT, 0, MPI_COMM_WORLD );
if ( nprows * npcols != nprocs ){
printf( "mesh not of right size\n" );
exit( 0 );
}
/* Figure out what my index is */
mycol = me / nprows;
myrow = me % nprows;
/* create a communicator for the row of which I am part */
MPI_Comm_split( MPI_COMM_WORLD, myrow, mycol, &comm_row );
/* create a communicator for the column of which I am part */
MPI_Comm_split( MPI_COMM_WORLD, mycol, myrow, &comm_col );
/* create buffers into which to hold the global A, B, C (everyone will have a copy) */
global_A = ( double * ) malloc ( sizeof( double ) * n * n );
global_B = ( double * ) malloc ( sizeof( double ) * n * n );
global_C = ( double * ) malloc ( sizeof( double ) * n * n );
/* create random matrices on node zero and share with all nodes */
if ( me == 0 ){
random_matrix( n, n, global_A, n );
random_matrix( n, n, global_B, n );
random_matrix( n, n, global_C, n );
}
MPI_Bcast( global_A, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
MPI_Bcast( global_B, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
MPI_Bcast( global_C, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
/* compute local matrix sizes */
local_m = n / nprows + ( myrow < n % nprows ? 1 : 0 );
local_n = n / npcols + ( mycol < n % npcols ? 1 : 0 );
/* create buffer into which to hold the ocal A, B, C */
local_A = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
local_B = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
local_C = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
/* copy the local parts */
CopyMatrixGlobalToLocal( n, n,
global_A, n,
local_A, local_m,
comm_row, comm_col );
CopyMatrixGlobalToLocal( n, n,
global_B, n,
local_B, local_m,
comm_row, comm_col );
CopyMatrixGlobalToLocal( n, n,
global_C, n,
local_C, local_m,
comm_row, comm_col );
/* Compute parallel matrix-matrix multiply */
ParallelMMult( n, n, n,
local_A, local_m,
local_B, local_m,
local_C, local_m,
comm_row, comm_col );
/* Compute sequential matrix-matrix multiply on all nodes */
dgemm_( "N", "N", &n, &n, &n,
&d_one, global_A, &n, global_B, &n,
&d_one, global_C, &n );
local_C_ref = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
CopyMatrixGlobalToLocal( n, n,
global_C, n,
local_C_ref, local_m,
comm_row, comm_col );
local_diff = compare_matrices( local_m, local_n,
local_C, local_m,
local_C_ref, local_m );
MPI_Allreduce( &local_diff, &diff, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );
if ( me == 0 )
printf("\ndiff = %le\n", diff );
free( global_A );
free( global_B );
free( global_C );
free( local_A );
free( local_B );
free( local_C );
free( local_C_ref);
MPI_Comm_free( &comm_row );
MPI_Comm_free( &comm_col );
/* Cleanup up the MPI environment */
MPI_Finalize();
exit( 0 );
}
开发者ID:scottenriquez,项目名称:parallel-matrix-multiply,代码行数:101,代码来源:driver.c
示例2: triangulateAndFormProl
static PetscErrorCode triangulateAndFormProl(IS selected_2,PetscInt data_stride,PetscReal coords[],PetscInt nselected_1,const PetscInt clid_lid_1[],const PetscCoarsenData *agg_lists_1,
const PetscInt crsGID[],PetscInt bs,Mat a_Prol,PetscReal *a_worst_best)
{
#if defined(PETSC_HAVE_TRIANGLE)
PetscErrorCode ierr;
PetscInt jj,tid,tt,idx,nselected_2;
struct triangulateio in,mid;
const PetscInt *selected_idx_2;
PetscMPIInt rank;
PetscInt Istart,Iend,nFineLoc,myFine0;
int kk,nPlotPts,sid;
MPI_Comm comm;
PetscReal tm;
PetscFunctionBegin;
ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
*a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
} else *a_worst_best = 0.0;
ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
if (tm > 0.0) {
*a_worst_best = 100.0;
PetscFunctionReturn(0);
}
ierr = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
nPlotPts = nFineLoc; /* locals */
/* traingle */
/* Define input points - in*/
in.numberofpoints = nselected_2;
in.numberofpointattributes = 0;
/* get nselected points */
ierr = PetscMalloc1(2*nselected_2, &in.pointlist);CHKERRQ(ierr);
ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);
for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
PetscInt lid = selected_idx_2[kk];
in.pointlist[sid] = coords[lid];
in.pointlist[sid+1] = coords[data_stride + lid];
if (lid>=nFineLoc) nPlotPts++;
}
if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);
in.numberofsegments = 0;
in.numberofedges = 0;
in.numberofholes = 0;
in.numberofregions = 0;
in.trianglelist = 0;
in.segmentmarkerlist = 0;
in.pointattributelist = 0;
in.pointmarkerlist = 0;
in.triangleattributelist = 0;
in.trianglearealist = 0;
in.segmentlist = 0;
in.holelist = 0;
in.regionlist = 0;
in.edgelist = 0;
in.edgemarkerlist = 0;
in.normlist = 0;
/* triangulate */
mid.pointlist = 0; /* Not needed if -N switch used. */
/* Not needed if -N switch used or number of point attributes is zero: */
mid.pointattributelist = 0;
mid.pointmarkerlist = 0; /* Not needed if -N or -B switch used. */
mid.trianglelist = 0; /* Not needed if -E switch used. */
/* Not needed if -E switch used or number of triangle attributes is zero: */
mid.triangleattributelist = 0;
mid.neighborlist = 0; /* Needed only if -n switch used. */
/* Needed only if segments are output (-p or -c) and -P not used: */
mid.segmentlist = 0;
/* Needed only if segments are output (-p or -c) and -P and -B not used: */
mid.segmentmarkerlist = 0;
mid.edgelist = 0; /* Needed only if -e switch used. */
mid.edgemarkerlist = 0; /* Needed if -e used and -B not used. */
mid.numberoftriangles = 0;
/* Triangulate the points. Switches are chosen to read and write a */
/* PSLG (p), preserve the convex hull (c), number everything from */
/* zero (z), assign a regional attribute to each element (A), and */
/* produce an edge list (e), a Voronoi diagram (v), and a triangle */
/* neighbor list (n). */
if (nselected_2 != 0) { /* inactive processor */
char args[] = "npczQ"; /* c is needed ? */
triangulate(args, &in, &mid, (struct triangulateio*) NULL);
/* output .poly files for 'showme' */
if (!PETSC_TRUE) {
static int level = 1;
FILE *file; char fname[32];
sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
/*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
fprintf(file, "%d %d %d %d\n",in.numberofpoints,2,0,0);
/*Following lines: <vertex #> <x> <y> */
for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
}
/*One line: <# of segments> <# of boundary markers (0 or 1)> */
//.........这里部分代码省略.........
开发者ID:pombredanne,项目名称:petsc,代码行数:101,代码来源:geo.c
示例3: isFinished
bool isFinished() {
int ret;
MPI_Allreduce(&active, &ret, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
return ret == 0;
}
开发者ID:qinglan233,项目名称:PDC-2016-project,代码行数:5,代码来源:Communicator.hpp
示例4: poisson_main
//.........这里部分代码省略.........
double solve_time = fei::utils::cpu_time() - start_solve_time;
if (localProc == 0) {
FEI_COUT << "FEI cpu-times:" << FEI_ENDL
<< " init. phase: " << iTime << FEI_ENDL
<< " load phase: " << lTime << FEI_ENDL
<< " solve time: " << sTime << FEI_ENDL;
}
double norm = 0.0;
FEI_COUT.setf(IOS_SCIENTIFIC, IOS_FLOATFIELD);
CHK_ERR( fei->residualNorm(1, 1, &fieldIDs[0], &norm) );
if (localProc == 0) {
FEI_COUT << "returned residual norm: " << norm << FEI_ENDL;
}
int itersTaken = 0;
CHK_ERR( fei->iterations(itersTaken) );
//
//We oughtta make sure the solution we just computed is correct...
//
int numNodes = 0;
CHK_ERR( fei->getNumLocalNodes(numNodes) );
double maxErr = 0.0;
if (numNodes > 0) {
int lenNodeIDs = numNodes;
GlobalID* nodeIDs = new GlobalID[lenNodeIDs];
double* soln = new double[lenNodeIDs];
if (nodeIDs != NULL && soln != NULL) {
CHK_ERR( fei->getLocalNodeIDList(numNodes, nodeIDs, lenNodeIDs) );
int fieldID = 1;
CHK_ERR( fei->getNodalFieldSolution(fieldID, numNodes, nodeIDs, soln));
for(int i=0; i<numNodes; i++) {
int nID = (int)nodeIDs[i];
double x = (1.0* ((nID-1)%(L+1)))/L;
double y = (1.0* ((nID-1)/(L+1)))/L;
double exactSoln = x*x + y*y;
double error = std::abs(exactSoln - soln[i]);
if (maxErr < error) maxErr = error;
}
delete [] nodeIDs;
delete [] soln;
}
else {
fei::console_out() << "allocation of nodeIDs or soln failed." << FEI_ENDL;
}
}
#ifndef FEI_SER
double globalMaxErr = 0.0;
MPI_Allreduce(&maxErr, &globalMaxErr, 1, MPI_DOUBLE, MPI_MAX, comm);
maxErr = globalMaxErr;
#endif
bool testPassed = true;
if (maxErr > 1.e-6) testPassed = false;
if (testPassed && localProc == 0) {
FEI_COUT << "poisson: TEST PASSED, maxErr = " << maxErr << ", iterations: "
<< itersTaken << FEI_ENDL;
//This is something the SIERRA runtest tool looks for in test output...
FEI_COUT << "SIERRA execution successful" << FEI_ENDL;
}
if (testPassed == false && localProc == 0) {
FEI_COUT << "maxErr = " << maxErr << ", TEST FAILED" << FEI_ENDL;
FEI_COUT << "(Test is deemed to have passed if the maximum difference"
<< " between the exact and computed solutions is 1.e-6 or less.)"
<< FEI_ENDL;
}
double elapsed_cpu_time = fei::utils::cpu_time() - start_time;
//The following IOS_... macros are defined in base/fei_macros.h
FEI_COUT.setf(IOS_FIXED, IOS_FLOATFIELD);
if (localProc==0) {
FEI_COUT << "Proc0 cpu times (seconds):" << FEI_ENDL
<< " FEI initialize: " << fei_init_time << FEI_ENDL
<< " FEI load: " << fei_load_time << FEI_ENDL
<< " solve: " << solve_time << FEI_ENDL
<< "Total program time: " << elapsed_cpu_time << FEI_ENDL;
}
wrapper.reset();
fei.reset();
factory.reset();
//If Prometheus is being used, we need to make sure that the
//LibraryWrapper inside factory is deleted before MPI_Finalize() is called.
//(That's why we call the 'reset' methods on these shared-pointers rather
//than just letting them destroy themselves when they go out of scope.)
return(0);
}
开发者ID:cakeisalie,项目名称:oomphlib_003,代码行数:101,代码来源:poisson_main.cpp
示例5: cell_mindt
double cell_mindt( struct Cell *** theCells, struct Sim * theSim, struct GravMass * theGravMasses ){
int i_m,j_m,k_m;
double dt_m = 1.e100;//HUGE_VAL;
double a_m,r_m,dx_m;
double mag_vel_m;
int i,j,k;
for( k=sim_Nghost_min(theSim,Z_DIR) ; k<sim_N(theSim,Z_DIR)-sim_Nghost_max(theSim,Z_DIR) ; ++k ){
double zm = sim_FacePos(theSim,k-1,Z_DIR);
double zp = sim_FacePos(theSim,k,Z_DIR);
double dz = zp-zm;
for( i=sim_Nghost_min(theSim,R_DIR) ; i<sim_N(theSim,R_DIR)-sim_Nghost_max(theSim,R_DIR) ; ++i ){
double rm = sim_FacePos(theSim,i-1,R_DIR);
double rp = sim_FacePos(theSim,i,R_DIR);
double dr = rp-rm;
double r = .5*(rp+rm);
for( j=0 ; j<sim_N_p(theSim,i) ; ++j ){
int jm = j-1;
if( j==0 ) jm = sim_N_p(theSim,i)-1;
double w = .5*(theCells[k][i][j].wiph+theCells[k][i][jm].wiph);
double dx = dr;
double rdphi = .5*(rp+rm)*theCells[k][i][j].dphi;
if( rdphi<dr ) {
dx = rdphi;
}
if( dx>dz ) {
dx = dz;
}
double a = maxvel( theCells[k][i][j].prim , w , r ,theSim);
double rho = theCells[k][i][j].prim[RHO];
double Pp = theCells[k][i][j].prim[PPP];
double dt = sim_CFL(theSim)*dx/a;
if( sim_EXPLICIT_VISCOSITY(theSim)>0.0 ){
double nu;
if (sim_VISC_CONST(theSim)==1){
nu = sim_EXPLICIT_VISCOSITY(theSim);
} else{
double tiph = theCells[k][i][j].tiph - 0.5*theCells[k][i][j].dphi;
if (sim_InitialDataType(theSim)==SHEAR){
double HoR = 0.1;
//nu = sim_EXPLICIT_VISCOSITY(theSim)*HoR*HoR*pow(fabs((r*cos(tiph))),1.5);
nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(fabs(r*cos(tiph)),2.0);
if (r*cos(tiph)>20.) nu=0.000000001;
} else{
double M0 = gravMass_M(theGravMasses,0);
double M1 = gravMass_M(theGravMasses,1);
double dist_bh0 = gravMass_dist(theGravMasses,0,r,tiph,0.);
double dist_bh1 = gravMass_dist(theGravMasses,1,r,tiph,0.);
double alpha = sim_EXPLICIT_VISCOSITY(theSim);
double eps = sim_G_EPS(theSim);
//nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(r,1.5);
nu = alpha*Pp/rho/sqrt(pow(dist_bh0*dist_bh0+eps*eps,-1.5)*M0+pow(dist_bh1*dist_bh1+eps*eps,-1.5)*M1);
}
}
double dt_visc = .25*dx*dx/nu;
dt = dt/( 1. + dt/dt_visc );
}
if( dt_m > dt ) {
dt_m = dt;
}
}
}
}
double dt2;
MPI_Allreduce( &dt_m , &dt2 , 1 , MPI_DOUBLE , MPI_MIN , sim_comm );
return( dt2 );
}
开发者ID:brianfarris,项目名称:Disco2,代码行数:69,代码来源:cell_mindt.c
示例6: gmx_check_thread_affinity_set
/* Check the process affinity mask and if it is found to be non-zero,
* will honor it and disable mdrun internal affinity setting.
* Note that this will only work on Linux as we use a GNU feature.
*/
void
gmx_check_thread_affinity_set(FILE *fplog,
const t_commrec *cr,
gmx_hw_opt_t *hw_opt,
int gmx_unused nthreads_hw_avail,
gmx_bool bAfterOpenmpInit)
{
GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");
if (!bAfterOpenmpInit)
{
/* Check for externally set OpenMP affinity and turn off internal
* pinning if any is found. We need to do this check early to tell
* thread-MPI whether it should do pinning when spawning threads.
* TODO: the above no longer holds, we should move these checks later
*/
if (hw_opt->thread_affinity != threadaffOFF)
{
char *message;
if (!gmx_omp_check_thread_affinity(&message))
{
/* TODO: with -pin auto we should only warn when using all cores */
md_print_warn(cr, fplog, "%s", message);
sfree(message);
hw_opt->thread_affinity = threadaffOFF;
}
}
/* With thread-MPI this is needed as pinning might get turned off,
* which needs to be known before starting thread-MPI.
* With thread-MPI hw_opt is processed here on the master rank
* and passed to the other ranks later, so we only do this on master.
*/
if (!SIMMASTER(cr))
{
return;
}
#ifndef GMX_THREAD_MPI
return;
#endif
}
#ifdef HAVE_SCHED_AFFINITY
int ret;
cpu_set_t mask_current;
if (hw_opt->thread_affinity == threadaffOFF)
{
/* internal affinity setting is off, don't bother checking process affinity */
return;
}
CPU_ZERO(&mask_current);
if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
{
/* failed to query affinity mask, will just return */
if (debug)
{
fprintf(debug, "Failed to query affinity mask (error %d)", ret);
}
return;
}
/* Before proceeding with the actual check, make sure that the number of
* detected CPUs is >= the CPUs in the current set.
* We need to check for CPU_COUNT as it was added only in glibc 2.6. */
#ifdef CPU_COUNT
if (nthreads_hw_avail < CPU_COUNT(&mask_current))
{
if (debug)
{
fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
nthreads_hw_avail, CPU_COUNT(&mask_current));
}
return;
}
#endif /* CPU_COUNT */
gmx_bool bAllSet = TRUE;
for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
{
bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
}
#ifdef GMX_LIB_MPI
gmx_bool bAllSet_All;
MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
bAllSet = bAllSet_All;
#endif
if (!bAllSet)
{
if (hw_opt->thread_affinity == threadaffAUTO)
{
if (!bAfterOpenmpInit)
//.........这里部分代码省略.........
开发者ID:pjohansson,项目名称:gromacs,代码行数:101,代码来源:thread_affinity.cpp
示例7: hunt_problem
//.........这里部分代码省略.........
#endif
}
/*
* PRINT OUT VALUES OF EXTRA UNKNOWNS
* FROM AUGMENTING CONDITIONS
*/
if (nAC > 0)
{
DPRINTF(stderr, "\n------------------------------\n");
DPRINTF(stderr, "Augmenting Conditions: %4d\n", nAC);
DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);
for (iAC = 0; iAC < nAC; iAC++)
{
if (augc[iAC].Type == AC_USERBC)
{
DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n",
augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
}
else if (augc[iAC].Type == AC_USERMAT ||
augc[iAC].Type == AC_FLUX_MAT )
{
DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n",
augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
}
else if(augc[iAC].Type == AC_VOLUME)
{
evol_local = augc[iAC].evol;
#ifdef PARALLEL
if( Num_Proc > 1 ) {
MPI_Allreduce( &evol_local, &evol_global, 1,
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
}
evol_local = evol_global;
#endif
DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
augc[iAC].MTID, augc[iAC].VOLID, evol_local,
x_AC[iAC]);
}
else if(augc[iAC].Type == AC_POSITION)
{
evol_local = augc[iAC].evol;
#ifdef PARALLEL
if( Num_Proc > 1 ) {
MPI_Allreduce( &evol_local, &evol_global, 1,
MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
}
evol_local = evol_global;
#endif
DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
augc[iAC].MTID, augc[iAC].VOLID, evol_local,
x_AC[iAC]);
}
else if(augc[iAC].Type == AC_FLUX)
{
DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
}
}
}
/* Check element quality */
good_mesh = element_quality(exo, x, ams[0]->proc_config);
开发者ID:gomatestbot,项目名称:goma,代码行数:67,代码来源:ac_hunt.c
示例8: main
int main( int argc, char **argv )
{
MPI_Comm comm;
int *sbuf, *rbuf;
int rank, size;
int *sendcounts, *recvcounts, *rdispls, *sdispls;
int i, j, *p, err, toterr;
MPI_Init( &argc, &argv );
err = 0;
comm = MPI_COMM_WORLD;
/* Create the buffer */
MPI_Comm_size( comm, &size );
MPI_Comm_rank( comm, &rank );
sbuf = (int *)malloc( size * size * sizeof(int) );
rbuf = (int *)malloc( size * size * sizeof(int) );
if (!sbuf || !rbuf) {
fprintf( stderr, "Could not allocated buffers!\n" );
MPI_Abort( comm, 1 );
}
/* Load up the buffers */
for (i=0; i<size*size; i++) {
sbuf[i] = i + 100*rank;
rbuf[i] = -i;
}
/* Create and load the arguments to alltoallv */
sendcounts = (int *)malloc( size * sizeof(int) );
recvcounts = (int *)malloc( size * sizeof(int) );
rdispls = (int *)malloc( size * sizeof(int) );
sdispls = (int *)malloc( size * sizeof(int) );
if (!sendcounts || !recvcounts || !rdispls || !sdispls) {
fprintf( stderr, "Could not allocate arg items!\n" );
MPI_Abort( comm, 1 );
}
for (i=0; i<size; i++) {
sendcounts[i] = i;
recvcounts[i] = rank;
rdispls[i] = i * rank;
sdispls[i] = (i * (i+1))/2;
}
MPI_Alltoallv( sbuf, sendcounts, sdispls, MPI_INT,
rbuf, recvcounts, rdispls, MPI_INT, comm );
/* Check rbuf */
for (i=0; i<size; i++) {
p = rbuf + rdispls[i];
for (j=0; j<rank; j++) {
if (p[j] != i * 100 + (rank*(rank+1))/2 + j) {
fprintf( stderr, "[%d] got %d expected %d for %dth\n",
rank, p[j],(i*(i+1))/2 + j, j );
err++;
}
}
}
free( sdispls );
free( rdispls );
free( recvcounts );
free( sendcounts );
free( rbuf );
free( sbuf );
MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
if (rank == 0) {
if (toterr > 0)
fprintf( stderr, "Test FAILED with %d errors\n", toterr );
else
fprintf( stderr, "Test passed\n" );
}
MPI_Finalize();
return 0;
}
开发者ID:MartinLidh,项目名称:tddc78,代码行数:79,代码来源:alltoallv.c
示例9: LU_decomp
void LU_decomp(struct problem *info, struct fmatrix *X, int *reorder, MPI_Datatype pivot_type, MPI_Op best_pivot_op)
{
MPI_Request req_spiv, req_sa, req_sm;
MPI_Status status;
number_type *m = malloc(info->blksz * sizeof(*m));
int diag;
for (diag = 0; diag < info->n; diag++) {
/* we do partial pivoting, so the proc with the pivot is on this column: */
int pivot_h = diag / info->blksz;
int r, c, i;
double start_time = MPI_Wtime();
double start_time2;
struct pivot pivot = { -1, 0. };
/* choose pivot across the column */
if (info->coords[HDIM] == pivot_h) {
/* column with pivot in block */
int pivot_c = diag % info->blksz;
/* Argo doesn't want aliasing in allreduce */
struct pivot pivot_cand = { -1, 0. };
for (i = 0; i < info->blksz; i++) {
if (reorder[i] > diag && fabs(CELL(X, i, pivot_c)) > fabs(pivot_cand.value)) {
pivot_cand.row = info->blksz*info->coords[VDIM] + i;
pivot_cand.value = CELL(X, i, pivot_c);
}
}
start_time2 = MPI_Wtime();
MPI_Allreduce(&pivot_cand, &pivot, 1, pivot_type, best_pivot_op, info->vcomm);
pivot_allr_time += MPI_Wtime() - start_time2;
}
/* broadcast pivot choice across row towards the right */
start_time2 = MPI_Wtime();
pipeline_right(info, pivot_h, &pivot, 1, pivot_type, 45, &req_spiv);
pivot_bcast_time += MPI_Wtime() - start_time2;
pivot_time += MPI_Wtime() - start_time;
/* find rank of proc with pivot on the vertical communicator */
int pivot_v = pivot.row / info->blksz;
/* fill in reorder */
if (info->coords[VDIM] == pivot_v) {
reorder[pivot.row % info->blksz] = diag;
}
/* calculate and distribute the ms */
for (r = 0; r < info->blksz; r++) {
if (reorder[r] > diag) {
if (info->coords[HDIM] == pivot_h) {
int pivot_c = diag % info->blksz;
m[r] = CELL(X, r, pivot_c) / pivot.value;
CELL(X, r, pivot_c) = m[r];
}
/* broadcast m towards right */
start_time = MPI_Wtime();
pipeline_right(info, pivot_h, &m[r], 1, MPI_number_type, 64, &req_sm);
m_bcast_time += MPI_Wtime() - start_time;
}
}
/* distribute the pivot row and eliminate */
int startc = 0;
if (info->coords[HDIM] == pivot_h) startc = (diag+1) % info->blksz;
if (info->coords[HDIM] < pivot_h) startc = info->blksz;
/* elimination */
for (c = startc; c < info->blksz; c++) {
number_type a;
if (info->coords[VDIM] == pivot_v) {
a = CELL(X, pivot.row % info->blksz, c);
}
start_time = MPI_Wtime();
int up = (info->coords[VDIM]+info->sqp-1)%info->sqp;
int down = (info->coords[VDIM]+1)%info->sqp;
if (info->coords[VDIM] != pivot_v) {
MPI_Recv(&a, 1, MPI_number_type, up, 78, info->vcomm, &status);
}
if (down != pivot_v) {
MPI_Isend(&a, 1, MPI_number_type, down, 78, info->vcomm, &req_sa);
}
a_bcast_time += MPI_Wtime() - start_time;
for (r = 0; r < info->blksz; r++) {
if (reorder[r] > diag) {
CELL(X, r,c) -= m[r]*a;
}
}
if (down != pivot_v) MPI_Wait(&req_sa, &status);
}
}
}
开发者ID:cstroe,项目名称:PP-MM-A03,代码行数:95,代码来源:lu2d.c
示例10: DMPlexPreallocateOperator
PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix)
{
MPI_Comm comm;
MatType mtype;
PetscSF sf, sfDof, sfAdj;
PetscSection leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj;
PetscInt nroots, nleaves, l, p;
const PetscInt *leaves;
const PetscSFNode *remotes;
PetscInt dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols;
PetscInt *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets;
PetscInt adjSize;
PetscLayout rLayout;
PetscInt locRows, rStart, rEnd, r;
PetscMPIInt size;
PetscBool doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock;
PetscBool useAnchors;
PetscErrorCode ierr;
PetscFunctionBegin;
PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3);
PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4);
PetscValidHeaderSpecific(A, MAT_CLASSID, 9);
if (dnz) PetscValidPointer(dnz,5);
if (onz) PetscValidPointer(onz,6);
if (dnzu) PetscValidPointer(dnzu,7);
if (onzu) PetscValidPointer(onzu,8);
ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr);
ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr);
ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE;
ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr);
/* Create dof SF based on point SF */
if (debug) {
ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr);
ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr);
ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr);
ierr = PetscSFView(sf, NULL);CHKERRQ(ierr);
}
ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr);
ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr);
if (debug) {
ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr);
ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr);
}
/* Create section for dof adjacency (dof ==> # adj dof) */
ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr);
ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr);
ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr);
ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr);
ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr);
ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr);
/* Fill in the ghost dofs on the interface */
ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr);
/* use constraints in finding adjacency in this routine */
ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr);
ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr);
/*
section - maps points to (# dofs, local dofs)
sectionGlobal - maps points to (# dofs, global dofs)
leafSectionAdj - maps unowned local dofs to # adj dofs
rootSectionAdj - maps owned local dofs to # adj dofs
adj - adj global dofs indexed by leafSectionAdj
rootAdj - adj global dofs indexed by rootSectionAdj
sf - describes shared points across procs
sfDof - describes shared dofs across procs
sfAdj - describes shared adjacent dofs across procs
** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point.
(0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj
(This is done in DMPlexComputeAnchorAdjacencies())
1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj
Reduce those counts to rootSectionAdj (now redundantly counting some interface points)
2. Visit owned points on interface, count adjacencies placing in rootSectionAdj
Create sfAdj connecting rootSectionAdj and leafSectionAdj
3. Visit unowned points on interface, write adjacencies to adj
Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies)
4. Visit owned points on interface, write adjacencies to rootAdj
Remove redundancy in rootAdj
** The last two traversals use transitive closure
5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj)
Allocate memory addressed by sectionAdj (cols)
6. Visit all owned points in the subdomain, insert dof adjacencies into cols
** Knowing all the column adjacencies, check ownership and sum into dnz and onz
*/
ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr);
for (l = 0; l < nleaves; ++l) {
PetscInt dof, off, d, q, anDof;
PetscInt p = leaves[l], numAdj = PETSC_DETERMINE;
if ((p < pStart) || (p >= pEnd)) continue;
//.........这里部分代码省略.........
开发者ID:PeiLiu90,项目名称:petsc,代码行数:101,代码来源:plexpreallocate.c
示例11: MPI_Comm_rank
struct fft_plan_3d *fft_3d_create_plan(
MPI_Comm comm, int nfast, int nmid, int nslow,
int in_ilo, int in_ihi, int in_jlo, int in_jhi,
int in_klo, int in_khi,
int out_ilo, int out_ihi, int out_jlo, int out_jhi,
int out_klo, int out_khi,
int scaled, int permute, int *nbuf)
{
struct fft_plan_3d *plan;
int me,nprocs;
int /*i,num,*/flag,remapflag;//,fftflag;
int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
int out_size,first_size,second_size,third_size,copy_size,scratch_size;
int np1,np2,ip1,ip2;
// int list[50];
/* system specific variables */
#ifdef FFT_INTEL
FFT_DATA dummy;
#endif
#ifdef FFT_T3E
FFT_DATA dummy[5];
int isign,isys;
double scalef;
#endif
/* query MPI info */
MPI_Comm_rank(comm,&me);
MPI_Comm_size(comm,&nprocs);
/* compute division of procs in 2 dimensions not on-processor */
bifactor(nprocs,&np1,&np2);
ip1 = me % np1;
ip2 = me/np1;
/* allocate memory for plan data struct */
plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
if (plan == NULL) return NULL;
/* remap from initial distribution to layout needed for 1st set of 1d FFTs
not needed if all procs own entire fast axis initially
first indices = distribution after 1st set of FFTs */
if (in_ilo == 0 && in_ihi == nfast-1)
flag = 0;
else
flag = 1;
MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);
if (remapflag == 0) {
first_ilo = in_ilo;
first_ihi = in_ihi;
first_jlo = in_jlo;
first_jhi = in_jhi;
first_klo = in_klo;
first_khi = in_khi;
plan->pre_plan = NULL;
}
else {
first_ilo = 0;
first_ihi = nfast - 1;
first_jlo = ip1*nmid/np1;
first_jhi = (ip1+1)*nmid/np1 - 1;
first_klo = ip2*nslow/np2;
first_khi = (ip2+1)*nslow/np2 - 1;
plan->pre_plan =
remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,
FFT_PRECISION,0,0,2);
if (plan->pre_plan == NULL) return NULL;
}
/* 1d FFTs along fast axis */
plan->length1 = nfast;
plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1);
/* remap from 1st to 2nd FFT
choose which axis is split over np1 vs np2 to minimize communication
second indices = distribution after 2nd set of FFTs */
second_ilo = ip1*nfast/np1;
second_ihi = (ip1+1)*nfast/np1 - 1;
second_jlo = 0;
second_jhi = nmid - 1;
second_klo = ip2*nslow/np2;
second_khi = (ip2+1)*nslow/np2 - 1;
plan->mid1_plan =
remap_3d_create_plan(comm,
first_ilo,first_ihi,first_jlo,first_jhi,
first_klo,first_khi,
//.........这里部分代码省略.........
开发者ID:darien0,项目名称:Mara,代码行数:101,代码来源:fft_3d.c
示例12: run_bfs
//.........这里部分代码省略.........
int64_t global_queue_summary_size = g_global_queue_summary_size;
int64_t global_queue_size = g_global_queue_size;
#define SWIZZLE_VERTEX(c) (((int64_t)(VERTEX_OWNER(c)) << lg_local_queue_size) | (int64_t)(VERTEX_LOCAL(c)))
#if 0
int64_t* restrict column_swizzled = (int64_t*)xmalloc(nlocaledges * sizeof(int64_t));
{
size_t i;
for (i = 0; i < nlocaledges; ++i) {
int64_t c = column[i];
column_swizzled[i] = SWIZZLE_VERTEX(c);
}
}
#endif
unsigned long* restrict in_queue = g_in_queue;
memset(in_queue, 0, global_queue_size * sizeof(unsigned long));
unsigned long* restrict in_queue_summary = g_in_queue_summary;
memset(in_queue_summary, 0, global_queue_summary_size * sizeof(unsigned long));
unsigned long* restrict out_queue = g_out_queue;
unsigned long* restrict out_queue_summary = g_out_queue_summary;
unsigned long* restrict visited = g_visited;
memset(visited, 0, local_queue_size * sizeof(unsigned long));
#define SET_IN(v) do {int64_t vs = SWIZZLE_VERTEX(v); size_t word_idx = vs / ulong_bits; int bit_idx = vs % ulong_bits; unsigned long mask = (1UL << bit_idx); in_queue_summary[word_idx / ulong_bits] |= (1UL << (word_idx % ulong_bits)); in_queue[word_idx] |= mask;} while (0)
#define TEST_IN(vs) (((in_queue_summary[vs / ulong_bits / ulong_bits] & (1UL << ((vs / ulong_bits) % ulong_bits))) != 0) && ((in_queue[vs / ulong_bits] & (1UL << (vs % ulong_bits))) != 0))
#define TEST_VISITED_LOCAL(v) ((visited[(v) / ulong_bits] & (1UL << ((v) % ulong_bits))) != 0)
// #define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); __sync_fetch_and_or(&visited[word_idx], mask); __sync_fetch_and_or(&out_queue[word_idx], mask);} while (0)
#define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); visited[word_idx] |= mask; out_queue[word_idx] |= mask;} while (0)
SET_IN(root);
{ptrdiff_t i; _Pragma("omp parallel for schedule(static)") for (i = 0; i < nlocalverts; ++i) pred[i] = -1;}
if (VERTEX_OWNER(root) == rank) {
pred[VERTEX_LOCAL(root)] = root;
SET_VISITED_LOCAL(VERTEX_LOCAL(root));
}
uint16_t cur_level = 0;
while (1) {
++cur_level;
#if 0
if (rank == 0) fprintf(stderr, "BFS level %" PRIu16 "\n", cur_level);
#endif
memset(out_queue, 0, (nlocalverts + ulong_bits - 1) / ulong_bits * sizeof(unsigned long));
// memset(out_queue_summary, 0, (nlocalverts + ulong_bits_squared - 1) / ulong_bits_squared * sizeof(unsigned long));
ptrdiff_t i, ii;
#if 0
#pragma omp parallel for schedule(static)
for (i = 0; i < global_queue_summary_size; ++i) {
unsigned long val = 0UL;
int j;
unsigned long mask = 1UL;
for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
if (in_queue[i * ulong_bits + j]) val |= mask;
}
in_queue_summary[i] = val;
}
#endif
unsigned long not_done = 0;
#pragma omp parallel for schedule(static) reduction(|:not_done)
for (ii = 0; ii < nlocalverts; ii += ulong_bits) {
size_t i, i_end = ii + ulong_bits;
if (i_end > nlocalverts) i_end = nlocalverts;
for (i = ii; i < i_end; ++i) {
if (!TEST_VISITED_LOCAL(i)) {
size_t j, j_end = rowstarts[i + 1];
for (j = rowstarts[i]; j < j_end; ++j) {
int64_t v1 = column[j];
int64_t v1_swizzled = SWIZZLE_VERTEX(v1);
if (TEST_IN(v1_swizzled)) {
pred[i] = (v1 & INT64_C(0xFFFFFFFFFFFF)) | ((int64_t)cur_level << 48);
not_done |= 1;
SET_VISITED_LOCAL(i);
break;
}
}
}
}
}
#if 1
#pragma omp parallel for schedule(static)
for (i = 0; i < local_queue_summary_size; ++i) {
unsigned long val = 0UL;
int j;
unsigned long mask = 1UL;
for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
unsigned long full_val = out_queue[i * ulong_bits + j];
visited[i * ulong_bits + j] |= full_val;
if (full_val) val |= mask;
}
out_queue_summary[i] = val;
// not_done |= val;
}
#endif
MPI_Allreduce(MPI_IN_PLACE, ¬_done, 1, MPI_UNSIGNED_LONG, MPI_BOR, MPI_COMM_WORLD);
if (not_done == 0) break;
MPI_Allgather(out_queue, local_queue_size, MPI_UNSIGNED_LONG, in_queue, local_queue_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
MPI_Allgather(out_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, in_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
}
deallocate_memory();
}
开发者ID:ShiZhan,项目名称:graph500-win32,代码行数:101,代码来源:bfs_replicated.c
示例13: star_density
//.........这里部分代码省略.........
}
/* do local particles and prepare export list */
for(nexport = 0; i >= 0; i = NextActiveParticle[i])
{
if(P[i].Type == 4)
{
if(star_density_evaluate(i, 0, &nexport, Send_count) < 0)
break;
}
}
#ifdef MYSORT
mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#else
qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#endif
MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD);
for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
{
Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask];
nimport += Recv_count[j];
if(j > 0)
{
Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
}
}
StarDataGet = (struct stardata_in *) mymalloc(nimport * sizeof(struct stardata_in));
StarDataIn = (struct stardata_in *) mymalloc(nexport * sizeof(struct stardata_in));
/* prepare particle data for export */
for(j = 0; j < nexport; j++)
{
place = DataIndexTable[j].Index;
StarDataIn[j].Pos[0] = P[place].Pos[0];
StarDataIn[j].Pos[1] = P[place].Pos[1];
StarDataIn[j].Pos[2] = P[place].Pos[2];
StarDataIn[j].Hsml = PPP[place].Hsml;
StarDataIn[j].Density = P[place].DensAroundStar;
StarDataIn[j].Mass = P[place].Mass;
memcpy(StarDataIn[j].NodeList,
DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int));
}
/* exchange particle data */
for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
{
sendTask = ThisTask;
recvTask = ThisTask ^ ngrp;
if(recvTask < NTask)
{
if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
{
/* get the particles */
MPI_Sendrecv(&StarDataIn[Send_offset[recvTask]],
Send_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
recvTask, TAG_DENS_A,
&StarDataGet[Recv_offset[recvTask]],
Recv_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
}
}
}
myfree(StarDataIn);
/* now do the particles that were sent to us */
for(j = 0; j < nimport; j++)
star_density_evaluate(j, 1, &dummy, &dummy);
/* check whether this is the last iteration */
if(i < 0)
ndone_flag = 1;
else
ndone_flag = 0;
MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
myfree(StarDataGet);
}
while(ndone < NTask);
myfree(DataNodeList);
myfree(DataIndexTable);
myfree(Ngblist);
#endif //for EDDINGTON_TENSOR_STARS
}
开发者ID:surftour,项目名称:astrotools,代码行数:101,代码来源:get_Je.c
示例14: heavyEdgeMatchAgg
//.........这里部分代码省略.........
n /= 2;
ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
/* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
/* save requests */
sbuffs1[nSend1] = sbuff;
request = (MPI_Request*)sbuff;
sbuff = pt = (PetscInt*)(request+1);
*pt++ = n; *pt++ = rank;
ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
while(pos){
PetscInt lid0, cpid, gid;
ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr);
gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr);
ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
*pt++ = gid; *pt++ = lid0;
}
/* send request tag1 [n, proc, n*[gid1,lid0] ] */
ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr);
/* post recieve */
request = (MPI_Request*)pt;
rreqs2[nSend1] = request; /* cache recv request */
pt = (PetscInt*)(request+1);
ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
/* clear list */
ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
nSend1++;
}
}
/* recieve requests, send response, clear lists */
kk = nactive_edges;
ierr =
|
请发表评论