本文整理汇总了C++中MPI_Isend函数的典型用法代码示例。如果您正苦于以下问题:C++ MPI_Isend函数的具体用法?C++ MPI_Isend怎么用?C++ MPI_Isend使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了MPI_Isend函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: relax
void relax(double *phi, double *b, double *tmp, param_t p)
{
int i, x, y;
// A little trick to index phi as expected.
double* phi_s = phi + p.L;
// Prepare for async send/recv
MPI_Request request[4];
int requests;
MPI_Status status[4];
for(i=0; i<p.niter; i++)
{
requests = 0;
// Send the higher-memory component to the next rank.
MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE,
(p.my_rank+1)%p.world_size, 1,
MPI_COMM_WORLD, request + requests++);
MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE,
(p.my_rank+p.world_size-1)%p.world_size, 1,
MPI_COMM_WORLD, request + requests++);
// Send the lower-memory component to the previous rank.
MPI_Isend(phi_s, p.L, MPI_DOUBLE,
(p.my_rank+p.world_size-1)%p.world_size, 0,
MPI_COMM_WORLD, request + requests++);
MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE,
(p.my_rank+1)%p.world_size, 0,
MPI_COMM_WORLD, request + requests++);
// Do some other work while we wait!
// Update everything that doesn't depend on buffers.
for(x = 0; x < p.L; x++)
{
for(y = 1; y < p.y-1; y++)
{
tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L]
+ p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + ((y+1)%p.L)*p.L] + phi_s[x + ((y-1+p.L)%p.L)*p.L])
+ p.dt*p.scale* b[x + y*p.L];
}
}
// Wait, if sync hasn't finished.
MPI_Waitall ( requests, request, status );
// Update the other cells.
for(x = 0; x < p.L; x++)
{
y = 0;
tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L]
+ p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L])
+ p.dt*p.scale* b[x + y*p.L];
y = p.y-1;
tmp[x + y*p.L] = (1 - p.dt)* phi_s[x + y*p.L]
+ p.dt* p.scale* (phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L])
+ p.dt*p.scale* b[x + y*p.L];
}
for(x = 0; x < p.L; x++)
{
for(y = 0; y < p.y; y++)
{
phi_s[x + y*p.L] = tmp[x + y*p.L];
}
}
}
MPI_Barrier(MPI_COMM_WORLD);
return;
}
开发者ID:BU-EC-HPC-S16,项目名称:EC500-High-Performance-Computing,代码行数:79,代码来源:Laplace2D_mpi.c
示例2: Stg_MPI_Isend
int Stg_MPI_Isend( char* file, int line, void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request ) {
Stream* stream = Journal_Register( Info_Type, "mpi" );
Journal_Printf( stream, "%s %d, rank %d MPI_Isend: tag = %d, count = %d, datatype = %d, dest = %d\n", file, line, Stg_Messaging_GetRank( comm ), tag, count, datatype, dest );
return MPI_Isend( buf, count, datatype, dest, tag, comm, request );
}
开发者ID:bmi-forum,项目名称:bmi-pyre,代码行数:5,代码来源:stgmessaging.c
示例3: exchsolution_gmrfData_1
//.........这里部分代码省略.........
for (; (i2<=3); i2 += 2) {
yPos = posEnd[1];
yPos = posEnd[1];
}
for (; (i2<=4); i2 += 1) {
yPos = posEnd[1];
}
}
}
{
double* fieldData_Solution_GMRF_1_p1 = (&fieldData_Solution_GMRF[1][0]);
int i2 = 2;
for (; (i2<=3); i2 += 2) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
fieldData_Solution_GMRF_1_p1[(i2+19)] = 0.000000e+00;
}
for (; (i2<=4); i2 += 1) {
fieldData_Solution_GMRF_1_p1[(i2+18)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(&fieldData_Solution_GMRF[1][10], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(&fieldData_Solution_GMRF[1][8], 1, mpiDatatype_3_1_6, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
;
}
}
;
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Send[1]) {
waitForMPIReq(&mpiRequest_Send[1]);
开发者ID:kel85uk,项目名称:GMRF_Exa,代码行数:67,代码来源:CommunicationFunctions_41.cpp
示例4: exchlaplacecoeff_gmrfData_0
//.........这里部分代码省略.........
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+109)] = 0.000000e+00;
}
for (; (i2<=3); i2 += 1) {
fieldData_LaplaceCoeff_GMRF_0_p1[(i2+108)] = 0.000000e+00;
}
}
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
/* Statements in this Scop: S1094 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Send_1_p1 = (&buffer_Send[1][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 1;
for (; (i1<=1); i1 += 2) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
buffer_Send_1_p1[i1] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+9)];
}
for (; (i1<=2); i1 += 1) {
buffer_Send_1_p1[(i1-1)] = fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)+3)];
}
}
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][1]&&neighbor_isRemote[0][1])) {
MPI_Isend(buffer_Send[1], 18, MPI_DOUBLE, neighbor_remoteRank[0][1], ((unsigned int)commId << 16) + ((unsigned int)(neighbor_fragCommId[0][1]) & 0x0000ffff), mpiCommunicator, &mpiRequest_Send[1]);
reqOutstanding_Send[1] = true;
}
}
}
;
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
MPI_Irecv(buffer_Recv[0], 18, MPI_DOUBLE, neighbor_remoteRank[0][0], ((unsigned int)(neighbor_fragCommId[0][0]) << 16) + ((unsigned int)commId & 0x0000ffff), mpiCommunicator, &mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = true;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if (reqOutstanding_Recv[0]) {
waitForMPIReq(&mpiRequest_Recv[0]);
reqOutstanding_Recv[0] = false;
}
}
}
for (int fragmentIdx = 0; fragmentIdx < 1; ++fragmentIdx) {
if (isValidForSubdomain[0]) {
if ((neighbor_isValid[0][0]&&neighbor_isRemote[0][0])) {
/* Statements in this Scop: S1095 */
for (int i0 = 0; (i0<=8); i0 += 1) {
double* buffer_Recv_0_p1 = (&buffer_Recv[0][(i0*2)]);
double* fieldData_LaplaceCoeff_GMRF_0_p1 = (&fieldData_LaplaceCoeff_GMRF[0][(i0*24)]);
int i1 = 3;
for (; (i1<=3); i1 += 2) {
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-10)] = buffer_Recv_0_p1[(i1-3)];
fieldData_LaplaceCoeff_GMRF_0_p1[((i1*6)-4)] = buffer_Recv_0_p1[(i1-2)];
开发者ID:kel85uk,项目名称:GMRF_Exa,代码行数:67,代码来源:CommunicationFunctions_78.cpp
示例5: ADIOI_W_Exchange_data
//.........这里部分代码省略.........
if (fd->atomicity) {
/* bug fix from Wei-keng Liao and Kenin Coloma */
requests = (MPI_Request *)
ADIOI_Malloc((nprocs_send+1)*sizeof(MPI_Request));
send_req = requests;
}
else {
requests = (MPI_Request *)
ADIOI_Malloc((nprocs_send+nprocs_recv+1)*sizeof(MPI_Request));
/* +1 to avoid a 0-size malloc */
/* post receives */
j = 0;
for (i=0; i<nprocs; i++) {
if (recv_size[i]) {
MPI_Irecv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
fd->comm, requests+j);
j++;
}
}
send_req = requests + nprocs_recv;
}
/* post sends. if buftype_is_contig, data can be directly sent from
user buf at location given by buf_idx. else use send_buf. */
#ifdef AGGREGATION_PROFILE
MPE_Log_event (5032, 0, NULL);
#endif
if (buftype_is_contig) {
j = 0;
for (i=0; i < nprocs; i++)
if (send_size[i]) {
MPI_Isend(((char *) buf) + buf_idx[i], send_size[i],
MPI_BYTE, i, myrank+i+100*iter, fd->comm,
send_req+j);
j++;
buf_idx[i] += send_size[i];
}
}
else if (nprocs_send) {
/* buftype is not contig */
send_buf = (char **) ADIOI_Malloc(nprocs*sizeof(char*));
for (i=0; i < nprocs; i++)
if (send_size[i])
send_buf[i] = (char *) ADIOI_Malloc(send_size[i]);
ADIOI_Fill_send_buffer(fd, buf, flat_buf, send_buf,
offset_list, len_list, send_size,
send_req,
sent_to_proc, nprocs, myrank,
contig_access_count,
min_st_offset, fd_size, fd_start, fd_end,
send_buf_idx, curr_to_proc, done_to_proc, iter,
buftype_extent);
/* the send is done in ADIOI_Fill_send_buffer */
}
if (fd->atomicity) {
/* bug fix from Wei-keng Liao and Kenin Coloma */
j = 0;
for (i=0; i<nprocs; i++) {
MPI_Status wkl_status;
if (recv_size[i]) {
MPI_Recv(MPI_BOTTOM, 1, recv_types[j], i, myrank+i+100*iter,
fd->comm, &wkl_status);
开发者ID:Slbomber,项目名称:ompi,代码行数:67,代码来源:ad_write_coll.c
示例6: main
//.........这里部分代码省略.........
for(i=0; i<dataPerProc; i++)
{
value = bBucket[i];
sAssigned = (int) (value * numprocs);
if(sAssigned == numprocs) sAssigned--; /* Resolves a bug cropping due to numerical errors;
* e.g. int(0.99 * 1) = 1 but first bucket is index 0*/
assignedIndex = sSize[sAssigned];
sBucket[sAssigned][assignedIndex] = value;
sSize[sAssigned] += 1;
}
// for(i=0; i<sTotal; i++) printf("\nRank %i: sBucket=%i, Size=%i\n", rank, i, sSize[i]);
displaySmallBuckets(sBucket, sSize, rank, numprocs, n);
/* Pour each rank's small bucket back into the correct big bucket
* The use of non-blocking communication to prevent deadlock when
* problem size is rather large (although there exists a way solution
* that doesn't involve non-blocking communication)*/
/* Step 2 to Step 3 of Lecture 8 */
for(i=0; i<bMaxSize; i++) bBucket[i] = 0.0;
bSize = 0;
for(p=0; p<sTotal; p++)
{
if(p==rank)
{
for(i=0; i<sSize[p]; i++)
bBucket[bSize + i] = sBucket[p][i];
bSize += sSize[p];
}
else
{
MPI_Isend(sBucket[p], sSize[p], MPI_FLOAT, p, 0, MPI_COMM_WORLD, &sendRequest);
MPI_Irecv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &recvRequest);
MPI_Wait(&recvRequest, &recvStatus);
MPI_Get_count(&recvStatus, MPI_FLOAT, &size);
for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i];
bSize += size;
}
}
/* All small buckets should pour their entire contents into the big buckets
* before just before serial sorting of big buckets */
MPI_Barrier(MPI_COMM_WORLD);
displayBigBuckets(bBucket, bSize, rank, numprocs, n);
/* Swirl each rank's big bucket until sorted */
/* Step 3 to Step 4 of Lecture 8 */
serialQuicksort(bBucket, 0, bSize);
displayBigBuckets(bBucket, bSize, rank, numprocs, n);
/* Concatenate each rank's big bucket */
/* Step 4 to Step 5 of Lecture 8 */
if(rank!=0) MPI_Send(bBucket, bSize, MPI_FLOAT, 0, 0, MPI_COMM_WORLD);
else
{
for(p=1; p<bTotal; p++)
{
MPI_Recv(tempArray, maxTempSize, MPI_FLOAT, p, 0, MPI_COMM_WORLD, &status);
MPI_Get_count(&status, MPI_FLOAT, &size);
for(i=0; i<size; i++) bBucket[bSize + i] = tempArray[i];
开发者ID:OthmanEmpire,项目名称:university_code,代码行数:67,代码来源:coursework.c
示例7: abs
/*
* Performs sparse matrix-vector multiplication.
*/
void
pdgsmv
(
int_t abs, /* Input. Do abs(A)*abs(x). */
SuperMatrix *A_internal, /* Input. Matrix A permuted by columns.
The column indices are translated into
the relative positions in the gathered x-vector.
The type of A can be:
Stype = NR_loc; Dtype = SLU_D; Mtype = GE. */
gridinfo_t *grid, /* Input */
pdgsmv_comm_t *gsmv_comm, /* Input. The data structure for communication. */
double x[], /* Input. The distributed source vector */
double ax[] /* Output. The distributed destination vector */
)
{
NRformat_loc *Astore;
int iam, procs;
int_t i, j, p, m, m_loc, n, fst_row, jcol;
int_t *colind, *rowptr;
int *SendCounts, *RecvCounts;
int_t *ind_tosend, *ind_torecv, *ptr_ind_tosend, *ptr_ind_torecv;
int_t *extern_start, TotalValSend;
double *nzval, *val_tosend, *val_torecv;
double zero = 0.0;
MPI_Request *send_req, *recv_req;
MPI_Status status;
#if ( DEBUGlevel>=1 )
CHECK_MALLOC(grid->iam, "Enter pdgsmv()");
#endif
/* ------------------------------------------------------------
INITIALIZATION.
------------------------------------------------------------*/
iam = grid->iam;
procs = grid->nprow * grid->npcol;
Astore = (NRformat_loc *) A_internal->Store;
m = A_internal->nrow;
n = A_internal->ncol;
m_loc = Astore->m_loc;
fst_row = Astore->fst_row;
colind = Astore->colind;
rowptr = Astore->rowptr;
nzval = (double *) Astore->nzval;
extern_start = gsmv_comm->extern_start;
ind_torecv = gsmv_comm->ind_torecv;
ptr_ind_tosend = gsmv_comm->ptr_ind_tosend;
ptr_ind_torecv = gsmv_comm->ptr_ind_torecv;
SendCounts = gsmv_comm->SendCounts;
RecvCounts = gsmv_comm->RecvCounts;
val_tosend = (double *) gsmv_comm->val_tosend;
val_torecv = (double *) gsmv_comm->val_torecv;
TotalValSend = gsmv_comm->TotalValSend;
/* ------------------------------------------------------------
COPY THE X VALUES INTO THE SEND BUFFER.
------------------------------------------------------------*/
for (i = 0; i < TotalValSend; ++i) {
j = ind_torecv[i] - fst_row; /* Relative index in x[] */
val_tosend[i] = x[j];
}
/* ------------------------------------------------------------
COMMUNICATE THE X VALUES.
------------------------------------------------------------*/
if ( !(send_req = (MPI_Request *)
SUPERLU_MALLOC(2*procs *sizeof(MPI_Request))))
ABORT("Malloc fails for recv_req[].");
recv_req = send_req + procs;
for (p = 0; p < procs; ++p) {
if ( RecvCounts[p] ) {
MPI_Isend(&val_tosend[ptr_ind_torecv[p]], RecvCounts[p],
MPI_DOUBLE, p, iam,
grid->comm, &send_req[p]);
}
if ( SendCounts[p] ) {
MPI_Irecv(&val_torecv[ptr_ind_tosend[p]], SendCounts[p],
MPI_DOUBLE, p, p,
grid->comm, &recv_req[p]);
}
}
/* ------------------------------------------------------------
PERFORM THE ACTUAL MULTIPLICATION.
------------------------------------------------------------*/
if ( abs ) { /* Perform abs(A)*abs(x) */
/* Multiply the local part. */
for (i = 0; i < m_loc; ++i) { /* Loop through each row */
ax[i] = 0.0;
for (j = rowptr[i]; j < extern_start[i]; ++j) {
jcol = colind[j];
ax[i] += fabs(nzval[j]) * fabs(x[jcol]);
}
}
for (p = 0; p < procs; ++p) {
if ( RecvCounts[p] ) MPI_Wait(&send_req[p], &status);
//.........这里部分代码省略.........
开发者ID:DBorello,项目名称:OpenSees,代码行数:101,代码来源:pdgsmv.c
示例8: main
//.........这里部分代码省略.........
// const char scratchdir[]="/scratch/gpfs/cbkeller/";
const char scratchdir[]="/scratch/";
// Variables that determine how much memory to allocate to imported results
const int maxMinerals=100, maxSteps=1700/abs(deltaT), maxColumns=50;
/***********************************************************/
// Malloc space for the imported melts array
double **rawMatrix=mallocDoubleArray(maxMinerals*maxSteps,maxColumns);
double ***melts=malloc(maxMinerals*sizeof(double**));
char **names=malloc(maxMinerals*sizeof(char*));
char ***elements=malloc(maxMinerals*sizeof(char**));
int *meltsrows=malloc(maxMinerals*sizeof(int)), *meltscolumns=malloc(maxMinerals*sizeof(int));
for (i=0; i<maxMinerals; i++){
names[i]=malloc(30*sizeof(char));
elements[i]=malloc(maxColumns*sizeof(char*));
for (k=0; k<maxColumns; k++){
elements[i][k]=malloc(30*sizeof(char));
}
}
int minerals;
// Variables for finding saturation temperature
int row, col, P, T, mass, SiO2, TiO2, Al2O3, Fe2O3, Cr2O3, FeO, MnO, MgO, NiO, CoO, CaO, Na2O, K2O, P2O5, CO2, H2O;
int fspCaO, fspNa2O, fspK2O, oxideTiO2, oxideFe2O3, oxideFeO, oxideMnO;
double M, Tf, Tsat, Tsatbulk, Ts, Tsmax, Zrf, Zrsat, MZr, MZrnow, Tcryst;
double AnKd, AbKd, OrKd, IlmKd, MtKd;
while (1) {
// Ask root node for new task
// *buf, count, datatype, dest, tag, comm, *request
MPI_Isend(&world_rank, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &sReq);
// *buf, count, datatype, source, tag, comm, *status
MPI_Recv(&ic, 18, MPI_DOUBLE, 0, 1, MPI_COMM_WORLD, &sStat);
// Exit loop if stop signal recieved
if (ic[0]<0) break;
//Configure working directory
sprintf(prefix,"%sout%i_%.0f/", scratchdir, world_rank, ic[17]);
sprintf(cmd_string,"mkdir -p %s", prefix);
system(cmd_string);
// //Set water
// ic[15]=3.0;
// //Set CO2
// ic[14]=0.1;
//Run MELTS
runmelts(prefix,ic,version,"isobaric",fo2Buffer,fo2Delta,"1\nsc.melts\n10\n1\n3\n1\nliquid\n1\n0.99\n1\n10\n0\n4\n0\n","","!",Ti,Pi,deltaT,deltaP,0.005);
// If simulation failed, clean up scratch directory and move on to next simulation
sprintf(cmd_string,"%sPhase_main_tbl.txt", prefix);
if ((fp = fopen(cmd_string, "r")) == NULL) {
fprintf(stderr, "%s : MELTS equilibration failed to produce output.\n", prefix);
sprintf(cmd_string,"rm -r %s", prefix);
system(cmd_string);
continue;
}
// Import results, if they exist. Format:
// Pressure Temperature mass S H V Cp viscosity SiO2 TiO2 Al2O3 Fe2O3 Cr2O3 FeO MnO MgO NiO CoO CaO Na2O K2O P2O5 H2O
minerals=maxMinerals;
importmelts(maxSteps, maxColumns, prefix, melts, rawMatrix, meltsrows, meltscolumns, names, elements, &minerals);
开发者ID:brenhinkeller,项目名称:meltstzirc,代码行数:67,代码来源:meltsTzircParallel.c
示例9: main
int main( int argc, char **argv )
{
int locId ;
int data [i_ntotin] ;
MPI_Init(&argc, &argv) ;
MPI_Comm_rank(MPI_COMM_WORLD, &locId) ;
if(locId == 0) {
/* The server... */
MPI_Status status[2] ;
MPI_Request events [2] ;
int eventId ;
int dstId = 1 ;
int i ;
for(i = 0 ; i < i_ntotin ; i++)
data [i] = i + 1 ;
events [0] = MPI_REQUEST_NULL ;
events [1] = MPI_REQUEST_NULL ;
MPI_Isend(data, i_ntotin, MPI_INT, dstId, DAR,
MPI_COMM_WORLD, events + 1) ;
/* enable send of data */
/*_begin_trace_code */
/* printf("locId = %d: MPI_Isend(%x, %d, %x, %d, %d, %x, %x)\n",
locId, data, i_ntotin, MPI_INT, dstId, DAR, MPI_COMM_WORLD, events [1]);
*/
/*_end_trace_code */
/*_begin_trace_code */
/* printf("locId = %d: MPI_Waitany(%d, [%x, %x], %x %x)...",
locId, 2, events [0], events [1], &eventId, &status) ; */
/*_end_trace_code */
MPI_Waitany(2, events, &eventId, status) ;
/*_begin_trace_code */
printf("done. eventId = %d\n", eventId) ;
/*_end_trace_code */
}
if(locId == 1) {
/* The Client... */
MPI_Status status ;
int srcId = MPI_ANY_SOURCE ;
/*_begin_trace_code */
/*
printf("locId = %d: MPI_Recv(%x, %d, %x, %d, %d, %x, %x)...",
locId, data, i_ntotin, MPI_INT, srcId, DAR, MPI_COMM_WORLD, &status) ;
*/
/*_end_trace_code */
MPI_Recv(data, i_ntotin, MPI_INT, srcId, DAR,
MPI_COMM_WORLD, &status) ;
/*_begin_trace_code */
/*printf("done.\n") ;*/
/*_end_trace_code */
/*
printf("locId = %d: data [0] = %d, data [%d] = %d\n",
locId, data [0], i_ntotin - 1, data [i_ntotin - 1]) ;
*/
}
MPI_Barrier( MPI_COMM_WORLD );
if (locId == 0)
printf( "Test complete\n" );
MPI_Finalize() ;
return 0;
}
开发者ID:Shurakai,项目名称:SimGrid,代码行数:83,代码来源:waitany.c
示例10: MPI_Send
void peanoclaw::records::RepositoryStatePacked::send(int destination, int tag, bool exchangeOnlyAttributesMarkedWithParallelise, bool communicateBlocking) {
_senderDestinationRank = destination;
if (communicateBlocking) {
const int result = MPI_Send(this, 1, exchangeOnlyAttributesMarkedWithParallelise ? Datatype : FullDatatype, destination, tag, tarch::parallel::Node::getInstance().getCommunicator());
if (result!=MPI_SUCCESS) {
std::ostringstream msg;
msg << "was not able to send message peanoclaw::records::RepositoryStatePacked "
<< toString()
<< " to node " << destination
<< ": " << tarch::parallel::MPIReturnValueToString(result);
_log.error( "send(int)",msg.str() );
}
}
else {
MPI_Request* sendRequestHandle = new MPI_Request();
MPI_Status status;
int flag = 0;
int result;
clock_t timeOutWarning = -1;
clock_t timeOutShutdown = -1;
bool triggeredTimeoutWarning = false;
if (exchangeOnlyAttributesMarkedWithParallelise) {
result = MPI_Isend(
this, 1, Datatype, destination,
tag, tarch::parallel::Node::getInstance().getCommunicator(),
sendRequestHandle
);
}
else {
result = MPI_Isend(
this, 1, FullDatatype, destination,
tag, tarch::parallel::Node::getInstance().getCommunicator(),
sendRequestHandle
);
}
if (result!=MPI_SUCCESS) {
std::ostringstream msg;
msg << "was not able to send message peanoclaw::records::RepositoryStatePacked "
<< toString()
<< " to node " << destination
<< ": " << tarch::parallel::MPIReturnValueToString(result);
_log.error( "send(int)",msg.str() );
}
result = MPI_Test( sendRequestHandle, &flag, &status );
while (!flag) {
if (timeOutWarning==-1) timeOutWarning = tarch::parallel::Node::getInstance().getDeadlockWarningTimeStamp();
if (timeOutShutdown==-1) timeOutShutdown = tarch::parallel::Node::getInstance().getDeadlockTimeOutTimeStamp();
result = MPI_Test( sendRequestHandle, &flag, &status );
if (result!=MPI_SUCCESS) {
std::ostringstream msg;
msg << "testing for finished send task for peanoclaw::records::RepositoryStatePacked "
<< toString()
<< " sent to node " << destination
<< " failed: " << tarch::parallel::MPIReturnValueToString(result);
_log.error("send(int)", msg.str() );
}
// deadlock aspect
if (
tarch::parallel::Node::getInstance().isTimeOutWarningEnabled() &&
(clock()>timeOutWarning) &&
(!triggeredTimeoutWarning)
) {
tarch::parallel::Node::getInstance().writeTimeOutWarning(
"peanoclaw::records::RepositoryStatePacked",
"send(int)", destination,tag,1
);
triggeredTimeoutWarning = true;
}
if (
tarch::parallel::Node::getInstance().isTimeOutDeadlockEnabled() &&
(clock()>timeOutShutdown)
) {
tarch::parallel::Node::getInstance().triggerDeadlockTimeOut(
"peanoclaw::records::RepositoryStatePacked",
"send(int)", destination,tag,1
);
}
tarch::parallel::Node::getInstance().receiveDanglingMessages();
}
delete sendRequestHandle;
#ifdef Debug
_log.debug("send(int,int)", "sent " + toString() );
#endif
}
}
开发者ID:zergnick,项目名称:peanoclaw,代码行数:97,代码来源:RepositoryState.cpp
示例11: hm
void connection_handler::handle_messages()
{
detail::handling_messages hm(handling_messages_); // reset on exit
bool bootstrapping = hpx::is_starting();
bool has_work = true;
std::size_t k = 0;
hpx::util::high_resolution_timer t;
std::list<std::pair<int, MPI_Request> > close_requests;
// We let the message handling loop spin for another 2 seconds to avoid the
// costs involved with posting it to asio
while(bootstrapping || has_work || (!has_work && t.elapsed() < 2.0))
{
if(stopped_) break;
// break the loop if someone requested to pause the parcelport
if(!enable_parcel_handling_) break;
// handle all send requests
{
hpx::lcos::local::spinlock::scoped_lock l(senders_mtx_);
for(
senders_type::iterator it = senders_.begin();
!stopped_ && enable_parcel_handling_ && it != senders_.end();
/**/)
{
if((*it)->done())
{
it = senders_.erase(it);
}
else
{
++it;
}
}
has_work = !senders_.empty();
}
// Send the pending close requests
{
hpx::lcos::local::spinlock::scoped_lock l(close_mtx_);
typedef std::pair<int, int> pair_type;
BOOST_FOREACH(pair_type p, pending_close_requests_)
{
header close_request = header::close(p.first, p.second);
close_requests.push_back(std::make_pair(p.first, MPI_Request()));
MPI_Isend(
close_request.data(), // Data pointer
close_request.data_size_, // Size
close_request.type(), // MPI Datatype
close_request.rank(), // Destination
0, // Tag
communicator_, // Communicator
&close_requests.back().second
);
}
pending_close_requests_.clear();
}
// add new receive requests
std::pair<bool, header> next(acceptor_.next_header());
if(next.first)
{
boost::shared_ptr<receiver> rcv;
header h = next.second;
receivers_tag_map_type & tag_map = receivers_map_[h.rank()];
receivers_tag_map_type::iterator jt = tag_map.find(h.tag());
if(jt != tag_map.end())
{
rcv = jt->second;
}
else
{
rcv = boost::make_shared<receiver>(
communicator_
, get_next_tag()
, h.tag()
, h.rank()
, *this);
tag_map.insert(std::make_pair(h.tag(), rcv));
}
if(h.close_request())
{
rcv->close();
}
else
{
h.assert_valid();
if (static_cast<std::size_t>(h.size()) > this->get_max_message_size())
{
// report this problem ...
HPX_THROW_EXCEPTION(boost::asio::error::operation_not_supported,
"mpi::connection_handler::handle_messages",
//.........这里部分代码省略.........
开发者ID:amitkr,项目名称:hpx,代码行数:101,代码来源:connection_handler_mpi.cpp
示例12: main
int main(int argc, char **argv)
{
int myRank;
int pNum;
double start_time, end_time;
double *matrix;
MPI_Status stat;
MPI_Request req1[300], req2[300];
MPI_Init(&argc, &argv);
start_time = MPI_Wtime();
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
MPI_Comm_size(MPI_COMM_WORLD, &pNum);
if (myRank == 0)
{
double buf[N+5];
while(1)
{
double diff;
int flag = 0;
for(int i = 1;i < pNum;i++)
{
MPI_Recv(&diff, 1, MPI_DOUBLE, i, MPI_ANY_TAG, MPI_COMM_WORLD, &stat);
if (diff > ext)
flag = 1;
}
MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
if (flag == 0)
break;
}
}
else
{
// init calculate model
int local_size = N / (pNum - 1) + 2;
if (myRank == pNum - 1)
local_size = N - (local_size - 2) * (pNum - 2) + 2;
//printf("local size: %d\n", local_size);
double temp[local_size][N + 2], temp2[local_size][N + 2];
for(int i = 1;i < local_size - 1;i++)
{
for(int j = 1;j < N + 1;j++)
temp[i][j] = (int)(random())% 1000;
temp[i][0] = temp[i][N + 1] = 0;
}
for(int j = 0;j < N + 2;j++)
temp[0][j] = temp[local_size - 1][j] = 0;
double maxDiff = ext + 1;
while(1)
{
maxDiff = ext;
// pass value
int sendNum = 0, recNum = 0;
if (myRank != 1)
MPI_Isend(temp[1], N + 2, MPI_DOUBLE, myRank - 1, 0, MPI_COMM_WORLD, &req1[sendNum++]);
if (myRank != pNum - 1)
MPI_Isend(temp[local_size - 2], N + 2, MPI_DOUBLE, myRank + 1, 0, MPI_COMM_WORLD, &req1[sendNum++]);
double preBuf[N], nextBuf[N];
if (myRank != 1)
{
MPI_Irecv(temp[0], N + 2, MPI_DOUBLE, myRank - 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]);
//memcpy(temp[0], preBuf, N + 2);
}
if (myRank != pNum - 1)
{
MPI_Irecv(temp[local_size - 1], N + 2, MPI_DOUBLE, myRank + 1, MPI_ANY_TAG, MPI_COMM_WORLD, &req2[recNum++]);
//memcpy(temp[local_size - 1], nextBuf, N + 2);
}
//calculate
for(int i = 1;i < local_size - 1;i++)
for(int j = 1;j <= N;j++)
{
temp2[i][j] = (temp[i - 1][j] + temp[i + 1][j] + temp[i][j - 1] + temp[i][j + 1] + temp[i][j]) / 5;
if (fabs(temp2[i][j] - temp[i][j]) > maxDiff)
maxDiff = fabs(temp2[i][j] - temp[i][j]);
}
for(int i = 0;i < recNum;i++)
MPI_Wait(&req2[i], &stat);
// printf("id:%d diff %lf localSize %d\n", myRank, maxDiff, local_size);
MPI_Send(&maxDiff, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
int flag;
MPI_Bcast(&flag, 1, MPI_INT, 0, MPI_COMM_WORLD);
for(int i = 1;i < local_size - 1;i++)
for(int j = 1;j <= N;j++)
temp[i][j] = temp2[i][j];
if (flag == 0)
break;
// printf("rank:%d ok diff %lf\n", myRank, maxDiff);
}
//for(int j = 1;j < local_size - 1;j++)
// MPI_Send(&temp[i][1], N, MPI_DOUBLE, 0, myRank, MPI_COMM_WORLD, &stat);
}
end_time = MPI_Wtime();
printf("rank: %d, runtime is %fs\n", myRank, end_time - start_time);
//.........这里部分代码省略.........
开发者ID:Harvey-Ai,项目名称:SystemRep,代码行数:101,代码来源:unblock_MPI_laplace.cpp
示例13: QCDDopr_Mult
void QCDDopr_Mult(QCDSpinor* pV,QCDMatrix* pU,QCDSpinor* pW,double k)
{
MPI_Request reqSend[8];
MPI_Request reqRecv[8];
MPI_Status st;
QCDMatrix* pUx;
QCDMatrix* pUy;
QCDMatrix* pUz;
QCDMatrix* pUt;
int i;
qcdtKappa[0] = k;
qcdtKappa[1] = k;
qcdtKappa[2] = k;
qcdtKappa[3] = k;
pUx = pU;
pUy = pU + qcdNsite;
pUz = pU + qcdNsite*2;
pUt = pU + qcdNsite*3;
/* #pragma omp parallel num_threads(8) */
#pragma omp parallel
{
int tid = 0,nid = 1;
tid = omp_get_thread_num();
nid = omp_get_num_threads();
/* //debug */
/* printf("nthreads: %d\n", nid); */
/* printf("max_threads: %d\n", omp_get_max_threads()); */
if(tid == 0){
MPI_Irecv(qcdRecvBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TP,MPI_COMM_WORLD,&reqRecv[QCD_TP]);
MPI_Irecv(qcdRecvBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TM,MPI_COMM_WORLD,&reqRecv[QCD_TM]);
MPI_Irecv(qcdRecvBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XP,MPI_COMM_WORLD,&reqRecv[QCD_XP]);
MPI_Irecv(qcdRecvBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XM,MPI_COMM_WORLD,&reqRecv[QCD_XM]);
MPI_Irecv(qcdRecvBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YP,MPI_COMM_WORLD,&reqRecv[QCD_YP]);
MPI_Irecv(qcdRecvBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YM,MPI_COMM_WORLD,&reqRecv[QCD_YM]);
MPI_Irecv(qcdRecvBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZP,MPI_COMM_WORLD,&reqRecv[QCD_ZP]);
MPI_Irecv(qcdRecvBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZM,MPI_COMM_WORLD,&reqRecv[QCD_ZM]);
}
//Send T
QCDDopr_MakeTPB_dirac(qcdSendBuf[QCD_TP],pW,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_TP],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TM],QCD_TP,MPI_COMM_WORLD,&reqSend[QCD_TP]);
}
QCDDopr_MakeTMB_dirac(qcdSendBuf[QCD_TM],pUt + qcdNsite-qcdNxyz,pW + qcdNsite-qcdNxyz,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_TM],12*qcdNxyz,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_TP],QCD_TM,MPI_COMM_WORLD,&reqSend[QCD_TM]);
}
//Send X
QCDDopr_MakeXPB(qcdSendBuf[QCD_XP],pW,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_XP],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XM],QCD_XP,MPI_COMM_WORLD,&reqSend[QCD_XP]);
}
QCDDopr_MakeXMB(qcdSendBuf[QCD_XM],pUx + qcdNx-1,pW + qcdNx-1,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_XM],12*qcdNy*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_XP],QCD_XM,MPI_COMM_WORLD,&reqSend[QCD_XM]);
}
//Send Y
QCDDopr_MakeYPB(qcdSendBuf[QCD_YP],pW,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_YP],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YM],QCD_YP,MPI_COMM_WORLD,&reqSend[QCD_YP]);
}
QCDDopr_MakeYMB(qcdSendBuf[QCD_YM],pUy + qcdNxy-qcdNx,pW + qcdNxy-qcdNx,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_YM],12*qcdNx*qcdNz*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_YP],QCD_YM,MPI_COMM_WORLD,&reqSend[QCD_YM]);
}
//Send Z
QCDDopr_MakeZPB(qcdSendBuf[QCD_ZP],pW,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_ZP],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZM],QCD_ZP,MPI_COMM_WORLD,&reqSend[QCD_ZP]);
}
QCDDopr_MakeZMB(qcdSendBuf[QCD_ZM],pUz + qcdNxyz-qcdNxy,pW + qcdNxyz-qcdNxy,tid,nid);
#pragma omp barrier
if(tid == 0){
MPI_Isend(qcdSendBuf[QCD_ZM],12*qcdNx*qcdNy*qcdNt,MPI_DOUBLE_PRECISION,qcdRankNeighbors[QCD_ZP],QCD_ZM,MPI_COMM_WORLD,&reqSend[QCD_ZM]);
}
//.........这里部分代码省略.........
开发者ID:shamouda,项目名称:x10-applications,代码行数:101,代码来源:dslash_base.c
示例14: GetResRoot
double GetResRoot(double *phi, double *b, param_t p)
{
int x,y;
//true residue
double residue;
double ResRoot = 0.0;
double Bmag = 0.0;
double ResRoot_global = 0.0;
double Bmag_global = 0.0;
// A little trick to index phi normally.
double* phi_s = phi + p.L;
// Prepare for async send/recv
MPI_Request request[4];
int requests;
MPI_Status status[4];
requests = 0;
// Send the higher-memory component to the next rank.
MPI_Isend(phi_s + p.L*(p.y-1), p.L, MPI_DOUBLE,
(p.my_rank+1)%p.world_size, 1,
MPI_COMM_WORLD, request + requests++);
MPI_Irecv(phi_s - p.L, p.L, MPI_DOUBLE,
(p.my_rank+p.world_size-1)%p.world_size, 1,
MPI_COMM_WORLD, request + requests++);
// Send the lower-memory component to the previous rank.
MPI_Isend(phi_s, p.L, MPI_DOUBLE,
(p.my_rank+p.world_size-1)%p.world_size, 0,
MPI_COMM_WORLD, request + requests++);
MPI_Irecv(phi_s + p.L*p.y, p.L, MPI_DOUBLE,
(p.my_rank+1)%p.world_size, 0,
MPI_COMM_WORLD, request + requests++);
// Do some other work while we wait!
// Update everything that doesn't depend on buffers.
for(x = 0; x < p.L; x++)
{
for(y = 1; y < p.y-1; y++)
{
residue = p.scale* b[x + y*p.L]
- phi_s[x + y*p.L]
+ p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]);
ResRoot += residue*residue;
Bmag += b[x + y*p.L]*b[x + y*p.L];
}
}
// Wait, if sync hasn't finished.
MPI_Waitall ( requests, request, status );
// Update the rest of the cells.
for(x = 0; x < p.L; x++)
{
y = 0;
residue = p.scale* b[x + y*p.L]
- phi_s[x + y*p.L]
+ p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]);
ResRoot += residue*residue;
Bmag += b[x + y*p.L]*b[x + y*p.L];
y = p.y-1;
residue = p.scale* b[x + y*p.L]
- phi_s[x + y*p.L]
+ p.scale*(phi_s[(x+1)%p.L + y*p.L] + phi_s[(x-1+p.L)%p.L + y*p.L]
+ phi_s[x + (y+1)*p.L] + phi_s[x + (y-1)*p.L]);
ResRoot += residue*residue;
Bmag += b[x + y*p.L]*b[x + y*p.L];
}
MPI_Allreduce(&Bmag, &Bmag_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
MPI_Allreduce(&ResRoot, &ResRoot_global, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
// Normalized true residue
return sqrt(ResRoot_global)/sqrt(Bmag_global);
}
开发者ID:BU-EC-HPC-S16,项目名称:EC500-High-Performance-Computing,代码行数:88,代码来源:Laplace2D_mpi.c
示例15: main
int
main (int argc, char **argv)
{
int nprocs = -1;
int rank = -1;
char processor_name[128];
int namelen = 128;
int buf0[buf_size];
int buf1[buf_size];
MPI_Status statuses[2];
MPI_Request reqs[2];
/* init */
MPI_Init (&argc, &argv);
MPI_Comm_size (MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank (MPI_COMM_WORLD, &rank);
MPI_Get_processor_name (processor_name, &namelen);
printf ("(%d) is alive on %s\n", rank, processor_name);
fflush (stdout);
MPI_Barrier (MPI_COMM_WORLD);
/* this code is very similar to no-error-waitall-any_src.c */
/* but deadlocks since task 2's send and recv are inverted... */
if (nprocs < 3)
{
printf ("not enough tasks\n");
}
else if (rank == 0)
{
MPI_Irecv (buf0, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[0]);
MPI_Irecv (buf1, buf_size, MPI_INT, 1, 0, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
MPI_Send (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD);
}
else if (rank == 1)
{
memset (buf0, 0, buf_size);
MPI_Isend (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD, &reqs[0]);
MPI_Isend (buf0, buf_size, MPI_INT, 2, 1, MPI_COMM_WORLD, &reqs[1]);
MPI_Waitall (2, reqs, statuses);
MPI_Recv (buf1, buf_size, MPI_INT, 0, 1, MPI_COMM_WORLD, statuses);
MPI_Send (buf0, buf_size, MPI_INT, 0, 0, MPI_COMM_WORLD);
}
else if (rank == 2)
{
MPI_Recv (buf1, buf_size, MPI_INT, 1, 1, MPI_COMM_WORLD, statuses);
}
MPI_Barrier (MPI_COMM_WORLD);
MPI_Finalize ();
printf ("(%d) Finished normally\n", rank);
}
开发者ID:Julio-Anjos,项目名称:simgrid,代码行数:62,代码来源:waitall-deadlock.c
示例16: MatGetSubMatrices_MPIDense_Local
//.........这里部分代码省略.........
ierr = PetscMemzero(sbuf1[j]+1,2*w3[j]*sizeof(PetscInt));CHKERRQ(ierr);
ptr[j] = sbuf1[j] + 2*w3[j] + 1;
}
/* Parse the isrow and copy data into outbuf */
for (i=0; i<ismax; i++) {
ierr = PetscMemzero(ctr,size*sizeof(PetscInt));CHKERRQ(ierr);
irow_i = irow[i];
jmax = nrow[i];
for (j=0; j<jmax; j++) { /* parse the indices of each IS */
row = irow_i[j];
proc = rtable[row];
if (proc != rank) { /* copy to the outgoing buf*/
ctr[proc]++;
*ptr[proc] = row;
ptr[proc]++;
}
}
/* Update the headers for the current IS */
for (j=0; j<size; j++) { /* Can Optimise this loop too */
if ((ctr_j = ctr[j])) {
sbuf1_j = sbuf1[j];
k = ++sbuf1_j[0];
sbuf1_j[2*k] = ctr_j;
sbuf1_j[2*k-1] = i;
}
}
}
/* Now post the sends */
ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&s_waits1);CHKERRQ(ierr);
for (i=0; i<nrqs; ++i) {
j = pa[i];
ierr = MPI_Isend(sbuf1[j],w1[2*j],MPIU_INT,j,tag0,comm,s_waits1+i);CHKERRQ(ierr);
}
/* Post recieves to capture the row_data from other procs */
ierr = PetscMalloc((nrqs+1)*sizeof(MPI_Request),&r_waits2);CHKERRQ(ierr);
ierr = PetscMalloc((nrqs+1)*sizeof(PetscScalar*),&rbuf2);CHKERRQ(ierr);
for (i=0; i<nrqs; i++) {
j = pa[i];
count = (w1[2*j] - (2*sbuf1[j][0] + 1))*N;
ierr = PetscMalloc((count+1)*sizeof(PetscScalar),&rbuf2[i]);CHKERRQ(ierr);
ierr = MPI_Irecv(rbuf2[i],count,MPIU_SCALAR,j,tag1,comm,r_waits2+i);CHKERRQ(ierr);
}
/* Receive messages(row_nos) and then, pack and send off the rowvalues
to the correct processors */
ierr = PetscMalloc((nrqr+1)*sizeof(MPI_Request),&s_waits2);CHKERRQ(ierr);
|
请发表评论