本文整理汇总了C++中shmem_barrier_all函数的典型用法代码示例。如果您正苦于以下问题:C++ shmem_barrier_all函数的具体用法?C++ shmem_barrier_all怎么用?C++ shmem_barrier_all使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了shmem_barrier_all函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: mca_memheap_modex_recv_all
void mca_memheap_modex_recv_all(void)
{
int i;
int j;
int nprocs, my_pe;
oshmem_proc_t *proc;
mca_spml_mkey_t *mkey;
void* dummy_rva;
if (!mca_memheap_base_key_exchange)
return;
/* init rkey cache */
nprocs = oshmem_num_procs();
my_pe = oshmem_my_proc_id();
/* Note:
* Doing exchange via rml till we figure out problem with grpcomm.modex and barrier
*/
for (i = 0; i < nprocs; i++) {
if (i == my_pe)
continue;
proc = oshmem_proc_group_find(oshmem_group_all, i);
for (j = 0; j < memheap_map->n_segments; j++) {
mkey =
mca_memheap_base_get_cached_mkey(i,
memheap_map->mem_segs[j].start,
proc->transport_ids[0],
&dummy_rva);
if (!mkey) {
MEMHEAP_ERROR("Failed to receive mkeys");
oshmem_shmem_abort(-1);
}
}
}
/*
* There is an issue with orte_grpcomm.barrier usage as
* ess/pmi directs to use grpcomm/pmi in case slurm srun() call grpcomm/pmi calls PMI_Barrier()
* that is a function of external library.
* There is no opal_progress() in such way. As a result slow PEs send a request (MEMHEAP_RKEY_REQ) to
* fast PEs waiting on barrier and do not get a respond (MEMHEAP_RKEY_RESP).
*
* there are following ways to solve one:
* 1. calculate requests from remote PEs and do ORTE_PROGRESSED_WAIT waiting for expected value;
* 2. use shmem_barrier_all();
* 3. rework pmi/barrier to use opal_progress();
* 4. use orte_grpcomm.barrier carefully;
*
* It seems there is no need to use orte_grpcomm.barrier here
*/
if (memheap_map->mem_segs[HEAP_SEG_INDEX].shmid != MEMHEAP_SHM_INVALID) {
/* unfortunately we must do barrier here to assure that everyone are attached to our segment
* good thing that this code path only invoked on older linuxes (-mca shmalloc_use_hugepages 3|4)
* try to minimize damage here by waiting 5 seconds and doing progress
*/
shmem_barrier_all();
/* keys exchanged, segments attached, now we can safely cleanup */
if (memheap_map->mem_segs[HEAP_SEG_INDEX].type
== MAP_SEGMENT_ALLOC_SHM) {
shmctl(memheap_map->mem_segs[HEAP_SEG_INDEX].shmid,
IPC_RMID,
NULL );
}
}
}
开发者ID:jsquyres,项目名称:ompi-idynamic,代码行数:69,代码来源:memheap_base_mkey.c
示例2: test_item3
static int test_item3(void)
{
int rc = TC_PASS;
TYPE_VALUE* shmem_addr = NULL;
TYPE_VALUE my_value = 0;
TYPE_VALUE* check_arr = NULL;
int num_proc = 0;
int my_proc = 0;
int peer_proc = 0;
int i = 0;
int j = 0;
int k = 0;
int flag = 0;
int missed_values = 0;
static long* pSync = NULL;
num_proc = _num_pes();
my_proc = _my_pe();
shmem_addr = shmalloc(sizeof(*shmem_addr));
check_arr = shmalloc(sizeof(*check_arr) * num_proc);
pSync = shmalloc(sizeof(*pSync) * _SHMEM_COLLECT_SYNC_SIZE);
for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
pSync[i] = _SHMEM_SYNC_VALUE;
}
if (shmem_addr && pSync && check_arr)
{
static TYPE_VALUE value = 0;
/* Store my value */
my_value = (TYPE_VALUE)my_proc;
*shmem_addr = DEFAULT_VALUE;
shmem_barrier_all();
for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
{
missed_values = 0;
my_value = (TYPE_VALUE)my_proc;
value = FUNC_VALUE(shmem_addr, my_value, peer_proc);
shmem_barrier_all();
shmem_collect32(check_arr, &value, (sizeof(value) + 3 ) / 4, 0, 0, num_proc, pSync);
shmem_barrier_all();
for (j = 0; j < num_proc ; j++)
{
flag = 0;
for (k = 0; k < num_proc; k++)
{
if (sys_fcompare(check_arr[k], j))
{
flag = 1;
break;
}
}
if (flag == 0)
{
missed_values++;
}
if (missed_values > 1)
{
rc = TC_FAIL;
break;
}
}
}
shmem_barrier_all();
log_debug(OSH_TC, "my(#%d:%lld) missed_values expected = 1 vs missed_values = %d\n",
my_proc, (INT64_TYPE)my_value, missed_values);
}
else
{
rc = TC_SETUP_FAIL;
}
if (shmem_addr)
{
shfree(shmem_addr);
}
if (pSync)
{
shfree(pSync);
}
return rc;
}
开发者ID:openshmem-org,项目名称:tests-mellanox,代码行数:89,代码来源:osh_atomic_tc5.c
示例3: main
int
main (int argc, char **argv)
{
int i;
int nextpe;
int me, npes;
int success1, success2, success3, success4, success5, success6, success7,
success8;
short src1[N];
int src2[N];
long src3[N];
long double src4[N];
long long src5[N];
double src6[N];
float src7[N];
char *src8;
short src9;
int src10;
long src11;
double src12;
float src13;
int fail_count = 0;
shmem_init ();
me = shmem_my_pe ();
npes = shmem_n_pes ();
if (npes > 1) {
success1 = 0;
success2 = 0;
success3 = 0;
success4 = 0;
success5 = 0;
success6 = 0;
success7 = 0;
success8 = 0;
src8 = (char *) malloc (N * sizeof (char));
for (i = 0; i < N; i += 1) {
src1[i] = (short) me;
src2[i] = me;
src3[i] = (long) me;
src4[i] = (long double) me;
src5[i] = (long long) me;
src6[i] = (double) me;
src7[i] = (float) me;
src8[i] = (char) me;
}
src9 = (short) me;
src10 = me;
src11 = (long) me;
src12 = (double) me;
src13 = (float) me;
for (i = 0; i < N; i += 1) {
dest1[i] = -9;
dest2[i] = -9;
dest3[i] = -9;
dest4[i] = -9;
dest5[i] = -9;
dest6[i] = -9;
dest7[i] = -9.0;
dest8[i] = -9;
}
dest9 = -9;
dest10 = -9;
dest11 = -9;
dest12 = -9;
dest13 = -9.0;
nextpe = (me + 1) % npes;
/* Testing shmem_short_put, shmem_short_put, shmem_int_put,
shmem_long_put, shmem_longdouble_put, shmem_longlong_put,
shmem_double_put, shmem_float_put, shmem_putmem */
shmem_barrier_all ();
shmem_short_put (dest1, src1, N, nextpe);
shmem_int_put (dest2, src2, N, nextpe);
shmem_long_put (dest3, src3, N, nextpe);
shmem_longdouble_put (dest4, src4, N, nextpe);
shmem_longlong_put (dest5, src5, N, nextpe);
shmem_double_put (dest6, src6, N, nextpe);
shmem_float_put (dest7, src7, N, nextpe);
shmem_putmem (dest8, src8, N * sizeof (char), nextpe);
shmem_barrier_all ();
if (me == 0) {
for (i = 0; i < N; i += 1) {
if (dest1[i] != (npes - 1)) {
success1 = 1;
}
if (dest2[i] != (npes - 1)) {
success2 = 1;
}
//.........这里部分代码省略.........
开发者ID:jdinan,项目名称:tests-uh,代码行数:101,代码来源:test_shmem_put_globals.c
示例4: main
int
main (int argc, char *argv[])
{
double t, tv[2];
int reps = 10000;
int doprint = 0;
char *progName;
int minWords = 1;
int maxWords = 1;
int incWords;
int nwords;
int nproc;
int proc;
int peer;
int c;
int r;
int i;
long *rbuf;
long *tbuf;
start_pes (0);
proc = _my_pe ();
nproc = _num_pes ();
for (progName = argv[0] + strlen (argv[0]);
progName > argv[0] && *(progName - 1) != '/'; progName--)
;
while ((c = getopt (argc, argv, "n:eh")) != -1)
switch (c)
{
case 'n':
if ((reps = getSize (optarg)) <= 0)
usage (progName);
break;
case 'e':
doprint++;
break;
case 'h':
help (progName);
default:
usage (progName);
}
if (optind == argc)
minWords = 1;
else if ((minWords = getSize (argv[optind++])) <= 0)
usage (progName);
if (optind == argc)
maxWords = minWords;
else if ((maxWords = getSize (argv[optind++])) < minWords)
usage (progName);
if (optind == argc)
incWords = 0;
else if ((incWords = getSize (argv[optind++])) < 0)
usage (progName);
if (!(rbuf = (long *) shmalloc (maxWords * sizeof (long))))
{
perror ("Failed memory allocation");
exit (1);
}
memset (rbuf, 0, maxWords * sizeof (long));
shmem_barrier_all ();
if (!(tbuf = (long *) malloc (maxWords * sizeof (long))))
{
perror ("Failed memory allocation");
exit (1);
}
if (nproc == 1)
return 0;
for (i = 0; i < maxWords; i++)
tbuf[i] = 1000 + (i & 255);
if (doprint)
printf
("%d(%d): Shmem PING reps %d minWords %d maxWords %d incWords %d\n",
proc, nproc, reps, minWords, maxWords, incWords);
shmem_barrier_all ();
peer = proc ^ 1;
if (peer >= nproc)
doprint = 0;
for (nwords = minWords;
nwords <= maxWords;
nwords = incWords ? nwords + incWords : nwords ? 2 * nwords : 1)
{
r = reps;
//.........这里部分代码省略.........
开发者ID:charlesarcher,项目名称:openshmem-tutorial,代码行数:101,代码来源:sping.c
示例5: Java_shmem_ShMem_barrierAll
JNIEXPORT void JNICALL Java_shmem_ShMem_barrierAll(JNIEnv *env, jclass clazz)
{
shmem_barrier_all();
}
开发者ID:ORNL,项目名称:ompi,代码行数:4,代码来源:shmem_ShMem.c
示例6: test_item7
static int test_item7(void)
{
int rc = TC_PASS;
TYPE_VALUE* target_addr = NULL;
TYPE_VALUE* source_addr = NULL;
TYPE_VALUE source_value = 0;
TYPE_VALUE expect_value = 0;
int num_proc = 0;
int my_proc = 0;
num_proc = _num_pes();
my_proc = _my_pe();
target_addr = (TYPE_VALUE*)shmalloc(sizeof(*target_addr) * __max_buffer_size);
source_addr = (TYPE_VALUE*)shmalloc(sizeof(*source_addr) * __max_buffer_size);
if (target_addr && source_addr)
{
TYPE_VALUE value = DEFAULT_VALUE;
int i = 0;
int j = 0;
long cur_buf_size = 0;
for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
{
cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count);
pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
if (pWrk)
{
/* Set initial target value */
value = DEFAULT_VALUE;
fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value));
/* Give some time to all PE for setting their values */
shmem_barrier_all();
/* Set my value */
source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);
fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value));
/* Define expected value */
expect_value = ( my_proc % 2 ? DEFAULT_VALUE : BASE_VALUE );
/* This guarantees that PE set initial value before peer change one */
for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ )
{
pSync[j] = _SHMEM_SYNC_VALUE;
}
shmem_barrier_all();
/* Put value to peer */
FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrk, pSync);
/* Get value put by peer:
* These routines start the remote transfer and may return before the data
* is delivered to the remote PE
*/
shmem_barrier_all();
{
int wait = WAIT_COUNT;
while (wait--)
{
value = *target_addr;
if (expect_value == value) break;
sleep(1);
}
}
rc = (!compare_buffer_with_const(target_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);
log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);
if (rc)
{
TYPE_VALUE* check_addr = target_addr;
int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
int show_index = (odd_index > 1 ? odd_index - 2 : 0);
int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);
log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
show_buffer(check_addr + show_index, show_size);
}
shfree(pWrk);
} else {
rc = TC_SETUP_FAIL;
}
}
}
else
{
rc = TC_SETUP_FAIL;
}
if (source_addr)
{
shfree(source_addr);
}
//.........这里部分代码省略.........
开发者ID:alex-mikheev,项目名称:tests-mellanox,代码行数:101,代码来源:osh_reduce_tc23.c
示例7: test_item8
static int test_item8(void)
{
int rc = TC_PASS;
static TYPE_VALUE target_addr[MAX_BUFFER_SIZE * 2];
static TYPE_VALUE source_addr[MAX_BUFFER_SIZE * 2];
TYPE_VALUE source_value = 0;
TYPE_VALUE expect_value = 0;
int num_proc = 0;
int my_proc = 0;
long* pSyncMult = NULL;
TYPE_VALUE* pWrkMult = NULL;
int pSyncNum = 2;
int pWrkNum = 2;
num_proc = _num_pes();
my_proc = _my_pe();
pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_REDUCE_SYNC_SIZE);
if (pSyncMult)
{
TYPE_VALUE value = DEFAULT_VALUE;
int i = 0;
int j = 0;
long cur_buf_size = 0;
for ( j = 0; j < pSyncNum * _SHMEM_REDUCE_SYNC_SIZE; j++ )
{
pSyncMult[j] = _SHMEM_SYNC_VALUE;
}
/* Give some time to all PE for setting their values */
shmem_barrier_all();
pWrkMult = shmalloc(sizeof(*pWrkMult) * pWrkNum * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE));
if (pWrkMult)
{
value = DEFAULT_VALUE;
source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);
fill_buffer((void *)source_addr, MAX_BUFFER_SIZE * 2, (void *)&source_value, sizeof(source_value));
fill_buffer((void *)target_addr, MAX_BUFFER_SIZE * 2, (void *)&value, sizeof(value));
shmem_barrier_all();
for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
{
cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count);
/* Set initial target value */
value = DEFAULT_VALUE;
/* Set my value */
source_value = (TYPE_VALUE)(BASE_VALUE + my_proc);
/* Define expected value */
expect_value = ( my_proc % 2 ? DEFAULT_VALUE : BASE_VALUE );
/* Put value to peer */
FUNC_VALUE(target_addr + (i % 2) * MAX_BUFFER_SIZE, source_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrkMult + (i % pWrkNum) * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE), pSyncMult + (i % pSyncNum) * _SHMEM_REDUCE_SYNC_SIZE);
rc = (!compare_buffer_with_const(target_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL);
log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n",
my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size);
if (rc)
{
TYPE_VALUE* check_addr = target_addr + (i % 2) * MAX_BUFFER_SIZE;
int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value));
int show_index = (odd_index > 1 ? odd_index - 2 : 0);
int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1);
log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1);
log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size);
show_buffer(check_addr + show_index, show_size);
}
fill_buffer((void *)(source_addr + (i % 2) * MAX_BUFFER_SIZE), cur_buf_size, (void *)&source_value, sizeof(source_value));
fill_buffer((void *)(target_addr + (i % 2) * MAX_BUFFER_SIZE ), cur_buf_size, (void *)&value, sizeof(value));
}
shfree(pWrkMult);
} else {
rc = TC_SETUP_FAIL;
}
shfree(pSyncMult);
} else {
rc = TC_SETUP_FAIL;
}
return rc;
}
开发者ID:alex-mikheev,项目名称:tests-mellanox,代码行数:85,代码来源:osh_reduce_tc23.c
示例8: sor
void
sor (float **current_ptr, float **next_ptr)
{
int i, j, my_start, my_end, my_num_rows;
float *U_Curr_Above = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values from bottom row of PE above */
float *U_Curr_Below = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values from top row of PE below */
float *U_Send_Buffer = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values that are currently being sent */
//float U_Curr_Above[(int)floor(WIDTH/H)]; /* 1d array holding values from bottom row of PE above */
//float U_Curr_Below[(int)floor(WIDTH/H)]; /* 1d array holding values from top row of PE below */
//float U_Send_Buffer[(int)floor(WIDTH/H)]; /* 1d array holding values that are currently being sent */
float W = 1.5;
//MPI_Request request;
//MPI_Status status;
//MPI_Comm_size(MPI_COMM_WORLD,&p);
//MPI_Comm_rank(MPI_COMM_WORLD,&my_rank);
my_start = get_start (my_rank);
my_end = get_end (my_rank);
my_num_rows = get_num_rows (my_rank);
/*
* Communicating ghost rows - only bother if p > 1
*/
if (p > 1)
{
/* send/receive bottom rows */
if (my_rank < (p - 1))
{
/* populate send buffer with bottow row */
for (i = 0; i < (int) floor (WIDTH / H); i++)
{
U_Send_Buffer[i] = current_ptr[my_num_rows - 1][i];
}
/* non blocking send */
//MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&request);
shmem_float_put (U_Curr_Above, U_Send_Buffer,
(int) floor (WIDTH / H), my_rank + 1);
}
//if (my_rank > ROOT) {
/* blocking receive */
//MPI_Recv(U_Curr_Above,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&status);
//}
//MPI_Barrier(MPI_COMM_WORLD);
shmem_barrier_all ();
/* send/receive top rows */
if (my_rank > ROOT)
{
/* populate send buffer with top row */
for (i = 0; i < (int) floor (WIDTH / H); i++)
{
U_Send_Buffer[i] = current_ptr[0][i];
}
/* non blocking send */
//MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&request);
shmem_float_put (U_Curr_Below, U_Send_Buffer,
(int) floor (WIDTH / H), my_rank - 1);
}
//if (my_rank < (p-1)) {
/* blocking receive */
//MPI_Recv(U_Curr_Below,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&status);
//}
//MPI_Barrier(MPI_COMM_WORLD);
shmem_barrier_all ();
}
/* solve next reds (i+j odd) */
for (j = my_start; j <= my_end; j++)
{
for (i = 0; i < (int) floor (WIDTH / H); i++)
{
if ((i + j) % 2 != 0)
{
next_ptr[j - my_start][i] =
get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank,
i, j) + (W / 4) * (get_val_par (U_Curr_Above,
current_ptr,
U_Curr_Below,
my_rank, i - 1,
j) +
get_val_par (U_Curr_Above,
current_ptr,
U_Curr_Below,
my_rank, i + 1,
j) +
get_val_par (U_Curr_Above,
current_ptr,
U_Curr_Below,
my_rank, i,
j - 1) +
get_val_par (U_Curr_Above,
current_ptr,
U_Curr_Below,
my_rank, i,
j + 1) -
4 *
(get_val_par
//.........这里部分代码省略.........
开发者ID:coti,项目名称:oshmpi,代码行数:101,代码来源:shmem_2dheat.c
示例9: main
int main()
{
int start,stride,rmlast,rstride,np_aset,inset,lpe;
int my_pe,n_pes;
int i,fail,n_err,asfail,nasfail;
char Case[40];
static int sSource_int[NREDUCE];
static int sTarget_int[NREDUCE];
static int spWrk_int[PWRKELEM];
static long spSync[_SHMEM_REDUCE_SYNC_SIZE];
shmem_init();
my_pe = shmem_my_pe();
n_pes = shmem_n_pes();
lpe=my_pe;
dpSync=shmem_malloc(_SHMEM_REDUCE_SYNC_SIZE*sizeof(long));
for(i=0;i<_SHMEM_REDUCE_SYNC_SIZE;i++) {
gpSync[i]=_SHMEM_SYNC_VALUE;
dpSync[i]=_SHMEM_SYNC_VALUE;
spSync[i]=_SHMEM_SYNC_VALUE;
}
dSource_int=shmem_malloc(NREDUCE*sizeof(int));
dTarget_int=shmem_malloc(NREDUCE*sizeof(int));
dpWrk_int=shmem_malloc((NREDUCE/2+1 > _SHMEM_REDUCE_MIN_WRKDATA_SIZE ? NREDUCE/2+1 : _SHMEM_REDUCE_MIN_WRKDATA_SIZE)*sizeof(int));
for(start=0;start<=MAXSTART;start++) {
rstride=1;
for(stride=0;stride<=MAXSTRIDE;stride++) {
for(rmlast=0;rmlast<=MAXRMLAST;rmlast++)
{
np_aset=(n_pes+rstride-1-start)/rstride-rmlast; /* number of processes in the active set */
if(np_aset > 0) /* if active set is not empty */
{
if(my_pe==0) printf("\nActive set triplet: PE_start=%d,logPE_stride=%d,PE_size=%d \n",start,stride,np_aset);
if((my_pe>=start) && ((my_pe-start)%rstride==0) && ((my_pe-start)/rstride<np_aset)) inset=1;
else inset=0;
/* Initialize Source and Target arrays */
for(i=0;i<NREDUCE;i++) {
sSource_int[i]=SINIT;
sTarget_int[i]=TINIT;
gSource_int[i]=SINIT;
gTarget_int[i]=TINIT;
dSource_int[i]=SINIT;
dTarget_int[i]=TINIT;
}
shmem_barrier_all();
/* CASE: static arrays, source is different from target */
sprintf(Case,"static, source!=target");
if(inset)
asfail=or_int(sSource_int,sTarget_int,start,stride,np_aset,rstride,0,dpWrk_int,gpSync,Case);
else { /* check that values of source and target have not been changed */
nasfail+=check_sval_notchanged(sSource_int,Case);
nasfail+=check_tval_notchanged(sTarget_int,Case);
}
/* CASE: global arrays, source is different from target */
sprintf(Case,"global, source!=target");
if(inset)
asfail=or_int(gSource_int,gTarget_int,start,stride,np_aset,rstride,0,spWrk_int,dpSync,Case);
else { /* check that values of source and target have not been changed */
nasfail+=check_sval_notchanged(gSource_int,Case);
nasfail+=check_tval_notchanged(gTarget_int,Case);
}
/* CASE: symmetric heap arrays, source is different from target */
sprintf(Case,"sym heap, source!=target");
if(inset)
asfail=or_int(dSource_int,dTarget_int,start,stride,np_aset,rstride,0,gpWrk_int,spSync,Case);
else { /* check that values of source and target have not been changed */
nasfail+=check_sval_notchanged(dSource_int,Case);
nasfail+=check_tval_notchanged(dTarget_int,Case);
}
/* Reinitialize Source arrays for new tests */
for(i=0;i<NREDUCE;i++) {
sSource_int[i]=SINIT;
gSource_int[i]=SINIT;
dSource_int[i]=SINIT;
}
shmem_barrier_all();
/* CASE: static arrays, source and target are the same array */
sprintf(Case,"static, source==target");
if(inset)
asfail=or_int(sSource_int,sSource_int,start,stride,np_aset,rstride,1,gpWrk_int,dpSync,Case);
else /* check that values of source have not been changed */
nasfail+=check_sval_notchanged(sSource_int,Case);
/* CASE: global arrays, source and target are the same array */
sprintf(Case,"global, source==target");
if(inset)
//.........这里部分代码省略.........
开发者ID:naveen-rn,项目名称:tests-cray,代码行数:101,代码来源:all_or_int_ext.c
示例10: HPCC_SHMEMRandomAccess
int
HPCC_SHMEMRandomAccess(HPCC_Params *params) {
s64Int i;
static s64Int NumErrors, GlbNumErrors;
int NumProcs, logNumProcs, MyProc;
u64Int GlobalStartMyProc;
int Remainder; /* Number of processors with (LocalTableSize + 1) entries */
u64Int Top; /* Number of table entries in top of Table */
s64Int LocalTableSize; /* Local table width */
u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */
u64Int logTableSize, TableSize;
double CPUTime; /* CPU time to update table */
double RealTime; /* Real time to update table */
double TotalMem;
static int sAbort, rAbort;
int PowerofTwo;
double timeBound = -1; /* OPTIONAL time bound for execution time */
u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */
u64Int NumUpdates; /* actual number of updates to table - may be smaller than
* NumUpdates_Default due to execution time bounds */
s64Int ProcNumUpdates; /* number of updates per processor */
#ifdef RA_TIME_BOUND
s64Int GlbNumUpdates; /* for reduction */
#endif
static long llpSync[_SHMEM_BCAST_SYNC_SIZE];
static long long int llpWrk[_SHMEM_REDUCE_SYNC_SIZE];
static long ipSync[_SHMEM_BCAST_SYNC_SIZE];
static int ipWrk[_SHMEM_REDUCE_SYNC_SIZE];
FILE *outFile = NULL;
double *GUPs;
double *temp_GUPs;
int numthreads;
for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1){
ipSync[i] = _SHMEM_SYNC_VALUE;
llpSync[i] = _SHMEM_SYNC_VALUE;
}
params->SHMEMGUPs = -1;
GUPs = ¶ms->SHMEMGUPs;
NumProcs = shmem_n_pes();
MyProc = shmem_my_pe();
if (0 == MyProc) {
outFile = stdout;
setbuf(outFile, NULL);
}
params->HPLMaxProcMem = 200000;
TotalMem = params->HPLMaxProcMem; /* max single node memory */
TotalMem *= NumProcs; /* max memory in NumProcs nodes */
TotalMem /= sizeof(u64Int);
/* calculate TableSize --- the size of update array (must be a power of 2) */
for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1;
TotalMem >= 1.0;
TotalMem *= 0.5, logTableSize++, TableSize <<= 1)
; /* EMPTY */
/* determine whether the number of processors is a power of 2 */
if ( (NumProcs & (NumProcs -1)) == 0) {
PowerofTwo = HPCC_TRUE;
Remainder = 0;
Top = 0;
MinLocalTableSize = (TableSize / NumProcs);
LocalTableSize = MinLocalTableSize;
GlobalStartMyProc = (MinLocalTableSize * MyProc);
}
else {
if(MyProc == 0) {
printf("Number of processes must be power of 2\n");
}
return 0;
}
sAbort = 0;
HPCC_Table = HPCC_XMALLOC( s64Int, LocalTableSize );
if (! HPCC_Table) sAbort = 1;
shmem_barrier_all();
shmem_int_sum_to_all(&rAbort, &sAbort, 1, 0, 0, NumProcs, ipWrk, ipSync);
//.........这里部分代码省略.........
开发者ID:hppritcha,项目名称:gups-shmem,代码行数:101,代码来源:RandomAccess.c
示例11: main
int
main (int argc, char **argv)
{
/* arrays used to contain each PE's rows - specify cols, no need to spec rows */
float **U_Curr;
float **U_Next;
/* helper variables */
/* available iterator */
int i, j, k, m, n;
int per_proc, remainder, my_start_row, my_end_row, my_num_rows;
int verbose = 0;
int show_time = 0;
double time;
double t, tv[2];
/*OpenSHMEM initilization*/
start_pes (0);
p = _num_pes ();
my_rank = _my_pe ();
if (p > 8) {
fprintf(stderr, "Ignoring test when run with more than 8 pes\n");
return 77;
}
/* argument processing done by everyone */
int c, errflg;
extern char *optarg;
extern int optind, optopt;
while ((c = getopt (argc, argv, "e:h:m:tw:v")) != -1)
{
switch (c)
{
case 'e':
EPSILON = atof (optarg);
break;
case 'h':
HEIGHT = atoi (optarg);
break;
case 'm':
/* selects the numerical methods */
switch (atoi (optarg))
{
case 1: /* jacobi */
meth = 1;
break;
case 2: /* gauss-seidel */
meth = 2;
break;
case 3: /* sor */
meth = 3;
break;
}
break;
case 't':
show_time++; /* overridden by -v (verbose) */
break;
case 'w':
WIDTH = atoi (optarg);
break;
case 'v':
verbose++;
break;
/* handle bad arguments */
case ':': /* -h or -w without operand */
if (ROOT == my_rank)
fprintf (stderr, "Option -%c requires an operand\n", optopt);
errflg++;
break;
case '?':
if (ROOT == my_rank)
fprintf (stderr, "Unrecognized option: -%c\n", optopt);
errflg++;
break;
}
}
if (ROOT == my_rank && argc < 2)
{
printf ("Using defaults: -h 20 -w 20 -m 2\n");
}
// if (0 < errflg)
// exit(EXIT_FAILURE);
/* wait for user to input runtime params */
for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1)
pSync[i] = _SHMEM_SYNC_VALUE;
shmem_barrier_all ();
/* broadcast method to use */
shmem_broadcast32 (&meth, &meth, 1, 0, 0, 0, p, pSync);
switch (meth)
{
//.........这里部分代码省略.........
开发者ID:coti,项目名称:oshmpi,代码行数:101,代码来源:shmem_2dheat.c
示例12: main
int main(int argc, char **argv)
{
int i,j;
long modj,oldj,oldxmodj,newcount;
int my_pe,n_pes;
size_t max_elements_bytes;
static long *x;
shmem_init();
my_pe = shmem_my_pe();
n_pes = shmem_n_pes();
#ifdef HAVE_SET_CACHE_INV
shmem_set_cache_inv();
#endif
/* fail if trying to use only one processor */
if ( n_pes <= 1 ){
fprintf(stderr, "FAIL - test requires at least two PEs\n");
exit(1);
}
if(my_pe == 0)
fprintf(stderr, "shmem_lock_set_clear(%s) n_pes=%d\n", argv[0],n_pes);
/* shmalloc x on all pes (only use the one on PE 0) */
max_elements_bytes = (size_t) (sizeof(long) * n_pes);
x = shmem_malloc( max_elements_bytes );
for(i=0; i<n_pes; i++)
x[i] = 0;
count = 0;
shmem_barrier_all();
for(i=0; i<ITER; i++) {
if (my_pe != 0) {
/* emulate oldj = shmem_long_finc(&count, 0); */
shmem_set_lock(&lock);
shmem_long_get(&oldj,&count,1,0); /* get oldj from PE 0's count */
newcount = oldj+1;
shmem_long_put(&count,&newcount,1,0); /* update count on PE 0 */
shmem_quiet; /* insure that write completes */
shmem_clear_lock(&lock);
/* end of emulation */
modj = (oldj % (n_pes-1)); /* PE 0 is just the counter/checker */
/* increment value in x[modj] */
oldxmodj = shmem_long_finc(&x[modj], 0);
/* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */
}
}
shmem_barrier_all();
if (my_pe == 0) { /* check x[j] array on PE 0 */
for(j=1 ; j<n_pes; j++) {
if (x[j-1] != (long) ITER)
fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n",
my_pe, n_pes, j-1, x[j-1], (long) ITER);
}
}
shmem_barrier_all();
#ifdef NEEDS_FINALIZE
shmem_finalize();
#endif
return 0;
}
开发者ID:openshmem-org,项目名称:tests-cray,代码行数:65,代码来源:shmem_lock_set_clear.c
示例13: main
int
main(int argc, char **argv)
{
int i,ps,ps_cnt=2;
int *target;
int *source;
int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
char *pgm;
double start_time, time_taken;
shmem_init();
me = shmem_my_pe();
npes = shmem_n_pes();
if ((pgm=strrchr(argv[0],'/')))
pgm++;
else
pgm = argv[0];
while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) {
switch (i)
{
case 'v':
Verbose++;
break;
case 'e':
if ((elements = atoi_scaled(optarg)) <= 0) {
fprintf(stderr,"ERR: Bad elements count %d\n",elements);
shmem_finalize();
return 1;
}
break;
case 'l':
if ((loops = atoi_scaled(optarg)) <= 0) {
fprintf(stderr,"ERR: Bad loop count %d\n",loops);
shmem_finalize();
return 1;
}
break;
case 'p':
if ((ps_cnt = atoi_scaled(optarg)) <= 0) {
fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops);
shmem_finalize();
return 1;
}
break;
case 's':
Serialize++;
break;
case 'h':
if (me == 0)
usage(pgm);
return 0;
default:
if (me == 0) {
fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
usage(pgm);
}
shmem_finalize();
return 1;
}
}
ps_cnt *= _SHMEM_BCAST_SYNC_SIZE;
pSync = shmem_malloc( ps_cnt * sizeof(long) );
for (i = 0; i < ps_cnt; i++)
pSync[i] = _SHMEM_SYNC_VALUE;
source = (int *) shmem_malloc( elements * sizeof(*source) );
target = (int *) shmem_malloc( elements * sizeof(*target) );
for (i = 0; i < elements; i += 1) {
source[i] = i + 1;
target[i] = -90;
}
if (me==0 && Verbose)
fprintf(stderr,"ps_cnt %d loops %d nElems %d\n",
ps_cnt,loops,elements);
shmem_barrier_all();
for(time_taken = 0.0, ps = i = 0; i < loops; i++) {
start_time = shmemx_wtime();
shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]);
if (Serialize) shmem_barrier_all();
time_taken += (shmemx_wtime() - start_time);
if (ps_cnt > 1 ) {
ps += _SHMEM_BCAST_SYNC_SIZE;
if ( ps >= ps_cnt ) ps = 0;
}
}
if(me == 0 && Verbose) {
//.........这里部分代码省略.........
开发者ID:stjohnt,项目名称:sandia-shmem,代码行数:101,代码来源:bcast_flood.c
示例14: main
//.........这里部分代码省略.........
printf
(" The *best* time for each kernel (excluding the first iteration)\n");
printf (" will be used to compute the reported bandwidth.\n");
printf ("Number of SHMEM PEs requested = %i\n", _world_size);
}
int blocksize = 10000;
assert (STREAM_ARRAY_SIZE % blocksize == 0);
// do something really minor
/* Get initial value for system clock. */
for (j = 0; j < STREAM_ARRAY_SIZE; j++)
{
a[j] = 1.0;
b[j] = 2.0;
c[j] = 0.0;
}
printf (HLINE);
if (_world_rank == 0)
{
if ((quantum = checktick ()) >= 1)
printf ("Your clock granularity/precision appears to be "
"%d microseconds.\n", quantum);
else
{
printf ("Your clock granularity appears to be "
"less than one microsecond.\n");
quantum = 1;
}
}
shmem_barrier_all ();
// assign fixed iterations per PE
// since we know default STREAM array size
// we are hardcoding this, but if the value
// changes, then this blocking factor must
// also change
// basically, each PE works on this block
// size at a time
time_start = mysecond ();
/* Initialize */
next_p = shmem_int_fadd (&gcounter, 1, ROOT);
for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize)
{
if (next_p == count_p)
{
for (i = j; i < (j + blocksize); i++)
{
a[i] = 2.0E0 * a[i];
}
next_p = shmem_int_fadd (&gcounter, 1, ROOT);
}
count_p++;
}
time_end = mysecond ();
clock_time_PE = time_end - time_start;
shmem_double_sum_to_all (&total_clock_time, &clock_time_PE, 1,
0, 0, _world_size, pWrk0, pSync0);
if (_world_rank == 0)
{
printf ("Each test below will take on the order"
开发者ID:jeffhammond,项目名称:oshmpi,代码行数:67,代码来源:stream_shmptr.c
示例15: main
int main(int argc, char *argv[]){
int i,n,next_pivot, pivot;
long pSync[_SHMEM_BCAST_SYNC_SIZE];
for (i=0; i < SHMEM_BCAST_SYNC_SIZE; i++) {
pSync[i] = _SHMEM_SYNC_VALUE;
}
start_pes(0);
me = shmem_my_pe();
npes = shmem_n_pes();
shmem_barrier_all();
srand (me+time(NULL));
N = atoi(argv[1]);
//int *nelems = (int*) shmalloc(sizeof(int));
//int *nelems_import= (int*) shmalloc(sizeof(int));;
printf("%d: Size = %d with np=%d\n",me,N,npes);
A = (int *)shmalloc((N/npes)*sizeof(int));
temp_arr = (int *)shmalloc((N/npes)*sizeof(int));
if(A==NULL){
printf("\nOut of memory");
return 1;
}
n= N/npes;
i=0;
while(i<N/npes){
A[i] = rand()%(10000-0);
i++;
}
printf("\nprocess %d elements:",me);
for(i=0;i<(N/npes);i++){
printf("%d, ", A[i]);
}
next_pivot = A[0];
//the step two of algo.....broadcast the new pivot
shmem_broadcast32(&next_pivot,A,1,0,0,0,npes,pSync);
shmem_barrier_all();
pivot = quicksort(A, 0, n-1);
printf("Process %d the pivot:%d",me, pivot);
shmem_barrier_all(); //just for the sake of clear display...can be removed in the end
printf("\nThe sorted list is of process %d: ",me);
for(i=0;i<n;i++){
printf("%d, ",A[i]);
}
printf("\n");
printf("the new pivot of process %d: %d\n",me,next_pivot); // to check the broadcast of new pivots
int check,j; //to check the division of the sorted arrays according to the new pivot.
shmem_barrier_all();
check = uplowPartition(next_pivot);
shmem_barrier_all();
printf("(%d)",me);
for(int j=0;j<N/npes;j++){
printf("%d, ",A[j]);
}
printf("new partition: %d",check);
shmem_barrier_all();
if(me < npes/2)
{
i=0;
// printf("Hello from %d", me);
printf("\n");
for(j=check;j<N/npes;j++){
temp_arr[i] = A[j];
i++;
}
i=0;
printf("(%d)",me);
for(j=check;j<N/npes;j++){
printf("%d, ",temp_arr[i]) ;
i++;
}
// printf("\n");
}
shmem_barrier_all();
if(me >= npes/2)
{
// printf("Hello from %d", me);
printf("\n");
for(j=0;j<check;j++){
temp_arr[j] = A[j];
}
printf("(%d)",me);
for(j=0;j<check;j++){
printf("%d, ",temp_arr[j]) ;
}
// printf("\n");
}
//.........这里部分代码省略.........
开发者ID:rutayanp,项目名称:sorting,代码行数:101,代码来源:quick.c
示例16: main
/* Performance test for shmem_XX_put (latency and bandwidth) */
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <shmem.h>
#include <sys/time.h>
long double time_taken;
long pSync[_SHMEM_REDUCE_SYNC_SIZE];
long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE];
//#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/
int
main(void)
{
int i,j,k;
int *target;
int *source;
int me, npes;
int nxtpe;
struct timeval start, end;
long double start_time,end_time;
int N_ELEMENTS = (4194304*2)/sizeof(int);
start_pes(0);
me = _my_pe();
npes = _num_pes();
for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1)
{
pSync[i] = _SHMEM_SYNC_VALUE;
}
nxtpe = (me+1)%npes;
source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) );
target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) );
if(me == 0)
printf("Put performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n");
for (i = 0; i < N_ELEMENTS; i += 1) {
source[i] = i + 1;
target[i] = -90;
}
shmem_barrier_all();
/*For int put we take average of all the times realized by a pair of PEs, thus
* reducing effects of physical location of PEs*/
for (i=1;i<=N_ELEMENTS;i=i*2)
{
time_taken = 0;
for(j=0;j<10000;j++){
gettimeofday(&start, NULL);
start_time = (start.tv_sec * 1000000.0) + start.tv_usec;
shmem_int_put(target, source, i,nxtpe);
gettimeofday(&end, NULL);
end_time = (end.tv_sec * 1000000.0) + end.tv_usec;
time_taken = time_taken + (end_time - start_time);
}
shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync);
if(me == 0){
time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/
if (i*sizeof(i) < 1048576)
printf("%ld \t\t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
(double)time_taken,(double)((i*sizeof(i))/(time_taken)));
else
printf("%ld \t\t\t %lf\t\t\t\t %lf\n",i*sizeof(i),
(double)time_taken,(double)((i*sizeof(i))/(time_taken)));
}
}
shmem_barrier_all();
shfree(target);
shfree(source);
return 0;
}
开发者ID:coti,项目名称:oshmpi,代码行数:89,代码来源:put_performance.c
|
请发表评论