本文整理汇总了C++中ORTE_FLAG_TEST函数的典型用法代码示例。如果您正苦于以下问题:C++ ORTE_FLAG_TEST函数的具体用法?C++ ORTE_FLAG_TEST怎么用?C++ ORTE_FLAG_TEST使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ORTE_FLAG_TEST函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: plm_alps_launch_job
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int plm_alps_launch_job(orte_job_t *jdata)
{
orte_app_context_t *app;
for (int i = 0 ; i < jdata->apps->size ; ++i) {
int env_count;
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
for (env_count = 0 ; app->env && app->env[env_count] ; ++env_count);
/* disable PMI for the application. this will prevent the pmi library from printing useless warnings */
opal_argv_append (&env_count, &app->env, "PMI_NO_FORK=1");
opal_argv_append (&env_count, &app->env, "PMI_NO_PREINITIALIZE=1");
}
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
/* new job - set it up */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
}
return ORTE_SUCCESS;
}
开发者ID:Greatrandom,项目名称:ompi,代码行数:30,代码来源:plm_alps_module.c
示例2: allocation_complete
/* after we allocate, we need to map the processes
* so we know what nodes will be used
*/
static void allocation_complete(int fd, short args, void *cbdata)
{
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata;
orte_job_t *daemons;
orte_topology_t *t;
orte_node_t *node;
int i;
ORTE_ACQUIRE_OBJECT(caddy);
jdata = state->jdata;
jdata->state = ORTE_JOB_STATE_ALLOCATION_COMPLETE;
/* get the daemon job object */
if (NULL == (daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
goto done;
}
/* mark that we are not using a VM */
orte_set_attribute(&daemons->attributes, ORTE_JOB_NO_VM, ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
/* ensure that all nodes point to our topology - we
* cannot support hetero nodes with this state machine
*/
t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
node->topology = t;
}
if (!orte_managed_allocation) {
if (NULL != orte_set_slots &&
0 != strncmp(orte_set_slots, "none", strlen(orte_set_slots))) {
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setting slots for node %s by %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
orte_plm_base_set_slots(node);
}
}
}
}
/* move to the map stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
done:
/* cleanup */
OBJ_RELEASE(state);
}
开发者ID:anandhis,项目名称:ompi,代码行数:60,代码来源:state_novm.c
示例3: orte_util_encode_nodemap
int orte_util_encode_nodemap(opal_byte_object_t *boptr, bool update)
{
orte_node_t *node;
int32_t i;
int rc;
opal_buffer_t buf;
orte_job_t *daemons;
orte_proc_t *dmn;
/* if the daemon job has not been updated, then there is
* nothing to send
*/
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (update && !ORTE_FLAG_TEST(daemons, ORTE_JOB_FLAG_UPDATED)) {
boptr->bytes = NULL;
boptr->size = 0;
return ORTE_SUCCESS;
}
/* setup a buffer for tmp use */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* send the number of nodes */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &daemons->num_procs, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
for (i=0; i < daemons->procs->size; i++) {
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, i))) {
continue;
}
/* if the daemon doesn't have a node, that's an error */
if (NULL == (node = dmn->node)) {
opal_output(0, "DAEMON %s HAS NO NODE", ORTE_NAME_PRINT(&dmn->name));
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &dmn->name.vpid, 1, ORTE_VPID))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the node */
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &node, 1, ORTE_NODE))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* transfer the payload to the byte object */
opal_dss.unload(&buf, (void**)&boptr->bytes, &boptr->size);
OBJ_DESTRUCT(&buf);
return ORTE_SUCCESS;
}
开发者ID:situspanesse,项目名称:ompi,代码行数:55,代码来源:nidmap.c
示例4: plm_slurm_launch_job
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static int plm_slurm_launch_job(orte_job_t *jdata)
{
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this is a restart situation - skip to the mapping stage */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_MAP);
} else {
/* new job - set it up */
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_INIT);
}
return ORTE_SUCCESS;
}
开发者ID:Greatrandom,项目名称:ompi,代码行数:15,代码来源:plm_slurm_module.c
示例5: orte_plm_base_create_jobid
/*
* Create a jobid
*/
int orte_plm_base_create_jobid(orte_job_t *jdata)
{
#if 0
int32_t j;
/* RHC: WHILE ORTE CAN NOW HANDLE RECYCLING OF JOBID'S,
* THE MPI LAYER CANNOT SINCE THERE IS NO WAY TO
* UPDATE THE OMPI_PROC_T LIST AND/OR THE BTL'S
*/
/* see if there is a prior
* jobid that has completed and can be re-used. It can
* never be 0 as that belongs to the HNP and its daemons
*/
for (j=1; j < orte_job_data->size; j++) {
if (NULL == opal_pointer_array_get_item(orte_job_data, j)) {
/* this local jobid is available - reuse it */
jdata->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, j);
return ORTE_SUCCESS;
}
}
#endif
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
/* this job is being restarted - do not assign it
* a new jobid
*/
return ORTE_SUCCESS;
}
if (UINT16_MAX == orte_plm_globals.next_jobid) {
/* if we get here, then no local jobids are available */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
jdata->jobid = ORTE_JOBID_INVALID;
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* take the next jobid */
jdata->jobid = ORTE_CONSTRUCT_LOCAL_JOBID(ORTE_PROC_MY_NAME->jobid, orte_plm_globals.next_jobid);
orte_plm_globals.next_jobid++;
return ORTE_SUCCESS;
}
开发者ID:Greatrandom,项目名称:ompi,代码行数:45,代码来源:plm_base_jobid.c
示例6: any_live_children
/*****************
* Local Functions
*****************/
static bool any_live_children(orte_jobid_t job)
{
int i;
orte_proc_t *child;
for (i=0; i < orte_local_children->size; i++) {
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
continue;
}
/* is this child part of the specified job? */
if ((job == child->name.jobid || ORTE_JOBID_WILDCARD == job) &&
ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) {
return true;
}
}
/* if we get here, then nobody is left alive from that job */
return false;
}
开发者ID:XuanWang1982,项目名称:ompi,代码行数:23,代码来源:errmgr_default_orted.c
示例7: launch_daemons
static void launch_daemons(int fd, short args, void *cbdata)
{
orte_job_map_t *map;
size_t num_nodes;
char *param;
char **argv = NULL;
int argc;
int rc;
char** env = NULL;
char **nodelist_argv;
char *nodelist;
int nodelist_argc;
char *vpid_string;
int i;
char *cur_prefix;
int proc_vpid_index = 0;
bool failed_launch = true;
orte_app_context_t *app;
orte_node_t *node;
orte_std_cntr_t nnode;
orte_job_t *daemons;
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata = state->jdata;
/* start by setting up the virtual machine */
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(jdata))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if we don't want to launch, then don't attempt to
* launch the daemons - the user really wants to just
* look at the proposed process map
*/
if (orte_do_not_launch) {
/* set the state to indicate the daemons reported - this
* will trigger the daemons_reported event and cause the
* job to move to the following step
*/
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
return;
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:lsf: launching vm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* Get the map for this job */
if (NULL == (map = daemons->map)) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
num_nodes = map->num_new_daemons;
if (0 == num_nodes) {
/* set the state to indicate the daemons reported - this
* will trigger the daemons_reported event and cause the
* job to move to the following step
*/
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:lsf: no new daemons to launch",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(state->jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
return;
}
/* create nodelist */
nodelist_argv = NULL;
nodelist_argc = 0;
for (nnode=0; nnode < map->nodes->size; nnode++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, nnode))) {
continue;
}
/* if the daemon already exists on this node, then
* don't include it
*/
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED)) {
continue;
}
/* otherwise, add it to the list of nodes upon which
* we need to launch a daemon
*/
opal_argv_append(&nodelist_argc, &nodelist_argv, node->name);
}
nodelist = opal_argv_join(nodelist_argv, ',');
/*
* start building argv array
*/
argv = NULL;
argc = 0;
//.........这里部分代码省略.........
开发者ID:artpol84,项目名称:ompi-timings,代码行数:101,代码来源:plm_lsf_module.c
示例8: orte_rmaps_seq_map
/*
* Sequentially map the ranks according to the placement in the
* specified hostfile
*/
static int orte_rmaps_seq_map(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_app_context_t *app;
int i, n;
orte_std_cntr_t j;
opal_list_item_t *item;
orte_node_t *node, *nd;
seq_node_t *sq, *save=NULL, *seq;;
orte_vpid_t vpid;
orte_std_cntr_t num_nodes;
int rc;
opal_list_t default_seq_list;
opal_list_t node_list, *seq_list, sq_list;
orte_proc_t *proc;
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
char *hosts = NULL, *sep, *eptr;
FILE *fp;
opal_hwloc_resource_type_t rtype;
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base_framework.framework_output,
"%s rmaps:seq called on job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* this mapper can only handle initial launch
* when seq mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s is being restarted - seq cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (NULL != jdata->map->req_mapper) {
if (0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s not using sequential mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
/* we need to process it */
goto process;
}
if (ORTE_MAPPING_SEQ != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
/* I don't know how to do these - defer */
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: job %s not using seq mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
process:
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:seq: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* flag that I did the mapping */
if (NULL != jdata->map->last_mapper) {
free(jdata->map->last_mapper);
}
jdata->map->last_mapper = strdup(c->mca_component_name);
/* convenience def */
map = jdata->map;
/* if there is a default hostfile, go and get its ordered list of nodes */
OBJ_CONSTRUCT(&default_seq_list, opal_list_t);
if (NULL != orte_default_hostfile) {
char *hstname = NULL;
/* open the file */
fp = fopen(orte_default_hostfile, "r");
if (NULL == fp) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
while (NULL != (hstname = orte_getline(fp))) {
if (0 == strlen(hstname)) {
free(hstname);
/* blank line - ignore */
continue;
}
if( '#' == hstname[0] ) {
free(hstname);
/* Comment line - ignore */
continue;
}
sq = OBJ_NEW(seq_node_t);
if (NULL != (sep = strchr(hstname, ' '))) {
*sep = '\0';
sep++;
/* remove any trailing space */
eptr = sep + strlen(sep) - 1;
//.........这里部分代码省略.........
开发者ID:jjhursey,项目名称:ompi,代码行数:101,代码来源:rmaps_seq.c
示例9: launch_daemons
/* When working in this function, ALWAYS jump to "cleanup" if
* you encounter an error so that orterun will be woken up and
* the job can cleanly terminate
*/
static void launch_daemons(int fd, short args, void *cbdata)
{
orte_job_map_t *map = NULL;
orte_app_context_t *app;
orte_node_t *node;
int proc_vpid_index;
char *param;
char **env = NULL;
char *var;
char **argv = NULL;
char **nodeargv;
int argc = 0;
int rc;
orte_std_cntr_t i;
char *bin_base = NULL, *lib_base = NULL;
tm_event_t *tm_events = NULL;
tm_task_id *tm_task_ids = NULL;
bool failed_launch = true;
mode_t current_umask;
char *nodelist;
char* vpid_string;
orte_job_t *daemons, *jdata;
orte_state_caddy_t *state = (orte_state_caddy_t*)cbdata;
int32_t launchid, *ldptr;
char *prefix_dir = NULL;
jdata = state->jdata;
/* if we are launching debugger daemons, then just go
* do it - no new daemons will be launched
*/
if (ORTE_FLAG_TEST(state->jdata, ORTE_JOB_FLAG_DEBUGGER_DAEMON)) {
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
return;
}
/* setup the virtual machine */
daemons = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
if (ORTE_SUCCESS != (rc = orte_plm_base_setup_virtual_machine(jdata))) {
ORTE_ERROR_LOG(rc);
goto cleanup;
}
/* if we don't want to launch, then don't attempt to
* launch the daemons - the user really wants to just
* look at the proposed process map
*/
if (orte_do_not_launch) {
/* set the state to indicate the daemons reported - this
* will trigger the daemons_reported event and cause the
* job to move to the following step
*/
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
return;
}
/* Get the map for this job */
if (NULL == (map = daemons->map)) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto cleanup;
}
if (0 == map->num_new_daemons) {
/* set the state to indicate the daemons reported - this
* will trigger the daemons_reported event and cause the
* job to move to the following step
*/
jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_REPORTED);
OBJ_RELEASE(state);
return;
}
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
"%s plm:tm: launching vm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* Allocate a bunch of TM events to use for tm_spawn()ing */
tm_events = malloc(sizeof(tm_event_t) * map->num_new_daemons);
if (NULL == tm_events) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
tm_task_ids = malloc(sizeof(tm_task_id) * map->num_new_daemons);
if (NULL == tm_task_ids) {
rc = ORTE_ERR_OUT_OF_RESOURCE;
ORTE_ERROR_LOG(rc);
goto cleanup;
}
//.........这里部分代码省略.........
开发者ID:Greatrandom,项目名称:ompi,代码行数:101,代码来源:plm_tm_module.c
示例10: sample
static void sample(orcm_sensor_sampler_t *sampler)
{
opal_pstats_t *stats;
opal_node_stats_t *nstats;
int rc, i;
orte_proc_t *child;
opal_buffer_t buf, *bptr;
char *comp;
OPAL_OUTPUT_VERBOSE((1, orcm_sensor_base_framework.framework_output,
"sample:resusage sampling resource usage"));
/* setup a buffer for our stats */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
/* pack our name */
comp = strdup("resusage");
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &comp, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return;
}
free(comp);
/* update stats on ourself and the node */
stats = OBJ_NEW(opal_pstats_t);
nstats = OBJ_NEW(opal_node_stats_t);
if (ORCM_SUCCESS != (rc = opal_pstat.query(orte_process_info.pid, stats, nstats))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(stats);
OBJ_RELEASE(nstats);
OBJ_DESTRUCT(&buf);
return;
}
/* the stats framework can't know nodename or rank */
strncpy(stats->node, orte_process_info.nodename, (OPAL_PSTAT_MAX_STRING_LEN - 1));
stats->rank = ORTE_PROC_MY_NAME->vpid;
#if 0
/* locally save the stats */
if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&my_proc->stats, stats))) {
OBJ_RELEASE(st);
}
if (NULL != (nst = (opal_node_stats_t*)opal_ring_buffer_push(&my_node->stats, nstats))) {
/* release the popped value */
OBJ_RELEASE(nst);
}
#endif
/* pack them */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &orte_process_info.nodename, 1, OPAL_STRING))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &nstats, 1, OPAL_NODE_STAT))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return;
}
/* loop through our children and update their stats */
if (NULL != orte_local_children) {
for (i=0; i < orte_local_children->size; i++) {
if (NULL == (child = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i))) {
continue;
}
if (!ORTE_FLAG_TEST(child, ORTE_PROC_FLAG_ALIVE)) {
continue;
}
if (0 == child->pid) {
/* race condition */
continue;
}
stats = OBJ_NEW(opal_pstats_t);
if (ORCM_SUCCESS != opal_pstat.query(child->pid, stats, NULL)) {
/* may hit a race condition where the process has
* terminated, so just ignore any error
*/
OBJ_RELEASE(stats);
continue;
}
/* the stats framework can't know nodename or rank */
strncpy(stats->node, orte_process_info.nodename, (OPAL_PSTAT_MAX_STRING_LEN - 1));
stats->rank = child->name.vpid;
#if 0
/* store it */
if (NULL != (st = (opal_pstats_t*)opal_ring_buffer_push(&child->stats, stats))) {
OBJ_RELEASE(st);
}
#endif
/* pack them */
if (OPAL_SUCCESS != (rc = opal_dss.pack(&buf, &stats, 1, OPAL_PSTAT))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&buf);
return;
//.........这里部分代码省略.........
开发者ID:forzaclaudio,项目名称:orcm,代码行数:101,代码来源:sensor_resusage.c
示例11: orte_state_base_check_all_complete
void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
{
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_job_t *jdata = caddy->jdata;
orte_proc_t *proc;
int i;
orte_std_cntr_t j;
orte_job_t *job;
orte_node_t *node;
orte_job_map_t *map;
orte_std_cntr_t index;
bool one_still_alive;
orte_vpid_t lowest=0;
int32_t i32, *i32ptr;
opal_output_verbose(2, orte_state_base_framework.framework_output,
"%s state:base:check_job_complete on job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
/* just check to see if the daemons are complete */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:base:check_job_complete - received NULL job, checking daemons",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
goto CHECK_DAEMONS;
} else {
/* mark the job as terminated, but don't override any
* abnormal termination flags
*/
if (jdata->state < ORTE_JOB_STATE_UNTERMINATED) {
jdata->state = ORTE_JOB_STATE_TERMINATED;
}
}
/* tell the IOF that the job is complete */
if (NULL != orte_iof.complete) {
orte_iof.complete(jdata);
}
i32ptr = &i32;
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32) && !orte_abort_non_zero_exit) {
if (!orte_report_child_jobs_separately || 1 == ORTE_LOCAL_JOBID(jdata->jobid)) {
/* update the exit code */
ORTE_UPDATE_EXIT_STATUS(lowest);
}
/* warn user */
opal_output(orte_clean_output,
"-------------------------------------------------------\n"
"While %s job %s terminated normally, %d %s. Further examination may be required.\n"
"-------------------------------------------------------",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "the primary" : "child",
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
i32, (1 == i32) ? "process returned\na non-zero exit code." :
"processes returned\nnon-zero exit codes.");
}
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:base:check_job_completed declared job %s terminated with state %s - checking all jobs",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(jdata->state)));
/* if this job is a continuously operating one, then don't do
* anything further - just return here
*/
if (NULL != jdata &&
(orte_get_attribute(&jdata->attributes, ORTE_JOB_CONTINUOUS_OP, NULL, OPAL_BOOL) ||
ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RECOVERABLE))) {
goto CHECK_ALIVE;
}
/* if the job that is being checked is the HNP, then we are
* trying to terminate the orteds. In that situation, we
* do -not- check all jobs - we simply notify the HNP
* that the orteds are complete. Also check special case
* if jdata is NULL - we want
* to definitely declare the job done if the orteds
* have completed, no matter what else may be happening.
* This can happen if a ctrl-c hits in the "wrong" place
* while launching
*/
CHECK_DAEMONS:
if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
if (0 == orte_routed.num_routes()) {
/* orteds are done! */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s orteds complete - exiting",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (NULL == jdata) {
jdata = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
}
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_DAEMONS_TERMINATED);
OBJ_RELEASE(caddy);
return;
}
OBJ_RELEASE(caddy);
return;
//.........这里部分代码省略.........
开发者ID:XuanWang1982,项目名称:ompi,代码行数:101,代码来源:state_base_fns.c
示例12: setup_child
//.........这里部分代码省略.........
}
asprintf(&value, "%lu", (unsigned long) child->node_rank);
opal_setenv("OMPI_COMM_WORLD_NODE_RANK", value, true, &app->env);
/* set an mca param for it too */
opal_setenv("OMPI_MCA_orte_ess_node_rank", value, true, &app->env);
free(value);
/* provide the identifier for the PMIx connection - the
* PMIx connection is made prior to setting the process
* name itself. Although in most cases the ID and the
* process name are the same, it isn't necessarily
* required */
orte_util_convert_process_name_to_string(&value, &child->name);
opal_setenv("PMIX_ID", value, true, &app->env);
free(value);
nrptr = &nrestarts;
if (orte_get_attribute(&child->attributes, ORTE_PROC_NRESTARTS, (void**)&nrptr, OPAL_INT32)) {
/* pass the number of restarts for this proc - will be zero for
* an initial start, but procs would like to know if they are being
* restarted so they can take appropriate action
*/
asprintf(&value, "%d", nrestarts);
opal_setenv("OMPI_MCA_orte_num_restarts", value, true, &app->env);
free(value);
}
/* if the proc should not barrier in orte_init, tell it */
if (orte_get_attribute(&child->attributes, ORTE_PROC_NOBARRIER, NULL, OPAL_BOOL)
|| 0 < nrestarts) {
opal_setenv("OMPI_MCA_orte_do_not_barrier", "1", true, &app->env);
}
/* if we are using staged execution, tell it */
if (orte_staged_execution) {
opal_setenv("OMPI_MCA_orte_staged_execution", "1", true, &app->env);
}
/* if the proc isn't going to forward IO, then we need to flag that
* it has "completed" iof termination as otherwise it will never fire
*/
if (!ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_IOF_COMPLETE);
}
/* construct the proc's session dir name */
if (NULL != orte_process_info.tmpdir_base) {
value = strdup(orte_process_info.tmpdir_base);
} else {
value = NULL;
}
param = NULL;
if (ORTE_SUCCESS != (rc = orte_session_dir_get_name(¶m, &value, NULL,
orte_process_info.nodename,
NULL, &child->name))) {
ORTE_ERROR_LOG(rc);
if (NULL != value) {
free(value);
}
return rc;
}
free(value);
/* pass an envar so the proc can find any files it had prepositioned */
opal_setenv("OMPI_FILE_LOCATION", param, true, &app->env);
/* if the user wanted the cwd to be the proc's session dir, then
* switch to that location now
*/
if (orte_get_attribute(&app->attributes, ORTE_APP_SSNDIR_CWD, NULL, OPAL_BOOL)) {
/* create the session dir - may not exist */
if (OPAL_SUCCESS != (rc = opal_os_dirpath_create(param, S_IRWXU))) {
ORTE_ERROR_LOG(rc);
/* doesn't exist with correct permissions, and/or we can't
* create it - either way, we are done
*/
free(param);
return rc;
}
/* change to it */
if (0 != chdir(param)) {
free(param);
return ORTE_ERROR;
}
/* It seems that chdir doesn't
* adjust the $PWD enviro variable when it changes the directory. This
* can cause a user to get a different response when doing getcwd vs
* looking at the enviro variable. To keep this consistent, we explicitly
* ensure that the PWD enviro variable matches the CWD we moved to.
*
* NOTE: if a user's program does a chdir(), then $PWD will once
* again not match getcwd! This is beyond our control - we are only
* ensuring they start out matching.
*/
opal_setenv("PWD", param, true, &app->env);
/* update the initial wdir value too */
opal_setenv("OMPI_MCA_initial_wdir", param, true, &app->env);
}
free(param);
return ORTE_SUCCESS;
}
开发者ID:AT95,项目名称:ompi,代码行数:101,代码来源:schizo_ompi.c
示例13: do_parent
static int do_parent(orte_app_context_t* context,
orte_proc_t *child,
char **environ_copy,
orte_job_t *jobdat, int read_fd,
orte_iof_base_io_conf_t opts)
{
int rc;
orte_odls_pipe_err_msg_t msg;
char file[ORTE_ODLS_MAX_FILE_LEN + 1], topic[ORTE_ODLS_MAX_TOPIC_LEN + 1], *str = NULL;
if (NULL != child && ORTE_FLAG_TEST(jobdat, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
/* connect endpoints IOF */
rc = orte_iof_base_setup_parent(&child->name, &opts);
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
close(read_fd);
if (NULL != child) {
child->state = ORTE_PROC_STATE_UNDEF;
}
return rc;
}
}
/* Block reading a message from the pipe */
while (1) {
rc = opal_fd_read(read_fd, sizeof(msg), &msg);
/* If the pipe closed, then the child successfully launched */
if (OPAL_ERR_TIMEOUT == rc) {
break;
}
/* If Something Bad happened in the read, error out */
if (OPAL_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
close(read_fd);
if (NULL != child) {
child->state = ORTE_PROC_STATE_UNDEF;
}
return rc;
}
/* Otherwise, we got a warning or error message from the child */
if (NULL != child) {
if (msg.fatal) {
ORTE_FLAG_UNSET(child, ORTE_PROC_FLAG_ALIVE);
} else {
ORTE_FLAG_SET(child, ORTE_PROC_FLAG_ALIVE);
}
}
/* Read in the strings; ensure to terminate them with \0 */
if (msg.file_str_len > 0) {
rc = opal_fd_read(read_fd, msg.file_str_len, file);
if (OPAL_SUCCESS != rc) {
orte_show_help("help-orte-odls-default.txt", "syscall fail",
true,
orte_process_info.nodename, context->app,
"opal_fd_read", __FILE__, __LINE__);
if (NULL != child) {
child->state = ORTE_PROC_STATE_UNDEF;
}
return rc;
}
file[msg.file_str_len] = '\0';
}
if (msg.topic_str_len > 0) {
rc = opal_fd_read(read_fd, msg.topic_str_len, topic);
if (OPAL_SUCCESS != rc) {
orte_show_help("help-orte-odls-default.txt", "syscall fail",
true,
orte_process_info.nodename, context->app,
"opal_fd_read", __FILE__, __LINE__);
if (NULL != child) {
child->state = ORTE_PROC_STATE_UNDEF;
}
return rc;
}
topic[msg.topic_str_len] = '\0';
}
if (msg.msg_str_len > 0) {
str = calloc(1, msg.msg_str_len + 1);
if (NULL == str) {
orte_show_help("help-orte-odls-default.txt", "syscall fail",
true,
orte_process_info.nodename, context->app,
"opal_fd_read", __FILE__, __LINE__);
if (NULL != child) {
child->state = ORTE_PROC_STATE_UNDEF;
}
return rc;
}
rc = opal_fd_read(read_fd, msg.msg_str_len, str);
}
/* Print out what we got. We already have a rendered string,
so use orte_show_help_norender(). */
if (msg.msg_str_len > 0) {
//.........这里部分代码省略.........
开发者ID:ORNL,项目名称:ompi,代码行数:101,代码来源:odls_default_module.c
示例14: hostfile_parse_line
//.........这里部分代码省略.........
} else if (2 == cnt) {
username = argv[0];
node_name = strdup(argv[1]);
} else {
opal_output(0, "WARNING: Unhandled [email protected]\n"); /* XXX */
}
opal_argv_free (argv);
// Strip off the FQDN if present, ignore IP addresses
if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
char *ptr;
if (NULL != (ptr = strchr(node_name, '.'))) {
*ptr = '\0';
}
}
/* Do we need to make a new node object? */
if (NULL == (node = hostfile_lookup(updates, node_name))) {
node = OBJ_NEW(orte_node_t);
node->name = node_name;
node->slots = 1;
if (NULL != username) {
orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
}
opal_list_append(updates, &node->super);
} else {
/* add a slot */
node->slots++;
free(node_name);
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s hostfile: node %s slots %d nodes-given %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots,
ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN) ? "TRUE" : "FALSE"));
/* mark the slots as "given" since we take them as being the
* number specified via the rankfile
*/
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
/* skip to end of line */
while (!orte_util_hostfile_done &&
ORTE_HOSTFILE_NEWLINE != token) {
token = orte_util_hostfile_lex();
}
return ORTE_SUCCESS;
} else {
hostfile_parse_error(token);
return ORTE_ERROR;
}
free(username);
while (!orte_util_hostfile_done) {
token = orte_util_hostfile_lex();
switch (token) {
case ORTE_HOSTFILE_DONE:
goto done;
case ORTE_HOSTFILE_NEWLINE:
goto done;
case ORTE_HOSTFILE_USERNAME:
username = hostfile_parse_string();
if (NULL != username) {
orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
free(username);
}
开发者ID:bharatpotnuri,项目名称:ompi,代码行数:67,代码来源:hostfile.c
示例15: do_child
static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
{
int i;
sigset_t sigs;
/* Setup the pipe to be close-on-exec */
opal_fd_set_cloexec(write_fd);
if (NULL != cd->child) {
/* setup stdout/stderr so that any error messages that we
may print out will get displayed back at orterun.
NOTE: Definitely do this AFTER we check contexts so
that any error message from those two functions doesn't
come out to the user. IF we didn't do it in this order,
THEN a user who gives us a bad executable name or
working directory would get N error messages, where
N=num_procs. This would be very annoying for large
jobs, so instead we set things up so that orterun
always outputs a nice, single message indicating what
happened
*/
if (ORTE_SUCCESS != (i = orte_iof_base_setup_child(&cd->opts, &cd->env))) {
ORTE_ERROR_LOG(i);
send_error_show_help(write_fd, 1,
"help-orte-odls-alps.txt",
"iof setup failed",
orte_process_info.nodename, cd->app->app);
/* Does not return */
}
/* now set any child-level controls such as binding */
orte_rtc.set(cd->jdata, cd->child, &cd->env, write_fd);
} else if (!ORTE_FLAG_TEST(cd->jdata, ORTE_JOB_FLAG_FORWARD_OUTPUT)) {
/* tie stdin/out/err/internal to /dev/null */
int fdnull;
for (i=0; i < 3; i++) {
fdnull = open("/dev/null", O_RDONLY, 0);
if (fdnull > i && i != write_fd) {
dup2(fdnull, i);
}
close(fdnull);
}
fdnull = open("/dev/null", O_RDONLY, 0);
if (fdnull > cd->opts.p_internal[1]) {
dup2(fdnull, cd->opts.p_internal[1]);
}
close(fdnull);
}
if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) {
send_error_show_help(write_fd, 1, "help-orte-odls-alps.txt",
"close fds",
orte_process_info.nodename, cd->app->app,
__FILE__, __LINE__);
}
if (cd->argv == NULL) {
cd->argv = malloc(sizeof(char*)*2);
cd->argv[0] = strdup(cd->app->app);
cd->argv[1] = NULL;
}
/* Set signal handlers back to the default. Do this close to
the exev() because the event library may (and likely will)
reset them. If we don't do this, the event library may
have left some set that, at least on some OS's, don't get
reset via fork() or exec(). Hence, the launched process
could be unkillable (for example). */
set_handler_alps(SIGTERM);
set_handler_alps(SIGINT);
set_handler_alps(SIGHUP);
set_handler_alps(SIGPIPE);
set_handler_alps(SIGCHLD);
/* Unblock all signals, for many of the same reasons that we
set the default handlers, above. This is noticable on
Linux where the event library blocks SIGTERM, but we don't
want that blocked by the launched process. */
sigprocmask(0, 0, &sigs);
sigprocmask(SIG_UNBLOCK, &sigs, 0);
/* Exec the new executable */
if (10 < opal_output_get_verbosity(orte_odls_base_framework.framework_output)) {
int jout;
opal_output(0, "%s STARTING %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), cd->app->app);
for (jout=0; NULL != cd->argv[jout]; jout++) {
opal_output(0, "%s\tARGV[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, cd->argv[jout]);
}
for (jout=0; NULL != cd->env[jout]; jout++) {
opal_output(0, "%s\tENVIRON[%d]: %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), jout, cd->env[jout]);
}
}
execve(cd->app->app, cd->argv, cd->env);
send_error_show_help(write_fd, 1,
//.........这里部分代码省略.........
开发者ID:sjeaugey,项目名称:ompi,代码行数:101,代码来源:odls_alps_module.c
示例16: check_complete
//.........这里部分代码省略.........
/* just check to see if the daemons are complete */
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
"%s state:dvm:check_job_complet
|
请发表评论