本文整理汇总了C++中opal_output_verbose函数的典型用法代码示例。如果您正苦于以下问题:C++ opal_output_verbose函数的具体用法?C++ opal_output_verbose怎么用?C++ opal_output_verbose使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了opal_output_verbose函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: component_shutdown
static void component_shutdown(void)
{
opal_output_verbose(2, orte_oob_base_framework.framework_output,
"%s ALPS SHUTDOWN",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
}
开发者ID:davideberius,项目名称:ompi,代码行数:6,代码来源:oob_alps_component.c
示例2: setup_fork
static int setup_fork(orte_job_t *jdata,
orte_app_context_t *app)
{
int i;
char *param;
bool oversubscribed;
orte_node_t *node;
char **envcpy, **nps, **firstranks;
char *npstring, *firstrankstring;
char *num_app_ctx;
bool takeus = false;
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:ompi: setup_fork",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
if (NULL != orte_schizo_base.personalities) {
/* see if we are included */
for (i=0; NULL != jdata->personality[i]; i++) {
if (0 == strcmp(jdata->personality[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
}
/* see if the mapper thinks we are oversubscribed */
oversubscribed = false;
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, ORTE_PROC_MY_NAME->vpid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
return ORTE_ERR_NOT_FOUND;
}
if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_OVERSUBSCRIBED)) {
oversubscribed = true;
}
/* setup base environment: copy the current environ and merge
in the app context environ */
if (NULL != app->env) {
/* manually free original context->env to avoid a memory leak */
char **tmp = app->env;
envcpy = opal_environ_merge(orte_launch_environ, app->env);
if (NULL != tmp) {
opal_argv_free(tmp);
}
} else {
envcpy = opal_argv_copy(orte_launch_environ);
}
app->env = envcpy;
/* special case handling for --prefix: this is somewhat icky,
but at least some users do this. :-\ It is possible that
when using --prefix, the user will also "-x PATH" and/or
"-x LD_LIBRARY_PATH", which would therefore clobber the
work that was done in the prior pls to ensure that we have
the prefix at the beginning of the PATH and
LD_LIBRARY_PATH. So examine the context->env and see if we
find PATH or LD_LIBRARY_PATH. If found, that means the
prior work was clobbered, and we need to re-prefix those
variables. */
param = NULL;
orte_get_attribute(&app->attributes, ORTE_APP_PREFIX_DIR, (void**)¶m, OPAL_STRING);
for (i = 0; NULL != param && NULL != app->env && NULL != app->env[i]; ++i) {
char *newenv;
/* Reset PATH */
if (0 == strncmp("PATH=", app->env[i], 5)) {
asprintf(&newenv, "%s/bin:%s", param, app->env[i] + 5);
opal_setenv("PATH", newenv, true, &app->env);
free(newenv);
}
/* Reset LD_LIBRARY_PATH */
else if (0 == strncmp("LD_LIBRARY_PATH=", app->env[i], 16)) {
asprintf(&newenv, "%s/lib:%s", param, app->env[i] + 16);
opal_setenv("LD_LIBRARY_PATH", newenv, true, &app->env);
free(newenv);
}
}
if (NULL != param) {
free(param);
}
/* pass my contact info to the local proc so we can talk */
opal_setenv("OMPI_MCA_orte_local_daemon_uri", orte_process_info.my_daemon_uri, true, &app->env);
/* pass the hnp's contact info to the local proc in case it
* needs it
*/
if (NULL != orte_process_info.my_hnp_uri) {
opal_setenv("OMPI_MCA_orte_hnp_uri", orte_process_info.my_hnp_uri, true, &app->env);
}
/* setup yield schedule - do not override any user-supplied directive! */
if (oversubscribed) {
opal_setenv("OMPI_MCA_mpi_yield_when_idle", "1", false, &app->env);
} else {
//.........这里部分代码省略.........
开发者ID:mwatt,项目名称:ompi,代码行数:101,代码来源:schizo_ompi.c
示例3: ompi_mtl_portals4_recv_progress
/* called when a receive should be progressed */
static int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
ompi_mtl_portals4_base_request_t* ptl_base_request)
{
int ret;
ompi_mtl_portals4_recv_request_t* ptl_request =
(ompi_mtl_portals4_recv_request_t*) ptl_base_request;
size_t msg_length = 0;
ptl_match_bits_t read_match_bits;
/* as soon as we've seen any event associated with a request, it's
started */
ptl_request->req_started = true;
switch (ev->type) {
case PTL_EVENT_PUT:
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) got put event",
ptl_request->opcount, ev->hdr_data));
if (ev->ni_fail_type != PTL_NI_OK) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
__FILE__, __LINE__, ev->ni_fail_type);
goto callback_error;
}
ptl_request->me_h = PTL_INVALID_HANDLE;
msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
MTL_PORTALS4_GET_SOURCE(ev->match_bits);
ptl_request->super.super.ompi_req->req_status.MPI_TAG =
MTL_PORTALS4_GET_TAG(ev->match_bits);
if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"truncate expected: %ld %ld",
msg_length, ptl_request->delivery_len);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
}
#if OPAL_ENABLE_DEBUG
ptl_request->hdr_data = ev->hdr_data;
#endif
if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
/* If it's not a short message and we're doing rndv, we
only have the first part of the message. Issue the get
to pull the second part of the message. */
if (ptl_request->is_triggered) {
ptl_request->super.super.ompi_req->req_status._ucount = 0;
}
else {
ptl_request->super.super.ompi_req->req_status._ucount = ompi_mtl_portals4.eager_limit;
MTL_PORTALS4_SET_READ_BITS(read_match_bits,
MTL_PORTALS4_GET_CONTEXT(ev->match_bits),
MTL_PORTALS4_GET_TAG(ev->match_bits));
ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
((msg_length > ptl_request->delivery_len) ?
ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
ev->initiator,
read_match_bits,
ompi_mtl_portals4.eager_limit,
ptl_request);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
goto callback_error;
}
}
} else {
/* If we're either using the eager protocol or were a
short message, all data has been received, so complete
the message. */
ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
ev->start,
ev->mlength);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_mtl_datatype_unpack failed: %d",
__FILE__, __LINE__, ret);
ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
}
ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) completed, expected",
ptl_request->opcount, ptl_request->hdr_data));
ptl_request->super.super.completion_callback(&ptl_request->super.super);
}
break;
case PTL_EVENT_REPLY:
OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
"Recv %lu (0x%lx) got reply event",
ptl_request->opcount, ptl_request->hdr_data));
//.........这里部分代码省略.........
开发者ID:AT95,项目名称:ompi,代码行数:101,代码来源:mtl_portals4_recv.c
示例4: initialize
static int initialize(int argc, char *argv[])
{
int ret, exit_status = OPAL_SUCCESS;
char * tmp_env_var = NULL;
/*
* Make sure to init util before parse_args
* to ensure installdirs is setup properly
* before calling mca_base_open();
*/
if( OPAL_SUCCESS != (ret = opal_init_util(&argc, &argv)) ) {
return ret;
}
/*
* Parse Command line arguments
*/
if (OPAL_SUCCESS != (ret = parse_args(argc, argv))) {
exit_status = ret;
goto cleanup;
}
/*
* Setup OPAL Output handle from the verbose argument
*/
if( opal_restart_globals.verbose ) {
opal_restart_globals.output = opal_output_open(NULL);
opal_output_set_verbosity(opal_restart_globals.output, 10);
} else {
opal_restart_globals.output = 0; /* Default=STDOUT */
}
/*
* Turn off the selection of the CRS component,
* we need to do that later
*/
(void) mca_base_var_env_name("crs_base_do_not_select", &tmp_env_var);
opal_setenv(tmp_env_var,
"1", /* turn off the selection */
true, &environ);
free(tmp_env_var);
tmp_env_var = NULL;
/*
* Make sure we select the proper compress component.
*/
if( NULL != opal_restart_globals.snapshot_compress ) {
(void) mca_base_var_env_name("compress", &tmp_env_var);
opal_setenv(tmp_env_var,
opal_restart_globals.snapshot_compress,
true, &environ);
free(tmp_env_var);
tmp_env_var = NULL;
}
/*
* Initialize the OPAL layer
*/
if (OPAL_SUCCESS != (ret = opal_init(&argc, &argv))) {
exit_status = ret;
goto cleanup;
}
/*
* If the checkpoint was compressed, then decompress it before continuing
*/
if( NULL != opal_restart_globals.snapshot_compress ) {
char * zip_dir = NULL;
char * tmp_str = NULL;
/* Make sure to clear the selection for the restart,
* this way the user can swich compression mechanism
* across restart
*/
(void) mca_base_var_env_name("compress", &tmp_env_var);
opal_unsetenv(tmp_env_var, &environ);
free(tmp_env_var);
tmp_env_var = NULL;
opal_asprintf(&zip_dir, "%s/%s%s",
opal_restart_globals.snapshot_loc,
opal_restart_globals.snapshot_ref,
opal_restart_globals.snapshot_compress_postfix);
if (0 > (ret = access(zip_dir, F_OK)) ) {
opal_output(opal_restart_globals.output,
"Error: Unable to access the file [%s]!",
zip_dir);
exit_status = OPAL_ERROR;
goto cleanup;
}
opal_output_verbose(10, opal_restart_globals.output,
"Decompressing (%s)",
zip_dir);
opal_compress.decompress(zip_dir, &tmp_str);
if( NULL != zip_dir ) {
free(zip_dir);
//.........这里部分代码省略.........
开发者ID:bosilca,项目名称:ompi,代码行数:101,代码来源:opal-restart.c
示例5: parse_cli
static int parse_cli(int argc, int start, char **argv)
{
int i, j, k;
bool ignore;
char *no_dups[] = {
"grpcomm",
"odls",
"rml",
"routed",
NULL
};
bool takeus = false;
opal_output_verbose(1, orte_schizo_base_framework.framework_output,
"%s schizo:ompi: parse_cli",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* if they gave us a list of personalities,
* see if we are included */
if (NULL != orte_schizo_base.personalities) {
for (i=0; NULL != orte_schizo_base.personalities[i]; i++) {
if (0 == strcmp(orte_schizo_base.personalities[i], "ompi")) {
takeus = true;
break;
}
}
if (!takeus) {
return ORTE_ERR_TAKE_NEXT_OPTION;
}
} else {
/* attempt to auto-detect CLI options that
* we recognize */
}
for (i = 0; i < (argc-start); ++i) {
if (0 == strcmp("-mca", argv[i]) ||
0 == strcmp("--mca", argv[i]) ) {
/* ignore this one */
if (0 == strcmp(argv[i+1], "mca_base_env_list")) {
i += 2;
continue;
}
/* It would be nice to avoid increasing the length
* of the orted cmd line by removing any non-ORTE
* params. However, this raises a problem since
* there could be OPAL directives that we really
* -do- want the orted to see - it's only the OMPI
* related directives we could ignore. This becomes
* a very complicated procedure, however, since
* the OMPI mca params are not cleanly separated - so
* filtering them out is nearly impossible.
*
* see if this is already present so we at least can
* avoid growing the cmd line with duplicates
*/
ignore = false;
if (NULL != orted_cmd_line) {
for (j=0; NULL != orted_cmd_line[j]; j++) {
if (0 == strcmp(argv[i+1], orted_cmd_line[j])) {
/* already here - if the value is the same,
* we can quitely ignore the fact that they
* provide it more than once. However, some
* frameworks are known to have problems if the
* value is different. We don't have a good way
* to know this, but we at least make a crude
* attempt here to protect ourselves.
*/
if (0 == strcmp(argv[i+2], orted_cmd_line[j+1])) {
/* values are the same */
ignore = true;
break;
} else {
/* values are different - see if this is a problem */
for (k=0; NULL != no_dups[k]; k++) {
if (0 == strcmp(no_dups[k], argv[i+1])) {
/* print help message
* and abort as we cannot know which one is correct
*/
orte_show_help("help-orterun.txt", "orterun:conflicting-params",
true, orte_basename, argv[i+1],
argv[i+2], orted_cmd_line[j+1]);
return ORTE_ERR_BAD_PARAM;
}
}
/* this passed muster - just ignore it */
ignore = true;
break;
}
}
}
}
if (!ignore) {
opal_argv_append_nosize(&orted_cmd_line, argv[i]);
opal_argv_append_nosize(&orted_cmd_line, argv[i+1]);
opal_argv_append_nosize(&orted_cmd_line, argv[i+2]);
}
i += 2;
}
}
return ORTE_SUCCESS;
//.........这里部分代码省略.........
开发者ID:mwatt,项目名称:ompi,代码行数:101,代码来源:schizo_ompi.c
示例6: read_bytes
static int read_bytes(mca_oob_usock_peer_t* peer)
{
int rc;
/* read until all bytes recvd or error */
while (0 < peer->recv_msg->rdbytes) {
rc = read(peer->sd, peer->recv_msg->rdptr, peer->recv_msg->rdbytes);
if (rc < 0) {
if(opal_socket_errno == EINTR) {
continue;
} else if (opal_socket_errno == EAGAIN) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return ORTE_ERR_RESOURCE_BUSY;
} else if (opal_socket_errno == EWOULDBLOCK) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return ORTE_ERR_WOULD_BLOCK;
}
/* we hit an error and cannot progress this message - report
* the error back to the RML and let the caller know
* to abort this message
*/
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
"%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)),
strerror(opal_socket_errno),
opal_socket_errno);
// mca_oob_usock_peer_close(peer);
// if (NULL != mca_oob_usock.oob_exception_callback) {
// mca_oob_usock.oob_exception_callback(&peer->name, ORTE_RML_PEER_DISCONNECTED);
//}
return ORTE_ERR_COMM_FAILURE;
} else if (rc == 0) {
/* the remote peer closed the connection - report that condition
* and let the caller know
*/
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
"%s-%s mca_oob_usock_msg_recv: peer closed connection",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&(peer->name)));
/* stop all events */
if (peer->recv_ev_active) {
opal_event_del(&peer->recv_event);
peer->recv_ev_active = false;
}
if (peer->timer_ev_active) {
opal_event_del(&peer->timer_event);
peer->timer_ev_active = false;
}
if (peer->send_ev_active) {
opal_event_del(&peer->send_event);
peer->send_ev_active = false;
}
if (NULL != peer->recv_msg) {
OBJ_RELEASE(peer->recv_msg);
peer->recv_msg = NULL;
}
mca_oob_usock_peer_close(peer);
//if (NULL != mca_oob_usock.oob_exception_callback) {
// mca_oob_usock.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
//}
return ORTE_ERR_WOULD_BLOCK;
}
/* we were able to read something, so adjust counters and location */
peer->recv_msg->rdbytes -= rc;
peer->recv_msg->rdptr += rc;
}
/* we read the full data block */
return ORTE_SUCCESS;
}
开发者ID:00datman,项目名称:ompi,代码行数:77,代码来源:oob_usock_sendrecv.c
示例7: mca_base_component_find
/*
* Function to find as many components of a given type as possible. This
* includes statically-linked in components as well as opening up a
* directory and looking for shared-library MCA components of the
* appropriate type (load them if available).
*
* Return one consolidated array of (mca_base_component_t*) pointing to all
* available components.
*/
int mca_base_component_find(const char *directory, const char *type,
const mca_base_component_t *static_components[],
char **requested_component_names,
bool include_mode,
opal_list_t *found_components,
bool open_dso_components)
{
int i;
opal_list_item_t *item;
mca_base_component_list_item_t *cli;
/* Find all the components that were statically linked in */
OBJ_CONSTRUCT(found_components, opal_list_t);
for (i = 0; NULL != static_components[i]; ++i) {
if ( use_component(include_mode,
(const char**)requested_component_names,
static_components[i]->mca_component_name) ) {
cli = OBJ_NEW(mca_base_component_list_item_t);
if (NULL == cli) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
cli->cli_component = static_components[i];
opal_list_append(found_components, (opal_list_item_t *) cli);
}
}
#if OMPI_WANT_LIBLTDL
/* Find any available dynamic components in the specified directory */
if (open_dso_components) {
int param, param_disable_dlopen;
param = mca_base_param_find("mca", NULL, "component_disable_dlopen");
mca_base_param_lookup_int(param, ¶m_disable_dlopen);
if (0 == param_disable_dlopen) {
find_dyn_components(directory, type,
(const char**)requested_component_names,
include_mode, found_components);
}
} else {
opal_output_verbose(40, 0,
"mca: base: component_find: dso loading for %s MCA components disabled",
type);
}
#endif
/* Ensure that *all* requested components exist. Print a warning
and abort if they do not. */
for (i = 0; include_mode && NULL != requested_component_names &&
NULL != requested_component_names[i]; ++i) {
for (item = opal_list_get_first(found_components);
opal_list_get_end(found_components) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t*) item;
if (0 == strcmp(requested_component_names[i],
cli->cli_component->mca_component_name)) {
break;
}
}
if (opal_list_get_end(found_components) == item) {
char h[MAXHOSTNAMELEN];
gethostname(h, sizeof(h));
opal_show_help("help-mca-base.txt",
"find-available:not-valid", true,
h, type, requested_component_names[i]);
return OPAL_ERR_NOT_FOUND;
}
}
/* All done */
return OPAL_SUCCESS;
}
开发者ID:hpc,项目名称:cce-mpi-openmpi-1.4.3,代码行数:82,代码来源:mca_base_component_find.c
示例8: compute_weight
/* Compare the addresses of the local interface corresponding to module and the
* remote interface corresponding to proc_modex_addr. Returns a weight value
* (higher values indicate more desirable connections). */
static uint64_t compute_weight(
ompi_btl_usnic_module_t *module,
ompi_btl_usnic_addr_t *proc_modex_addr)
{
char my_ip_string[INET_ADDRSTRLEN], peer_ip_string[INET_ADDRSTRLEN];
uint32_t mynet, peernet;
int err, metric;
uint32_t min_link_speed_gbps;
inet_ntop(AF_INET, &module->if_ipv4_addr,
my_ip_string, sizeof(my_ip_string));
inet_ntop(AF_INET, &proc_modex_addr->ipv4_addr,
peer_ip_string, sizeof(peer_ip_string));
/* Just compare the CIDR-masked IP address to see if they're on
the same network. If so, we're good. */
mynet = ompi_btl_usnic_get_ipv4_subnet(module->if_ipv4_addr,
module->if_cidrmask);
peernet = ompi_btl_usnic_get_ipv4_subnet(proc_modex_addr->ipv4_addr,
proc_modex_addr->cidrmask);
opal_output_verbose(5, USNIC_OUT,
"btl:usnic:%s: checking my IP address/subnet (%s/%d) vs. peer (%s/%d): %s",
__func__, my_ip_string, module->if_cidrmask,
peer_ip_string, proc_modex_addr->cidrmask,
(mynet == peernet ? "match" : "DO NOT match"));
if (!mca_btl_usnic_component.use_udp) {
if (mynet != peernet) {
return WEIGHT_UNREACHABLE;
} else {
return 1; /* any positive weight is fine */
}
}
min_link_speed_gbps = MIN(module->super.btl_bandwidth,
proc_modex_addr->link_speed_mbps) / 1000;
metric = 0;
err = ompi_btl_usnic_nl_ip_rt_lookup(mca_btl_usnic_component.unlsk,
module->if_ipv4_addr,
proc_modex_addr->ipv4_addr,
&metric);
if (0 != err) {
return 0; /* no connectivity */
}
else {
/* Format in binary MSB LSB
* most sig. 32-bits: 00000000 0000000A BBBBBBBB 00000001
* least sig. 32-bits: CCCCCCCC CCCCCCCC CCCCCCCC CCCCCCCC
*
* A = 1 iff same subnet
* B = min link speed (in Gbps) between iface pair
* C = metric from routing table
*
* That is, this prioritizes interfaces in the same subnet first,
* followed by having the same link speed. The extra literal "1" is in
* there to help prioritize over any zero-cost links that might
* otherwise make their way into the graph. It is not strictly
* necessary and could be eliminated if the extra byte is needed.
*
* TODO add an MCA parameter to optionally swap the offsets of A and
* B, thereby prioritizing link speed over same subnet reachability.
*/
/* FIXME how can we check that the metric is the same before we have
* communication with this host? Mismatched metrics could cause the
* remote peer to make a different pairing decision... */
if (min_link_speed_gbps > 0xff) {
opal_output_verbose(20, USNIC_OUT, "clamping min_link_speed_gbps=%u to 255",
min_link_speed_gbps);
min_link_speed_gbps = 0xff;
}
return ((uint64_t)(mynet == peernet) << 48) |
((uint64_t)(min_link_speed_gbps & 0xff) << 40) |
((uint64_t)0x1 << 32) |
(/*metric=*/0);
}
}
开发者ID:h4ck3rm1k3,项目名称:ompi-svn-mirror,代码行数:80,代码来源:btl_usnic_proc.c
示例9: create_proc_module_graph
/**
* Constructs an interface graph from all local modules and the given proc's
* remote interfaces. The resulting vertices will always have the module
* vertices appear before the proc vertices.
*/
static int create_proc_module_graph(
ompi_btl_usnic_proc_t *proc,
bool proc_is_left,
ompi_btl_usnic_graph_t **g_out)
{
int err;
int i, j;
int u, v;
int num_modules;
ompi_btl_usnic_graph_t *g = NULL;
if (NULL == g_out) {
return OMPI_ERR_BAD_PARAM;
}
*g_out = NULL;
num_modules = (int)mca_btl_usnic_component.num_modules;
/* Construct a bipartite graph with remote interfaces on the one side and
* local interfaces (modules) on the other. */
err = ompi_btl_usnic_gr_create(NULL, NULL, &g);
if (OMPI_SUCCESS != err) {
OMPI_ERROR_LOG(err);
goto out;
}
/* create vertices for each interface (local and remote) */
for (i = 0; i < num_modules; ++i) {
int idx = -1;
err = ompi_btl_usnic_gr_add_vertex(g,
mca_btl_usnic_component.usnic_active_modules[i],
&idx);
if (OMPI_SUCCESS != err) {
OMPI_ERROR_LOG(err);
goto out_free_graph;
}
assert(idx == MODULE_VERTEX(i));
}
for (i = 0; i < (int)proc->proc_modex_count; ++i) {
int idx = -1;
err = ompi_btl_usnic_gr_add_vertex(g, &proc->proc_modex[i], &idx);
if (OMPI_SUCCESS != err) {
OMPI_ERROR_LOG(err);
goto out_free_graph;
}
assert(idx == (int)PROC_VERTEX(i));
}
/* now add edges between interfaces that can communicate */
for (i = 0; i < num_modules; ++i) {
for (j = 0; j < (int)proc->proc_modex_count; ++j) {
int64_t weight, cost;
/* assumption: compute_weight returns the same weight on the
* remote process with these arguments (effectively) transposed */
weight = compute_weight(mca_btl_usnic_component.usnic_active_modules[i],
&proc->proc_modex[j]);
opal_output_verbose(20, USNIC_OUT,
"btl:usnic:%s: weight=0x%016" PRIx64 " for edge module[%d] (%p) <--> endpoint[%d] on proc %p",
__func__,
weight, i,
(void *)mca_btl_usnic_component.usnic_active_modules[i],
j, (void *)proc);
if (WEIGHT_UNREACHABLE == weight) {
continue;
} else {
/* the graph code optimizes for minimum *cost*, but we have
* been computing weights (negative costs) */
cost = -weight;
}
assert(INT64_MAX != cost);
assert(INT64_MIN != cost);
if (proc_is_left) {
u = PROC_VERTEX(j);
v = MODULE_VERTEX(i);
} else {
u = MODULE_VERTEX(i);
v = PROC_VERTEX(j);
}
opal_output_verbose(20, USNIC_OUT,
"btl:usnic:%s: adding edge (%d,%d) with cost=%" PRIi64 " for edge module[%d] <--> endpoint[%d]",
__func__, u, v, cost, i, j);
err = ompi_btl_usnic_gr_add_edge(g, u, v, cost,
/*capacity=*/1,
/*e_data=*/NULL);
if (OMPI_SUCCESS != err) {
OMPI_ERROR_LOG(err);
goto out_free_graph;
}
}
}
//.........这里部分代码省略.........
开发者ID:h4ck3rm1k3,项目名称:ompi-svn-mirror,代码行数:101,代码来源:btl_usnic_proc.c
示例10: dyn_allocate
//.........这里部分代码省略.........
* have a field in the jdata structure for "mandatory" vs "optional"
* allocations, so we'll have to add that someday. Likewise, you may
* want to provide a param to adjust the timeout value
*/
/* construct the cmd string */
opal_argv_append_nosize(&cmd, "allocate");
/* add the jobid */
orte_util_convert_jobid_to_string(&jstring, jdata->jobid);
opal_asprintf(&tmp, "jobid=%s", jstring);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
free(jstring);
/* if we want the allocation for all apps in one shot,
* then tell slurm
*
* RHC: we don't currently have the ability to handle
* rolling allocations in the rest of the code base
*/
#if 0
if (!mca_ras_slurm_component.rolling_alloc) {
opal_argv_append_nosize(&cmd, "return=all");
}
#else
opal_argv_append_nosize(&cmd, "return=all");
#endif
/* pass the timeout */
opal_asprintf(&tmp, "timeout=%d", mca_ras_slurm_component.timeout);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
/* for each app, add its allocation request info */
i64ptr = &i64;
for (i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
/* add the app id, preceded by a colon separator */
opal_asprintf(&tmp, ": app=%d", (int)app->idx);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
/* add the number of process "slots" we need */
opal_asprintf(&tmp, "np=%d", app->num_procs);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
/* if we were given a minimum number of nodes, pass it along */
if (orte_get_attribute(&app->attributes, ORTE_APP_MIN_NODES, (void**)&i64ptr, OPAL_INT64)) {
opal_asprintf(&tmp, "N=%ld", (long int)i64);
opal_argv_append_nosize(&cmd, tmp);
free(tmp);
}
/* add the list of nodes, if one was given, ensuring
* that each node only appears once
*/
node_list = get_node_list(app);
if (NULL != node_list) {
opal_asprintf(&tmp, "node_list=%s", node_list);
opal_argv_append_nosize(&cmd, tmp);
free(node_list);
free(tmp);
}
/* add the mandatory/optional flag */
if (orte_get_attribute(&app->attributes, ORTE_APP_MANDATORY, NULL, OPAL_BOOL)) {
opal_argv_append_nosize(&cmd, "flag=mandatory");
} else {
opal_argv_append_nosize(&cmd, "flag=optional");
}
}
/* assemble it into the final cmd to be sent */
cmd_str = opal_argv_join(cmd, ' ');
opal_argv_free(cmd);
/* start a timer - if the response to our request doesn't appear
* in the defined time, then we will error out as Slurm isn't
* responding to us
*/
opal_event_evtimer_set(orte_event_base, &jtrk->timeout_ev, timeout, jtrk);
tv.tv_sec = mca_ras_slurm_component.timeout * 2;
tv.tv_usec = 0;
opal_event_evtimer_add(&jtrk->timeout_ev, &tv);
opal_output_verbose(2, orte_ras_base_framework.framework_output,
"%s slurm:dynalloc cmd_str = %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
cmd_str);
if (send(socket_fd, cmd_str, strlen(cmd_str)+1, 0) < 0) {
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
}
free(cmd_str);
/* we cannot wait here for a response as we
* are already in an event. So return a value
* that indicates we are waiting for an
* allocation so the base functions know
* that they shouldn't progress the job
*/
return ORTE_ERR_ALLOCATION_PENDING;
}
开发者ID:davideberius,项目名称:ompi,代码行数:101,代码来源:ras_slurm_module.c
示例11: recv_data
static void recv_data(int fd, short args, void *cbdata)
{
bool found;
int i, rc;
orte_node_t *nd, *nd2;
opal_list_t nds, ndtmp;
opal_list_item_t *item, *itm;
char recv_msg[8192];
int nbytes, idx, sjob;
char **alloc, *nodelist, *tpn;
local_jobtracker_t *ptr, *jtrk;
local_apptracker_t *aptrk;
orte_app_context_t *app;
orte_jobid_t jobid;
orte_job_t *jdata;
char **dash_host = NULL;
opal_output_verbose(2, orte_ras_base_framework.framework_output,
"%s ras:slurm: dynamic allocation - data recvd",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* read the data from the socket and put it in the
* nodes field of op
*/
memset(recv_msg, 0, sizeof(recv_msg));
nbytes = read(fd, recv_msg, sizeof(recv_msg) - 1);
opal_output_verbose(2, orte_ras_base_framework.framework_output,
"%s ras:slurm: dynamic allocation msg: %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), recv_msg);
/* check if we got something */
if (0 == nbytes || 0 == strlen(recv_msg) || strstr(recv_msg, "failure") != NULL) {
/* show an error here - basically, a "nothing was available"
* message
*/
orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true,
(0 == strlen(recv_msg)) ? "NO MSG" : recv_msg);
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALLOC_FAILED);
return;
}
/* break the message into its component parts, separated by colons */
alloc = opal_argv_split(recv_msg, ':');
/* the first section contains the ORTE jobid for this allocation */
tpn = strchr(alloc[0], '=');
orte_util_convert_string_to_jobid(&jobid, tpn+1);
/* get the corresponding job object */
jdata = orte_get_job_data_object(jobid);
jtrk = NULL;
/* find the associated tracking object */
for (item = opal_list_get_first(&jobs);
item != opal_list_get_end(&jobs);
item = opal_list_get_next(item)) {
ptr = (local_jobtracker_t*)item;
if (ptr->jobid == jobid) {
jtrk = ptr;
break;
}
}
if (NULL == jtrk) {
orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, "NO JOB TRACKER");
ORTE_ACTIVATE_JOB_STATE(NULL, ORTE_JOB_STATE_ALLOC_FAILED);
opal_argv_free(alloc);
return;
}
/* stop the timeout event */
opal_event_del(&jtrk->timeout_ev);
/* cycle across all the remaining parts - each is the allocation for
* an app in this job
*/
OBJ_CONSTRUCT(&nds, opal_list_t);
OBJ_CONSTRUCT(&ndtmp, opal_list_t);
idx = -1;
sjob = -1;
nodelist = NULL;
tpn = NULL;
for (i=1; NULL != alloc[i]; i++) {
if (ORTE_SUCCESS != parse_alloc_msg(alloc[i], &idx, &sjob, &nodelist, &tpn)) {
orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED);
opal_argv_free(alloc);
if (NULL != nodelist) {
free(nodelist);
}
if (NULL != tpn) {
free(tpn);
}
return;
}
if (idx < 0) {
orte_show_help("help-ras-slurm.txt", "slurm-dyn-alloc-failed", true, jtrk->cmd);
ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOC_FAILED);
opal_argv_free(alloc);
free(nodelist);
free(tpn);
return;
//.........这里部分代码省略.........
开发者ID:davideberius,项目名称:ompi,代码行数:101,代码来源:ras_slurm_module.c
示例12: orte_ras_slurm_allocate
/**
* Discover available (pre-allocated) nodes. Allocate the
* requested number of nodes/process slots to the job.
*
*/
static int orte_ras_slurm_allocate(orte_job_t *jdata, opal_list_t *nodes)
{
int ret, cpus_per_task;
char *slurm_node_str, *regexp;
char *tasks_per_node, *node_tasks;
char *tmp;
char *slurm_jobid;
if (NULL == (slurm_jobid = getenv("SLURM_JOBID"))) {
/* we are not in a slurm allocation - see if dyn alloc
* is enabled
*/
if (!mca_ras_slurm_component.dyn_alloc_enabled) {
/* nope - nothing we can do */
opal_output_verbose(2, orte_ras_base_framework.framework_output,
"%s ras:slurm: no prior allocation and dynamic alloc disabled",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
} else {
/* save this value in the global job ident string for
* later use in any error reporting
*/
orte_job_ident = strdup(slurm_jobid);
}
slurm_node_str = getenv("SLURM_NODELIST");
if (NULL == slurm_node_str) {
/* see if dynamic allocation is enabled */
if (mca_ras_slurm_component.dyn_alloc_enabled) {
/* attempt to get the allocation - the function
* dyn_allocate will return as ORTE_ERR_ALLOCATION_PENDING
* if it succeeds in sending the allocation request
*/
ret = dyn_allocate(jdata);
/* return to the above layer in ras/base/ras_base_allocate.c
* to wait for event (libevent) happening
*/
return ret;
}
orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1,
"SLURM_NODELIST");
return ORTE_ERR_NOT_FOUND;
}
regexp = strdup(slurm_node_str);
if(NULL == regexp) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
return ORTE_ERR_OUT_OF_RESOURCE;
}
if (mca_ras_slurm_component.use_all) {
/* this is an oddball case required for debug situations where
* a tool is started that will then call mpirun. In this case,
* Slurm will assign only 1 tasks/per node to the tool, but
* we want mpirun to use the entire allocation. They don't give
* us a specific variable for this purpose, so we have to fudge
* a bit - but this is a special edge case, and we'll live with it */
tasks_per_node = getenv("SLURM_JOB_CPUS_PER_NODE");
if (NULL == tasks_per_node) {
/* couldn't find any version - abort */
orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1,
"SLURM_JOB_CPUS_PER_NODE");
free(regexp);
return ORTE_ERR_NOT_FOUND;
}
node_tasks = strdup(tasks_per_node);
if (NULL == node_tasks) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(regexp);
return ORTE_ERR_OUT_OF_RESOURCE;
}
cpus_per_task = 1;
} else {
/* get the number of process slots we were assigned on each node */
tasks_per_node = getenv("SLURM_TASKS_PER_NODE");
if (NULL == tasks_per_node) {
/* couldn't find any version - abort */
orte_show_help("help-ras-slurm.txt", "slurm-env-var-not-found", 1,
"SLURM_TASKS_PER_NODE");
free(regexp);
return ORTE_ERR_NOT_FOUND;
}
node_tasks = strdup(tasks_per_node);
if (NULL == node_tasks) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
free(regexp);
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* get the number of CPUs per task that the user provided to slurm */
tmp = getenv("SLURM_CPUS_PER_TASK");
if(NULL != tmp) {
cpus_per_task = atoi(tmp);
if(0 >= cpus_per_task) {
opal_output(0, "ras:slurm:allocate: Got bad value from SLURM_CPUS_PER_TASK. "
//.........这里部分代码省略.........
开发者ID:davideberius,项目名称:ompi,代码行数:101,代码来源:ras_slurm_module.c
示例13: ompi_osc_pt2pt_sendreq_recv_accum
int
ompi_osc_pt2pt_sendreq_recv_accum(ompi_osc_pt2pt_module_t *module,
ompi_osc_pt2pt_send_header_t *header,
void *payload)
{
int ret = OMPI_SUCCESS;
struct ompi_op_t *op = ompi_osc_pt2pt_op_create(header->hdr_target_op);
ompi_proc_t *proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
struct ompi_datatype_t *datatype =
ompi_osc_pt2pt_datatype_create(proc, &payload);
if (header->hdr_msg_length > 0) {
/* lock the window for accumulates */
OPAL_THREAD_LOCK(&module->p2p_acc_lock);
/* copy the data from the temporary buffer into the user window */
ret = ompi_osc_pt2pt_process_op(module, header, datatype, op, payload,
header->hdr_msg_length);
/* unlock the window for accumulates */
OPAL_THREAD_UNLOCK(&module->p2p_acc_lock);
/* Release datatype & op */
OBJ_RELEASE(datatype);
OBJ_RELEASE(op);
OPAL_THREAD_ADD32(&(module->p2p_num_pending_in), -1);
opal_output_verbose(50, ompi_osc_base_output,
"%d received accum message from %d",
module->p2p_comm->c_my_rank,
header->hdr_origin);
} else {
ompi_osc_pt2pt_longreq_t *longreq;
ptrdiff_t lb, extent, true_lb, true_extent;
size_t buflen;
/* figure out how big a buffer we need */
ompi_ddt_get_extent(datatype, &lb, &extent);
ompi_ddt_get_true_extent(datatype, &true_lb, &true_extent);
buflen = true_extent + (header->hdr_target_count - 1) * extent;
/* get a longreq and fill it in */
ompi_osc_pt2pt_longreq_alloc(&longreq);
longreq->req_comp_cb = ompi_osc_pt2pt_sendreq_recv_accum_long_cb;
longreq->req_datatype = datatype;
longreq->req_op = op;
longreq->req_module = module;
/* allocate a buffer to receive into ... */
longreq->req_comp_cbdata = malloc(buflen + sizeof(ompi_osc_pt2pt_send_header_t));
if (NULL == longreq->req_comp_cbdata) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
/* fill in tmp header */
memcpy(longreq->req_comp_cbdata, header,
sizeof(ompi_osc_pt2pt_send_header_t));
((ompi_osc_pt2pt_send_header_t*) longreq->req_comp_cbdata)->hdr_msg_length = buflen;
ret = mca_pml.pml_irecv(((char*) longre
|
请发表评论