本文整理汇总了Python中mrjob.compat.uses_yarn函数的典型用法代码示例。如果您正苦于以下问题:Python uses_yarn函数的具体用法?Python uses_yarn怎么用?Python uses_yarn使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了uses_yarn函数的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: _hadoop_log_dirs
def _hadoop_log_dirs(self, output_dir=None):
"""Yield all possible places to look for hadoop logs."""
# hadoop_log_dirs opt overrides all this
if self._opts['hadoop_log_dirs']:
for path in self._opts['hadoop_log_dirs']:
yield path
return
hadoop_log_dir = os.environ.get('HADOOP_LOG_DIR')
if hadoop_log_dir:
yield hadoop_log_dir
if uses_yarn(self.get_hadoop_version()):
yarn_log_dir = os.environ.get('YARN_LOG_DIR')
if yarn_log_dir:
yield yarn_log_dir
if output_dir:
# Cloudera style of logging
yield posixpath.join(output_dir, '_logs')
for hadoop_dir in self._hadoop_dirs():
yield posixpath.join(hadoop_dir, 'logs')
# hard-coded log paths for EMR, so this can work out-of-the-box
for path in _EMR_HADOOP_LOG_DIRS:
yield path
开发者ID:sebratt,项目名称:mrjob,代码行数:27,代码来源:hadoop.py
示例2: _hadoop_log_dirs
def _hadoop_log_dirs(self, output_dir=None):
"""Yield all possible places to look for hadoop logs."""
# hadoop_log_dirs opt overrides all this
if self._opts['hadoop_log_dirs']:
for path in self._opts['hadoop_log_dirs']:
yield path
return
hadoop_log_dir = os.environ.get('HADOOP_LOG_DIR')
if hadoop_log_dir:
yield hadoop_log_dir
yarn = uses_yarn(self.get_hadoop_version())
if yarn:
yarn_log_dir = os.environ.get('YARN_LOG_DIR')
if yarn_log_dir:
yield yarn_log_dir
yield _DEFAULT_YARN_HDFS_LOG_DIR
if output_dir:
# Cloudera style of logging
yield posixpath.join(output_dir, '_logs')
for hadoop_dir in self._hadoop_dirs():
yield posixpath.join(hadoop_dir, 'logs')
# hard-coded fallback paths
if yarn:
for path in _FALLBACK_HADOOP_YARN_LOG_DIRS:
yield path
for path in _FALLBACK_HADOOP_LOG_DIRS:
yield path
开发者ID:okomestudio,项目名称:mrjob,代码行数:35,代码来源:hadoop.py
示例3: _interpret_task_logs
def _interpret_task_logs(self, log_interpretation, partial=True):
"""Fetch task syslogs and stderr, and add 'task' to interpretation."""
if 'task' in log_interpretation and (
partial or not log_interpretation['task'].get('partial')):
return # already interpreted
step_interpretation = log_interpretation.get('step') or {}
application_id = step_interpretation.get('application_id')
job_id = step_interpretation.get('job_id')
output_dir = step_interpretation.get('output_dir')
yarn = uses_yarn(self.get_hadoop_version())
if yarn:
if not application_id:
log.warning("Can't fetch task logs; missing application ID")
return
else:
if not job_id:
log.warning("Can't fetch task logs; missing job ID")
return
log_interpretation['task'] = _interpret_task_logs(
self.fs,
self._ls_task_syslogs(
application_id=application_id,
job_id=job_id,
output_dir=output_dir),
partial=partial,
stderr_callback=_log_parsing_task_stderr)
开发者ID:Dean838,项目名称:mrjob,代码行数:31,代码来源:mixin.py
示例4: _interpret_task_logs
def _interpret_task_logs(self, log_interpretation):
"""Find and interpret the task logs, storing the
interpretation in ``log_interpretation['task']``."""
if 'task' not in log_interpretation:
# get job/application ID from output of hadoop command
step_interpretation = log_interpretation.get('step') or {}
application_id = step_interpretation.get('application_id')
job_id = step_interpretation.get('job_id')
yarn = uses_yarn(self.get_hadoop_version())
if yarn and application_id is None:
log.warning("Can't fetch task logs without application ID")
return {}
elif not yarn and job_id is None:
log.warning("Can't fetch task logs without job ID")
return {}
# Note: this is unlikely to be super-helpful on "real" (multi-node)
# pre-YARN Hadoop because task logs aren't generally shipped to a
# local directory. It's a start, anyways. See #1201.
def stream_task_log_dirs():
for log_dir in unique(
self._hadoop_log_dirs(
output_dir=step_interpretation.get('output_dir'))):
if yarn:
path = self.fs.join(
log_dir, 'userlogs', application_id)
else:
# sometimes pre-YARN attempt logs are organized by
# job_id,
# sometimes not. Play it safe
path = self.fs.join(log_dir, 'userlogs')
if self.fs.exists(path):
log.info('Scanning task syslogs in %s' % path)
yield [path]
# wrap _ls_task_syslogs() to add logging
def ls_task_syslogs():
# there should be at most one history log
for match in _ls_task_syslogs(
self.fs, stream_task_log_dirs(),
application_id=application_id, job_id=job_id):
# TODO: this isn't really correct because
# _interpret_task_logs() sorts the logs paths and
# scans starting at the most recent one. Probably
# should have _ls_task_syslogs() do the sorting.
log.info(' Scanning for errors: %s' % match['path'])
yield match
log_interpretation['task'] = _interpret_task_logs(
self.fs, ls_task_syslogs())
return log_interpretation['task']
开发者ID:imtiaz39,项目名称:mrjob,代码行数:57,代码来源:hadoop.py
示例5: ls
def ls(self, path_glob):
components = urlparse(path_glob)
hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)
version = self.get_hadoop_version()
# use ls -R on Hadoop 2 (see #1152)
if uses_yarn(version):
args = ['fs', '-ls', '-R', path_glob]
else:
args = ['fs', '-lsr', path_glob]
try:
stdout = self.invoke_hadoop(args, return_stdout=True,
ok_stderr=[_HADOOP_LS_NO_SUCH_FILE])
except CalledProcessError:
raise IOError("Could not ls %s" % path_glob)
for line in BytesIO(stdout):
line = line.rstrip(b'\r\n')
# ignore total item count
if line.startswith(b'Found '):
continue
fields = line.split(b' ')
# Throw out directories
if fields[0].startswith(b'd'):
continue
# Try to figure out which part of the line is the path
# Expected lines:
#
# HDFS:
# -rw-r--r-- 3 dave users 3276 2010-01-13 14:00 /foo/bar
#
# S3:
# -rwxrwxrwx 1 3276 010-01-13 14:00 /foo/bar
path_index = None
for index, field in enumerate(fields):
# look for time field, and pick one after that
# (can't use field[2] because that's an int in Python 3)
if len(field) == 5 and field[2:3] == b':':
path_index = (index + 1)
if not path_index:
raise IOError("Could not locate path in string %r" % line)
path = to_unicode(line.split(b' ', path_index)[-1])
# handle fully qualified URIs from newer versions of Hadoop ls
# (see Pull Request #577)
if is_uri(path):
yield path
else:
yield hdfs_prefix + path
开发者ID:Yelp,项目名称:mrjob,代码行数:55,代码来源:hadoop.py
示例6: mkdir
def mkdir(self, path):
version = self.get_hadoop_version()
# use -p on Hadoop 2 (see #991, #845)
if uses_yarn(version):
args = ['fs', '-mkdir', '-p', path]
else:
args = ['fs', '-mkdir', path]
try:
self.invoke_hadoop(args, ok_stderr=[_HADOOP_FILE_EXISTS_RE])
except CalledProcessError:
raise IOError("Could not mkdir %s" % path)
开发者ID:Yelp,项目名称:mrjob,代码行数:13,代码来源:hadoop.py
示例7: _interpret_task_logs
def _interpret_task_logs(
self, log_interpretation, step_type, error_attempt_ids=(),
partial=True):
"""Fetch task syslogs and stderr, and add 'task' to interpretation."""
if 'task' in log_interpretation and (
partial or not log_interpretation['task'].get('partial')):
return # already interpreted
if not self._read_logs():
return
step_interpretation = log_interpretation.get('step') or {}
application_id = step_interpretation.get('application_id')
job_id = step_interpretation.get('job_id')
output_dir = step_interpretation.get('output_dir')
yarn = uses_yarn(self.get_hadoop_version())
attempt_to_container_id = log_interpretation.get('history', {}).get(
'attempt_to_container_id', {})
if yarn:
if not application_id:
if not log_interpretation.get('no_job'):
log.warning(
"Can't fetch task logs; missing application ID")
return
else:
if not job_id:
if not log_interpretation.get('no_job'):
log.warning("Can't fetch task logs; missing job ID")
return
if _is_spark_step_type(step_type):
interpret_func = _interpret_spark_task_logs
else:
interpret_func = _interpret_task_logs
log_interpretation['task'] = interpret_func(
self.fs,
self._ls_task_logs(
step_type,
application_id=application_id,
job_id=job_id,
output_dir=output_dir,
error_attempt_ids=error_attempt_ids,
attempt_to_container_id=attempt_to_container_id,
),
partial=partial,
log_callback=_log_parsing_task_log)
开发者ID:Affirm,项目名称:mrjob,代码行数:51,代码来源:mixin.py
示例8: rm
def rm(self, path_glob):
if not is_uri(path_glob):
super(HadoopFilesystem, self).rm(path_glob)
version = self.get_hadoop_version()
if uses_yarn(version):
args = ['fs', '-rm', '-R', '-f', '-skipTrash', path_glob]
else:
args = ['fs', '-rmr', '-skipTrash', path_glob]
try:
self.invoke_hadoop(
args,
return_stdout=True, ok_stderr=[_HADOOP_RM_NO_SUCH_FILE])
except CalledProcessError:
raise IOError("Could not rm %s" % path_glob)
开发者ID:Yelp,项目名称:mrjob,代码行数:16,代码来源:hadoop.py
示例9: _find_probable_cause_of_failure
def _find_probable_cause_of_failure(self, application_id=None, job_id=None,
output_dir=None, **ignored):
"""Find probable cause of failure. Currently we just scan task logs.
On YARN, you must set application_id, and pre-YARN, you must set
job_id.
"""
# package up logs for _find_error_intask_logs(),
# and log where we're looking.
hadoop_version = self.get_hadoop_version()
yarn = uses_yarn(hadoop_version)
if yarn and application_id is None:
log.warning("No application ID!")
return None
if not yarn and job_id is None:
log.warning("No job ID!")
return None
# Note: this is unlikely to be super-helpful on "real" (multi-node)
# pre-YARN Hadoop because task logs aren't generally shipped to a local
# directory. It's a start, anyways. See #1201.
def stream_task_log_dirs():
for log_dir in unique(
self._hadoop_log_dirs(output_dir=output_dir)):
if yarn:
path = self.fs.join(log_dir, 'userlogs', application_id)
else:
# sometimes pre-YARN attempt logs are organized by job_id,
# sometimes not. Play it safe
path = self.fs.join(log_dir, 'userlogs')
if self.fs.exists(path):
log.info('looking for logs in %s' % path)
yield [path]
return _find_error_in_task_logs(
self.fs, stream_task_log_dirs(), hadoop_version,
application_id=application_id, job_id=job_id)
开发者ID:BeeswaxIO,项目名称:mrjob,代码行数:41,代码来源:hadoop.py
示例10: mock_hadoop_uses_yarn
def mock_hadoop_uses_yarn(environ):
return uses_yarn(environ['MOCK_HADOOP_VERSION'])
开发者ID:kodizant,项目名称:mrjob,代码行数:2,代码来源:mockhadoop.py
示例11: test_uses_yarn
def test_uses_yarn(self):
self.assertEqual(uses_yarn('0.22'), False)
self.assertEqual(uses_yarn('0.23'), True)
self.assertEqual(uses_yarn('0.24'), True)
self.assertEqual(uses_yarn('1.0.0'), False)
self.assertEqual(uses_yarn('2.0.0'), True)
开发者ID:kartheek6,项目名称:mrjob,代码行数:6,代码来源:test_compat.py
示例12: _find_error_in_task_logs
def _find_error_in_task_logs(fs, log_dirs_stream, hadoop_version,
application_id=None, job_id=None):
"""Given a filesystem and a stream of lists of log dirs to search in,
find the last error and return details about it. *hadoop_version*
is required, as task logs have very different paths in YARN.
In YARN, you must set *application_id*, and pre-YARN, you must set
*job_id*, or we'll bail out and return None.
Returns a dictionary with the following keys ("optional" means
that something may be None):
syslog: dict with keys:
path: path of syslog we found error in
error: error details; dict with keys:
exception: Java exception (as string)
stack_trace: array of lines with Java stack trace
split: optional input split we were reading; dict with keys:
path: path of input file
start_line: first line of split (0-indexed)
num_lines: number of lines in split
stderr: optional dict with keys:
path: path of stderr corresponding to syslog
error: optional error details; dict with keys:
exception: string (Python exception)
traceback: array of lines with Python stack trace
type: always set to 'task'
"""
syslog_paths = []
yarn = uses_yarn(hadoop_version)
if ((yarn and application_id is None) or (not yarn and job_id is None)):
return None
# we assume that each set of log paths contains the same copies
# of syslogs, so stop once we find any non-empty set of log dirs
for log_dirs in log_dirs_stream:
if yarn:
syslog_paths = _ls_yarn_task_syslogs(fs, log_dirs,
application_id=application_id)
else:
syslog_paths = _ls_pre_yarn_task_syslogs(fs, log_dirs,
job_id=job_id)
if syslog_paths:
break
for syslog_path in syslog_paths:
log.debug('Looking for error in %s' % syslog_path)
syslog_info = _parse_task_syslog(_cat_log(fs, syslog_path))
if not syslog_info['error']:
continue
# found error! see if we can explain it
# TODO: don't bother if error wasn't due to child process
stderr_path = _stderr_for_syslog(syslog_path)
stderr_info = _parse_python_task_stderr(_cat_log(fs, stderr_path))
# output error info
syslog_info['path'] = syslog_path
stderr_info['path'] = stderr_path
return dict(type='task', syslog=syslog_info, stderr=stderr_info)
return None
开发者ID:parastoo-62,项目名称:mrjob,代码行数:69,代码来源:interpret.py
注:本文中的mrjob.compat.uses_yarn函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论