本文整理汇总了Python中mrjob.parse.parse_mr_job_stderr函数的典型用法代码示例。如果您正苦于以下问题:Python parse_mr_job_stderr函数的具体用法?Python parse_mr_job_stderr怎么用?Python parse_mr_job_stderr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_mr_job_stderr函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_update_counters
def test_update_counters(self):
counters = {'Foo': {'Bar': 3, 'Baz': 1}}
parse_mr_job_stderr(
StringIO('reporter:counter:Foo,Baz,1\n'), counters=counters)
assert_equal(counters, {'Foo': {'Bar': 3, 'Baz': 2}})
开发者ID:gimlids,项目名称:LTPM,代码行数:7,代码来源:parse_test.py
示例2: test_update_counters
def test_update_counters(self):
counters = {'Foo': {'Bar': 3, 'Baz': 1}}
parse_mr_job_stderr(
BytesIO(b'reporter:counter:Foo,Baz,1\n'), counters=counters)
self.assertEqual(counters, {'Foo': {'Bar': 3, 'Baz': 2}})
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_parse.py
示例3: _run_step
def _run_step(self, step_num, step_type, input_path, output_path,
working_dir, env, child_stdin=None):
step = self._get_step(step_num)
# if no mapper, just pass the data through (see #1141)
if step_type == 'mapper' and not step.get('mapper'):
copyfile(input_path, output_path)
return
# Passing local=False ensures the job uses proper names for file
# options (see issue #851 on github)
common_args = (['--step-num=%d' % step_num] +
self._mr_job_extra_args(local=False))
if step_type == 'mapper':
child_args = (
['--mapper'] + [input_path] + common_args)
elif step_type == 'reducer':
child_args = (
['--reducer'] + [input_path] + common_args)
elif step_type == 'combiner':
child_args = ['--combiner'] + common_args + ['-']
has_combiner = (step_type == 'mapper' and 'combiner' in step)
try:
# Use custom stdout
if has_combiner:
child_stdout = BytesIO()
else:
child_stdout = open(output_path, 'wb')
with save_current_environment():
with save_cwd():
os.environ.update(env)
os.chdir(working_dir)
child_instance = self._mrjob_cls(args=child_args)
child_instance.sandbox(stdin=child_stdin,
stdout=child_stdout)
child_instance.execute()
if has_combiner:
sorted_lines = sorted(child_stdout.getvalue().splitlines())
combiner_stdin = BytesIO(b'\n'.join(sorted_lines))
else:
child_stdout.flush()
finally:
child_stdout.close()
while len(self._counters) <= step_num:
self._counters.append({})
parse_mr_job_stderr(child_instance.stderr.getvalue(),
counters=self._counters[step_num])
if has_combiner:
self._run_step(step_num, 'combiner', None, output_path,
working_dir, env, child_stdin=combiner_stdin)
combiner_stdin.close()
开发者ID:Dean838,项目名称:mrjob,代码行数:60,代码来源:inline.py
示例4: _parse_task_counters
def _parse_task_counters(self, task_type, step_num):
"""Parse all stderr files from the given task (if any)."""
# don't disable if read_logs=False; parsing counters is
# internal to Hadoop, not something that happens in log files
stderr_paths = self.fs.ls(self._task_stderr_paths_glob(
task_type, step_num))
for stderr_path in stderr_paths:
with open(stderr_path, 'rb') as stderr:
parse_mr_job_stderr(stderr, counters=self._counters[step_num])
开发者ID:Affirm,项目名称:mrjob,代码行数:10,代码来源:sim.py
示例5: _run_step
def _run_step(self, step_num, step_type, input_path, output_path,
working_dir, env, child_stdin=None):
step = self._get_step(step_num)
common_args = (['--step-num=%d' % step_num] +
self._mr_job_extra_args(local=True))
if step_type == 'mapper':
child_args = (
['--mapper'] + [input_path] + common_args)
elif step_type == 'reducer':
child_args = (
['--reducer'] + [input_path] + common_args)
elif step_type == 'combiner':
child_args = ['--combiner'] + common_args + ['-']
child_instance = self._mrjob_cls(args=child_args)
has_combiner = (step_type == 'mapper' and 'combiner' in step)
# Use custom stdin
if has_combiner:
child_stdout = StringIO()
else:
child_stdout = open(output_path, 'w')
with save_current_environment():
with save_cwd():
os.environ.update(env)
os.chdir(working_dir)
child_instance.sandbox(stdin=child_stdin, stdout=child_stdout)
child_instance.execute()
if has_combiner:
sorted_lines = sorted(child_stdout.getvalue().splitlines())
combiner_stdin = StringIO('\n'.join(sorted_lines))
else:
child_stdout.flush()
child_stdout.close()
while len(self._counters) <= step_num:
self._counters.append({})
parse_mr_job_stderr(child_instance.stderr.getvalue(),
counters=self._counters[step_num])
if has_combiner:
self._run_step(step_num, 'combiner', None, output_path,
working_dir, env, child_stdin=combiner_stdin)
combiner_stdin.close()
开发者ID:Asana,项目名称:mrjob,代码行数:52,代码来源:inline.py
示例6: test_negative_counters
def test_negative_counters(self):
# kind of poor practice to use negative counters, but Hadoop
# Streaming supports it (negative numbers are integers too!)
self.assertEqual(
parse_mr_job_stderr([b'reporter:counter:Foo,Bar,-2\n']),
{'counters': {'Foo': {'Bar': -2}},
'statuses': [], 'other': []})
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_parse.py
示例7: test_counters_and_status
def test_counters_and_status(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter('Foo', 'Bar')
mr_job.set_status('Initializing qux gradients...')
mr_job.increment_counter('Foo', 'Bar')
mr_job.increment_counter('Foo', 'Baz', 20)
mr_job.set_status('Sorting metasyntactic variables...')
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(
parsed_stderr, {
'counters': {
'Foo': {
'Bar': 2,
'Baz': 20
}
},
'statuses': [
'Initializing qux gradients...',
'Sorting metasyntactic variables...'
],
'other': []
})
# make sure parse_counters() works
self.assertEqual(mr_job.parse_counters(), parsed_stderr['counters'])
开发者ID:bchess,项目名称:mrjob,代码行数:28,代码来源:test_job.py
示例8: _run_step
def _run_step(self, step_num, step_type, input_path, output_path, working_dir, env, child_stdin=None):
step = self._get_step(step_num)
# Passing local=False ensures the job uses proper names for file
# options (see issue #851 on github)
common_args = ["--step-num=%d" % step_num] + self._mr_job_extra_args(local=False)
if step_type == "mapper":
child_args = ["--mapper"] + [input_path] + common_args
elif step_type == "reducer":
child_args = ["--reducer"] + [input_path] + common_args
elif step_type == "combiner":
child_args = ["--combiner"] + common_args + ["-"]
has_combiner = step_type == "mapper" and "combiner" in step
# Use custom stdin
if has_combiner:
child_stdout = BytesIO()
else:
child_stdout = open(output_path, "wb")
with save_current_environment():
with save_cwd():
os.environ.update(env)
os.chdir(working_dir)
child_instance = self._mrjob_cls(args=child_args)
child_instance.sandbox(stdin=child_stdin, stdout=child_stdout)
child_instance.execute()
if has_combiner:
sorted_lines = sorted(child_stdout.getvalue().splitlines())
combiner_stdin = BytesIO(b"\n".join(sorted_lines))
else:
child_stdout.flush()
child_stdout.close()
while len(self._counters) <= step_num:
self._counters.append({})
parse_mr_job_stderr(child_instance.stderr.getvalue(), counters=self._counters[step_num])
if has_combiner:
self._run_step(step_num, "combiner", None, output_path, working_dir, env, child_stdin=combiner_stdin)
combiner_stdin.close()
开发者ID:senseb,项目名称:mrjob,代码行数:47,代码来源:inline.py
示例9: test_commas_in_counters
def test_commas_in_counters(self):
# commas should be replaced with semicolons
mr_job = MRJob().sandbox()
mr_job.increment_counter("Bad items", "a, b, c")
mr_job.increment_counter("girl, interrupted", "movie")
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(parsed_stderr["counters"], {"Bad items": {"a; b; c": 1}, "girl; interrupted": {"movie": 1}})
开发者ID:davidmarin,项目名称:mrjob,代码行数:9,代码来源:test_job.py
示例10: test_negative_and_zero_counters
def test_negative_and_zero_counters(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter("Foo", "Bar", -1)
mr_job.increment_counter("Foo", "Baz")
mr_job.increment_counter("Foo", "Baz", -1)
mr_job.increment_counter("Qux", "Quux", 0)
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(parsed_stderr["counters"], {"Foo": {"Bar": -1, "Baz": 0}, "Qux": {"Quux": 0}})
开发者ID:davidmarin,项目名称:mrjob,代码行数:10,代码来源:test_job.py
示例11: test_negative_and_zero_counters
def test_negative_and_zero_counters(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter('Foo', 'Bar', -1)
mr_job.increment_counter('Foo', 'Baz')
mr_job.increment_counter('Foo', 'Baz', -1)
mr_job.increment_counter('Qux', 'Quux', 0)
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(parsed_stderr['counters'],
{'Foo': {'Bar': -1, 'Baz': 0}, 'Qux': {'Quux': 0}})
开发者ID:okomestudio,项目名称:mrjob,代码行数:11,代码来源:test_job.py
示例12: test_commas_in_counters
def test_commas_in_counters(self):
# commas should be replaced with semicolons
mr_job = MRJob().sandbox()
mr_job.increment_counter('Bad items', 'a, b, c')
mr_job.increment_counter('girl, interrupted', 'movie')
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(parsed_stderr['counters'],
{'Bad items': {'a; b; c': 1},
'girl; interrupted': {'movie': 1}})
开发者ID:okomestudio,项目名称:mrjob,代码行数:11,代码来源:test_job.py
示例13: test_garbled_counters
def test_garbled_counters(self):
# we should be able to do something graceful with
# garbled counters and status messages
BAD_LINES = [
'reporter:counter:Foo,Bar,Baz,1\n', # too many items
'reporter:counter:Foo,1\n', # too few items
'reporter:counter:Foo,Bar,a million\n', # not a number
'reporter:counter:Foo,Bar,1.0\n', # not an int
'reporter:crounter:Foo,Bar,1\n', # not a valid reporter
'reporter,counter:Foo,Bar,1\n', # wrong format!
]
self.assertEqual(parse_mr_job_stderr(BAD_LINES),
{'counters': {}, 'statuses': [], 'other': BAD_LINES})
开发者ID:Asana,项目名称:mrjob,代码行数:14,代码来源:test_parse.py
示例14: test_parsing
def test_parsing(self):
INPUT = BytesIO(
b'reporter:counter:Foo,Bar,2\n' +
b'reporter:status:Baz\n' +
b'reporter:status:Baz\n' +
b'reporter:counter:Foo,Bar,1\n' +
b'reporter:counter:Foo,Baz,1\n' +
b'reporter:counter:Quux Subsystem,Baz,42\n' +
b'Warning: deprecated metasyntactic variable: garply\n')
self.assertEqual(
parse_mr_job_stderr(INPUT),
{'counters': {'Foo': {'Bar': 3, 'Baz': 1},
'Quux Subsystem': {'Baz': 42}},
'statuses': ['Baz', 'Baz'],
'other': ['Warning: deprecated metasyntactic variable: garply\n']
})
开发者ID:Affirm,项目名称:mrjob,代码行数:17,代码来源:test_parse.py
示例15: test_counters_and_status
def test_counters_and_status(self):
mr_job = MRJob().sandbox()
mr_job.increment_counter("Foo", "Bar")
mr_job.set_status("Initializing qux gradients...")
mr_job.increment_counter("Foo", "Bar")
mr_job.increment_counter("Foo", "Baz", 20)
mr_job.set_status("Sorting metasyntactic variables...")
parsed_stderr = parse_mr_job_stderr(mr_job.stderr.getvalue())
self.assertEqual(
parsed_stderr,
{
"counters": {"Foo": {"Bar": 2, "Baz": 20}},
"statuses": ["Initializing qux gradients...", "Sorting metasyntactic variables..."],
"other": [],
},
)
开发者ID:davidmarin,项目名称:mrjob,代码行数:19,代码来源:test_job.py
示例16: parse_counters
def parse_counters(self, counters=None):
""".. deprecated:: 0.4.2
Parse the counters from the given sandboxed job's ``self.stderr``;
superseded :py:func:`mrjob.parse.parse_mr_job_stderr`.
This was only useful for testing individual mappers/reducers
without a runner; normally you'd just use
:py:meth:`runner.counters() <mrjob.runner.MRJobRunner.counters()>`.
"""
if self.stderr == sys.stderr:
raise AssertionError('You must call sandbox() first;'
' parse_counters() is for testing only.')
log.warning(
'parse_counters() is deprecated and will be removed in v0.5.0')
stderr_results = parse_mr_job_stderr(self.stderr.getvalue(), counters)
return stderr_results['counters']
开发者ID:PythonCharmers,项目名称:mrjob,代码行数:19,代码来源:job.py
示例17: _process_stderr_from_script
def _process_stderr_from_script(self, stderr):
"""Handle stderr a line at time:
- for counter lines, store counters
- for status message, log the status change
- for all other lines, log an error, and yield the lines
"""
for line in stderr:
# just pass one line at a time to parse_mr_job_stderr(),
# so we can print error and status messages in realtime
parsed = parse_mr_job_stderr([line], counters=self._counters)
# in practice there's only going to be at most one line in
# one of these lists, but the code is cleaner this way
for status in parsed['statuses']:
log.info('status: %s' % status)
for line in parsed['other']:
log.error('STDERR: %s' % line.rstrip('\n'))
yield line
开发者ID:boursier,项目名称:mrjob,代码行数:20,代码来源:local.py
示例18: test_empty
def test_empty(self):
assert_equal(parse_mr_job_stderr(StringIO()),
{'counters': {}, 'statuses': [], 'other': []})
开发者ID:gimlids,项目名称:LTPM,代码行数:3,代码来源:parse_test.py
示例19: test_empty
def test_empty(self):
self.assertEqual(parse_mr_job_stderr(BytesIO()),
{'counters': {}, 'statuses': [], 'other': []})
开发者ID:Affirm,项目名称:mrjob,代码行数:3,代码来源:test_parse.py
示例20: test_read_multiple_lines_from_buffer
def test_read_multiple_lines_from_buffer(self):
self.assertEqual(
parse_mr_job_stderr(b'reporter:counter:Foo,Bar,2\nwoot\n'),
{'counters': {'Foo': {'Bar': 2}},
'statuses': [], 'other': ['woot\n']})
开发者ID:Affirm,项目名称:mrjob,代码行数:5,代码来源:test_parse.py
注:本文中的mrjob.parse.parse_mr_job_stderr函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论