本文整理汇总了Python中mrjob.local.LocalMRJobRunner类的典型用法代码示例。如果您正苦于以下问题:Python LocalMRJobRunner类的具体用法?Python LocalMRJobRunner怎么用?Python LocalMRJobRunner使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LocalMRJobRunner类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_hadoop_output_format
def test_hadoop_output_format(self):
format = "org.apache.hadoop.mapred.SequenceFileOutputFormat"
runner = LocalMRJobRunner(conf_paths=[], hadoop_output_format=format)
self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-outputformat", format])
# test multi-step job
self.assertEqual(runner._hadoop_conf_args({}, 0, 2), [])
self.assertEqual(runner._hadoop_conf_args({}, 1, 2), ["-outputformat", format])
开发者ID:pyzen,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例2: test_empty_jobconf_values
def test_empty_jobconf_values(self):
# value of None means to omit that jobconf
jobconf = {'foo': '', 'bar': None}
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
['-D', 'foo='])
开发者ID:bryankim220,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例3: test_get_file_splits_test
def test_get_file_splits_test(self):
# set up input paths
input_path = os.path.join(self.tmp_dir, "input")
with open(input_path, "w") as input_file:
input_file.write("bar\nqux\nfoo\nbar\nqux\nfoo\n")
input_path2 = os.path.join(self.tmp_dir, "input2")
with open(input_path2, "wb") as input_file:
input_file.write(b"foo\nbar\nbar\n")
runner = LocalMRJobRunner(conf_paths=[])
# split into 3 files
file_splits = runner._get_file_splits([input_path, input_path2], 3)
# make sure we get 3 files
self.assertEqual(len(file_splits), 3)
# make sure all the data is preserved
content = []
for file_name in file_splits:
with open(file_name, "rb") as f:
content.extend(f.readlines())
self.assertEqual(
sorted(content), [b"bar\n", b"bar\n", b"bar\n", b"bar\n", b"foo\n", b"foo\n", b"foo\n", b"qux\n", b"qux\n"]
)
开发者ID:alanhdu,项目名称:mrjob,代码行数:27,代码来源:test_local.py
示例4: test_jobconf_from_step
def test_jobconf_from_step(self):
jobconf = {"FOO": "bar", "BAZ": "qux"}
# Hack in steps rather than creating a new MRJob subclass
runner = LocalMRJobRunner(jobconf=jobconf)
runner._steps = [{"jobconf": {"BAZ": "quux", "BAX": "Arnold"}}]
self.assertEqual(runner._hadoop_args_for_step(0), ["-D", "BAX=Arnold", "-D", "BAZ=quux", "-D", "FOO=bar"])
开发者ID:irskep,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例5: test_owner_and_label_kwargs
def test_owner_and_label_kwargs(self):
runner = LocalMRJobRunner(conf_path=False,
owner='ads', label='ads_chain')
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'ads_chain')
assert_equal(match.group(2), 'ads')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例6: test_get_file_splits_sorted_test
def test_get_file_splits_sorted_test(self):
# set up input paths
input_path = os.path.join(self.tmp_dir, "input")
with open(input_path, "wb") as input_file:
input_file.write(b"1\tbar\n1\tbar\n1\tbar\n2\tfoo\n2\tfoo\n2\tfoo\n3\tqux\n" b"3\tqux\n3\tqux\n")
runner = LocalMRJobRunner(conf_paths=[])
file_splits = runner._get_file_splits([input_path], 3, keep_sorted=True)
# make sure we get 3 files
self.assertEqual(len(file_splits), 3)
# make sure all the data is preserved in sorted order
content = []
for file_name in sorted(file_splits.keys()):
with open(file_name, "rb") as f:
content.extend(f.readlines())
self.assertEqual(
content,
[
b"1\tbar\n",
b"1\tbar\n",
b"1\tbar\n",
b"2\tfoo\n",
b"2\tfoo\n",
b"2\tfoo\n",
b"3\tqux\n",
b"3\tqux\n",
b"3\tqux\n",
],
)
开发者ID:alanhdu,项目名称:mrjob,代码行数:33,代码来源:test_local.py
示例7: test_empty_no_user
def test_empty_no_user(self):
self.getuser_should_fail = True
runner = LocalMRJobRunner(conf_path=False)
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'no_script')
assert_equal(match.group(2), 'no_user')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例8: test_auto_owner
def test_auto_owner(self):
os.environ['USER'] = 'mcp'
runner = LocalMRJobRunner(conf_path=False)
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'no_script')
assert_equal(match.group(2), 'mcp')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例9: test_get_file_splits_test
def test_get_file_splits_test(self):
# set up input paths
input_path = os.path.join(self.tmp_dir, 'input')
with open(input_path, 'w') as input_file:
input_file.write('bar\nqux\nfoo\nbar\nqux\nfoo\n')
input_path2 = os.path.join(self.tmp_dir, 'input2')
with open(input_path2, 'w') as input_file:
input_file.write('foo\nbar\nbar\n')
runner = LocalMRJobRunner(conf_paths=[])
# split into 3 files
file_splits = runner._get_file_splits([input_path, input_path2], 3)
# make sure we get 3 files
self.assertEqual(len(file_splits), 3)
# make sure all the data is preserved
content = []
for file_name in file_splits:
f = open(file_name)
content.extend(f.readlines())
self.assertEqual(sorted(content),
['bar\n', 'bar\n', 'bar\n', 'bar\n', 'foo\n',
'foo\n', 'foo\n', 'qux\n', 'qux\n'])
开发者ID:eklitzke,项目名称:mrjob,代码行数:27,代码来源:test_local.py
示例10: test_get_file_splits_sorted_test
def test_get_file_splits_sorted_test(self):
# set up input paths
input_path = os.path.join(self.tmp_dir, 'input')
with open(input_path, 'w') as input_file:
input_file.write(
'1\tbar\n1\tbar\n1\tbar\n2\tfoo\n2\tfoo\n2\tfoo\n3\tqux\n'
'3\tqux\n3\tqux\n')
runner = LocalMRJobRunner(conf_paths=[])
file_splits = runner._get_file_splits([input_path], 3,
keep_sorted=True)
# make sure we get 3 files
self.assertEqual(len(file_splits), 3)
# make sure all the data is preserved in sorted order
content = []
for file_name in sorted(file_splits.keys()):
f = open(file_name, 'r')
content.extend(f.readlines())
self.assertEqual(content,
['1\tbar\n', '1\tbar\n', '1\tbar\n',
'2\tfoo\n', '2\tfoo\n', '2\tfoo\n',
'3\tqux\n', '3\tqux\n', '3\tqux\n'])
开发者ID:eklitzke,项目名称:mrjob,代码行数:26,代码来源:test_local.py
示例11: test_stream_output
def test_stream_output(self):
a_dir_path = os.path.join(self.tmp_dir, 'a')
b_dir_path = os.path.join(self.tmp_dir, 'b')
l_dir_path = os.path.join(self.tmp_dir, '_logs')
os.mkdir(a_dir_path)
os.mkdir(b_dir_path)
os.mkdir(l_dir_path)
a_file_path = os.path.join(a_dir_path, 'part-00000')
b_file_path = os.path.join(b_dir_path, 'part-00001')
c_file_path = os.path.join(self.tmp_dir, 'part-00002')
x_file_path = os.path.join(l_dir_path, 'log.xml')
y_file_path = os.path.join(self.tmp_dir, '_SUCCESS')
with open(a_file_path, 'w') as f:
f.write('A')
with open(b_file_path, 'w') as f:
f.write('B')
with open(c_file_path, 'w') as f:
f.write('C')
with open(x_file_path, 'w') as f:
f.write('<XML XML XML/>')
with open(y_file_path, 'w') as f:
f.write('I win')
runner = LocalMRJobRunner()
runner._output_dir = self.tmp_dir
assert_equal(sorted(runner.stream_output()),
['A', 'B', 'C'])
开发者ID:gimlids,项目名称:LTPM,代码行数:33,代码来源:runner_test.py
示例12: _test_spark_executor_memory
def _test_spark_executor_memory(self, conf_value, megs):
runner = LocalMRJobRunner(
jobconf={'spark.executor.memory': conf_value})
self.assertEqual(runner._spark_master(),
'local-cluster[%d,1,%d]' % (
cpu_count(), megs))
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_local.py
示例13: test_partitioner
def test_partitioner(self):
partitioner = 'org.apache.hadoop.mapreduce.Partitioner'
runner = LocalMRJobRunner(conf_paths=[], partitioner=partitioner)
self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
['-D', 'mapred.job.name=None > None',
'-partitioner', partitioner,
])
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:8,代码来源:test_runner.py
示例14: test_jobconf
def test_jobconf(self):
jobconf = {"FOO": "bar", "BAZ": "qux", "BAX": "Arnold"}
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
self.assertEqual(runner._hadoop_conf_args({}, 0, 1), ["-D", "BAX=Arnold", "-D", "BAZ=qux", "-D", "FOO=bar"])
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf, hadoop_version="0.18")
self.assertEqual(
runner._hadoop_conf_args({}, 0, 1), ["-jobconf", "BAX=Arnold", "-jobconf", "BAZ=qux", "-jobconf", "FOO=bar"]
)
开发者ID:pyzen,项目名称:mrjob,代码行数:8,代码来源:test_runner.py
示例15: test_cmdenv
def test_cmdenv(self):
cmdenv = {'FOO': 'bar', 'BAZ': 'qux', 'BAX': 'Arnold'}
runner = LocalMRJobRunner(conf_paths=[], cmdenv=cmdenv)
self.assertEqual(runner._hadoop_conf_args(0, 1),
['-cmdenv', 'BAX=Arnold',
'-cmdenv', 'BAZ=qux',
'-cmdenv', 'FOO=bar',
])
开发者ID:eklitzke,项目名称:mrjob,代码行数:8,代码来源:test_local.py
示例16: test_command_streaming_step_without_mr_job_script
def test_command_streaming_step_without_mr_job_script(self):
# you don't need a script to run commands
steps = MRCmdJob(['--mapper-cmd', 'cat'])._steps_desc()
runner = LocalMRJobRunner(steps=steps, stdin=BytesIO(b'dog\n'))
runner.run()
runner.cleanup()
开发者ID:Affirm,项目名称:mrjob,代码行数:8,代码来源:test_runner.py
示例17: test_jobconf_job_name_custom
def test_jobconf_job_name_custom(self):
jobconf = {'BAX': 'Arnold', 'mapred.job.name': 'Foo'}
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
hadoop_version='0.18')
self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
['-jobconf', 'BAX=Arnold',
'-jobconf', 'mapred.job.name=Foo'
])
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:8,代码来源:test_runner.py
示例18: test_environment_variables_018
def test_environment_variables_018(self):
runner = LocalMRJobRunner(hadoop_version='0.18', conf_paths=[])
# clean up after we're done. On windows, job names are only to
# the millisecond, so these two tests end up trying to create
# the same temp dir
with runner as runner:
runner._setup_working_dir()
self.assertIn('mapred_cache_localArchives',
runner._subprocess_env('mapper', 0, 0).keys())
开发者ID:eklitzke,项目名称:mrjob,代码行数:9,代码来源:test_local.py
示例19: test_configuration_translation
def test_configuration_translation(self):
jobconf = {'mapred.jobtracker.maxtasks.per.job': 1}
with no_handlers_for_logger('mrjob.compat'):
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf,
hadoop_version='0.21')
self.assertEqual(runner._hadoop_conf_args({}, 0, 1),
['-D', 'mapred.jobtracker.maxtasks.per.job=1',
'-D', 'mapreduce.jobtracker.maxtasks.perjob=1'
])
开发者ID:Anihc,项目名称:mrjob,代码行数:9,代码来源:test_runner.py
示例20: test_jobconf_from_step
def test_jobconf_from_step(self):
jobconf = {'FOO': 'bar', 'BAZ': 'qux'}
runner = LocalMRJobRunner(conf_paths=[], jobconf=jobconf)
step = {'jobconf': {'BAZ': 'quux', 'BAX': 'Arnold'}}
self.assertEqual(runner._hadoop_conf_args(step, 0, 1),
['-D', 'BAX=Arnold',
'-D', 'BAZ=quux',
'-D', 'FOO=bar',
])
开发者ID:Anihc,项目名称:mrjob,代码行数:9,代码来源:test_runner.py
注:本文中的mrjob.local.LocalMRJobRunner类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论