本文整理汇总了Python中mrjob.conf.combine_dicts函数的典型用法代码示例。如果您正苦于以下问题:Python combine_dicts函数的具体用法?Python combine_dicts怎么用?Python combine_dicts使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了combine_dicts函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: default_options
def default_options(self):
super_opts = super(HadoopRunnerOptionStore, self).default_options()
return combine_dicts(super_opts, {
'hadoop_home': os.environ.get('HADOOP_HOME'),
'hdfs_scratch_dir': 'tmp/mrjob',
'check_input_paths': True
})
开发者ID:civitaslearning,项目名称:mrjob,代码行数:7,代码来源:hadoop.py
示例2: _default_opts
def _default_opts(self):
return combine_dicts(
super(MRJobBinRunner, self)._default_opts(),
dict(
read_logs=True,
)
)
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:bin.py
示例3: _default_opts
def _default_opts(self):
return combine_dicts(
super(MRJobBinRunner, self)._default_opts(),
dict(
sh_bin=['sh', '-ex'],
)
)
开发者ID:okomestudio,项目名称:mrjob,代码行数:7,代码来源:bin.py
示例4: _default_opts
def _default_opts(self):
return combine_dicts(
super(HadoopJobRunner, self)._default_opts(),
dict(
hadoop_tmp_dir='tmp/mrjob',
)
)
开发者ID:Yelp,项目名称:mrjob,代码行数:7,代码来源:hadoop.py
示例5: jobconf
def jobconf(self):
orig_jobconf = super(ZipNumClusterJob, self).jobconf()
custom_jobconf = {'mapreduce.job.reduces': self.options.shards,
'mapreduce.totalorderpartitioner.path': self.options.splitfile}
combined = combine_dicts(orig_jobconf, custom_jobconf)
return combined
开发者ID:mrt,项目名称:webarchive-indexing,代码行数:7,代码来源:zipnumclusterjob.py
示例6: _add_runner_args_for_opt
def _add_runner_args_for_opt(parser, opt_name, include_deprecated=True):
"""Add switches for a single option (*opt_name*) to the given parser."""
conf = _RUNNER_OPTS[opt_name]
if conf.get('deprecated') and not include_deprecated:
return
switches = conf.get('switches') or []
for args, kwargs in switches:
kwargs = dict(kwargs)
deprecated_aliases = kwargs.pop('deprecated_aliases', None)
kwargs['dest'] = opt_name
if kwargs.get('action') == 'append':
kwargs['default'] = []
else:
kwargs['default'] = None
parser.add_argument(*args, **kwargs)
# add a switch for deprecated aliases
if deprecated_aliases and include_deprecated:
help = 'Deprecated alias%s for %s' % (
('es' if len(deprecated_aliases) > 1 else ''),
args[-1])
parser.add_argument(
*deprecated_aliases,
**combine_dicts(kwargs, dict(help=help)))
开发者ID:okomestudio,项目名称:mrjob,代码行数:31,代码来源:options.py
示例7: _default_opts
def _default_opts(self):
return combine_dicts(
super(SparkMRJobRunner, self)._default_opts(),
dict(
cloud_part_size_mb=_DEFAULT_CLOUD_PART_SIZE_MB,
),
)
开发者ID:Yelp,项目名称:mrjob,代码行数:7,代码来源:runner.py
示例8: _opts_combiners
def _opts_combiners(cls):
"""Map from option name to a combine_*() function used to combine
values for that option. This allows us to specify that some options
are lists, or contain environment variables, or whatever."""
return combine_dicts(
super(HadoopJobRunner, cls)._opts_combiners(),
{"hadoop_bin": combine_paths, "hadoop_home": combine_paths, "hdfs_scratch_dir": combine_paths},
)
开发者ID:sspencer,项目名称:mrjob,代码行数:8,代码来源:hadoop.py
示例9: test_hadoop_1
def test_hadoop_1(self):
updated, warnings = self.updated_and_warnings(
self.JOBCONF, '1.0')
self.assertEqual(updated,
combine_dicts(self.JOBCONF, {'user.name': 'dave'}))
self.assertIn('do not match hadoop version', warnings)
self.assertIn('mapreduce.job.user.name: user.name', warnings)
开发者ID:Milkigit,项目名称:mrjob,代码行数:8,代码来源:test_runner.py
示例10: paginate
def paginate(self, **kwargs):
result = self.method(**kwargs)
values = result[self.result_key]
for page_start in range(0, len(values), self.page_size):
page = values[page_start:page_start + self.page_size]
yield combine_dicts(result, {self.result_key: page})
开发者ID:Affirm,项目名称:mrjob,代码行数:8,代码来源:util.py
示例11: _default_opts
def _default_opts(self):
return combine_dicts(
super(HadoopJobRunner, self)._default_opts(),
dict(
hadoop_tmp_dir='tmp/mrjob',
spark_deploy_mode='client',
spark_master='yarn',
)
)
开发者ID:Affirm,项目名称:mrjob,代码行数:9,代码来源:hadoop.py
示例12: test_hadoop_2
def test_hadoop_2(self):
updated, warnings = self.updated_and_warnings(
self.JOBCONF, '2.0')
self.assertEqual(updated,
combine_dicts(self.JOBCONF,
{'mapreduce.job.jar': 'a.jar'}))
self.assertIn('do not match hadoop version', warnings)
self.assertIn('mapred.jar: mapreduce.job.jar', warnings)
开发者ID:Milkigit,项目名称:mrjob,代码行数:9,代码来源:test_runner.py
示例13: _job_runner_kwargs_for_runner
def _job_runner_kwargs_for_runner(self, runner_alias):
"""Helper method that powers the *_job_runner_kwargs()
methods."""
# user can no longer silently ignore switches by overriding
# job_runner_kwargs()
return combine_dicts(
self._kwargs_from_switches(_allowed_keys(runner_alias)),
self.job_runner_kwargs(),
)
开发者ID:davidmarin,项目名称:mrjob,代码行数:9,代码来源:launch.py
示例14: _runner_kwargs
def _runner_kwargs(self):
# just use combine_dicts() and not combine_confs(); leave the
# magic to the runner
return combine_dicts(
self._non_option_kwargs(),
# don't screen out irrelevant opts (see #1898)
self._kwargs_from_switches(set(_RUNNER_OPTS)),
self._job_kwargs(),
)
开发者ID:Yelp,项目名称:mrjob,代码行数:9,代码来源:launch.py
示例15: test_can_override_sort_values_from_job
def test_can_override_sort_values_from_job(self):
mr_job = MRSortValuesAndMore()
self.assertEqual(
mr_job.partitioner(),
'org.apache.hadoop.mapred.lib.HashPartitioner')
self.assertEqual(
mr_job.jobconf(),
combine_dicts(_SORT_VALUES_JOBCONF, MRSortValuesAndMore.JOBCONF))
开发者ID:Dean838,项目名称:mrjob,代码行数:10,代码来源:test_job.py
示例16: _jobconf_for_step
def _jobconf_for_step(self, step_num):
"""Get the jobconf dictionary, optionally including step-specific
jobconf info.
Also translate jobconfs to the current Hadoop version, if necessary.
"""
step = self._get_step(step_num)
jobconf = combine_dicts(self._opts["jobconf"], step.get("jobconf"))
return add_translated_jobconf_for_hadoop_version(jobconf, self.get_hadoop_version())
开发者ID:swiftserve,项目名称:mrjob,代码行数:10,代码来源:runner.py
示例17: hadoop_job_runner_kwargs
def hadoop_job_runner_kwargs(self):
"""Keyword arguments to create create runners when
:py:meth:`make_runner` is called, when we run a job on EMR
(``-r hadoop``).
:return: map from arg name to value
Re-define this if you want finer control when running jobs on hadoop.
"""
return combine_dicts(self.job_runner_kwargs(), self._get_kwargs_from_opt_group(self.hadoop_opt_group))
开发者ID:senseb,项目名称:mrjob,代码行数:10,代码来源:launch.py
示例18: _hadoop_conf_args
def _hadoop_conf_args(self, step, step_num, num_steps):
"""Build a list of extra arguments to the hadoop binary.
This handles *cmdenv*, *hadoop_extra_args*, *hadoop_input_format*,
*hadoop_output_format*, *jobconf*, and *partitioner*.
This doesn't handle input, output, mappers, reducers, or uploading
files.
"""
assert 0 <= step_num < num_steps
args = []
jobconf = combine_dicts(self._opts['jobconf'], step.get('jobconf'))
# hadoop_extra_args
args.extend(self._opts['hadoop_extra_args'])
# new-style jobconf
version = self.get_hadoop_version()
# translate the jobconf configuration names to match
# the hadoop version
jobconf = add_translated_jobconf_for_hadoop_version(jobconf,
version)
if uses_generic_jobconf(version):
for key, value in sorted(jobconf.iteritems()):
if value is not None:
args.extend(['-D', '%s=%s' % (key, value)])
# old-style jobconf
else:
for key, value in sorted(jobconf.iteritems()):
if value is not None:
args.extend(['-jobconf', '%s=%s' % (key, value)])
# partitioner
if self._partitioner:
args.extend(['-partitioner', self._partitioner])
# cmdenv
for key, value in sorted(self._opts['cmdenv'].iteritems()):
args.append('-cmdenv')
args.append('%s=%s' % (key, value))
# hadoop_input_format
if (step_num == 0 and self._hadoop_input_format):
args.extend(['-inputformat', self._hadoop_input_format])
# hadoop_output_format
if (step_num == num_steps - 1 and self._hadoop_output_format):
args.extend(['-outputformat', self._hadoop_output_format])
return args
开发者ID:bryankim220,项目名称:mrjob,代码行数:53,代码来源:runner.py
示例19: test_can_override_sort_values_from_cmd_line
def test_can_override_sort_values_from_cmd_line(self):
mr_job = MRSortValues(
['--partitioner', 'org.pants.FancyPantsPartitioner',
'--jobconf', 'stream.num.map.output.key.fields=lots'])
self.assertEqual(
mr_job.partitioner(),
'org.pants.FancyPantsPartitioner')
self.assertEqual(
mr_job.jobconf(),
combine_dicts(_SORT_VALUES_JOBCONF,
{'stream.num.map.output.key.fields': 'lots'}))
开发者ID:Dean838,项目名称:mrjob,代码行数:13,代码来源:test_job.py
示例20: _jobconf_for_step
def _jobconf_for_step(self, step_num):
"""Get the jobconf dictionary, optionally including step-specific
jobconf info.
Also translate jobconfs to the current Hadoop version, if necessary.
"""
step = self._get_step(step_num)
jobconf = combine_dicts(self._opts["jobconf"], step.get("jobconf"))
# if user is using the wrong jobconfs, add in the correct ones
self._update_jobconf_for_hadoop_version(jobconf, self.get_hadoop_version())
return jobconf
开发者ID:irskep,项目名称:mrjob,代码行数:13,代码来源:runner.py
注:本文中的mrjob.conf.combine_dicts函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论