本文整理汇总了Python中mrjob.conf.load_opts_from_mrjob_conf函数的典型用法代码示例。如果您正苦于以下问题:Python load_opts_from_mrjob_conf函数的具体用法?Python load_opts_from_mrjob_conf怎么用?Python load_opts_from_mrjob_conf使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了load_opts_from_mrjob_conf函数的18个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_include_relative_to_real_path
def test_include_relative_to_real_path(self):
os.mkdir(os.path.join(self.tmp_dir, 'conf'))
base_conf_path = os.path.join(self.tmp_dir, 'conf', 'mrjob.base.conf')
real_base_conf_path = os.path.realpath(base_conf_path)
conf_path = os.path.join(self.tmp_dir, 'conf', 'mrjob.conf')
conf_symlink_path = os.path.join(self.tmp_dir, 'mrjob.conf')
with open(base_conf_path, 'w') as f:
dump_mrjob_conf({}, f)
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': 'mrjob.base.conf'}, f)
os.symlink(os.path.join('conf', 'mrjob.conf'), conf_symlink_path)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(real_base_conf_path, {}), (conf_path, {})])
# relative include should work from the symlink even though
# it's not in the same directory as mrjob.base.conf
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_symlink_path),
[(real_base_conf_path, {}), (conf_symlink_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:26,代码来源:test_conf.py
示例2: test_load_mrjob_conf_and_load_opts
def test_load_mrjob_conf_and_load_opts(self):
conf_path = os.path.join(self.tmp_dir, "mrjob.conf.2")
with open(conf_path, "w") as f:
f.write('{"runners": {"foo": {"qux": "quux"}}}')
self.assertEqual(load_mrjob_conf(conf_path=conf_path), {"runners": {"foo": {"qux": "quux"}}})
self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path=conf_path)[0][1], {"qux": "quux"})
# test missing options
with logger_disabled("mrjob.conf"):
self.assertEqual(load_opts_from_mrjob_conf("bar", conf_path=conf_path)[0][1], {})
开发者ID:JeffersonK,项目名称:mrjob,代码行数:10,代码来源:test_conf.py
示例3: test_load_mrjob_conf_and_load_opts
def test_load_mrjob_conf_and_load_opts(self):
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf.2')
with open(conf_path, 'w') as f:
f.write('{"runners": {"foo": {"qux": "quux"}}}')
assert_equal(load_mrjob_conf(conf_path=conf_path),
{'runners': {'foo': {'qux': 'quux'}}})
assert_equal(load_opts_from_mrjob_conf('foo', conf_path=conf_path),
{'qux': 'quux'})
# test missing options
with logger_disabled('mrjob.conf'):
assert_equal(
load_opts_from_mrjob_conf('bar', conf_path=conf_path), {})
开发者ID:gimlids,项目名称:LTPM,代码行数:13,代码来源:conf_test.py
示例4: test_nested_include
def test_nested_include(self):
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf')
conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf')
conf_path_3 = os.path.join(self.tmp_dir, 'mrjob.3.conf')
# accidentally reversed the order of nested includes when
# trying to make precedence work; this test would catch that
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': conf_path_1}, f)
with open(conf_path_1, 'w') as f:
dump_mrjob_conf({'include': [conf_path_2, conf_path_3]}, f)
with open(conf_path_2, 'w') as f:
dump_mrjob_conf({}, f)
with open(conf_path_3, 'w') as f:
dump_mrjob_conf({}, f)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(conf_path_2, {}),
(conf_path_3, {}),
(conf_path_1, {}),
(conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:27,代码来源:test_conf.py
示例5: test_recursive_include
def test_recursive_include(self):
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': conf_path}, f)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:8,代码来源:test_conf.py
示例6: test_load_and_load_opts_use_find_mrjob_conf
def test_load_and_load_opts_use_find_mrjob_conf(self):
os.environ["HOME"] = self.tmp_dir
dot_mrjob_path = os.path.join(self.tmp_dir, ".mrjob.conf")
with open(dot_mrjob_path, "w") as f:
f.write('{"runners": {"foo": {"bar": "baz"}}}')
self.assertEqual(load_mrjob_conf(), {"runners": {"foo": {"bar": "baz"}}})
self.assertEqual(load_opts_from_mrjob_conf("foo")[0][1], {"bar": "baz"})
开发者ID:JeffersonK,项目名称:mrjob,代码行数:9,代码来源:test_conf.py
示例7: test_load_and_load_opts_use_find_mrjob_conf
def test_load_and_load_opts_use_find_mrjob_conf(self):
os.environ['HOME'] = self.tmp_dir
dot_mrjob_path = os.path.join(self.tmp_dir, '.mrjob.conf')
with open(dot_mrjob_path, 'w') as f:
f.write('{"runners": {"foo": {"bar": "baz"}}}')
assert_equal(load_mrjob_conf(),
{'runners': {'foo': {'bar': 'baz'}}})
assert_equal(load_opts_from_mrjob_conf('foo'), {'bar': 'baz'})
开发者ID:gimlids,项目名称:LTPM,代码行数:10,代码来源:conf_test.py
示例8: test_doubly_recursive_include
def test_doubly_recursive_include(self):
conf_path_1 = os.path.join(self.tmp_dir, "mrjob.1.conf")
conf_path_2 = os.path.join(self.tmp_dir, "mrjob.2.conf")
with open(conf_path_1, "w") as f:
dump_mrjob_conf({"include": conf_path_2}, f)
with open(conf_path_2, "w") as f:
dump_mrjob_conf({"include": conf_path_1}, f)
self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path_1), [(conf_path_2, {}), (conf_path_1, {})])
开发者ID:kartheek6,项目名称:mrjob,代码行数:11,代码来源:test_conf.py
示例9: test_doubly_recursive_include
def test_doubly_recursive_include(self):
conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf')
conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf')
with open(conf_path_1, 'w') as f:
dump_mrjob_conf({'include': conf_path_2}, f)
with open(conf_path_2, 'w') as f:
dump_mrjob_conf({'include': conf_path_1}, f)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path_1),
[(conf_path_2, {}), (conf_path_1, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:13,代码来源:test_conf.py
示例10: test_relative_include
def test_relative_include(self):
base_conf_path = os.path.join(self.tmp_dir, "mrjob.base.conf")
real_base_conf_path = os.path.realpath(base_conf_path)
conf_path = os.path.join(self.tmp_dir, "mrjob.conf")
with open(base_conf_path, "w") as f:
dump_mrjob_conf({}, f)
with open(conf_path, "w") as f:
dump_mrjob_conf({"include": "mrjob.base.conf"}, f)
self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path), [(real_base_conf_path, {}), (conf_path, {})])
开发者ID:kartheek6,项目名称:mrjob,代码行数:13,代码来源:test_conf.py
示例11: test_relative_include
def test_relative_include(self):
base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf')
real_base_conf_path = os.path.realpath(base_conf_path)
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
with open(base_conf_path, 'w') as f:
dump_mrjob_conf({}, f)
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': 'mrjob.base.conf'}, f)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(real_base_conf_path, {}), (conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:15,代码来源:test_conf.py
示例12: test_include_relative_to_real_path
def test_include_relative_to_real_path(self):
os.mkdir(os.path.join(self.tmp_dir, "conf"))
base_conf_path = os.path.join(self.tmp_dir, "conf", "mrjob.base.conf")
real_base_conf_path = os.path.realpath(base_conf_path)
conf_path = os.path.join(self.tmp_dir, "conf", "mrjob.conf")
conf_symlink_path = os.path.join(self.tmp_dir, "mrjob.conf")
with open(base_conf_path, "w") as f:
dump_mrjob_conf({}, f)
with open(conf_path, "w") as f:
dump_mrjob_conf({"include": "mrjob.base.conf"}, f)
os.symlink(os.path.join("conf", "mrjob.conf"), conf_symlink_path)
self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path), [(real_base_conf_path, {}), (conf_path, {})])
# relative include should work from the symlink even though
# it's not in the same directory as mrjob.base.conf
self.assertEqual(
load_opts_from_mrjob_conf("foo", conf_symlink_path), [(real_base_conf_path, {}), (conf_symlink_path, {})]
)
开发者ID:kartheek6,项目名称:mrjob,代码行数:24,代码来源:test_conf.py
示例13: test_tilde_in_include
def test_tilde_in_include(self):
# regression test for #1308
os.environ['HOME'] = self.tmp_dir
base_conf_path = os.path.join(self.tmp_dir, 'mrjob.base.conf')
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
with open(base_conf_path, 'w') as f:
dump_mrjob_conf({}, f)
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': '~/mrjob.base.conf'}, f)
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(base_conf_path, {}), (conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:16,代码来源:test_conf.py
示例14: test_include_order_beats_include
def test_include_order_beats_include(self):
conf_path = os.path.join(self.tmp_dir, "mrjob.conf")
conf_path_1 = os.path.join(self.tmp_dir, "mrjob.1.conf")
conf_path_2 = os.path.join(self.tmp_dir, "mrjob.2.conf")
with open(conf_path, "w") as f:
dump_mrjob_conf({"include": [conf_path_1, conf_path_2]}, f)
with open(conf_path_1, "w") as f:
dump_mrjob_conf({"include": [conf_path_2]}, f)
with open(conf_path_2, "w") as f:
dump_mrjob_conf({}, f)
# shouldn't matter that conf_path_1 includes conf_path_2
self.assertEqual(
load_opts_from_mrjob_conf("foo", conf_path), [(conf_path_1, {}), (conf_path_2, {}), (conf_path, {})]
)
开发者ID:kartheek6,项目名称:mrjob,代码行数:18,代码来源:test_conf.py
示例15: test_include_order_beats_include
def test_include_order_beats_include(self):
conf_path = os.path.join(self.tmp_dir, 'mrjob.conf')
conf_path_1 = os.path.join(self.tmp_dir, 'mrjob.1.conf')
conf_path_2 = os.path.join(self.tmp_dir, 'mrjob.2.conf')
with open(conf_path, 'w') as f:
dump_mrjob_conf({'include': [conf_path_1, conf_path_2]}, f)
with open(conf_path_1, 'w') as f:
dump_mrjob_conf({'include': [conf_path_2]}, f)
with open(conf_path_2, 'w') as f:
dump_mrjob_conf({}, f)
# shouldn't matter that conf_path_1 includes conf_path_2
self.assertEqual(
load_opts_from_mrjob_conf('foo', conf_path),
[(conf_path_1, {}), (conf_path_2, {}), (conf_path, {})])
开发者ID:Affirm,项目名称:mrjob,代码行数:18,代码来源:test_conf.py
示例16: __init__
def __init__(self, alias, opts, conf_path):
super(RunnerOptionStore, self).__init__()
# sanitize incoming options and issue warnings for bad keys
opts = self.validated_options(
opts, 'Got unexpected keyword arguments: %s')
unsanitized_opt_dicts = load_opts_from_mrjob_conf(
alias, conf_path=conf_path)
for path, mrjob_conf_opts in unsanitized_opt_dicts:
self.cascading_dicts.append(self.validated_options(
mrjob_conf_opts, 'Got unexpected opts from %s: %%s' % path))
self.cascading_dicts.append(opts)
self.populate_values_from_cascading_dicts()
self._validate_cleanup()
开发者ID:icio,项目名称:mrjob,代码行数:19,代码来源:runner.py
示例17: __init__
def __init__(self, mr_job_script=None, conf_path=None,
extra_args=None, file_upload_args=None,
input_paths=None, output_dir=None, stdin=None,
**opts):
"""All runners take the following keyword arguments:
:type mr_job_script: str
:param mr_job_script: the path of the ``.py`` file containing the :py:class:`~mrjob.job.MRJob`. If this is None, you won't actually be able to :py:meth:`run` the job, but other utilities (e.g. :py:meth:`ls`) will work.
:type conf_path: str
:param conf_path: Alternate path to read configs from, or ``False`` to ignore all config files.
:type extra_args: list of str
:param extra_args: a list of extra cmd-line arguments to pass to the mr_job script. For example: ``['--protocol', 'repr']``. This is a hook to allow jobs to take additional arguments.
:param file_upload_args: a list of tuples of ``('--ARGNAME', path)``. The file at the given path will be uploaded to the local directory of the mr_job script when it runs, and then passed into the script with ``--ARGNAME``. Useful for passing in SQLite DBs and other configuration files to your job.
:type input_paths: list of str
:param input_paths: Input files for your job. Supports globs and recursively walks directories (e.g. ``['data/common/', 'data/training/*.gz']``). If this is left blank, we'll read from stdin
:type output_dir: str
:param output_dir: an empty/non-existent directory where Hadoop streaming should put the final output from the job. If you don't specify an output directory, we'll output into a subdirectory of this job's temporary directory. You can control this from the command line with ``--output-dir``.
:param stdin: an iterable (can be a ``StringIO`` or even a list) to use as stdin. This is a hook for testing; if you set ``stdin`` via :py:meth:`~mrjob.job.MRJob.sandbox`, it'll get passed through to the runner. If for some reason your lines are missing newlines, we'll add them; this makes it easier to write automated tests.
All runners also take the following options as keyword arguments.
These can be defaulted in your :mod:`mrjob.conf` file:
:type base_tmp_dir: str
:param base_tmp_dir: path to put local temp dirs inside. By default we just call :py:func:`tempfile.gettempdir`
:type bootstrap_mrjob: bool
:param bootstrap_mrjob: should we automatically tar up the mrjob library and install it when we run the mrjob? Set this to ``False`` if you've already installed ``mrjob`` on your Hadoop cluster.
:type cleanup: str
:param cleanup: is :py:meth:`cleanup` allowed to clean up logs and scratch files? See :py:data:`CLEANUP_CHOICES`.
:type cmdenv: dict
:param cmdenv: environment variables to pass to the job inside Hadoop streaming
:type hadoop_extra_args: list of str
:param hadoop_extra_args: extra arguments to pass to hadoop streaming
:type hadoop_input_format: str
:param hadoop_input_format: name of an optional Hadoop ``InputFormat`` class. Passed to Hadoop along with your first step with the ``-inputformat`` option. Note that if you write your own class, you'll need to include it in your own custom streaming jar (see *hadoop_streaming_jar*).
:type hadoop_output_format: str
:param hadoop_output_format: name of an optional Hadoop ``OutputFormat`` class. Passed to Hadoop along with your first step with the ``-outputformat`` option. Note that if you write your own class, you'll need to include it in your own custom streaming jar (see *hadoop_streaming_jar*).
:type hadoop_streaming_jar: str
:param hadoop_streaming_jar: path to a custom hadoop streaming jar.
:type jobconf: dict
:param jobconf: ``-jobconf`` args to pass to hadoop streaming. This should be a map from property name to value. Equivalent to passing ``['-jobconf', 'KEY1=VALUE1', '-jobconf', 'KEY2=VALUE2', ...]`` to ``hadoop_extra_args``.
:type label: str
:param label: description of this job to use as the part of its name. By default, we use the script's module name, or ``no_script`` if there is none. This used to be called *job_name_prefix* (which still works but is deprecated).
:type owner: str
:param owner: who is running this job. Used solely to set the job name. By default, we use :py:func:`getpass.getuser`, or ``no_user`` if it fails.
:type python_archives: list of str
:param python_archives: same as upload_archives, except they get added to the job's :envvar:`PYTHONPATH`
:type python_bin: str
:param python_bin: Name/path of alternate python binary for mappers/reducers (e.g. for use with :py:mod:`virtualenv`). Defaults to ``'python'``.
:type setup_cmds: list
:param setup_cmds: a list of commands to run before each mapper/reducer step (e.g. ``['cd my-src-tree; make', 'mkdir -p /tmp/foo']``). You can specify commands as strings, which will be run through the shell, or lists of args, which will be invoked directly. We'll use file locking to ensure that multiple mappers/reducers running on the same node won't run *setup_cmds* simultaneously (it's safe to run ``make``).
:type setup_scripts: list of str
:param setup_scripts: files that will be copied into the local working directory and then run. These are run after *setup_cmds*. Like with *setup_cmds*, we use file locking to keep multiple mappers/reducers on the same node from running *setup_scripts* simultaneously.
:type steps_python_bin: str
:param steps_python_bin: Name/path of alternate python binary to use to query the job about its steps (e.g. for use with :py:mod:`virtualenv`). Rarely needed. Defaults to ``sys.executable`` (the current python interpreter).
:type upload_archives: list of str
:param upload_archives: a list of archives (e.g. tarballs) to unpack in the local directory of the mr_job script when it runs. You can set the local name of the dir we unpack into by appending ``#localname`` to the path; otherwise we just use the name of the archive file (e.g. ``foo.tar.gz``)
:type upload_files: list of str
:param upload_files: a list of files to copy to the local directory of the mr_job script when it runs. You can set the local name of the dir we unpack into by appending ``#localname`` to the path; otherwise we just use the name of the file
"""
# enforce correct arguments
self._fix_deprecated_opts(opts)
allowed_opts = set(self._allowed_opts())
unrecognized_opts = set(opts) - allowed_opts
if unrecognized_opts:
log.warn('got unexpected keyword arguments: ' +
', '.join(sorted(unrecognized_opts)))
opts = dict((k, v) for k, v in opts.iteritems()
if k in allowed_opts)
# issue a warning for unknown opts from mrjob.conf and filter them out
mrjob_conf_opts = load_opts_from_mrjob_conf(
self.alias, conf_path=conf_path)
self._fix_deprecated_opts(mrjob_conf_opts)
unrecognized_opts = set(mrjob_conf_opts) - set(self._allowed_opts())
if unrecognized_opts:
log.warn('got unexpected opts from mrjob.conf: ' +
', '.join(sorted(unrecognized_opts)))
mrjob_conf_opts = dict((k, v)
for k, v in mrjob_conf_opts.iteritems()
if k in allowed_opts)
# make sure all opts are at least set to None
blank_opts = dict((key, None) for key in allowed_opts)
# combine all of these options
# only __init__() methods should modify self._opts!
self._opts = self.combine_opts(blank_opts, self._default_opts(),
mrjob_conf_opts, opts)
# we potentially have a lot of files to copy, so we keep track
# of them as a list of dictionaries, with the following keys:
#
# 'path': the path to the file on the local system
# 'name': a unique name for the file when we copy it into HDFS etc.
# if this is blank, we'll pick one
# 'cache': if 'file', copy into mr_job_script's working directory
# on the Hadoop nodes. If 'archive', uncompress the file
self._files = []
# validate cleanup
#.........这里部分代码省略.........
开发者ID:Jyrsa,项目名称:mrjob,代码行数:101,代码来源:runner.py
示例18: test_recursive_include
def test_recursive_include(self):
conf_path = os.path.join(self.tmp_dir, "mrjob.conf")
with open(conf_path, "w") as f:
dump_mrjob_conf({"include": conf_path}, f)
self.assertEqual(load_opts_from_mrjob_conf("foo", conf_path), [(conf_path, {})])
开发者ID:kartheek6,项目名称:mrjob,代码行数:6,代码来源:test_conf.py
注:本文中的mrjob.conf.load_opts_from_mrjob_conf函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论