本文整理汇总了Python中mrjob.parse.JOB_NAME_RE类的典型用法代码示例。如果您正苦于以下问题:Python JOB_NAME_RE类的具体用法?Python JOB_NAME_RE怎么用?Python JOB_NAME_RE使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了JOB_NAME_RE类的17个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_owner_and_label_kwargs
def test_owner_and_label_kwargs(self):
runner = InlineMRJobRunner(conf_paths=[],
owner='ads', label='ads_chain')
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'ads_chain')
self.assertEqual(match.group(2), 'ads')
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例2: test_auto_owner
def test_auto_owner(self):
os.environ['USER'] = 'mcp'
runner = InlineMRJobRunner(conf_paths=[])
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'no_script')
self.assertEqual(match.group(2), 'mcp')
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例3: test_owner_and_label_switches
def test_owner_and_label_switches(self):
runner_opts = ['--no-conf', '--owner=ads', '--label=ads_chain']
runner = MRTwoStepJob(runner_opts).make_runner()
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'ads_chain')
self.assertEqual(match.group(2), 'ads')
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例4: test_empty_no_user
def test_empty_no_user(self):
self.getuser_should_fail = True
runner = InlineMRJobRunner(conf_paths=[])
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'no_script')
self.assertEqual(match.group(2), 'no_user')
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:7,代码来源:test_runner.py
示例5: test_auto_owner
def test_auto_owner(self):
os.environ['USER'] = 'mcp'
runner = LocalMRJobRunner(conf_path=False)
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'no_script')
assert_equal(match.group(2), 'mcp')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例6: test_owner_and_label_kwargs
def test_owner_and_label_kwargs(self):
runner = LocalMRJobRunner(conf_path=False,
owner='ads', label='ads_chain')
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'ads_chain')
assert_equal(match.group(2), 'ads')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例7: test_empty_no_user
def test_empty_no_user(self):
self.getuser_should_fail = True
runner = LocalMRJobRunner(conf_path=False)
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'no_script')
assert_equal(match.group(2), 'no_user')
开发者ID:chomp,项目名称:mrjob,代码行数:7,代码来源:runner_test.py
示例8: test_auto_everything
def test_auto_everything(self):
test_start = datetime.datetime.utcnow()
os.environ["USER"] = "mcp"
runner = MRTwoStepJob(["--no-conf"]).make_runner()
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), "mr_two_step_job")
self.assertEqual(match.group(2), "mcp")
job_start = datetime.datetime.strptime(match.group(3) + match.group(4), "%Y%m%d%H%M%S")
job_start = job_start.replace(microsecond=int(match.group(5)))
self.assertGreaterEqual(job_start, test_start)
self.assertLessEqual(job_start - test_start, datetime.timedelta(seconds=5))
开发者ID:pyzen,项目名称:mrjob,代码行数:14,代码来源:test_runner.py
示例9: test_auto_everything
def test_auto_everything(self):
test_start = datetime.datetime.utcnow()
os.environ['USER'] = 'mcp'
runner = MRTwoStepJob(['--no-conf']).make_runner()
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'mr_two_step_job')
assert_equal(match.group(2), 'mcp')
job_start = datetime.datetime.strptime(
match.group(3) + match.group(4), '%Y%m%d%H%M%S')
job_start = job_start.replace(microsecond=int(match.group(5)))
assert_gte(job_start, test_start)
assert_lte(job_start - test_start, datetime.timedelta(seconds=5))
开发者ID:chomp,项目名称:mrjob,代码行数:15,代码来源:runner_test.py
示例10: test_auto_label
def test_auto_label(self):
runner = MRTwoStepJob(['--no-conf']).make_runner()
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'mr_two_step_job')
self.assertEqual(match.group(2), getpass.getuser())
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:6,代码来源:test_runner.py
示例11: test_empty
def test_empty(self):
runner = InlineMRJobRunner(conf_paths=[])
match = JOB_NAME_RE.match(runner.get_job_name())
self.assertEqual(match.group(1), 'no_script')
self.assertEqual(match.group(2), getpass.getuser())
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:6,代码来源:test_runner.py
示例12: test_job_name_not_specified
def test_job_name_not_specified(self):
job = MRWordCount()
with job.make_runner() as runner:
self.assertFalse(runner._opts['job_name'])
self.assertIsNotNone(JOB_NAME_RE.match(runner.get_job_name()))
开发者ID:anusha-r,项目名称:mrjob,代码行数:5,代码来源:test_runner.py
示例13: test_empty
def test_empty(self):
runner = LocalMRJobRunner(conf_path=False)
match = JOB_NAME_RE.match(runner.get_job_name())
assert_equal(match.group(1), 'no_script')
assert_equal(match.group(2), getpass.getuser())
开发者ID:chomp,项目名称:mrjob,代码行数:6,代码来源:runner_test.py
示例14: job_flow_to_basic_summary
def job_flow_to_basic_summary(job_flow, now=None):
"""Extract fields such as creation time, owner, etc. from the job flow,
so we can safely reference them without using :py:func:`getattr`.
:param job_flow: a :py:class:`boto.emr.EmrObject`
:param now: the current UTC time, as a :py:class:`datetime.datetime`.
Defaults to the current time.
Returns a dictionary with the following keys. These will be ``None`` if the
corresponding field in the job flow is unavailable.
* *created*: UTC `datetime.datetime` that the job flow was created,
or ``None``
* *end*: UTC `datetime.datetime` that the job flow finished, or ``None``
* *id*: job flow ID, or ``None`` (this should never happen)
* *label*: The label for the job flow (usually the module name of the
:py:class:`~mrjob.job.MRJob` script that started it), or
``None`` for non-:py:mod:`mrjob` job flows.
* *name*: job flow name, or ``None`` (this should never happen)
* *nih*: number of normalized instance hours used by the job flow.
* *num_steps*: Number of steps in the job flow.
* *owner*: The owner for the job flow (usually the user that started it),
or ``None`` for non-:py:mod:`mrjob` job flows.
* *pool*: pool name (e.g. ``'default'``) if the job flow is pooled,
otherwise ``None``.
* *ran*: How long the job flow ran, or has been running, as a
:py:class:`datetime.timedelta`. This will be ``timedelta(0)`` if
the job flow hasn't started.
* *ready*: UTC `datetime.datetime` that the job flow finished
bootstrapping, or ``None``
* *start*: UTC `datetime.datetime` that the job flow became available, or
``None``
* *state*: The job flow's state as a string (e.g. ``'RUNNING'``)
"""
if now is None:
now = datetime.utcnow()
jf = {} # summary to fill in
jf['id'] = getattr(job_flow, 'jobflowid', None)
jf['name'] = getattr(job_flow, 'name', None)
jf['created'] = to_datetime(getattr(job_flow, 'creationdatetime', None))
jf['start'] = to_datetime(getattr(job_flow, 'startdatetime', None))
jf['ready'] = to_datetime(getattr(job_flow, 'readydatetime', None))
jf['end'] = to_datetime(getattr(job_flow, 'enddatetime', None))
if jf['start']:
jf['ran'] = (jf['end'] or now) - jf['start']
else:
jf['ran'] = timedelta(0)
jf['state'] = getattr(job_flow, 'state', None)
jf['num_steps'] = len(getattr(job_flow, 'steps', None) or ())
jf['pool'] = None
bootstrap_actions = getattr(job_flow, 'bootstrapactions', None)
if bootstrap_actions:
args = [arg.value for arg in bootstrap_actions[-1].args]
if len(args) == 2 and args[0].startswith('pool-'):
jf['pool'] = args[1]
m = JOB_NAME_RE.match(getattr(job_flow, 'name', ''))
if m:
jf['label'], jf['owner'] = m.group(1), m.group(2)
else:
jf['label'], jf['owner'] = None, None
jf['nih'] = float(getattr(job_flow, 'normalizedinstancehours', '0'))
return jf
开发者ID:PythonCharmers,项目名称:mrjob,代码行数:72,代码来源:audit_usage.py
示例15: test_end_to_end
def test_end_to_end(self):
# read from STDIN, a local file, and a remote file
stdin = StringIO('foo\nbar\n')
local_input_path = os.path.join(self.tmp_dir, 'input')
with open(local_input_path, 'w') as local_input_file:
local_input_file.write('bar\nqux\n')
remote_input_path = 's3://walrus/data/foo'
self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n'}})
# setup fake output
self.mock_emr_output = {('j-MOCKJOBFLOW0', 1): [
'1\t"qux"\n2\t"bar"\n', '2\t"foo"\n5\tnull\n']}
mr_job = MRTwoStepJob(['-r', 'emr', '-v',
'-c', self.mrjob_conf_path,
'-', local_input_path, remote_input_path,
'--hadoop-input-format', 'FooFormat',
'--hadoop-output-format', 'BarFormat'])
mr_job.sandbox(stdin=stdin)
local_tmp_dir = None
results = []
mock_s3_fs_snapshot = copy.deepcopy(self.mock_s3_fs)
with mr_job.make_runner() as runner:
assert isinstance(runner, EMRJobRunner)
# make sure that initializing the runner doesn't affect S3
# (Issue #50)
assert_equal(mock_s3_fs_snapshot, self.mock_s3_fs)
runner.run()
for line in runner.stream_output():
key, value = mr_job.parse_output_line(line)
results.append((key, value))
local_tmp_dir = runner._get_local_tmp_dir()
# make sure cleanup hasn't happened yet
assert os.path.exists(local_tmp_dir)
assert any(runner.ls(runner.get_output_dir()))
emr_conn = runner.make_emr_conn()
job_flow = emr_conn.describe_jobflow(runner.get_emr_job_flow_id())
assert_equal(job_flow.state, 'COMPLETED')
name_match = JOB_NAME_RE.match(job_flow.name)
assert_equal(name_match.group(1), 'mr_two_step_job')
assert_equal(name_match.group(2), getpass.getuser())
# make sure our input and output formats are attached to
# the correct steps
assert_in('-inputformat', job_flow.steps[0].args)
assert_not_in('-outputformat', job_flow.steps[0].args)
assert_not_in('-inputformat', job_flow.steps[1].args)
assert_in('-outputformat', job_flow.steps[1].args)
# make sure mrjob.tar.gz is created and uploaded as
# a bootstrap file
assert runner._mrjob_tar_gz_path
mrjob_tar_gz_file_dicts = [
file_dict for file_dict in runner._files
if file_dict['path'] == runner._mrjob_tar_gz_path]
assert_equal(len(mrjob_tar_gz_file_dicts), 1)
mrjob_tar_gz_file_dict = mrjob_tar_gz_file_dicts[0]
assert mrjob_tar_gz_file_dict['name']
assert_equal(mrjob_tar_gz_file_dict.get('bootstrap'), 'file')
# shouldn't be in PYTHONPATH (we dump it directly in site-packages)
pythonpath = runner._get_cmdenv().get('PYTHONPATH') or ''
assert_not_in(mrjob_tar_gz_file_dict['name'],
pythonpath.split(':'))
assert_equal(sorted(results),
[(1, 'qux'), (2, 'bar'), (2, 'foo'), (5, None)])
# make sure cleanup happens
assert not os.path.exists(local_tmp_dir)
assert not any(runner.ls(runner.get_output_dir()))
# job should get terminated
emr_conn = runner.make_emr_conn()
job_flow_id = runner.get_emr_job_flow_id()
for i in range(10):
emr_conn.simulate_progress(job_flow_id)
job_flow = emr_conn.describe_jobflow(job_flow_id)
assert_equal(job_flow.state, 'TERMINATED')
开发者ID:boursier,项目名称:mrjob,代码行数:92,代码来源:emr_test.py
示例16: print_report
def print_report(options):
emr_conn = EMRJobRunner(conf_path=options.conf_path).make_emr_conn()
log.info('getting job flow history...')
# microseconds just make our report messy
now = datetime.datetime.utcnow().replace(microsecond=0)
# if --max-days-ago is set, only look at recent jobs
created_after = None
if options.max_days_ago is not None:
created_after = now - datetime.timedelta(days=options.max_days_ago)
job_flows = describe_all_job_flows(emr_conn, created_after=created_after)
job_flow_infos = []
for jf in job_flows:
job_flow_info = {}
job_flow_info['id'] = jf.jobflowid
job_flow_info['name'] = jf.name
job_flow_info['created'] = to_datetime(jf.creationdatetime)
start_time = to_datetime(getattr(jf, 'startdatetime', None))
if start_time:
end_time = to_datetime(getattr(jf, 'enddatetime', None)) or now
job_flow_info['ran'] = end_time - start_time
else:
job_flow_info['ran'] = datetime.timedelta(0)
job_flow_info['state'] = jf.state
job_flow_info['num_steps'] = len(jf.steps or [])
# this looks to be an integer, but let's protect against
# future changes
job_flow_info['hours'] = float(jf.normalizedinstancehours)
# estimate hours billed but not used
job_flow_info['hours_bbnu'] = (
job_flow_info['hours'] *
estimate_proportion_billed_but_not_used(jf))
# split out mr job name and user
# jobs flows created by MRJob have names like:
# mr_word_freq_count.dave.20101103.121249.638552
match = JOB_NAME_RE.match(jf.name)
if match:
job_flow_info['mr_job_name'] = match.group(1)
job_flow_info['user'] = match.group(2)
else:
# not run by mrjob
job_flow_info['mr_job_name'] = None
job_flow_info['user'] = None
job_flow_infos.append(job_flow_info)
if not job_flow_infos:
print 'No job flows created in the past two months!'
return
earliest = min(info['created'] for info in job_flow_infos)
latest = max(info['created'] for info in job_flow_infos)
print 'Total # of Job Flows: %d' % len(job_flow_infos)
print
print '* All times are in UTC.'
print
print 'Min create time: %s' % earliest
print 'Max create time: %s' % latest
print ' Current time: %s' % now
print
print '* All usage is measured in Normalized Instance Hours, which are'
print ' roughly equivalent to running an m1.small instance for an hour.'
print
# total compute-unit hours used
total_hours = sum(info['hours'] for info in job_flow_infos)
print 'Total Usage: %d' % total_hours
print
print '* Time billed but not used is estimated, and may not match'
print " Amazon's billing system exactly."
print
total_hours_bbnu = sum(info['hours_bbnu'] for info in job_flow_infos)
print 'Total time billed but not used (waste): %.2f' % total_hours_bbnu
print
date_to_hours = defaultdict(float)
date_to_hours_bbnu = defaultdict(float)
for info in job_flow_infos:
date_created = info['created'].date()
date_to_hours[date_created] += info['hours']
#.........这里部分代码省略.........
开发者ID:AntonKast,项目名称:mrjob,代码行数:101,代码来源:audit_usage.py
示例17: test_end_to_end
def test_end_to_end(self):
# read from STDIN, a local file, and a remote file
stdin = StringIO('foo\nbar\n')
local_input_path = os.path.join(self.tmp_dir, 'input')
with open(local_input_path, 'w') as local_input_file:
local_input_file.write('bar\nqux\n')
remote_input_path = 's3://walrus/data/foo'
self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n'}})
# setup fake output
self.mock_emr_output = {('j-MOCKJOBFLOW0', 1): [
'1\t"qux"\n2\t"bar"\n', '2\t"foo"\n5\tnull\n']}
mr_job = MRTwoStepJob(['-r', 'emr', '-v',
'-c', self.mrjob_conf_path,
'-', local_input_path, remote_input_path])
mr_job.sandbox(stdin=stdin)
local_tmp_dir = None
results = []
mock_s3_fs_snapshot = copy.deepcopy(self.mock_s3_fs)
with mr_job.make_runner() as runner:
assert isinstance(runner, EMRJobRunner)
# make sure that initializing the runner doesn't affect S3
# (Issue #50)
assert_equal(mock_s3_fs_snapshot, self.mock_s3_fs)
runner.run()
for line in runner.stream_output():
key, value = mr_job.parse_output_line(line)
results.append((key, value))
local_tmp_dir = runner._get_local_tmp_dir()
# make sure cleanup hasn't happened yet
assert os.path.exists(local_tmp_dir)
assert any(runner.ls(runner.get_output_dir()))
emr_conn = runner.make_emr_conn()
job_flow = emr_conn.describe_jobflow(runner.get_emr_job_flow_id())
assert_equal(job_flow.state, 'COMPLETED')
name_match = JOB_NAME_RE.match(job_flow.name)
assert_equal(name_match.group(1), 'mr_two_step_job')
assert_equal(name_match.group(2), getpass.getuser())
assert_equal(sorted(results),
[(1, 'qux'), (2, 'bar'), (2, 'foo'), (5, None)])
# make sure cleanup happens
assert not os.path.exists(local_tmp_dir)
assert not any(runner.ls(runner.get_output_dir()))
# job should get terminated
emr_conn = runner.make_emr_conn()
job_flow_id = runner.get_emr_job_flow_id()
for i in range(10):
emr_conn.simulate_progress(job_flow_id)
job_flow = emr_conn.describe_jobflow(job_flow_id)
assert_equal(job_flow.state, 'TERMINATED')
开发者ID:chomp,项目名称:mrjob,代码行数:65,代码来源:emr_test.py
注:本文中的mrjob.parse.JOB_NAME_RE类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论