本文整理汇总了Python中mrjob.parse.parse_s3_uri函数的典型用法代码示例。如果您正苦于以下问题:Python parse_s3_uri函数的具体用法?Python parse_s3_uri怎么用?Python parse_s3_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse_s3_uri函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_uri_parsing
def test_uri_parsing(self):
self.assertEqual(is_uri('notauri!'), False)
self.assertEqual(is_uri('they://did/the/monster/mash'), True)
self.assertEqual(is_s3_uri('s3://a/uri'), True)
self.assertEqual(is_s3_uri('s3n://a/uri'), True)
self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
开发者ID:Asana,项目名称:mrjob,代码行数:7,代码来源:test_parse.py
示例2: _s3_ls
def _s3_ls(self, uri):
"""Helper for ls(); doesn't bother with globbing or directories"""
bucket_name, key_name = parse_s3_uri(uri)
bucket = self.get_bucket(bucket_name)
for key in bucket.list(key_name):
yield s3_key_to_uri(key)
开发者ID:nilesh-molankar,项目名称:mrjob,代码行数:7,代码来源:s3.py
示例3: _get_s3_key
def _get_s3_key(self, uri):
"""Get the boto3 s3.Object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
return self.get_bucket(bucket_name).Object(key_name)
开发者ID:okomestudio,项目名称:mrjob,代码行数:8,代码来源:s3.py
示例4: _s3_ls
def _s3_ls(self, uri):
"""Helper for ls(); doesn't bother with globbing or directories"""
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
bucket = s3_conn.get_bucket(bucket_name, validate=VALIDATE_BUCKET)
for key in bucket.list(key_name):
yield s3_key_to_uri(key)
开发者ID:LXiong,项目名称:mrjob,代码行数:8,代码来源:s3.py
示例5: make_s3_key
def make_s3_key(self, uri):
"""Create the given S3 key, and return the corresponding
boto Key object.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
return self.get_bucket(bucket_name).new_key(key_name)
开发者ID:gitbenedict,项目名称:mrjob,代码行数:9,代码来源:s3.py
示例6: get_s3_keys
def get_s3_keys(self, uri):
"""Get a stream of boto Key objects for each key inside
the given dir on S3.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_prefix = parse_s3_uri(uri)
bucket = self.get_bucket(bucket_name)
for key in bucket.list(key_prefix):
yield key
开发者ID:gitbenedict,项目名称:mrjob,代码行数:10,代码来源:s3.py
示例7: ls
def ls(self, path_glob):
"""Recursively list files on S3.
This doesn't list "directories" unless there's actually a
corresponding key ending with a '/' (which is weird and confusing;
don't make S3 keys ending in '/')
To list a directory, path_glob must end with a trailing
slash (foo and foo/ are different on S3)
"""
log.debug("ls %s", path_glob)
# clean up the base uri to ensure we have an equal uri to boto (s3://)
# just incase we get passed s3n://
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
# Check if we're only going to get results by using a / on the end
uris = self._s3_ls(base_uri)
try:
first = uris.next()
uris = chain([first], uris)
except (boto.exception.S3ResponseError, StopIteration):
try:
uris = self._s3_ls(base_uri.rstrip("/") + "/")
except (boto.exception.S3ResponseError, StopIteration):
return
prev_uri = None
for uri in uris:
uri = "%s://%s/%s" % ((scheme,) + parse_s3_uri(uri))
# enforce globbing
if glob_match and not fnmatch.fnmatchcase(uri, path_glob):
continue
# If there are keys /data and /data/my_file then we consider there
# to be a file /data, overriding there being a directory called
# /data containing a file my_file. We discard /data/my_file.
if prev_uri is not None and uri.startswith(prev_uri):
continue
yield uri
prev_uri = uri.rstrip("/") + "/"
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:54,代码来源:s3.py
示例8: test_cleanup
def test_cleanup(self):
runner = EMRJobRunner(conf_paths=[], s3_sync_wait_time=0.01)
# add some mock data and change last_modified
remote_input_path = 's3://walrus/data/'
self.add_mock_s3_data({'walrus': {'data/foo': 'foo\n',
'data/bar': 'bar\n',
'data/qux': 'qux\n'}})
s3_conn = runner.make_s3_conn()
bucket_name, key_name = parse_s3_uri(remote_input_path)
bucket = s3_conn.get_bucket(bucket_name)
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
key_bar.last_modified = datetime.now() - timedelta(days=45)
key_qux.last_modified = datetime.now() - timedelta(hours=50)
# make sure keys are there
assert isinstance(key_foo, MockKey)
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(days=30), dry_run=True,
conf_paths=[])
# dry-run shouldn't delete anything
assert isinstance(key_foo, MockKey)
assert isinstance(key_bar, MockKey)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(days=30), conf_paths=[])
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
# make sure key_bar is deleted
assert isinstance(key_foo, MockKey)
self.assertEqual(key_bar, None)
assert isinstance(key_qux, MockKey)
s3_cleanup(remote_input_path, timedelta(hours=48), conf_paths=[])
key_foo = bucket.get_key('data/foo')
key_bar = bucket.get_key('data/bar')
key_qux = bucket.get_key('data/qux')
# make sure key_qux is deleted
assert isinstance(key_foo, MockKey)
self.assertEqual(key_bar, None)
self.assertEqual(key_qux, None)
开发者ID:Anihc,项目名称:mrjob,代码行数:53,代码来源:test_s3_tmpwatch.py
示例9: get_s3_key
def get_s3_key(self, uri, s3_conn=None):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing s3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
return s3_conn.get_bucket(bucket_name).get_key(key_name)
开发者ID:adaptivelab,项目名称:mrjob,代码行数:14,代码来源:s3.py
示例10: make_s3_key
def make_s3_key(self, uri, s3_conn=None):
"""Create the given S3 key, and return the corresponding
boto Key object.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing S3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
return s3_conn.get_bucket(bucket_name).new_key(key_name)
开发者ID:inncapsule,项目名称:mrjob,代码行数:14,代码来源:s3.py
示例11: get_s3_keys
def get_s3_keys(self, uri, s3_conn=None):
"""Get a stream of boto Key objects for each key inside
the given dir on S3.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing S3 connection through s3_conn
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_prefix = parse_s3_uri(uri)
bucket = s3_conn.get_bucket(bucket_name)
for key in bucket.list(key_prefix):
yield key
开发者ID:inncapsule,项目名称:mrjob,代码行数:15,代码来源:s3.py
示例12: mkdir
def mkdir(self, dest):
"""Make a directory. This doesn't actually create directories on S3
(because there is no such thing), but it will create the corresponding
bucket if it doesn't exist.
"""
bucket_name, key_name = parse_s3_uri(dest)
client = self.make_s3_client()
try:
client.head_bucket(Bucket=bucket_name)
except botocore.exceptions.ClientError as ex:
if _client_error_status(ex) != 404:
raise
self.create_bucket(bucket_name)
开发者ID:Yelp,项目名称:mrjob,代码行数:16,代码来源:s3.py
示例13: ls
def ls(self, path_glob):
"""Recursively list files on S3.
*path_glob* can include ``?`` to match single characters or
``*`` to match 0 or more characters. Both ``?`` and ``*`` can match
``/``.
.. versionchanged:: 0.5.0
You no longer need a trailing slash to list "directories" on S3;
both ``ls('s3://b/dir')`` and `ls('s3://b/dir/')` will list
all keys starting with ``dir/``.
"""
# clean up the base uri to ensure we have an equal uri to boto (s3://)
# just in case we get passed s3n://
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
bucket_name, base_name = parse_s3_uri(base_uri)
# allow subdirectories of the path/glob
if path_glob and not path_glob.endswith('/'):
dir_glob = path_glob + '/*'
else:
dir_glob = path_glob + '*'
bucket = self.get_bucket(bucket_name)
for key in bucket.list(base_name):
uri = "%s://%s/%s" % (scheme, bucket_name, key.name)
# enforce globbing
if not (fnmatch.fnmatchcase(uri, path_glob) or
fnmatch.fnmatchcase(uri, dir_glob)):
continue
yield uri
开发者ID:gitbenedict,项目名称:mrjob,代码行数:46,代码来源:s3.py
示例14: get_s3_key
def get_s3_key(self, uri):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
"""
bucket_name, key_name = parse_s3_uri(uri)
try:
bucket = self.get_bucket(bucket_name)
except boto.exception.S3ResponseError as e:
if e.status != 404:
raise e
key = None
else:
key = bucket.get_key(key_name)
return key
开发者ID:gitbenedict,项目名称:mrjob,代码行数:18,代码来源:s3.py
示例15: get_s3_key
def get_s3_key(self, uri, s3_conn=None):
"""Get the boto Key object matching the given S3 uri, or
return None if that key doesn't exist.
uri is an S3 URI: ``s3://foo/bar``
You may optionally pass in an existing s3 connection through
``s3_conn``.
"""
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
try:
bucket = s3_conn.get_bucket(bucket_name)
except boto.exception.S3ResponseError, e:
if e.status != 404:
raise e
key = None
开发者ID:inncapsule,项目名称:mrjob,代码行数:19,代码来源:s3.py
示例16: _ls
def _ls(self, path_glob):
"""Helper method for :py:meth:`ls`; yields tuples of
``(uri, key)`` where *key* is the corresponding boto3 s3.ObjectSummary.
"""
# clean up the base uri to ensure we have pass boto3 an s3:// URI
# (not s3n://)
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
bucket_name, base_name = parse_s3_uri(base_uri)
# allow subdirectories of the path/glob
if path_glob and not path_glob.endswith('/'):
dir_glob = path_glob + '/*'
else:
dir_glob = path_glob + '*'
try:
bucket = self.get_bucket(bucket_name)
except botocore.exceptions.ClientError as ex:
if _client_error_status(ex) == 404: # treat nonexistent as empty
return
raise
for key in bucket.objects.filter(Prefix=base_name):
uri = "%s://%s/%s" % (scheme, bucket_name, key.key)
# enforce globbing
if not (fnmatch.fnmatchcase(uri, path_glob) or
fnmatch.fnmatchcase(uri, dir_glob)):
continue
yield uri, key
开发者ID:okomestudio,项目名称:mrjob,代码行数:42,代码来源:s3.py
示例17: get_s3_folder_keys
def get_s3_folder_keys(self, uri, s3_conn=None):
""".. deprecated:: 0.4.0
Background: EMR used to fake directories on S3 by creating special
``*_$folder$`` keys in S3. That is no longer true, so this method is
deprecated.
For example if your job outputs ``s3://walrus/tmp/output/part-00000``,
EMR will also create these keys:
- ``s3://walrus/tmp_$folder$``
- ``s3://walrus/tmp/output_$folder$``
If you want to grant another Amazon user access to your files so they
can use them in S3, you must grant read access on the actual keys,
plus any ``*_$folder$`` keys that "contain" your keys; otherwise
EMR will error out with a permissions error.
This gets all the ``*_$folder$`` keys associated with the given URI,
as boto Key objects.
This does not support globbing.
You may optionally pass in an existing S3 connection through
``s3_conn``.
"""
log.warning(
'get_s3_folder_keys() is deprecated and will be removed in v0.5.0')
if not s3_conn:
s3_conn = self.make_s3_conn()
bucket_name, key_name = parse_s3_uri(uri)
bucket = _get_bucket(s3_conn, bucket_name)
dirs = key_name.split('/')
for i in range(len(dirs)):
folder_name = '/'.join(dirs[:i]) + '_$folder$'
key = bucket.get_key(folder_name)
if key:
yield key
开发者ID:DanisHack,项目名称:mrjob,代码行数:41,代码来源:s3.py
示例18: ls
def ls(self, path_glob):
"""Recursively list files on S3.
This doesn't list "directories" unless there's actually a
corresponding key ending with a '/' (which is weird and confusing;
don't make S3 keys ending in '/')
To list a directory, path_glob must end with a trailing
slash (foo and foo/ are different on S3)
"""
# clean up the base uri to ensure we have an equal uri to boto (s3://)
# just incase we get passed s3n://
scheme = urlparse(path_glob).scheme
# support globs
glob_match = GLOB_RE.match(path_glob)
# if it's a "file" (doesn't end with /), just check if it exists
if not glob_match and not path_glob.endswith('/'):
uri = path_glob
if self.get_s3_key(uri):
yield uri
return
# we're going to search for all keys starting with base_uri
if glob_match:
# cut it off at first wildcard
base_uri = glob_match.group(1)
else:
base_uri = path_glob
for uri in self._s3_ls(base_uri):
uri = "%s://%s/%s" % ((scheme,) + parse_s3_uri(uri))
# enforce globbing
if glob_match and not fnmatch.fnmatchcase(uri, path_glob):
continue
yield uri
开发者ID:inncapsule,项目名称:mrjob,代码行数:40,代码来源:s3.py
示例19: s3_cleanup
def s3_cleanup(glob_path, time_old, dry_run=False, conf_path=None):
"""Delete all files older than *time_old* in *path*.
If *dry_run* is ``True``, then just log the files that need to be
deleted without actually deleting them
"""
runner = EMRJobRunner(conf_path=conf_path)
s3_conn = runner.make_s3_conn()
log.info("Deleting all files in %s that are older than %s" % (glob_path, time_old))
for path in runner.ls(glob_path):
bucket_name, key_name = parse_s3_uri(path)
bucket = s3_conn.get_bucket(bucket_name)
for key in bucket.list(key_name):
last_modified = iso8601_to_datetime(key.last_modified)
age = datetime.utcnow() - last_modified
if age > time_old:
# Delete it
log.info("Deleting %s; is %s old" % (key.name, age))
if not dry_run:
key.delete()
开发者ID:ealliaume,项目名称:mrjob,代码行数:22,代码来源:s3_tmpwatch.py
示例20: _s3_cleanup
def _s3_cleanup(glob_path, time_old, dry_run=False, **runner_kwargs):
"""Delete all files older than *time_old* in *path*.
If *dry_run* is true, then just log the files that need to be
deleted without actually deleting them
"""
runner = EMRJobRunner(**runner_kwargs)
log.info('Deleting all files in %s that are older than %s' %
(glob_path, time_old))
for path in runner.fs.ls(glob_path):
bucket_name, key_name = parse_s3_uri(path)
bucket = runner.fs.get_bucket(bucket_name)
for key in bucket.list(key_name):
last_modified = iso8601_to_datetime(key.last_modified)
age = datetime.utcnow() - last_modified
if age > time_old:
# Delete it
log.info('Deleting %s; is %s old' % (key.name, age))
if not dry_run:
key.delete()
开发者ID:Dean838,项目名称:mrjob,代码行数:23,代码来源:s3_tmpwatch.py
注:本文中的mrjob.parse.parse_s3_uri函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论