• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python parse.is_uri函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中mrjob.parse.is_uri函数的典型用法代码示例。如果您正苦于以下问题:Python is_uri函数的具体用法?Python is_uri怎么用?Python is_uri使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了is_uri函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_uri_parsing

 def test_uri_parsing(self):
     self.assertEqual(is_uri('notauri!'), False)
     self.assertEqual(is_uri('they://did/the/monster/mash'), True)
     self.assertEqual(is_s3_uri('s3://a/uri'), True)
     self.assertEqual(is_s3_uri('s3n://a/uri'), True)
     self.assertEqual(is_s3_uri('hdfs://a/uri'), False)
     self.assertEqual(parse_s3_uri('s3://bucket/loc'), ('bucket', 'loc'))
开发者ID:Asana,项目名称:mrjob,代码行数:7,代码来源:test_parse.py


示例2: join

 def join(self, dirname, filename):
     """Join *filename* onto *dirname* (which may be a URI)"""
     if is_uri(filename):
         return filename
     elif is_uri(dirname):
         return posixpath.join(dirname, filename)
     else:
         return os.path.join(dirname, filename)
开发者ID:kartheek6,项目名称:mrjob,代码行数:8,代码来源:base.py


示例3: test_spark_master_mesos

    def test_spark_master_mesos(self):
        runner = SparkMRJobRunner(spark_master='mesos://host:12345')

        self.assertTrue(is_uri(runner._spark_tmp_dir))
        self.assertEqual(runner._spark_tmp_dir[:8], 'hdfs:///')

        self.assertIsNotNone(runner._upload_mgr)
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py


示例4: test_default

    def test_default(self):
        runner = SparkMRJobRunner()

        self.assertFalse(is_uri(runner._spark_tmp_dir))
        self.assertIsNone(runner._upload_mgr)

        self.assertEqual(runner._spark_tmp_dir[-6:], '-spark')
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py


示例5: ls

    def ls(self, path_glob):
        if not is_uri(path_glob):
            for path in super(HadoopJobRunner, self).ls(path_glob):
                yield path
            return

        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        stdout = self._invoke_hadoop(
            ['fs', '-lsr', path_glob],
            return_stdout=True,
            ok_stderr=[HADOOP_LSR_NO_SUCH_FILE])

        for line in StringIO(stdout):
            fields = line.rstrip('\r\n').split()
            # expect lines like:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            if len(fields) < 8:
                raise Exception('unexpected ls line from hadoop: %r' % line)
            # ignore directories
            if fields[0].startswith('d'):
                continue
            # not sure if you can have spaces in filenames; just to be safe
            path = ' '.join(fields[7:])
            yield hdfs_prefix + path
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:26,代码来源:hadoop.py


示例6: _cat_file

    def _cat_file(self, filename):
        if is_uri(filename):
            # stream from HDFS
            cat_args = self._opts['hadoop_bin'] + ['fs', '-cat', filename]
            log.debug('> %s' % cmd_line(cat_args))

            cat_proc = Popen(cat_args, stdout=PIPE, stderr=PIPE)

            def stream():
                for line in cat_proc.stdout:
                    yield line

                # there shouldn't be any stderr
                for line in cat_proc.stderr:
                    log.error('STDERR: ' + line)

                returncode = cat_proc.wait()

                if returncode != 0:
                    raise CalledProcessError(returncode, cat_args)

            return read_file(filename, stream())
        else:
            # read from local filesystem
            return super(HadoopJobRunner, self)._cat_file(filename)
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:25,代码来源:hadoop.py


示例7: _setup_input

    def _setup_input(self):
        """Copy local input files (if any) to a special directory on HDFS.

        Set self._hdfs_input_files
        """
        # winnow out HDFS files from local ones
        self._hdfs_input_files = []
        local_input_files = []

        for path in self._input_paths:
            if is_uri(path):
                # Don't even bother running the job if the input isn't there.
                if not self.ls(path):
                    raise AssertionError(
                        'Input path %s does not exist!' % (path,))
                self._hdfs_input_files.append(path)
            else:
                local_input_files.append(path)

        # copy local files into an input directory, with names like
        # 00000-actual_name.ext
        if local_input_files:
            hdfs_input_dir = posixpath.join(self._hdfs_tmp_dir, 'input')
            log.info('Uploading input to %s' % hdfs_input_dir)
            self._mkdir_on_hdfs(hdfs_input_dir)

            for i, path in enumerate(local_input_files):
                if path == '-':
                    path = self._dump_stdin_to_local_file()

                target = '%s/%05i-%s' % (
                    hdfs_input_dir, i, os.path.basename(path))
                self._upload_to_hdfs(path, target)

            self._hdfs_input_files.append(hdfs_input_dir)
开发者ID:AnthonyNystrom,项目名称:mrjob,代码行数:35,代码来源:hadoop.py


示例8: test_spark_master_yarn

    def test_spark_master_yarn(self):
        runner = SparkMRJobRunner(spark_master='yarn')

        self.assertTrue(is_uri(runner._spark_tmp_dir))
        self.assertEqual(runner._spark_tmp_dir[:8], 'hdfs:///')

        self.assertIsNotNone(runner._upload_mgr)
开发者ID:Affirm,项目名称:mrjob,代码行数:7,代码来源:test_runner.py


示例9: _create_input_manifest_if_needed

    def _create_input_manifest_if_needed(self):
        """Create a file with a list of URIs of input files."""
        if self._input_manifest_path or not self._uses_input_manifest():
            return

        uris = []

        log.info('finding input files to add to manifest...')

        for path in self._get_input_paths():
            log.debug('  in %s' % path)
            if is_uri(path):
                # URIs might be globs
                for uri in self.fs.ls(path):
                    uris.append(uri)
            else:
                # local paths are expected to be single files
                # (shell would resolve globs)
                if self._upload_mgr:
                    uris.append(self._upload_mgr.uri(path))
                else:
                    # just make sure job can find files from it's working dir
                    uris.append(os.path.abspath(path))

        log.info('found %d input files' % len(uris))

        path = os.path.join(self._get_local_tmp_dir(), 'input-manifest.txt')
        self._write_script(uris, path, 'input manifest')

        self._input_manifest_path = path
        if self._upload_mgr:
            self._upload_mgr.add(self._input_manifest_path)
开发者ID:Affirm,项目名称:mrjob,代码行数:32,代码来源:runner.py


示例10: fully_qualify_hdfs_path

def fully_qualify_hdfs_path(path):
    """If path isn't an ``hdfs://`` URL, turn it into one."""
    if is_uri(path):
        return path
    elif path.startswith('/'):
        return 'hdfs://' + path
    else:
        return 'hdfs:///user/%s/%s' % (getpass.getuser(), path)
开发者ID:Infolaber,项目名称:mrjob,代码行数:8,代码来源:hadoop.py


示例11: uri

    def uri(self, path):
        """Get the URI for the given path. If *path* is a URI, just return it.
        """
        if (not os.path.exists(path)) and is_uri(path):
            return path

        if path in self._path_to_name:
            return posixpath.join(self.prefix, self._path_to_name[path])
        else:
            raise ValueError('%r is not a URI or a known local file' % (path,))
开发者ID:eyecat,项目名称:mrjob,代码行数:10,代码来源:setup.py


示例12: ls

    def ls(self, path_glob):
        components = urlparse(path_glob)
        hdfs_prefix = '%s://%s' % (components.scheme, components.netloc)

        version = self.get_hadoop_version()

        # use ls -R on Hadoop 2 (see #1152)
        if uses_yarn(version):
            args = ['fs', '-ls', '-R', path_glob]
        else:
            args = ['fs', '-lsr', path_glob]

        try:
            stdout = self.invoke_hadoop(args, return_stdout=True,
                                        ok_stderr=[_HADOOP_LS_NO_SUCH_FILE])
        except CalledProcessError:
            raise IOError("Could not ls %s" % path_glob)

        for line in BytesIO(stdout):
            line = line.rstrip(b'\r\n')

            # ignore total item count
            if line.startswith(b'Found '):
                continue

            fields = line.split(b' ')

            # Throw out directories
            if fields[0].startswith(b'd'):
                continue

            # Try to figure out which part of the line is the path
            # Expected lines:
            #
            # HDFS:
            # -rw-r--r--   3 dave users       3276 2010-01-13 14:00 /foo/bar
            #
            # S3:
            # -rwxrwxrwx   1          3276 010-01-13 14:00 /foo/bar
            path_index = None
            for index, field in enumerate(fields):
                # look for time field, and pick one after that
                # (can't use field[2] because that's an int in Python 3)
                if len(field) == 5 and field[2:3] == b':':
                    path_index = (index + 1)
            if not path_index:
                raise IOError("Could not locate path in string %r" % line)

            path = to_unicode(line.split(b' ', path_index)[-1])
            # handle fully qualified URIs from newer versions of Hadoop ls
            # (see Pull Request #577)
            if is_uri(path):
                yield path
            else:
                yield hdfs_prefix + path
开发者ID:Yelp,项目名称:mrjob,代码行数:55,代码来源:hadoop.py


示例13: _endpoint_url

def _endpoint_url(host_or_uri):
    """If *host_or_uri* is non-empty and isn't a URI, prepend ``'https://'``.

    Otherwise, pass through as-is.
    """
    if not host_or_uri:
        return host_or_uri
    elif is_uri(host_or_uri):
        return host_or_uri
    else:
        return 'https://' + host_or_uri
开发者ID:okomestudio,项目名称:mrjob,代码行数:11,代码来源:s3.py


示例14: path_exists

    def path_exists(self, path_glob):
        """Does the given path exist?

        If dest is a directory (ends with a "/"), we check if there are
        any files starting with that path.
        """
        if not is_uri(path_glob):
            return super(HadoopJobRunner, self).path_exists(path_glob)

        return bool(self._invoke_hadoop(['fs', '-test', '-e', path_glob],
                                        ok_returncodes=(0, 1)))
开发者ID:BrandonHaynes,项目名称:mrjob,代码行数:11,代码来源:hadoop.py


示例15: test_copy_files_with_rename_to_remote_wd_mirror

    def test_copy_files_with_rename_to_remote_wd_mirror(self):
        self.add_mock_s3_data({'walrus': {'fish': b'salmon',
                                          'fowl': b'goose'}})

        foe_path = self.makefile('foe', b'giant')

        run_spark_submit = self.start(patch(
            'mrjob.bin.MRJobBinRunner._run_spark_submit',
            return_value=0))

        job = MRSparkOSWalk(['-r', 'spark',
                             '--spark-master', 'mesos://host:9999',
                             '--spark-tmp-dir', 's3://walrus/tmp',
                             '--file', 's3://walrus/fish#ghoti',
                             '--file', 's3://walrus/fowl',
                             '--file', foe_path])
        job.sandbox()

        with job.make_runner() as runner:
            runner.run()

            # check working dir mirror
            wd_mirror = runner._wd_mirror()
            fs = runner.fs

            self.assertIsNotNone(wd_mirror)
            self.assertTrue(is_uri(wd_mirror))

            self.assertTrue(fs.exists(wd_mirror))
            # uploaded for rename
            self.assertTrue(fs.exists(fs.join(wd_mirror, 'ghoti')))
            # wrong name
            self.assertFalse(fs.exists(fs.join(wd_mirror, 'fish')))
            # no need to upload, already visible
            self.assertFalse(fs.exists(fs.join(wd_mirror, 'fowl')))
            # need to upload from local to remote
            self.assertTrue(fs.exists(fs.join(wd_mirror, 'foe')))

            run_spark_submit.assert_called_once_with(
                ANY, ANY, record_callback=ANY)

            spark_submit_args = run_spark_submit.call_args[0][0]
            self.assertIn('--files', spark_submit_args)
            files_arg = spark_submit_args[
                spark_submit_args.index('--files') + 1]

            self.assertEqual(
                files_arg, ','.join([
                    fs.join(wd_mirror, 'foe'),
                    's3://walrus/fowl',
                    fs.join(wd_mirror, 'ghoti'),
                    fs.join(wd_mirror, 'mr_spark_os_walk.py'),
                ]))
开发者ID:Yelp,项目名称:mrjob,代码行数:53,代码来源:test_runner.py


示例16: join

    def join(self, path, *paths):
        """Join *paths* onto *path* (which may be a URI)"""
        all_paths = (path,) + paths

        # if there's a URI, we only care about it and what follows
        for i in range(len(all_paths), 0, -1):
            if is_uri(all_paths[i - 1]):
                scheme, netloc, uri_path = urlparse(all_paths[i - 1])[:3]
                return '%s://%s%s' % (
                    scheme, netloc, posixpath.join(
                        uri_path or '/', *all_paths[i:]))
        else:
            return os.path.join(*all_paths)
开发者ID:Dean838,项目名称:mrjob,代码行数:13,代码来源:base.py


示例17: fully_qualify_hadoop_path

def fully_qualify_hadoop_path(path):
    """If we're on MapR, we should get an alternative to hdfs://. CDH4 will fail"""
    process = Popen(HADOOP_FETCH_URI_SCHEME, shell=True, stdout=PIPE, stderr=STDOUT)
    uri_scheme = process.communicate()[0]
    if process.returncode != 0:
       uri_scheme='hdfs://'
    else:
       uri_scheme = HADOOP_FETCH_URI_CLEANUP.sub('//', uri_scheme)
    if is_uri(path):
        return path
    elif path.startswith('/'):
        return uri_scheme + path
    else:
        return '%s/user/%s/%s' % (uri_scheme, getpass.getuser(), path)
开发者ID:pythian,项目名称:mrjob,代码行数:14,代码来源:hadoop.py


示例18: du

    def du(self, path_glob):
        """Get the size of a file, or None if it's not a file or doesn't
        exist."""
        if not is_uri(path_glob):
            return super(HadoopJobRunner, self).dus(path_glob)

        stdout = self._invoke_hadoop(['fs', '-du', path_glob],
                                     return_stdout=True)

        try:
            return int(stdout.split()[1])
        except (ValueError, TypeError, IndexError):
            raise Exception(
                'Unexpected output from hadoop fs -du: %r' % stdout)
开发者ID:GabbleEngineer,项目名称:mrjob,代码行数:14,代码来源:hadoop.py


示例19: add

    def add(self, path):
        """Add a path. If *path* hasn't been added before, assign it a name.
                       If *path* is a URI don't add it; just return the URI.

        :return: the URI assigned to the path"""
        if (not os.path.exists(path)) and is_uri(path):
            return path

        if path not in self._path_to_name:
            name = name_uniquely(path, names_taken=self._names_taken)
            self._names_taken.add(name)
            self._path_to_name[path] = name

        return self.uri(path)
开发者ID:eyecat,项目名称:mrjob,代码行数:14,代码来源:setup.py


示例20: setUp

    def setUp(self):
        super(CompositeFilesystemTestCase, self).setUp()

        self.log = self.start(patch('mrjob.fs.composite.log'))

        self.hadoop_fs = Mock(spec=Filesystem)
        self.hadoop_fs.get_hadoop_version = Mock()
        self.hadoop_fs.can_handle_path.side_effect = is_uri

        self.local_fs = Mock(spec=Filesystem)
        self.local_fs.can_handle_path.side_effect = lambda p: not is_uri(p)

        self.s3_fs = Mock(spec=Filesystem)
        self.s3_fs.create_bucket = Mock()
        self.s3_fs.can_handle_path.side_effect = is_s3_uri
开发者ID:Yelp,项目名称:mrjob,代码行数:15,代码来源:test_composite.py



注:本文中的mrjob.parse.is_uri函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python parse.parse_hadoop_counters_from_line函数代码示例发布时间:2022-05-27
下一篇:
Python parse.is_s3_uri函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap