本文整理汇总了Python中mrjob.fs.hadoop.HadoopFilesystem类的典型用法代码示例。如果您正苦于以下问题:Python HadoopFilesystem类的具体用法?Python HadoopFilesystem怎么用?Python HadoopFilesystem使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HadoopFilesystem类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_logging_stderr_in_cleanup
def test_logging_stderr_in_cleanup(self):
def mock_Popen(*args, **kwargs):
mock_proc = MagicMock()
mock_proc.stdout = MagicMock()
mock_proc.stdout.__iter__.return_value = [
b'line1\n', b'line2\n']
mock_proc.stderr = MagicMock()
mock_proc.stderr.__iter__.return_value = [
b'Emergency, everybody to get from street\n']
mock_proc.wait.return_value = 0
return mock_proc
self.start(patch('mrjob.fs.hadoop.Popen', mock_Popen))
mock_log = self.start(patch('mrjob.fs.hadoop.log'))
fs = HadoopFilesystem()
data = b''.join(fs._cat_file('/some/path'))
self.assertEqual(data, b'line1\nline2\n')
mock_log.error.assert_called_once_with(
'STDERR: Emergency, everybody to get from street')
开发者ID:Yelp,项目名称:mrjob,代码行数:28,代码来源:test_hadoop.py
示例2: test_deprecated_hadoop_home_option
def test_deprecated_hadoop_home_option(self):
hadoop_home = join(self.tmp_dir, 'hadoop_home_option')
hadoop_bin = self.makefile(join(hadoop_home, 'bin', 'hadoop'),
executable=True)
# deprecation warning is in HadoopJobRunner
self.fs = HadoopFilesystem(hadoop_home=hadoop_home)
with no_handlers_for_logger('mrjob.fs.hadoop'):
self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_bin])
开发者ID:Dean838,项目名称:mrjob,代码行数:10,代码来源:test_hadoop.py
示例3: setUp
def setUp(self):
super(FindHadoopBinTestCase, self).setUp()
# track calls to which()
self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which))
# keep which() from searching in /bin, etc.
os.environ['PATH'] = self.tmp_dir
# create basic HadoopFilesystem (okay to overwrite)
self.fs = HadoopFilesystem()
开发者ID:Yelp,项目名称:mrjob,代码行数:11,代码来源:test_hadoop.py
示例4: setUp
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(["hadoop"])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
开发者ID:DepengLuan,项目名称:mrjob,代码行数:6,代码来源:test_hadoop.py
示例5: HadoopFSTestCase
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(["hadoop"])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env["HADOOP_HOME"] = self.makedirs("mock_hadoop_home")
self.makefile(
os.path.join("mock_hadoop_home", "contrib", "streaming", "hadoop-0.X.Y-streaming.jar"),
"i are java bytecode",
)
self.env["MOCK_HDFS_ROOT"] = self.makedirs("mock_hdfs_root")
self.env["MOCK_HADOOP_OUTPUT"] = self.makedirs("mock_hadoop_output")
self.env["USER"] = "mrjob_tests"
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents="contents"):
return self.makefile(os.path.join("mock_hdfs_root", name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls("hdfs:///")), [])
def test_ls_basic(self):
self.make_mock_file("f")
self.assertEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f"])
def test_ls_basic_2(self):
self.make_mock_file("f")
self.make_mock_file("f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///f2"])
def test_ls_recurse(self):
self.make_mock_file("f")
self.make_mock_file("d/f2")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///f", "hdfs:///d/f2"])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file("f", "foo")
self.make_mock_file("f3 win", "foo" * 10)
self.assertItemsEqual(list(self.fs.ls("s3n://bucket/")), ["s3n://bucket/f", "s3n://bucket/f3 win"])
def test_single_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_double_space(self):
self.make_mock_file("foo bar")
self.assertItemsEqual(list(self.fs.ls("hdfs:///")), ["hdfs:///foo bar"])
def test_cat_uncompressed(self):
self.make_mock_file("data/foo", "foo\nfoo\n")
remote_path = self.fs.path_join("hdfs:///data", "foo")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n", "foo\n"])
def test_cat_bz2(self):
self.make_mock_file("data/foo.bz2", bz2.compress("foo\n" * 1000))
remote_path = self.fs.path_join("hdfs:///data", "foo.bz2")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 1000)
def test_cat_gz(self):
self.make_mock_file("data/foo.gz", gzip_compress("foo\n" * 10000))
remote_path = self.fs.path_join("hdfs:///data", "foo.gz")
self.assertEqual(list(self.fs._cat_file(remote_path)), ["foo\n"] * 10000)
def test_du(self):
self.make_mock_file("data1", "abcd")
self.make_mock_file("more/data2", "defg")
self.make_mock_file("more/data3", "hijk")
self.assertEqual(self.fs.du("hdfs:///"), 12)
self.assertEqual(self.fs.du("hdfs:///data1"), 4)
self.assertEqual(self.fs.du("hdfs:///more"), 8)
self.assertEqual(self.fs.du("hdfs:///more/*"), 8)
self.assertEqual(self.fs.du("hdfs:///more/data2"), 4)
self.assertEqual(self.fs.du("hdfs:///more/data3"), 4)
def test_mkdir(self):
for hadoop_version in ["0.20.0", "0.23.0", "1.2.0", "2.0.0"]:
self.env["MOCK_HADOOP_VERSION"] = hadoop_version
self.fs.mkdir("hdfs:///d")
local_path = os.path.join(self.tmp_dir, "mock_hdfs_root", "d")
self.assertEqual(os.path.isdir(local_path), True)
def test_path_exists_no(self):
path = "hdfs:///f"
self.assertEqual(self.fs.path_exists(path), False)
#.........这里部分代码省略.........
开发者ID:DepengLuan,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py
示例6: HadoopFSTestCase
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f', 'contents')
self.make_hdfs_file('d/f2', 'contents')
self.assertEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.makefile(os.path.join('mock_hdfs_root', 'data', 'foo'), 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)), ['foo\n', 'foo\n'])
def test_du(self):
self.makefile(os.path.join('mock_hdfs_root', 'data1'), 'abcd')
self.makedirs('mock_hdfs_root/more')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data2'), 'defg')
self.makefile(os.path.join('mock_hdfs_root', 'more', 'data3'), 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
self.assertEqual(self.fs.du('hdfs:///more/data2'), 4)
self.assertEqual(self.fs.du('hdfs:///more/data3'), 4)
def test_mkdir(self):
self.fs.mkdir('hdfs:///d')
local_path = os.path.join(self.tmp_dir, 'mock_hdfs_root', 'd')
self.assertEqual(os.path.isdir(local_path), True)
def test_rm(self):
local_path = self.make_hdfs_file('f', 'contents')
self.assertEqual(os.path.exists(local_path), True)
self.fs.rm('hdfs:///f')
self.assertEqual(os.path.exists(local_path), False)
def test_touchz(self):
# mockhadoop doesn't implement this.
pass
开发者ID:adaptivelab,项目名称:mrjob,代码行数:85,代码来源:test_hadoop.py
示例7: HadoopFSTestCase
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HADOOP_TMP'] = self.makedirs('mock_hadoop')
self.env['MOCK_HADOOP_VERSION'] = '2.7.1'
self.env['USER'] = 'mrjob_tests'
def make_mock_file(self, name, contents='contents'):
return self.makefile(
os.path.join(get_mock_hdfs_root(self.env), name), contents)
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.join('hdfs:///data', 'foo')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\nfoo\n')
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.join('hdfs:///data', 'foo.bz2')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.join('hdfs:///data', 'foo.gz')
self.assertEqual(
b''.join(self.fs._cat_file(remote_path)),
b'foo\n' * 10000)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_ls_s3a(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3a://bucket/')),
['s3a://bucket/f', 's3a://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
#.........这里部分代码省略.........
开发者ID:Yelp,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py
示例8: test_predefined_hadoop_bin
def test_predefined_hadoop_bin(self):
self.fs = HadoopFilesystem(hadoop_bin=['hadoop', '-v'])
self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop', '-v'])
self.assertFalse(self.which.called)
开发者ID:Yelp,项目名称:mrjob,代码行数:6,代码来源:test_hadoop.py
示例9: FindHadoopBinTestCase
class FindHadoopBinTestCase(SandboxedTestCase):
def setUp(self):
super(FindHadoopBinTestCase, self).setUp()
# track calls to which()
self.which = self.start(patch('mrjob.fs.hadoop.which', wraps=which))
# keep which() from searching in /bin, etc.
os.environ['PATH'] = self.tmp_dir
# create basic HadoopFilesystem (okay to overwrite)
self.fs = HadoopFilesystem()
def _add_hadoop_bin_for_envvar(self, envvar, *dirnames):
"""Add a fake "Hadoop" binary to its own subdirectory of
``self.tmp_dir``, and set *os.environ[envvar]* to point at it. You can
use *dirnames* to put the binary in a subdirectory of
*os.environ[envvar]* (e.g. ``'bin'``).
return the path to the fake Hadoop binary.
"""
os.environ[envvar] = join(self.tmp_dir, envvar.lower())
hadoop_bin_path = join(join(os.environ[envvar], *dirnames), 'hadoop')
self.makefile(hadoop_bin_path, executable=True)
return hadoop_bin_path
# tests without environment variables
def test_do_nothing_on_init(self):
self.assertFalse(self.which.called)
def test_fallback(self):
self.assertFalse(self.which.called)
self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop'])
self.which.assert_called_once_with('hadoop', path=None)
def test_predefined_hadoop_bin(self):
self.fs = HadoopFilesystem(hadoop_bin=['hadoop', '-v'])
self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop', '-v'])
self.assertFalse(self.which.called)
# environment variable tests
def _test_environment_variable(self, envvar, *dirnames):
"""Check if we can find the hadoop binary from *envvar*"""
# okay to add after HadoopFilesystem() created; it hasn't looked yet
hadoop_bin = self._add_hadoop_bin_for_envvar(envvar, *dirnames)
self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_bin])
def test_hadoop_prefix(self):
self._test_environment_variable('HADOOP_PREFIX', 'bin')
def test_hadoop_home_envvar(self):
self._test_environment_variable('HADOOP_HOME', 'bin')
def test_hadoop_install(self):
self._test_environment_variable('HADOOP_INSTALL', 'bin')
def test_hadoop_install_hadoop_subdir(self):
self._test_environment_variable('HADOOP_INSTALL', 'hadoop', 'bin')
def test_path(self):
self._test_environment_variable('PATH')
def test_two_part_path(self):
hadoop_path1 = join(self.tmp_dir, 'path1')
hadoop_path1_bin = self.makefile(join(hadoop_path1, 'hadoop'),
executable=True)
hadoop_path2 = join(self.tmp_dir, 'path2')
hadoop_path2_bin = self.makefile(join(hadoop_path2, 'hadoop'),
executable=True)
os.environ['PATH'] = ':'.join([hadoop_path1, hadoop_path2])
self.assertEqual(self.fs.get_hadoop_bin(), [hadoop_path1_bin])
self.assertNotEqual(self.fs.get_hadoop_bin(), [hadoop_path2_bin])
def test_hadoop_mapred_home(self):
self._test_environment_variable('HADOOP_MAPRED_HOME', 'bin')
def test_hadoop_anything_home(self):
self._test_environment_variable('HADOOP_ANYTHING_HOME', 'bin')
def test_other_environment_variable(self):
self._add_hadoop_bin_for_envvar('HADOOP_YARN_MRJOB_DIR', 'bin')
self.assertEqual(self.fs.get_hadoop_bin(), ['hadoop'])
# precedence tests
def test_hadoop_prefix_beats_hadoop_home_envvar(self):
#.........这里部分代码省略.........
开发者ID:Yelp,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py
示例10: HadoopFSTestCase
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_hdfs_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def make_hdfs_dir(self, name):
return self.makedirs(os.path.join('mock_hdfs_root', name))
def make_hdfs_tree(self, path, files=None):
if files is None:
files = ('f', 'g/a/b', 'g/a/a/b')
test_files = []
for f in sorted(files):
f = os.path.join(path, f)
self.make_hdfs_file(f, f)
test_files.append("hdfs:///" + f)
self.assertEqual(
sorted(self.fs.ls("hdfs:///" + path.rstrip('/') + '/*')),
test_files
)
return path
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_hdfs_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_hdfs_file('f')
self.make_hdfs_file('f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f',
'hdfs:///f2'])
def test_ls_recurse(self):
self.make_hdfs_file('f')
self.make_hdfs_file('d/f2')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///d/f2'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_hdfs_file('f', 'foo')
self.make_hdfs_file('f3 win', 'foo' * 10)
self.assertItemsEqual(list(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_double_space(self):
self.make_hdfs_file('foo bar')
self.assertItemsEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///foo bar'])
def test_cat_uncompressed(self):
# mockhadoop doesn't support compressed files, so we won't test for it.
# this is only a sanity check anyway.
self.make_hdfs_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
['foo\n', 'foo\n'])
def test_write_str(self):
path = 'hdfs:///write-test-str'
content = 'some content!'
self.fs.write(path, content)
self.assertEqual("".join(self.fs.cat(path)), content)
def test_write_file(self):
path = 'hdfs:///write-test-fileobj'
content = StringIO('some content!')
#.........这里部分代码省略.........
开发者ID:duedil-ltd,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py
示例11: HadoopFSTestCase
class HadoopFSTestCase(MockSubprocessTestCase):
def setUp(self):
super(HadoopFSTestCase, self).setUp()
# wrap HadoopFilesystem so it gets cat()
self.fs = HadoopFilesystem(['hadoop'])
self.set_up_mock_hadoop()
self.mock_popen(fs_hadoop, mock_hadoop_main, self.env)
def set_up_mock_hadoop(self):
# setup fake hadoop home
self.env = {}
self.env['HADOOP_HOME'] = self.makedirs('mock_hadoop_home')
self.makefile(
os.path.join(
'mock_hadoop_home',
'contrib',
'streaming',
'hadoop-0.X.Y-streaming.jar'),
'i are java bytecode',
)
self.env['MOCK_HDFS_ROOT'] = self.makedirs('mock_hdfs_root')
self.env['MOCK_HADOOP_OUTPUT'] = self.makedirs('mock_hadoop_output')
self.env['USER'] = 'mrjob_tests'
# don't set MOCK_HADOOP_LOG, we get command history other ways
def make_mock_file(self, name, contents='contents'):
return self.makefile(os.path.join('mock_hdfs_root', name), contents)
def test_ls_empty(self):
self.assertEqual(list(self.fs.ls('hdfs:///')), [])
def test_ls_basic(self):
self.make_mock_file('f')
self.assertEqual(list(self.fs.ls('hdfs:///')), ['hdfs:///f'])
def test_ls_basic_2(self):
self.make_mock_file('f')
self.make_mock_file('f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///f', 'hdfs:///f2'])
def test_ls_recurse(self):
self.make_mock_file('f')
self.make_mock_file('d/f2')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///d/f2', 'hdfs:///f'])
def test_ls_s3n(self):
# hadoop fs -lsr doesn't have user and group info when reading from s3
self.make_mock_file('f', 'foo')
self.make_mock_file('f3 win', 'foo' * 10)
self.assertEqual(sorted(self.fs.ls('s3n://bucket/')),
['s3n://bucket/f', 's3n://bucket/f3 win'])
def test_single_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_double_space(self):
self.make_mock_file('foo bar')
self.assertEqual(sorted(self.fs.ls('hdfs:///')),
['hdfs:///foo bar'])
def test_cat_uncompressed(self):
self.make_mock_file('data/foo', 'foo\nfoo\n')
remote_path = self.fs.path_join('hdfs:///data', 'foo')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n', b'foo\n'])
def test_cat_bz2(self):
self.make_mock_file('data/foo.bz2', bz2.compress(b'foo\n' * 1000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.bz2')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 1000)
def test_cat_gz(self):
self.make_mock_file('data/foo.gz', gzip_compress(b'foo\n' * 10000))
remote_path = self.fs.path_join('hdfs:///data', 'foo.gz')
self.assertEqual(list(self.fs._cat_file(remote_path)),
[b'foo\n'] * 10000)
def test_du(self):
self.make_mock_file('data1', 'abcd')
self.make_mock_file('more/data2', 'defg')
self.make_mock_file('more/data3', 'hijk')
self.assertEqual(self.fs.du('hdfs:///'), 12)
self.assertEqual(self.fs.du('hdfs:///data1'), 4)
self.assertEqual(self.fs.du('hdfs:///more'), 8)
self.assertEqual(self.fs.du('hdfs:///more/*'), 8)
#.........这里部分代码省略.........
开发者ID:DanisHack,项目名称:mrjob,代码行数:101,代码来源:test_hadoop.py
注:本文中的mrjob.fs.hadoop.HadoopFilesystem类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论