本文整理汇总了Python中pydoop.hdfs.dump函数的典型用法代码示例。如果您正苦于以下问题:Python dump函数的具体用法?Python dump怎么用?Python dump使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dump函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: dump
def dump(self):
for test_path in self.hdfs_paths[0], self.local_paths[0]:
hdfs.dump(self.data, test_path)
with hdfs.open(test_path) as fi:
rdata = fi.read()
fi.fs.close()
self.assertEqual(rdata, self.data)
开发者ID:ZEMUSHKA,项目名称:pydoop,代码行数:7,代码来源:test_hdfs.py
示例2: _write
def _write(self, data):
"Internal Write API"
schema = self.schema
wmaid = self.wmaid(data)
year, month, _ = today()
hdir = '%s/%s/%s' % (self.hdir, year, month)
if not hdfs.path.isdir(hdir):
hdfs.mkdir(hdir)
fname = file_name(hdir, wmaid, self.compress)
# create Avro writer and binary encoder
writer = avro.io.DatumWriter(schema)
bytes_writer = io.BytesIO()
if self.compress:
# use gzip'ed writer with BytesIO file object
gzip_writer = gzip.GzipFile(fileobj=bytes_writer, mode='wb')
encoder = avro.io.BinaryEncoder(gzip_writer)
else:
# plain binary reader
encoder = avro.io.BinaryEncoder(bytes_writer)
# write records from given data stream to binary writer
writer.write(data, encoder)
# close gzip stream if necessary
if self.compress:
gzip_writer.flush()
gzip_writer.close()
# store raw data to hadoop via HDFS
hdfs.dump(bytes_writer.getvalue(), fname)
# close bytes stream
bytes_writer.close()
开发者ID:yuyiguo,项目名称:WMArchive,代码行数:35,代码来源:HdfsIO.py
示例3: __setup_remote_paths
def __setup_remote_paths(self):
"""
Actually create the working directory and copy the module into it.
Note: the script has to be readable by Hadoop; though this may not
generally be a problem on HDFS, where the Hadoop user is usually
the superuser, things may be different if our working directory is
on a shared POSIX filesystem. Therefore, we make the directory
and the script accessible by all.
"""
self.logger.debug("remote_wd: %s", self.remote_wd)
self.logger.debug("remote_exe: %s", self.remote_exe)
self.logger.debug("remotes: %s", self.files_to_upload)
if self.args.module:
self.logger.debug(
'Generated pipes_code:\n\n %s', self._generate_pipes_code()
)
if not self.args.pretend:
hdfs.mkdir(self.remote_wd)
hdfs.chmod(self.remote_wd, "a+rx")
self.logger.debug("created and chmod-ed: %s", self.remote_wd)
pipes_code = self._generate_pipes_code()
hdfs.dump(pipes_code, self.remote_exe)
self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
hdfs.chmod(self.remote_exe, "a+rx")
self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
for (l, h, _) in self.files_to_upload:
self.logger.debug("uploading: %s to %s", l, h)
hdfs.cp(l, h)
self.logger.debug("Created%sremote paths:" %
(' [simulation] ' if self.args.pretend else ' '))
开发者ID:kikkomep,项目名称:pydoop,代码行数:31,代码来源:submit.py
示例4: good
def good(self):
base_path = make_random_str()
for path in base_path, base_path + UNI_CHR:
hdfs.dump("foo\n", path)
self.assertTrue(hdfs.path.exists(path))
hdfs.rmr(path)
self.assertFalse(hdfs.path.exists(path))
开发者ID:kikkomep,项目名称:pydoop,代码行数:7,代码来源:test_path.py
示例5: write
def write(writeFlag):
if (writeFlag == True):
# instantiate hadoop
hdfs.hdfs()
targetPath = config.targetPath;
targetDirectory = config.targetDirectory;
sourceFile = config.sourceFile;
print("Target Path: " + targetPath);
print("Target Directory: " + targetDirectory);
print("Source Path: " + sourceFile);
dumpFile = open(sourceFile, "r");
fullText = dumpFile.read();
dumpFile.close();
# write to hadoop
#hdfs.mkdir(targetDirectory)
hdfs.dump(fullText, targetPath)
#hdfs.cp(sourceFile, targetPath)
#print (hdfs.ls("test4"))
#files = hdfs.ls("test4")
# read from hadoop
#hdfs.get("test4/hello.txt", "/tmp/hello.txt")
#with open("/tmp/hello.txt") as f:
# print f.read()
#print(hdfs.ls("test", "hduser1"))
#text = hdfs.load("test/hello.txt")
#print text
开发者ID:davedwards,项目名称:beautiful-data,代码行数:33,代码来源:hadoopWriter.py
示例6: get
def get(self):
src = self.hdfs_paths[0]
dest = hdfs.path.split(self.local_paths[0])[-1]
hdfs.dump(self.data, src)
hdfs.get(src, dest)
with open(dest) as fi:
rdata = fi.read()
self.assertEqual(rdata, self.data)
开发者ID:ZEMUSHKA,项目名称:pydoop,代码行数:8,代码来源:test_hdfs.py
示例7: renames
def renames(self):
test_path = self.hdfs_paths[0]
hdfs.dump(self.data, test_path)
new_d = hdfs.path.join(self.hdfs_wd, "new_dir")
new_path = hdfs.path.join(new_d, "new_p")
hdfs.renames(test_path, new_path)
self.assertFalse(hdfs.path.exists(test_path))
self.assertTrue(hdfs.path.exists(new_path))
开发者ID:kikkomep,项目名称:pydoop,代码行数:8,代码来源:test_hdfs.py
示例8: set_exe
def set_exe(self, pipes_code):
"""
Dump launcher code to the distributed file system.
"""
if not self.output:
raise RuntimeError("no output directory, can't create launcher")
parent = hdfs.path.dirname(hdfs.path.abspath(self.output.rstrip("/")))
self.exe = hdfs.path.join(parent, utils.make_random_str())
hdfs.dump(pipes_code, self.exe)
开发者ID:crs4,项目名称:pydoop,代码行数:9,代码来源:hadut.py
示例9: __ls
def __ls(self, ls_func, path_transform):
for wd, paths in izip(
(self.local_wd, self.hdfs_wd), (self.local_paths, self.hdfs_paths)
):
for p in paths:
hdfs.dump(self.data, p)
self.assertEqual(path_transform(ls_func(p)[0]), p)
dir_list = [path_transform(p) for p in ls_func(wd)]
self.assertEqual(set(dir_list), set(paths))
开发者ID:ilveroluca,项目名称:pydoop,代码行数:9,代码来源:test_hdfs.py
示例10: rename
def rename(self):
test_path = self.hdfs_paths[0]
new_path = "%s.new" % test_path
hdfs.dump(self.data, test_path)
hdfs.rename(test_path, new_path)
self.assertFalse(hdfs.path.exists(test_path))
self.assertTrue(hdfs.path.exists(new_path))
self.assertRaises(
RuntimeError, hdfs.rename, test_path, self.local_paths[0]
)
开发者ID:kikkomep,项目名称:pydoop,代码行数:10,代码来源:test_hdfs.py
示例11: __make_tree
def __make_tree(self, wd):
d1 = "%s/d1" % wd
t1 = FSTree(d1)
d2 = "%s/d2" % d1
t2 = t1.add(d2)
hdfs.mkdir(d2)
for t, d, bn in ((t1, d1, "f1"), (t2, d2, "f2")):
f = "%s/%s" % (d, bn)
hdfs.dump(self.data, f)
t.add(f, 0)
return t1
开发者ID:ZEMUSHKA,项目名称:pydoop,代码行数:11,代码来源:test_hdfs.py
示例12: runTest
def runTest(self):
path = make_random_str() + UNI_CHR
hdfs.dump("foo\n", path)
st = hdfs.path.stat(path)
atime, mtime = [getattr(st, 'st_%stime' % _) for _ in 'am']
new_atime, new_mtime = atime + 100, mtime + 200
hdfs.path.utime(path, (new_atime, new_mtime))
st = hdfs.path.stat(path)
self.assertEqual(st.st_atime, new_atime)
self.assertEqual(st.st_mtime, new_mtime)
hdfs.rmr(path)
开发者ID:kikkomep,项目名称:pydoop,代码行数:11,代码来源:test_path.py
示例13: get_hosts
def get_hosts(self):
if hdfs.default_is_local():
# only run on HDFS
return
hdfs.dump(self.data, self.hdfs_paths[0])
fs = hdfs.hdfs("default", 0)
hs = fs.get_hosts(self.hdfs_paths[0], 0, 10)
self.assertTrue(len(hs) > 0)
self.assertRaises(
ValueError, fs.get_hosts, self.hdfs_paths[0], -10, 10
)
self.assertRaises(ValueError, fs.get_hosts, self.hdfs_paths[0], 0, -10)
开发者ID:kikkomep,项目名称:pydoop,代码行数:12,代码来源:test_hdfs.py
示例14: __cp_file
def __cp_file(self, wd):
fn = "%s/fn" % wd
hdfs.dump(self.data, fn)
dest_dir = "%s/dest_dir" % wd
hdfs.mkdir(dest_dir)
fn_copy_on_wd = "%s/fn_copy" % wd
hdfs.cp(fn, fn_copy_on_wd)
self.assertEqual(hdfs.load(fn_copy_on_wd), self.data)
self.assertRaises(IOError, hdfs.cp, fn, fn_copy_on_wd)
fn_copy_on_dest_dir = "%s/fn" % dest_dir
hdfs.cp(fn, dest_dir)
self.assertEqual(hdfs.load(fn_copy_on_dest_dir), self.data)
self.assertRaises(IOError, hdfs.cp, fn, dest_dir)
开发者ID:ZEMUSHKA,项目名称:pydoop,代码行数:13,代码来源:test_hdfs.py
示例15: test_kind
def test_kind(self):
path = utils.make_random_str()
self.assertTrue(hdfs.path.kind(path) is None)
try:
hdfs.dump("foo\n", path)
self.assertEqual('file', hdfs.path.kind(path))
hdfs.rmr(path)
hdfs.mkdir(path)
self.assertEqual('directory', hdfs.path.kind(path))
finally:
try:
hdfs.rmr(path)
except IOError:
pass
开发者ID:ilveroluca,项目名称:pydoop,代码行数:14,代码来源:test_path.py
示例16: chown
def chown(self):
new_user = 'nobody'
test_path = self.hdfs_paths[0]
hdfs.dump(self.data, test_path)
hdfs.chown(test_path, user=new_user)
path_info = hdfs.lsl(test_path)[0]
self.assertEqual(path_info['owner'], new_user)
prev_owner = path_info['owner']
prev_grp = path_info['group']
# owner and group should remain unchanged
hdfs.chown(test_path, user='', group='')
path_info = hdfs.lsl(test_path)[0]
self.assertEqual(path_info['owner'], prev_owner)
self.assertEqual(path_info['group'], prev_grp)
开发者ID:kikkomep,项目名称:pydoop,代码行数:14,代码来源:test_hdfs.py
示例17: test_kind
def test_kind(self):
for path in self.path, self.u_path:
self.assertTrue(hdfs.path.kind(path) is None)
try:
hdfs.dump("foo\n", path)
self.assertEqual('file', hdfs.path.kind(path))
hdfs.rmr(path)
hdfs.mkdir(path)
self.assertEqual('directory', hdfs.path.kind(path))
finally:
try:
hdfs.rmr(path)
except IOError:
pass
开发者ID:kikkomep,项目名称:pydoop,代码行数:14,代码来源:test_path.py
示例18: test_isdir
def test_isdir(self):
for path in self.path, self.u_path:
self.assertFalse(hdfs.path.isdir(path))
try:
hdfs.dump("foo\n", path)
self.assertFalse(hdfs.path.isdir(path))
hdfs.rmr(path)
hdfs.mkdir(path)
self.assertTrue(hdfs.path.isdir(path))
finally:
try:
hdfs.rmr(path)
except IOError:
pass
开发者ID:kikkomep,项目名称:pydoop,代码行数:14,代码来源:test_path.py
示例19: test_isdir
def test_isdir(self):
path = utils.make_random_str()
self.assertFalse(hdfs.path.isdir(path))
try:
hdfs.dump("foo\n", path)
self.assertFalse(hdfs.path.isdir(path))
hdfs.rmr(path)
hdfs.mkdir(path)
self.assertTrue(hdfs.path.isdir(path))
finally:
try:
hdfs.rmr(path)
except IOError:
pass
开发者ID:ilveroluca,项目名称:pydoop,代码行数:14,代码来源:test_path.py
示例20: __ls
def __ls(self, ls_func, path_transform):
for wd, paths in izip(
(self.local_wd, self.hdfs_wd), (self.local_paths, self.hdfs_paths)
):
for p in paths:
hdfs.dump(self.data, p)
test_dir = "%s/%s" % (wd, "test_dir")
test_path = "%s/%s" % (test_dir, "test_path")
hdfs.dump(self.data, test_path)
paths.append(test_dir)
for recursive in False, True:
if recursive:
paths.append(test_path)
dir_list = [path_transform(p) for p in ls_func(wd, recursive=recursive)]
self.assertEqual(sorted(dir_list), sorted(paths))
开发者ID:ZEMUSHKA,项目名称:pydoop,代码行数:15,代码来源:test_hdfs.py
注:本文中的pydoop.hdfs.dump函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论