本文整理汇总了Python中pyarrow.compat.guid函数的典型用法代码示例。如果您正苦于以下问题:Python guid函数的具体用法?Python guid怎么用?Python guid使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了guid函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_native_file_TextIOWrapper
def test_native_file_TextIOWrapper(tmpdir):
data = (u'foooo\n'
u'barrr\n'
u'bazzz\n')
path = os.path.join(str(tmpdir), guid())
with open(path, 'wb') as f:
f.write(data.encode('utf-8'))
with TextIOWrapper(pa.OSFile(path, mode='rb')) as fil:
assert fil.readable()
res = fil.read()
assert res == data
assert fil.closed
with TextIOWrapper(pa.OSFile(path, mode='rb')) as fil:
# Iteration works
lines = list(fil)
assert ''.join(lines) == data
# Writing
path2 = os.path.join(str(tmpdir), guid())
with TextIOWrapper(pa.OSFile(path2, mode='wb')) as fil:
assert fil.writable()
fil.write(data)
with TextIOWrapper(pa.OSFile(path2, mode='rb')) as fil:
res = fil.read()
assert res == data
开发者ID:sunchao,项目名称:arrow,代码行数:29,代码来源:test_io.py
示例2: test_native_file_modes
def test_native_file_modes(tmpdir):
path = os.path.join(str(tmpdir), guid())
with open(path, 'wb') as f:
f.write(b'foooo')
with pa.OSFile(path, mode='r') as f:
assert f.mode == 'rb'
with pa.OSFile(path, mode='rb') as f:
assert f.mode == 'rb'
with pa.OSFile(path, mode='w') as f:
assert f.mode == 'wb'
with pa.OSFile(path, mode='wb') as f:
assert f.mode == 'wb'
with open(path, 'wb') as f:
f.write(b'foooo')
with pa.memory_map(path, 'r') as f:
assert f.mode == 'rb'
with pa.memory_map(path, 'r+') as f:
assert f.mode == 'rb+'
with pa.memory_map(path, 'r+b') as f:
assert f.mode == 'rb+'
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:28,代码来源:test_io.py
示例3: test_dataset_read_pandas
def test_dataset_read_pandas(tmpdir):
import pyarrow.parquet as pq
nfiles = 5
size = 5
dirpath = tmpdir.join(guid()).strpath
os.mkdir(dirpath)
test_data = []
frames = []
paths = []
for i in range(nfiles):
df = _test_dataframe(size, seed=i)
df.index = np.arange(i * size, (i + 1) * size)
df.index.name = 'index'
path = pjoin(dirpath, '{0}.parquet'.format(i))
table = pa.Table.from_pandas(df)
_write_table(table, path)
test_data.append(table)
frames.append(df)
paths.append(path)
dataset = pq.ParquetDataset(dirpath)
columns = ['uint8', 'strings']
result = dataset.read_pandas(columns=columns).to_pandas()
expected = pd.concat([x[columns] for x in frames])
tm.assert_frame_equal(result, expected)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:31,代码来源:test_parquet.py
示例4: test_read_multiple_parquet_files
def test_read_multiple_parquet_files(self):
import pyarrow.parquet as pq
nfiles = 10
size = 5
tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())
self.hdfs.mkdir(tmpdir)
test_data = []
paths = []
for i in range(nfiles):
df = test_parquet._test_dataframe(size, seed=i)
df['index'] = np.arange(i * size, (i + 1) * size)
# Hack so that we don't have a dtype cast in v1 files
df['uint32'] = df['uint32'].astype(np.int64)
path = pjoin(tmpdir, '{0}.parquet'.format(i))
table = pa.Table.from_pandas(df, preserve_index=False)
with self.hdfs.open(path, 'wb') as f:
pq.write_table(table, f)
test_data.append(table)
paths.append(path)
result = self.hdfs.read_parquet(tmpdir)
expected = pa.concat_tables(test_data)
pdt.assert_frame_equal(result.to_pandas()
.sort_values(by='index').reset_index(drop=True),
expected.to_pandas())
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:35,代码来源:test_hdfs.py
示例5: test_native_file_raises_ValueError_after_close
def test_native_file_raises_ValueError_after_close(tmpdir):
path = os.path.join(str(tmpdir), guid())
with open(path, 'wb') as f:
f.write(b'foooo')
with pa.OSFile(path, mode='rb') as os_file:
assert not os_file.closed
assert os_file.closed
with pa.memory_map(path, mode='rb') as mmap_file:
assert not mmap_file.closed
assert mmap_file.closed
files = [os_file,
mmap_file]
methods = [('tell', ()),
('seek', (0,)),
('size', ()),
('flush', ()),
('readable', ()),
('writable', ()),
('seekable', ())]
for f in files:
for method, args in methods:
with pytest.raises(ValueError):
getattr(f, method)(*args)
开发者ID:sunchao,项目名称:arrow,代码行数:28,代码来源:test_io.py
示例6: test_read_multiple_parquet_files
def test_read_multiple_parquet_files(self):
tmpdir = pjoin(self.tmp_path, 'multi-parquet-' + guid())
self.hdfs.mkdir(tmpdir)
expected = self._write_multiple_hdfs_pq_files(tmpdir)
result = self.hdfs.read_parquet(tmpdir)
pdt.assert_frame_equal(result.to_pandas()
.sort_values(by='index').reset_index(drop=True),
expected.to_pandas())
开发者ID:CodingCat,项目名称:arrow,代码行数:12,代码来源:test_hdfs.py
示例7: sample_disk_data
def sample_disk_data(request):
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype('u1')
data = arr.tobytes()[:SIZE]
path = guid()
with open(path, 'wb') as f:
f.write(data)
def teardown():
_try_delete(path)
request.addfinalizer(teardown)
return path, data
开发者ID:kiril-me,项目名称:arrow,代码行数:13,代码来源:test_io.py
示例8: test_read_multiple_parquet_files_with_uri
def test_read_multiple_parquet_files_with_uri(self):
import pyarrow.parquet as pq
tmpdir = pjoin(self.tmp_path, 'multi-parquet-uri-' + guid())
self.hdfs.mkdir(tmpdir)
expected = self._write_multiple_hdfs_pq_files(tmpdir)
path = _get_hdfs_uri(tmpdir)
result = pq.read_table(path)
pdt.assert_frame_equal(result.to_pandas()
.sort_values(by='index').reset_index(drop=True),
expected.to_pandas())
开发者ID:CodingCat,项目名称:arrow,代码行数:14,代码来源:test_hdfs.py
示例9: s3_example
def s3_example():
access_key = os.environ['PYARROW_TEST_S3_ACCESS_KEY']
secret_key = os.environ['PYARROW_TEST_S3_SECRET_KEY']
bucket_name = os.environ['PYARROW_TEST_S3_BUCKET']
import s3fs
fs = s3fs.S3FileSystem(key=access_key, secret=secret_key)
test_dir = guid()
bucket_uri = 's3://{0}/{1}'.format(bucket_name, test_dir)
fs.mkdir(bucket_uri)
yield fs, bucket_uri
fs.rm(bucket_uri, recursive=True)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:14,代码来源:test_parquet.py
示例10: test_dataset_read_pandas_common_metadata
def test_dataset_read_pandas_common_metadata(tmpdir):
# ARROW-1103
import pyarrow.parquet as pq
nfiles = 5
size = 5
dirpath = tmpdir.join(guid()).strpath
os.mkdir(dirpath)
test_data = []
frames = []
paths = []
for i in range(nfiles):
df = _test_dataframe(size, seed=i)
df.index = pd.Index(np.arange(i * size, (i + 1) * size))
df.index.name = 'index'
path = pjoin(dirpath, '{0}.parquet'.format(i))
df_ex_index = df.reset_index(drop=True)
df_ex_index['index'] = df.index
table = pa.Table.from_pandas(df_ex_index,
preserve_index=False)
# Obliterate metadata
table = table.replace_schema_metadata(None)
assert table.schema.metadata is None
_write_table(table, path)
test_data.append(table)
frames.append(df)
paths.append(path)
# Write _metadata common file
table_for_metadata = pa.Table.from_pandas(df)
pq.write_metadata(table_for_metadata.schema,
pjoin(dirpath, '_metadata'))
dataset = pq.ParquetDataset(dirpath)
columns = ['uint8', 'strings']
result = dataset.read_pandas(columns=columns).to_pandas()
expected = pd.concat([x[columns] for x in frames])
tm.assert_frame_equal(result, expected)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:45,代码来源:test_parquet.py
示例11: test_os_file_writer
def test_os_file_writer(tmpdir):
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype('u1')
data = arr.tobytes()[:SIZE]
path = os.path.join(str(tmpdir), guid())
with open(path, 'wb') as f:
f.write(data)
# Truncates file
f2 = pa.OSFile(path, mode='w')
f2.write('foo')
with pa.OSFile(path) as f3:
assert f3.size() == 3
with pytest.raises(IOError):
f2.read(5)
开发者ID:sunchao,项目名称:arrow,代码行数:18,代码来源:test_io.py
示例12: test_read_write_parquet_files_with_uri
def test_read_write_parquet_files_with_uri(self):
import pyarrow.parquet as pq
tmpdir = pjoin(self.tmp_path, 'uri-parquet-' + guid())
self.hdfs.mkdir(tmpdir)
path = _get_hdfs_uri(pjoin(tmpdir, 'test.parquet'))
size = 5
df = test_parquet._test_dataframe(size, seed=0)
# Hack so that we don't have a dtype cast in v1 files
df['uint32'] = df['uint32'].astype(np.int64)
table = pa.Table.from_pandas(df, preserve_index=False)
pq.write_table(table, path)
result = pq.read_table(path).to_pandas()
pdt.assert_frame_equal(result, df)
开发者ID:CodingCat,项目名称:arrow,代码行数:18,代码来源:test_hdfs.py
示例13: test_memory_map_resize
def test_memory_map_resize(tmpdir):
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype(np.uint8)
data1 = arr.tobytes()[:(SIZE // 2)]
data2 = arr.tobytes()[(SIZE // 2):]
path = os.path.join(str(tmpdir), guid())
mmap = pa.create_memory_map(path, SIZE / 2)
mmap.write(data1)
mmap.resize(SIZE)
mmap.write(data2)
mmap.close()
with open(path, 'rb') as f:
assert f.read() == arr.tobytes()
开发者ID:rok,项目名称:arrow,代码行数:18,代码来源:test_io.py
示例14: test_memory_map_writer
def test_memory_map_writer():
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype('u1')
data = arr.tobytes()[:SIZE]
path = guid()
try:
with open(path, 'wb') as f:
f.write(data)
f = io.MemoryMappedFile(path, mode='r+w')
f.seek(10)
f.write('peekaboo')
assert f.tell() == 18
f.seek(10)
assert f.read(8) == b'peekaboo'
f2 = io.MemoryMappedFile(path, mode='r+w')
f2.seek(10)
f2.write(b'booapeak')
f2.seek(10)
f.seek(10)
assert f.read(8) == b'booapeak'
# Does not truncate file
f3 = io.MemoryMappedFile(path, mode='w')
f3.write('foo')
with io.MemoryMappedFile(path) as f4:
assert f4.size() == SIZE
with pytest.raises(IOError):
f3.read(5)
f.seek(0)
assert f.read(3) == b'foo'
finally:
_try_delete(path)
开发者ID:kiril-me,项目名称:arrow,代码行数:42,代码来源:test_io.py
示例15: test_os_file_writer
def test_os_file_writer():
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype('u1')
data = arr.tobytes()[:SIZE]
path = guid()
try:
with open(path, 'wb') as f:
f.write(data)
# Truncates file
f2 = io.OSFile(path, mode='w')
f2.write('foo')
with io.OSFile(path) as f3:
assert f3.size() == 3
with pytest.raises(IOError):
f2.read(5)
finally:
_try_delete(path)
开发者ID:kiril-me,项目名称:arrow,代码行数:21,代码来源:test_io.py
示例16: test_memory_map_writer
def test_memory_map_writer(tmpdir):
SIZE = 4096
arr = np.random.randint(0, 256, size=SIZE).astype('u1')
data = arr.tobytes()[:SIZE]
path = os.path.join(str(tmpdir), guid())
with open(path, 'wb') as f:
f.write(data)
f = pa.memory_map(path, mode='r+b')
f.seek(10)
f.write('peekaboo')
assert f.tell() == 18
f.seek(10)
assert f.read(8) == b'peekaboo'
f2 = pa.memory_map(path, mode='r+b')
f2.seek(10)
f2.write(b'booapeak')
f2.seek(10)
f.seek(10)
assert f.read(8) == b'booapeak'
# Does not truncate file
f3 = pa.memory_map(path, mode='w')
f3.write('foo')
with pa.memory_map(path) as f4:
assert f4.size() == SIZE
with pytest.raises(IOError):
f3.read(5)
f.seek(0)
assert f.read(3) == b'foo'
开发者ID:sunchao,项目名称:arrow,代码行数:39,代码来源:test_io.py
示例17: _visit_level
def _visit_level(base_dir, level, part_keys):
name, values = partition_spec[level]
for value in values:
this_part_keys = part_keys + [(name, value)]
level_dir = pjoin(base_dir, '{0}={1}'.format(name, value))
fs.mkdir(level_dir)
if level == DEPTH - 1:
# Generate example data
file_path = pjoin(level_dir, guid())
filtered_df = _filter_partition(df, this_part_keys)
part_table = pa.Table.from_pandas(filtered_df)
with fs.open(file_path, 'wb') as f:
_write_table(part_table, f)
assert fs.exists(file_path)
_touch(pjoin(level_dir, '_SUCCESS'))
else:
_visit_level(level_dir, level + 1, this_part_keys)
_touch(pjoin(level_dir, '_SUCCESS'))
开发者ID:giantwhale,项目名称:arrow,代码行数:22,代码来源:test_parquet.py
示例18: test_ignore_private_directories
def test_ignore_private_directories(tmpdir):
import pyarrow.parquet as pq
nfiles = 10
size = 5
dirpath = tmpdir.join(guid()).strpath
os.mkdir(dirpath)
test_data = []
paths = []
for i in range(nfiles):
df = _test_dataframe(size, seed=i)
path = pjoin(dirpath, '{0}.parquet'.format(i))
test_data.append(_write_table(df, path))
paths.append(path)
# private directory
os.mkdir(pjoin(dirpath, '_impala_staging'))
dataset = pq.ParquetDataset(dirpath)
assert set(paths) == set(x.path for x in dataset.pieces)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:23,代码来源:test_parquet.py
示例19: random_path
def random_path():
return 'feather_{}'.format(guid())
开发者ID:giantwhale,项目名称:arrow,代码行数:2,代码来源:test_feather.py
示例20: write_to_dataset
def write_to_dataset(table, root_path, partition_cols=None,
filesystem=None, preserve_index=True, **kwargs):
"""
Wrapper around parquet.write_table for writing a Table to
Parquet format by partitions.
For each combination of partition columns and values,
a subdirectories are created in the following
manner:
root_dir/
group1=value1
group2=value1
<uuid>.parquet
group2=value2
<uuid>.parquet
group1=valueN
group2=value1
<uuid>.parquet
group2=valueN
<uuid>.parquet
Parameters
----------
table : pyarrow.Table
root_path : string,
The root directory of the dataset
filesystem : FileSystem, default None
If nothing passed, paths assumed to be found in the local on-disk
filesystem
partition_cols : list,
Column names by which to partition the dataset
Columns are partitioned in the order they are given
preserve_index : bool,
Parameter for instantiating Table; preserve pandas index or not.
**kwargs : dict, kwargs for write_table function.
"""
from pyarrow import (
Table,
compat
)
if filesystem is None:
fs = _get_fs_from_path(root_path)
else:
fs = _ensure_filesystem(filesystem)
_mkdir_if_not_exists(fs, root_path)
if partition_cols is not None and len(partition_cols) > 0:
df = table.to_pandas()
partition_keys = [df[col] for col in partition_cols]
data_df = df.drop(partition_cols, axis='columns')
data_cols = df.columns.drop(partition_cols)
if len(data_cols) == 0:
raise ValueError("No data left to save outside partition columns")
for keys, subgroup in data_df.groupby(partition_keys):
if not isinstance(keys, tuple):
keys = (keys,)
subdir = "/".join(
["{colname}={value}".format(colname=name, value=val)
for name, val in zip(partition_cols, keys)])
subtable = Table.from_pandas(subgroup,
preserve_index=preserve_index)
prefix = "/".join([root_path, subdir])
_mkdir_if_not_exists(fs, prefix)
outfile = compat.guid() + ".parquet"
full_path = "/".join([prefix, outfile])
with fs.open(full_path, 'wb') as f:
write_table(subtable, f, **kwargs)
else:
outfile = compat.guid() + ".parquet"
full_path = "/".join([root_path, outfile])
with fs.open(full_path, 'wb') as f:
write_table(table, f, **kwargs)
开发者ID:sunchao,项目名称:arrow,代码行数:74,代码来源:parquet.py
注:本文中的pyarrow.compat.guid函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论