本文整理汇总了Python中pyarrow.binary函数的典型用法代码示例。如果您正苦于以下问题:Python binary函数的具体用法?Python binary怎么用?Python binary使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了binary函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_type_to_pandas_dtype
def test_type_to_pandas_dtype():
M8_ns = np.dtype('datetime64[ns]')
cases = [
(pa.null(), np.float64),
(pa.bool_(), np.bool_),
(pa.int8(), np.int8),
(pa.int16(), np.int16),
(pa.int32(), np.int32),
(pa.int64(), np.int64),
(pa.uint8(), np.uint8),
(pa.uint16(), np.uint16),
(pa.uint32(), np.uint32),
(pa.uint64(), np.uint64),
(pa.float16(), np.float16),
(pa.float32(), np.float32),
(pa.float64(), np.float64),
(pa.date32(), M8_ns),
(pa.date64(), M8_ns),
(pa.timestamp('ms'), M8_ns),
(pa.binary(), np.object_),
(pa.binary(12), np.object_),
(pa.string(), np.object_),
(pa.list_(pa.int8()), np.object_),
]
for arrow_type, numpy_type in cases:
assert arrow_type.to_pandas_dtype() == numpy_type
开发者ID:giantwhale,项目名称:arrow,代码行数:26,代码来源:test_schema.py
示例2: test_type_schema_pickling
def test_type_schema_pickling():
cases = [
pa.int8(),
pa.string(),
pa.binary(),
pa.binary(10),
pa.list_(pa.string()),
pa.struct([
pa.field('a', 'int8'),
pa.field('b', 'string')
]),
pa.time32('s'),
pa.time64('us'),
pa.date32(),
pa.date64(),
pa.timestamp('ms'),
pa.timestamp('ns'),
pa.decimal(12, 2),
pa.field('a', 'string', metadata={b'foo': b'bar'})
]
for val in cases:
roundtripped = pickle.loads(pickle.dumps(val))
assert val == roundtripped
fields = []
for i, f in enumerate(cases):
if isinstance(f, pa.Field):
fields.append(f)
else:
fields.append(pa.field('_f{}'.format(i), f))
schema = pa.schema(fields, metadata={b'foo': b'bar'})
roundtripped = pickle.loads(pickle.dumps(schema))
assert schema == roundtripped
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:35,代码来源:test_schema.py
示例3: test_fixed_size_bytes
def test_fixed_size_bytes(self):
data = [b'foof', None, b'barb', b'2346']
arr = pa.from_pylist(data, type=pa.binary(4))
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pa.binary(4)
assert arr.to_pylist() == data
开发者ID:StevenMPhillips,项目名称:arrow,代码行数:7,代码来源:test_convert_builtin.py
示例4: test_empty_cast
def test_empty_cast():
types = [
pa.null(),
pa.bool_(),
pa.int8(),
pa.int16(),
pa.int32(),
pa.int64(),
pa.uint8(),
pa.uint16(),
pa.uint32(),
pa.uint64(),
pa.float16(),
pa.float32(),
pa.float64(),
pa.date32(),
pa.date64(),
pa.binary(),
pa.binary(length=4),
pa.string(),
]
for (t1, t2) in itertools.product(types, types):
try:
# ARROW-4766: Ensure that supported types conversion don't segfault
# on empty arrays of common types
pa.array([], type=t1).cast(t2)
except pa.lib.ArrowNotImplementedError:
continue
开发者ID:emkornfield,项目名称:arrow,代码行数:29,代码来源:test_array.py
示例5: test_cast_from_null
def test_cast_from_null():
in_data = [None] * 3
in_type = pa.null()
out_types = [
pa.null(),
pa.uint8(),
pa.float16(),
pa.utf8(),
pa.binary(),
pa.binary(10),
pa.list_(pa.int16()),
pa.decimal128(19, 4),
pa.timestamp('us'),
pa.timestamp('us', tz='UTC'),
pa.timestamp('us', tz='Europe/Paris'),
pa.struct([pa.field('a', pa.int32()),
pa.field('b', pa.list_(pa.int8())),
pa.field('c', pa.string())]),
]
for out_type in out_types:
_check_cast_case((in_data, in_type, in_data, out_type))
out_types = [
pa.dictionary(pa.int32(), pa.string()),
pa.union([pa.field('a', pa.binary(10)),
pa.field('b', pa.string())], mode=pa.lib.UnionMode_DENSE),
pa.union([pa.field('a', pa.binary(10)),
pa.field('b', pa.string())], mode=pa.lib.UnionMode_SPARSE),
]
in_arr = pa.array(in_data, type=pa.null())
for out_type in out_types:
with pytest.raises(NotImplementedError):
in_arr.cast(out_type)
开发者ID:rok,项目名称:arrow,代码行数:33,代码来源:test_array.py
示例6: test_sequence_fixed_size_bytes
def test_sequence_fixed_size_bytes():
data = [b'foof', None, bytearray(b'barb'), b'2346']
arr = pa.array(data, type=pa.binary(4))
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pa.binary(4)
assert arr.to_pylist() == [b'foof', None, b'barb', b'2346']
开发者ID:dremio,项目名称:arrow,代码行数:7,代码来源:test_convert_builtin.py
示例7: field
def field(jvm_field):
"""
Construct a Field from a org.apache.arrow.vector.types.pojo.Field
instance.
Parameters
----------
jvm_field: org.apache.arrow.vector.types.pojo.Field
Returns
-------
pyarrow.Field
"""
name = jvm_field.getName()
jvm_type = jvm_field.getType()
typ = None
if not jvm_type.isComplex():
type_str = jvm_type.getTypeID().toString()
if type_str == 'Null':
typ = pa.null()
elif type_str == 'Int':
typ = _from_jvm_int_type(jvm_type)
elif type_str == 'FloatingPoint':
typ = _from_jvm_float_type(jvm_type)
elif type_str == 'Utf8':
typ = pa.string()
elif type_str == 'Binary':
typ = pa.binary()
elif type_str == 'FixedSizeBinary':
typ = pa.binary(jvm_type.getByteWidth())
elif type_str == 'Bool':
typ = pa.bool_()
elif type_str == 'Time':
typ = _from_jvm_time_type(jvm_type)
elif type_str == 'Timestamp':
typ = _from_jvm_timestamp_type(jvm_type)
elif type_str == 'Date':
typ = _from_jvm_date_type(jvm_type)
elif type_str == 'Decimal':
typ = pa.decimal128(jvm_type.getPrecision(), jvm_type.getScale())
else:
raise NotImplementedError(
"Unsupported JVM type: {}".format(type_str))
else:
# TODO: The following JVM types are not implemented:
# Struct, List, FixedSizeList, Union, Dictionary
raise NotImplementedError(
"JVM field conversion only implemented for primitive types.")
nullable = jvm_field.isNullable()
if jvm_field.getMetadata().isEmpty():
metadata = None
else:
metadata = dict(jvm_field.getMetadata())
return pa.field(name, typ, nullable, metadata)
开发者ID:rok,项目名称:arrow,代码行数:56,代码来源:jvm.py
示例8: test_is_binary_string
def test_is_binary_string():
assert types.is_binary(pa.binary())
assert not types.is_binary(pa.string())
assert types.is_string(pa.string())
assert types.is_unicode(pa.string())
assert not types.is_string(pa.binary())
assert types.is_fixed_size_binary(pa.binary(5))
assert not types.is_fixed_size_binary(pa.binary())
开发者ID:giantwhale,项目名称:arrow,代码行数:10,代码来源:test_types.py
示例9: test_bit_width
def test_bit_width():
for ty, expected in [(pa.bool_(), 1),
(pa.int8(), 8),
(pa.uint32(), 32),
(pa.float16(), 16),
(pa.decimal128(19, 4), 128),
(pa.binary(42), 42 * 8)]:
assert ty.bit_width == expected
for ty in [pa.binary(), pa.string(), pa.list_(pa.int16())]:
with pytest.raises(ValueError, match="fixed width"):
ty.bit_width
开发者ID:CodingCat,项目名称:arrow,代码行数:11,代码来源:test_types.py
示例10: test_convert_options
def test_convert_options():
cls = ConvertOptions
opts = cls()
assert opts.check_utf8 is True
opts.check_utf8 = False
assert opts.check_utf8 is False
assert opts.strings_can_be_null is False
opts.strings_can_be_null = True
assert opts.strings_can_be_null is True
assert opts.column_types == {}
# Pass column_types as mapping
opts.column_types = {'b': pa.int16(), 'c': pa.float32()}
assert opts.column_types == {'b': pa.int16(), 'c': pa.float32()}
opts.column_types = {'v': 'int16', 'w': 'null'}
assert opts.column_types == {'v': pa.int16(), 'w': pa.null()}
# Pass column_types as schema
schema = pa.schema([('a', pa.int32()), ('b', pa.string())])
opts.column_types = schema
assert opts.column_types == {'a': pa.int32(), 'b': pa.string()}
# Pass column_types as sequence
opts.column_types = [('x', pa.binary())]
assert opts.column_types == {'x': pa.binary()}
with pytest.raises(TypeError, match='DataType expected'):
opts.column_types = {'a': None}
with pytest.raises(TypeError):
opts.column_types = 0
assert isinstance(opts.null_values, list)
assert '' in opts.null_values
assert 'N/A' in opts.null_values
opts.null_values = ['xxx', 'yyy']
assert opts.null_values == ['xxx', 'yyy']
assert isinstance(opts.true_values, list)
opts.true_values = ['xxx', 'yyy']
assert opts.true_values == ['xxx', 'yyy']
assert isinstance(opts.false_values, list)
opts.false_values = ['xxx', 'yyy']
assert opts.false_values == ['xxx', 'yyy']
opts = cls(check_utf8=False, column_types={'a': pa.null()},
null_values=['N', 'nn'], true_values=['T', 'tt'],
false_values=['F', 'ff'], strings_can_be_null=True)
assert opts.check_utf8 is False
assert opts.column_types == {'a': pa.null()}
assert opts.null_values == ['N', 'nn']
assert opts.false_values == ['F', 'ff']
assert opts.true_values == ['T', 'tt']
assert opts.strings_can_be_null is True
开发者ID:wesm,项目名称:arrow,代码行数:54,代码来源:test_csv.py
示例11: test_sequence_bytes
def test_sequence_bytes():
u1 = b'ma\xc3\xb1ana'
data = [b'foo',
u1.decode('utf-8'), # unicode gets encoded,
bytearray(b'bar'),
None]
for ty in [None, pa.binary()]:
arr = pa.array(data, type=ty)
assert len(arr) == 4
assert arr.null_count == 1
assert arr.type == pa.binary()
assert arr.to_pylist() == [b'foo', u1, b'bar', None]
开发者ID:dremio,项目名称:arrow,代码行数:12,代码来源:test_convert_builtin.py
示例12: test_array_mixed_unicode_bytes
def test_array_mixed_unicode_bytes():
values = [u'qux', b'foo', bytearray(b'barz')]
b_values = [b'qux', b'foo', b'barz']
u_values = [u'qux', u'foo', u'barz']
arr = pa.array(values)
expected = pa.array(b_values, type=pa.binary())
assert arr.type == pa.binary()
assert arr.equals(expected)
arr = pa.array(values, type=pa.string())
expected = pa.array(u_values, type=pa.string())
assert arr.type == pa.string()
assert arr.equals(expected)
开发者ID:dremio,项目名称:arrow,代码行数:14,代码来源:test_convert_builtin.py
示例13: numpy_array_from_arrow_array
def numpy_array_from_arrow_array(arrow_array):
arrow_type = arrow_array.type
buffers = arrow_array.buffers()
assert len(buffers) == 2
bitmap_buffer, data_buffer = buffers
if isinstance(arrow_type, type(pyarrow.binary(1))): # todo, is there a better way to typecheck?
# mimics python/pyarrow/array.pxi::Array::to_numpy
assert len(buffers) == 2
dtype = "S" + str(arrow_type.byte_width)
# arrow seems to do padding, check if it is all ok
expected_length = arrow_type.byte_width * len(arrow_array)
actual_length = len(buffers[-1])
if actual_length < expected_length:
raise ValueError('buffer is smaller (%d) than expected (%d)' % (actual_length, expected_length))
array = np.frombuffer(buffers[-1], dtype, len(arrow_array))# TODO: deal with offset ? [arrow_array.offset:arrow_array.offset + len(arrow_array)]
else:
dtype = arrow_array.type.to_pandas_dtype()
if np.bool_ == dtype:
# TODO: this will also be a copy, we probably want to support bitmasks as well
bitmap = np.frombuffer(data_buffer, np.uint8, len(data_buffer))
array = numpy_mask_from_arrow_mask(bitmap, len(arrow_array))
else:
array = np.frombuffer(data_buffer, dtype, len(arrow_array))
if bitmap_buffer is not None:
bitmap = np.frombuffer(bitmap_buffer, np.uint8, len(bitmap_buffer))
mask = numpy_mask_from_arrow_mask(bitmap, len(arrow_array))
array = np.ma.MaskedArray(array, mask=mask)
return array
开发者ID:maartenbreddels,项目名称:vaex,代码行数:29,代码来源:convert.py
示例14: test_orcfile_empty
def test_orcfile_empty():
from pyarrow import orc
f = orc.ORCFile(path_for_orc_example('TestOrcFile.emptyFile'))
table = f.read()
assert table.num_rows == 0
schema = table.schema
expected_schema = pa.schema([
('boolean1', pa.bool_()),
('byte1', pa.int8()),
('short1', pa.int16()),
('int1', pa.int32()),
('long1', pa.int64()),
('float1', pa.float32()),
('double1', pa.float64()),
('bytes1', pa.binary()),
('string1', pa.string()),
('middle', pa.struct([
('list', pa.list_(pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
]))),
])),
('list', pa.list_(pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
]))),
('map', pa.list_(pa.struct([
('key', pa.string()),
('value', pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
])),
]))),
])
assert schema == expected_schema
开发者ID:dremio,项目名称:arrow,代码行数:35,代码来源:test_orc.py
示例15: test_bytes
def test_bytes(self):
u1 = b"ma\xc3\xb1ana"
data = [b"foo", u1.decode("utf-8"), None] # unicode gets encoded,
arr = pyarrow.from_pylist(data)
assert len(arr) == 3
assert arr.null_count == 1
assert arr.type == pyarrow.binary()
assert arr.to_pylist() == [b"foo", u1, None]
开发者ID:apache,项目名称:arrow,代码行数:8,代码来源:test_convert_builtin.py
示例16: test_fixed_size_binary
def test_fixed_size_binary():
t0 = pa.binary(10)
data = [b'fooooooooo', None, b'barooooooo', b'quxooooooo']
a0 = pa.array(data, type=t0)
table = pa.Table.from_arrays([a0],
['binary[10]'])
_check_roundtrip(table)
开发者ID:marklavrynenko-original,项目名称:arrow,代码行数:8,代码来源:test_parquet.py
示例17: test_cast_binary_to_utf8
def test_cast_binary_to_utf8():
binary_arr = pa.array([b'foo', b'bar', b'baz'], type=pa.binary())
utf8_arr = binary_arr.cast(pa.utf8())
expected = pa.array(['foo', 'bar', 'baz'], type=pa.utf8())
assert utf8_arr.equals(expected)
non_utf8_values = [(u'mañana').encode('utf-16-le')]
non_utf8_binary = pa.array(non_utf8_values)
assert non_utf8_binary.type == pa.binary()
with pytest.raises(ValueError):
non_utf8_binary.cast(pa.string())
non_utf8_all_null = pa.array(non_utf8_values, mask=np.array([True]),
type=pa.binary())
# No error
casted = non_utf8_all_null.cast(pa.string())
assert casted.null_count == 1
开发者ID:emkornfield,项目名称:arrow,代码行数:18,代码来源:test_array.py
示例18: test_fixed_size_bytes
def test_fixed_size_bytes(self):
values = [b'foo', None, b'bar', None, None, b'hey']
df = pd.DataFrame({'strings': values})
schema = pa.schema([pa.field('strings', pa.binary(3))])
table = pa.Table.from_pandas(df, schema=schema)
assert table.schema[0].type == schema[0].type
assert table.schema[0].name == schema[0].name
result = table.to_pandas()
tm.assert_frame_equal(result, df)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:9,代码来源:test_convert_pandas.py
示例19: test_bytes
def test_bytes(self):
u1 = b'ma\xc3\xb1ana'
data = [b'foo',
u1.decode('utf-8'), # unicode gets encoded,
None]
arr = pyarrow.from_pylist(data)
assert len(arr) == 3
assert arr.null_count == 1
assert arr.type == pyarrow.binary()
assert arr.to_pylist() == [b'foo', u1, None]
开发者ID:kiril-me,项目名称:arrow,代码行数:10,代码来源:test_convert_builtin.py
示例20: test_bytes_to_binary
def test_bytes_to_binary(self):
values = [u('qux'), b'foo', None, 'bar', 'qux', np.nan]
df = pd.DataFrame({'strings': values})
table = pa.Table.from_pandas(df)
assert table[0].type == pa.binary()
values2 = [b'qux', b'foo', None, b'bar', b'qux', np.nan]
expected = pd.DataFrame({'strings': values2})
self._check_pandas_roundtrip(df, expected)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:10,代码来源:test_convert_pandas.py
注:本文中的pyarrow.binary函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论