本文整理汇总了Python中pyarrow.array函数的典型用法代码示例。如果您正苦于以下问题:Python array函数的具体用法?Python array怎么用?Python array使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了array函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_array_slice
def test_array_slice():
arr = pa.array(range(10))
sliced = arr.slice(2)
expected = pa.array(range(2, 10))
assert sliced.equals(expected)
sliced2 = arr.slice(2, 4)
expected2 = pa.array(range(2, 6))
assert sliced2.equals(expected2)
# 0 offset
assert arr.slice(0).equals(arr)
# Slice past end of array
assert len(arr.slice(len(arr))) == 0
with pytest.raises(IndexError):
arr.slice(-1)
# Test slice notation
assert arr[2:].equals(arr.slice(2))
assert arr[2:5].equals(arr.slice(2, 3))
assert arr[-5:].equals(arr.slice(len(arr) - 5))
with pytest.raises(IndexError):
arr[::-1]
with pytest.raises(IndexError):
arr[::2]
n = len(arr)
for start in range(-n * 2, n * 2):
for stop in range(-n * 2, n * 2):
assert arr[start:stop].to_pylist() == arr.to_pylist()[start:stop]
开发者ID:CodingCat,项目名称:arrow,代码行数:33,代码来源:test_array.py
示例2: test_list_from_arrays
def test_list_from_arrays():
offsets_arr = np.array([0, 2, 5, 8], dtype='i4')
offsets = pa.array(offsets_arr, type='int32')
pyvalues = [b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h']
values = pa.array(pyvalues, type='binary')
result = pa.ListArray.from_arrays(offsets, values)
expected = pa.array([pyvalues[:2], pyvalues[2:5], pyvalues[5:8]])
assert result.equals(expected)
# With nulls
offsets = [0, None, 2, 6]
values = ['a', 'b', 'c', 'd', 'e', 'f']
result = pa.ListArray.from_arrays(offsets, values)
expected = pa.array([values[:2], None, values[2:]])
assert result.equals(expected)
# Another edge case
offsets2 = [0, 2, None, 6]
result = pa.ListArray.from_arrays(offsets2, values)
expected = pa.array([values[:2], values[2:], None])
assert result.equals(expected)
开发者ID:CodingCat,项目名称:arrow,代码行数:26,代码来源:test_array.py
示例3: test_asarray
def test_asarray():
arr = pa.array(range(4))
# The iterator interface gives back an array of Int64Value's
np_arr = np.asarray([_ for _ in arr])
assert np_arr.tolist() == [0, 1, 2, 3]
assert np_arr.dtype == np.dtype('O')
assert type(np_arr[0]) == pa.lib.Int64Value
# Calling with the arrow array gives back an array with 'int64' dtype
np_arr = np.asarray(arr)
assert np_arr.tolist() == [0, 1, 2, 3]
assert np_arr.dtype == np.dtype('int64')
# An optional type can be specified when calling np.asarray
np_arr = np.asarray(arr, dtype='str')
assert np_arr.tolist() == ['0', '1', '2', '3']
# If PyArrow array has null values, numpy type will be changed as needed
# to support nulls.
arr = pa.array([0, 1, 2, None])
assert arr.type == pa.int64()
np_arr = np.asarray(arr)
elements = np_arr.tolist()
assert elements[:3] == [0., 1., 2.]
assert np.isnan(elements[3])
assert np_arr.dtype == np.dtype('float64')
开发者ID:emkornfield,项目名称:arrow,代码行数:27,代码来源:test_array.py
示例4: dataframe_to_types
def dataframe_to_types(df, preserve_index, columns=None):
(all_names,
column_names,
index_descriptors,
index_columns,
columns_to_convert,
_) = _get_columns_to_convert(df, None, preserve_index, columns)
types = []
# If pandas knows type, skip conversion
for c in columns_to_convert:
values = c.values
if _pandas_api.is_categorical(values):
type_ = pa.array(c, from_pandas=True).type
else:
values, type_ = get_datetimetz_type(values, c.dtype, None)
type_ = pa.lib._ndarray_to_arrow_type(values, type_)
if type_ is None:
type_ = pa.array(c, from_pandas=True).type
types.append(type_)
metadata = construct_metadata(df, column_names, index_columns,
index_descriptors, preserve_index, types)
return all_names, types, metadata
开发者ID:laurentgo,项目名称:arrow,代码行数:25,代码来源:pandas_compat.py
示例5: test_buffers_nested
def test_buffers_nested():
a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
buffers = a.buffers()
assert len(buffers) == 4
# The parent buffers
null_bitmap = buffers[0].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00000101
offsets = buffers[1].to_pybytes()
assert struct.unpack('4i', offsets) == (0, 2, 2, 6)
# The child buffers
null_bitmap = buffers[2].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00110111
values = buffers[3].to_pybytes()
assert struct.unpack('qqq8xqq', values) == (1, 2, 3, 4, 5)
a = pa.array([(42, None), None, (None, 43)],
type=pa.struct([pa.field('a', pa.int8()),
pa.field('b', pa.int16())]))
buffers = a.buffers()
assert len(buffers) == 5
# The parent buffer
null_bitmap = buffers[0].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00000101
# The child buffers: 'a'
null_bitmap = buffers[1].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00000001
values = buffers[2].to_pybytes()
assert struct.unpack('bxx', values) == (42,)
# The child buffers: 'b'
null_bitmap = buffers[3].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00000100
values = buffers[4].to_pybytes()
assert struct.unpack('4xh', values) == (43,)
开发者ID:CodingCat,项目名称:arrow,代码行数:33,代码来源:test_array.py
示例6: test_recordbatch_slice
def test_recordbatch_slice():
data = [
pa.array(range(5)),
pa.array([-10, -5, 0, 5, 10])
]
names = ['c0', 'c1']
batch = pa.RecordBatch.from_arrays(data, names)
sliced = batch.slice(2)
assert sliced.num_rows == 3
expected = pa.RecordBatch.from_arrays(
[x.slice(2) for x in data], names)
assert sliced.equals(expected)
sliced2 = batch.slice(2, 2)
expected2 = pa.RecordBatch.from_arrays(
[x.slice(2, 2) for x in data], names)
assert sliced2.equals(expected2)
# 0 offset
assert batch.slice(0).equals(batch)
# Slice past end of array
assert len(batch.slice(len(batch))) == 0
with pytest.raises(IndexError):
batch.slice(-1)
开发者ID:hdfeos,项目名称:arrow,代码行数:30,代码来源:test_table.py
示例7: test_chunked_array_str
def test_chunked_array_str():
data = [
pa.array([1, 2, 3]),
pa.array([4, 5, 6])
]
data = pa.chunked_array(data)
assert str(data) == """[
开发者ID:dremio,项目名称:arrow,代码行数:7,代码来源:test_table.py
示例8: test_cast_timestamp_to_int
def test_cast_timestamp_to_int():
arr = pa.array(np.array([0, 1, 2], dtype='int64'),
type=pa.timestamp('us'))
expected = pa.array([0, 1, 2], type='i8')
result = arr.cast('i8')
assert result.equals(expected)
开发者ID:CodingCat,项目名称:arrow,代码行数:7,代码来源:test_array.py
示例9: test_recordbatch_basics
def test_recordbatch_basics():
data = [
pa.array(range(5)),
pa.array([-10, -5, 0, 5, 10])
]
batch = pa.RecordBatch.from_arrays(data, ['c0', 'c1'])
assert not batch.schema.metadata
assert len(batch) == 5
assert batch.num_rows == 5
assert batch.num_columns == len(data)
assert batch.to_pydict() == OrderedDict([
('c0', [0, 1, 2, 3, 4]),
('c1', [-10, -5, 0, 5, 10])
])
with pytest.raises(IndexError):
# bounds checking
batch[2]
# Schema passed explicitly
schema = pa.schema([pa.field('c0', pa.int16()),
pa.field('c1', pa.int32())],
metadata={b'foo': b'bar'})
batch = pa.RecordBatch.from_arrays(data, schema)
assert batch.schema == schema
开发者ID:dremio,项目名称:arrow,代码行数:27,代码来源:test_table.py
示例10: test_recordbatch_from_arrays_validate_lengths
def test_recordbatch_from_arrays_validate_lengths():
# ARROW-2820
data = [pa.array([1]), pa.array(["tokyo", "like", "happy"]),
pa.array(["derek"])]
with pytest.raises(ValueError):
pa.RecordBatch.from_arrays(data, ['id', 'tags', 'name'])
开发者ID:dremio,项目名称:arrow,代码行数:7,代码来源:test_table.py
示例11: test_chunked_array_asarray
def test_chunked_array_asarray():
data = [
pa.array([0]),
pa.array([1, 2, 3])
]
chunked_arr = pa.chunked_array(data)
np_arr = np.asarray(chunked_arr)
assert np_arr.tolist() == [0, 1, 2, 3]
assert np_arr.dtype == np.dtype('int64')
# An optional type can be specified when calling np.asarray
np_arr = np.asarray(chunked_arr, dtype='str')
assert np_arr.tolist() == ['0', '1', '2', '3']
# Types are modified when there are nulls
data = [
pa.array([1, None]),
pa.array([1, 2, 3])
]
chunked_arr = pa.chunked_array(data)
np_arr = np.asarray(chunked_arr)
elements = np_arr.tolist()
assert elements[0] == 1.
assert np.isnan(elements[1])
assert elements[2:] == [1., 2., 3.]
assert np_arr.dtype == np.dtype('float64')
开发者ID:dremio,项目名称:arrow,代码行数:28,代码来源:test_table.py
示例12: test_invalid_table_construct
def test_invalid_table_construct():
array = np.array([0, 1], dtype=np.uint8)
u8 = pa.uint8()
arrays = [pa.array(array, type=u8), pa.array(array[1:], type=u8)]
with pytest.raises(pa.lib.ArrowInvalid):
pa.Table.from_arrays(arrays, names=["a1", "a2"])
开发者ID:emkornfield,项目名称:arrow,代码行数:7,代码来源:test_table.py
示例13: test_sequence_timestamp_from_int_with_unit
def test_sequence_timestamp_from_int_with_unit():
data = [1]
s = pa.timestamp('s')
ms = pa.timestamp('ms')
us = pa.timestamp('us')
ns = pa.timestamp('ns')
arr_s = pa.array(data, type=s)
assert len(arr_s) == 1
assert arr_s.type == s
assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"
arr_ms = pa.array(data, type=ms)
assert len(arr_ms) == 1
assert arr_ms.type == ms
assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"
arr_us = pa.array(data, type=us)
assert len(arr_us) == 1
assert arr_us.type == us
assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"
arr_ns = pa.array(data, type=ns)
assert len(arr_ns) == 1
assert arr_ns.type == ns
assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
with pytest.raises(pa.ArrowException):
class CustomClass():
pass
pa.array([1, CustomClass()], type=ns)
pa.array([1, CustomClass()], type=pa.date32())
pa.array([1, CustomClass()], type=pa.date64())
开发者ID:CodingCat,项目名称:arrow,代码行数:34,代码来源:test_convert_builtin.py
示例14: test_file_reader_writer
def test_file_reader_writer():
data = [
pa.array([1, 2, 3, 4]),
pa.array(['foo', 'bar', 'baz', None]),
pa.array([True, None, False, True])
]
batch = pa.RecordBatch.from_arrays(data, ['f0', 'f1', 'f2'])
sink = pa.BufferOutputStream()
with pytest.warns(FutureWarning):
stream_writer = pa.StreamWriter(sink, batch.schema)
assert isinstance(stream_writer, pa.RecordBatchStreamWriter)
sink2 = pa.BufferOutputStream()
with pytest.warns(FutureWarning):
file_writer = pa.FileWriter(sink2, batch.schema)
assert isinstance(file_writer, pa.RecordBatchFileWriter)
file_writer.write_batch(batch)
stream_writer.write_batch(batch)
file_writer.close()
stream_writer.close()
buf = sink.get_result()
buf2 = sink2.get_result()
with pytest.warns(FutureWarning):
stream_reader = pa.StreamReader(buf)
assert isinstance(stream_reader, pa.RecordBatchStreamReader)
with pytest.warns(FutureWarning):
file_reader = pa.FileReader(buf2)
assert isinstance(file_reader, pa.RecordBatchFileReader)
开发者ID:hdfeos,项目名称:arrow,代码行数:35,代码来源:test_deprecations.py
示例15: test_table_basics
def test_table_basics():
data = [
pa.array(range(5)),
pa.array([-10, -5, 0, 5, 10])
]
table = pa.Table.from_arrays(data, names=('a', 'b'))
table._validate()
assert len(table) == 5
assert table.num_rows == 5
assert table.num_columns == 2
assert table.shape == (5, 2)
assert table.to_pydict() == OrderedDict([
('a', [0, 1, 2, 3, 4]),
('b', [-10, -5, 0, 5, 10])
])
columns = []
for col in table.itercolumns():
columns.append(col)
for chunk in col.data.iterchunks():
assert chunk is not None
with pytest.raises(IndexError):
col.data.chunk(-1)
with pytest.raises(IndexError):
col.data.chunk(col.data.num_chunks)
assert table.columns == columns
开发者ID:dremio,项目名称:arrow,代码行数:29,代码来源:test_table.py
示例16: test_cast_time32_to_int
def test_cast_time32_to_int():
arr = pa.array(np.array([0, 1, 2], dtype='int32'),
type=pa.time32('s'))
expected = pa.array([0, 1, 2], type='i4')
result = arr.cast('i4')
assert result.equals(expected)
开发者ID:CodingCat,项目名称:arrow,代码行数:7,代码来源:test_array.py
示例17: test_struct_array_field
def test_struct_array_field():
ty = pa.struct([pa.field('x', pa.int16()),
pa.field('y', pa.float32())])
a = pa.array([(1, 2.5), (3, 4.5), (5, 6.5)], type=ty)
x0 = a.field(0)
y0 = a.field(1)
x1 = a.field(-2)
y1 = a.field(-1)
x2 = a.field('x')
y2 = a.field('y')
assert isinstance(x0, pa.lib.Int16Array)
assert isinstance(y1, pa.lib.FloatArray)
assert x0.equals(pa.array([1, 3, 5], type=pa.int16()))
assert y0.equals(pa.array([2.5, 4.5, 6.5], type=pa.float32()))
assert x0.equals(x1)
assert x0.equals(x2)
assert y0.equals(y1)
assert y0.equals(y2)
for invalid_index in [None, pa.int16()]:
with pytest.raises(TypeError):
a.field(invalid_index)
for invalid_index in [3, -3]:
with pytest.raises(IndexError):
a.field(invalid_index)
for invalid_name in ['z', '']:
with pytest.raises(KeyError):
a.field(invalid_name)
开发者ID:emkornfield,项目名称:arrow,代码行数:32,代码来源:test_array.py
示例18: test_buffers_primitive
def test_buffers_primitive():
a = pa.array([1, 2, None, 4], type=pa.int16())
buffers = a.buffers()
assert len(buffers) == 2
null_bitmap = buffers[0].to_pybytes()
assert 1 <= len(null_bitmap) <= 64 # XXX this is varying
assert bytearray(null_bitmap)[0] == 0b00001011
# Slicing does not affect the buffers but the offset
a_sliced = a[1:]
buffers = a_sliced.buffers()
a_sliced.offset == 1
assert len(buffers) == 2
null_bitmap = buffers[0].to_pybytes()
assert 1 <= len(null_bitmap) <= 64 # XXX this is varying
assert bytearray(null_bitmap)[0] == 0b00001011
assert struct.unpack('hhxxh', buffers[1].to_pybytes()) == (1, 2, 4)
a = pa.array(np.int8([4, 5, 6]))
buffers = a.buffers()
assert len(buffers) == 2
# No null bitmap from Numpy int array
assert buffers[0] is None
assert struct.unpack('3b', buffers[1].to_pybytes()) == (4, 5, 6)
a = pa.array([b'foo!', None, b'bar!!'])
buffers = a.buffers()
assert len(buffers) == 3
null_bitmap = buffers[0].to_pybytes()
assert bytearray(null_bitmap)[0] == 0b00000101
offsets = buffers[1].to_pybytes()
assert struct.unpack('4i', offsets) == (0, 4, 4, 9)
values = buffers[2].to_pybytes()
assert values == b'foo!bar!!'
开发者ID:CodingCat,项目名称:arrow,代码行数:35,代码来源:test_array.py
示例19: test_empty_cast
def test_empty_cast():
types = [
pa.null(),
pa.bool_(),
pa.int8(),
pa.int16(),
pa.int32(),
pa.int64(),
pa.uint8(),
pa.uint16(),
pa.uint32(),
pa.uint64(),
pa.float16(),
pa.float32(),
pa.float64(),
pa.date32(),
pa.date64(),
pa.binary(),
pa.binary(length=4),
pa.string(),
]
for (t1, t2) in itertools.product(types, types):
try:
# ARROW-4766: Ensure that supported types conversion don't segfault
# on empty arrays of common types
pa.array([], type=t1).cast(t2)
except pa.lib.ArrowNotImplementedError:
continue
开发者ID:emkornfield,项目名称:arrow,代码行数:29,代码来源:test_array.py
示例20: test_to_pandas_zero_copy
def test_to_pandas_zero_copy():
import gc
arr = pa.array(range(10))
for i in range(10):
np_arr = arr.to_pandas()
assert sys.getrefcount(np_arr) == 2
np_arr = None # noqa
assert sys.getrefcount(arr) == 2
for i in range(10):
arr = pa.array(range(10))
np_arr = arr.to_pandas()
arr = None
gc.collect()
# Ensure base is still valid
# Because of py.test's assert inspection magic, if you put getrefcount
# on the line being examined, it will be 1 higher than you expect
base_refcount = sys.getrefcount(np_arr.base)
assert base_refcount == 2
np_arr.sum()
开发者ID:CodingCat,项目名称:arrow,代码行数:25,代码来源:test_array.py
注:本文中的pyarrow.array函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论