本文整理汇总了Python中pyarrow.schema函数的典型用法代码示例。如果您正苦于以下问题:Python schema函数的具体用法?Python schema怎么用?Python schema使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了schema函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_schema
def test_schema():
fields = [
pa.field('foo', pa.int32()),
pa.field('bar', pa.string()),
pa.field('baz', pa.list_(pa.int8()))
]
sch = pa.schema(fields)
assert sch.names == ['foo', 'bar', 'baz']
assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
assert len(sch) == 3
assert sch[0].name == 'foo'
assert sch[0].type == fields[0].type
assert sch.field_by_name('foo').name == 'foo'
assert sch.field_by_name('foo').type == fields[0].type
assert repr(sch) == """\
foo: int32
bar: string
baz: list<item: int8>
child 0, item: int8"""
with pytest.raises(TypeError):
pa.schema([None])
开发者ID:rok,项目名称:arrow,代码行数:25,代码来源:test_schema.py
示例2: test_timestamps_notimezone_nulls
def test_timestamps_notimezone_nulls(self):
df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123',
None,
'2010-08-13T05:46:57.437'],
dtype='datetime64[ms]')
})
field = pa.field('datetime64', pa.timestamp('ms'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df,
timestamps_to_ms=True,
expected_schema=schema,
)
df = pd.DataFrame({
'datetime64': np.array([
'2007-07-13T01:23:34.123456789',
None,
'2010-08-13T05:46:57.437699912'],
dtype='datetime64[ns]')
})
field = pa.field('datetime64', pa.timestamp('ns'))
schema = pa.schema([field])
self._check_pandas_roundtrip(
df,
timestamps_to_ms=False,
expected_schema=schema,
)
开发者ID:marklavrynenko-original,项目名称:arrow,代码行数:30,代码来源:test_convert_pandas.py
示例3: test_custom_nulls
def test_custom_nulls(self):
# Infer nulls with custom values
opts = ConvertOptions(null_values=['Xxx', 'Zzz'])
rows = b"a,b,c,d\nZzz,Xxx,1,2\nXxx,#N/A,,Zzz\n"
table = self.read_bytes(rows, convert_options=opts)
schema = pa.schema([('a', pa.null()),
('b', pa.string()),
('c', pa.string()),
('d', pa.int64())])
assert table.schema == schema
assert table.to_pydict() == {
'a': [None, None],
'b': [u"Xxx", u"#N/A"],
'c': [u"1", u""],
'd': [2, None],
}
opts = ConvertOptions(null_values=[])
rows = b"a,b\n#N/A,\n"
table = self.read_bytes(rows, convert_options=opts)
schema = pa.schema([('a', pa.string()),
('b', pa.string())])
assert table.schema == schema
assert table.to_pydict() == {
'a': [u"#N/A"],
'b': [u""],
}
开发者ID:laurentgo,项目名称:arrow,代码行数:27,代码来源:test_csv.py
示例4: test_schema_equals_propagates_check_metadata
def test_schema_equals_propagates_check_metadata():
# ARROW-4088
schema1 = pa.schema([
pa.field('foo', pa.int32()),
pa.field('bar', pa.string())
])
schema2 = pa.schema([
pa.field('foo', pa.int32()),
pa.field('bar', pa.string(), metadata={'a': 'alpha'}),
])
assert not schema1.equals(schema2)
assert schema1.equals(schema2, check_metadata=False)
开发者ID:rok,项目名称:arrow,代码行数:12,代码来源:test_schema.py
示例5: test_schema_equals
def test_schema_equals():
fields = [
pa.field('foo', pa.int32()),
pa.field('bar', pa.string()),
pa.field('baz', pa.list_(pa.int8()))
]
sch1 = pa.schema(fields)
sch2 = pa.schema(fields)
assert sch1.equals(sch2)
del fields[-1]
sch3 = pa.schema(fields)
assert not sch1.equals(sch3)
开发者ID:giantwhale,项目名称:arrow,代码行数:14,代码来源:test_schema.py
示例6: test_table_from_pydict
def test_table_from_pydict():
table = pa.Table.from_pydict({})
assert table.num_columns == 0
assert table.num_rows == 0
assert table.schema == pa.schema([])
assert table.to_pydict() == {}
# With arrays as values
data = OrderedDict([('strs', pa.array([u'', u'foo', u'bar'])),
('floats', pa.array([4.5, 5, None]))])
schema = pa.schema([('strs', pa.utf8()), ('floats', pa.float64())])
table = pa.Table.from_pydict(data)
assert table.num_columns == 2
assert table.num_rows == 3
assert table.schema == schema
# With chunked arrays as values
data = OrderedDict([('strs', pa.chunked_array([[u''], [u'foo', u'bar']])),
('floats', pa.chunked_array([[4.5], [5, None]]))])
table = pa.Table.from_pydict(data)
assert table.num_columns == 2
assert table.num_rows == 3
assert table.schema == schema
# With lists as values
data = OrderedDict([('strs', [u'', u'foo', u'bar']),
('floats', [4.5, 5, None])])
table = pa.Table.from_pydict(data)
assert table.num_columns == 2
assert table.num_rows == 3
assert table.schema == schema
assert table.to_pydict() == data
# With metadata and inferred schema
metadata = {b'foo': b'bar'}
schema = schema.add_metadata(metadata)
table = pa.Table.from_pydict(data, metadata=metadata)
assert table.schema == schema
assert table.schema.metadata == metadata
assert table.to_pydict() == data
# With explicit schema
table = pa.Table.from_pydict(data, schema=schema)
assert table.schema == schema
assert table.schema.metadata == metadata
assert table.to_pydict() == data
# Cannot pass both schema and metadata
with pytest.raises(ValueError):
pa.Table.from_pydict(data, schema=schema, metadata=metadata)
开发者ID:rok,项目名称:arrow,代码行数:50,代码来源:test_table.py
示例7: test_table_unsafe_casting
def test_table_unsafe_casting():
data = [
pa.array(range(5), type=pa.int64()),
pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
pa.array([1.1, 2.2, 3.3, 4.4, 5.5], type=pa.float64()),
pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
]
table = pa.Table.from_arrays(data, names=tuple('abcd'))
expected_data = [
pa.array(range(5), type=pa.int32()),
pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
pa.array([1, 2, 3, 4, 5], type=pa.int64()),
pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
]
expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
target_schema = pa.schema([
pa.field('a', pa.int32()),
pa.field('b', pa.int16()),
pa.field('c', pa.int64()),
pa.field('d', pa.string())
])
with pytest.raises(pa.ArrowInvalid,
match='Floating point value truncated'):
table.cast(target_schema)
casted_table = table.cast(target_schema, safe=False)
assert casted_table.equals(expected_table)
开发者ID:emkornfield,项目名称:arrow,代码行数:30,代码来源:test_table.py
示例8: make_recordbatch
def make_recordbatch(length):
schema = pa.schema([pa.field('f0', pa.int16()),
pa.field('f1', pa.int16())])
a0 = pa.array(np.random.randint(0, 255, size=length, dtype=np.int16))
a1 = pa.array(np.random.randint(0, 255, size=length, dtype=np.int16))
batch = pa.RecordBatch.from_arrays([a0, a1], schema)
return batch
开发者ID:emkornfield,项目名称:arrow,代码行数:7,代码来源:test_cuda.py
示例9: test_orcfile_empty
def test_orcfile_empty():
from pyarrow import orc
f = orc.ORCFile(path_for_orc_example('TestOrcFile.emptyFile'))
table = f.read()
assert table.num_rows == 0
schema = table.schema
expected_schema = pa.schema([
('boolean1', pa.bool_()),
('byte1', pa.int8()),
('short1', pa.int16()),
('int1', pa.int32()),
('long1', pa.int64()),
('float1', pa.float32()),
('double1', pa.float64()),
('bytes1', pa.binary()),
('string1', pa.string()),
('middle', pa.struct([
('list', pa.list_(pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
]))),
])),
('list', pa.list_(pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
]))),
('map', pa.list_(pa.struct([
('key', pa.string()),
('value', pa.struct([
('int1', pa.int32()),
('string1', pa.string()),
])),
]))),
])
assert schema == expected_schema
开发者ID:dremio,项目名称:arrow,代码行数:35,代码来源:test_orc.py
示例10: test_type_schema_pickling
def test_type_schema_pickling():
cases = [
pa.int8(),
pa.string(),
pa.binary(),
pa.binary(10),
pa.list_(pa.string()),
pa.struct([
pa.field('a', 'int8'),
pa.field('b', 'string')
]),
pa.time32('s'),
pa.time64('us'),
pa.date32(),
pa.date64(),
pa.timestamp('ms'),
pa.timestamp('ns'),
pa.decimal(12, 2),
pa.field('a', 'string', metadata={b'foo': b'bar'})
]
for val in cases:
roundtripped = pickle.loads(pickle.dumps(val))
assert val == roundtripped
fields = []
for i, f in enumerate(cases):
if isinstance(f, pa.Field):
fields.append(f)
else:
fields.append(pa.field('_f{}'.format(i), f))
schema = pa.schema(fields, metadata={b'foo': b'bar'})
roundtripped = pickle.loads(pickle.dumps(schema))
assert schema == roundtripped
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:35,代码来源:test_schema.py
示例11: test_recordbatch_basics
def test_recordbatch_basics():
data = [
pa.array(range(5)),
pa.array([-10, -5, 0, 5, 10])
]
batch = pa.RecordBatch.from_arrays(data, ['c0', 'c1'])
assert not batch.schema.metadata
assert len(batch) == 5
assert batch.num_rows == 5
assert batch.num_columns == len(data)
assert batch.to_pydict() == OrderedDict([
('c0', [0, 1, 2, 3, 4]),
('c1', [-10, -5, 0, 5, 10])
])
with pytest.raises(IndexError):
# bounds checking
batch[2]
# Schema passed explicitly
schema = pa.schema([pa.field('c0', pa.int16()),
pa.field('c1', pa.int32())],
metadata={b'foo': b'bar'})
batch = pa.RecordBatch.from_arrays(data, schema)
assert batch.schema == schema
开发者ID:dremio,项目名称:arrow,代码行数:27,代码来源:test_table.py
示例12: test_table_safe_casting
def test_table_safe_casting():
data = [
pa.array(range(5), type=pa.int64()),
pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()),
pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
]
table = pa.Table.from_arrays(data, names=tuple('abcd'))
expected_data = [
pa.array(range(5), type=pa.int32()),
pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
pa.array([1, 2, 3, 4, 5], type=pa.int64()),
pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
]
expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))
target_schema = pa.schema([
pa.field('a', pa.int32()),
pa.field('b', pa.int16()),
pa.field('c', pa.int64()),
pa.field('d', pa.string())
])
casted_table = table.cast(target_schema)
assert casted_table.equals(expected_table)
开发者ID:emkornfield,项目名称:arrow,代码行数:26,代码来源:test_table.py
示例13: test_float_nulls
def test_float_nulls(self):
num_values = 100
null_mask = np.random.randint(0, 10, size=num_values) < 3
dtypes = [('f4', pa.float32()), ('f8', pa.float64())]
names = ['f4', 'f8']
expected_cols = []
arrays = []
fields = []
for name, arrow_dtype in dtypes:
values = np.random.randn(num_values).astype(name)
arr = pa.array(values, from_pandas=True, mask=null_mask)
arrays.append(arr)
fields.append(pa.field(name, arrow_dtype))
values[null_mask] = np.nan
expected_cols.append(values)
ex_frame = pd.DataFrame(dict(zip(names, expected_cols)),
columns=names)
table = pa.Table.from_arrays(arrays, names)
assert table.schema.equals(pa.schema(fields))
result = table.to_pandas()
tm.assert_frame_equal(result, ex_frame)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:27,代码来源:test_convert_pandas.py
示例14: test_int_object_nulls
def test_int_object_nulls(self):
arr = np.array([None, 1, np.int64(3)] * 5, dtype=object)
df = pd.DataFrame({'ints': arr})
expected = pd.DataFrame({'ints': pd.to_numeric(arr)})
field = pa.field('ints', pa.int64())
schema = pa.schema([field])
self._check_pandas_roundtrip(df, expected=expected,
expected_schema=schema)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:8,代码来源:test_convert_pandas.py
示例15: test_empty_table
def test_empty_table():
schema = pa.schema([
pa.field('oneField', pa.int64())
])
table = schema.empty_table()
assert isinstance(table, pa.Table)
assert table.num_rows == 0
assert table.schema == schema
开发者ID:rok,项目名称:arrow,代码行数:8,代码来源:test_schema.py
示例16: test_unicode
def test_unicode(self):
repeats = 1000
values = [u'foo', None, u'bar', u'mañana', np.nan]
df = pd.DataFrame({'strings': values * repeats})
field = pa.field('strings', pa.string())
schema = pa.schema([field])
self._check_pandas_roundtrip(df, expected_schema=schema)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:8,代码来源:test_convert_pandas.py
示例17: test_boolean_no_nulls
def test_boolean_no_nulls(self):
num_values = 100
np.random.seed(0)
df = pd.DataFrame({'bools': np.random.randn(num_values) > 0})
field = pa.field('bools', pa.bool_())
schema = pa.schema([field])
self._check_pandas_roundtrip(df, expected_schema=schema)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:9,代码来源:test_convert_pandas.py
示例18: test_fixed_size_bytes
def test_fixed_size_bytes(self):
values = [b'foo', None, b'bar', None, None, b'hey']
df = pd.DataFrame({'strings': values})
schema = pa.schema([pa.field('strings', pa.binary(3))])
table = pa.Table.from_pandas(df, schema=schema)
assert table.schema[0].type == schema[0].type
assert table.schema[0].name == schema[0].name
result = table.to_pandas()
tm.assert_frame_equal(result, df)
开发者ID:NonVolatileComputing,项目名称:arrow,代码行数:9,代码来源:test_convert_pandas.py
示例19: test_schema_from_tuples
def test_schema_from_tuples():
fields = [
('foo', pa.int32()),
('bar', pa.string()),
('baz', pa.list_(pa.int8())),
]
sch = pa.schema(fields)
assert sch.names == ['foo', 'bar', 'baz']
assert sch.types == [pa.int32(), pa.string(), pa.list_(pa.int8())]
assert len(sch) == 3
assert repr(sch) == """\
foo: int32
bar: string
baz: list<item: int8>
child 0, item: int8"""
with pytest.raises(TypeError):
pa.schema([('foo', None)])
开发者ID:rok,项目名称:arrow,代码行数:18,代码来源:test_schema.py
示例20: test_table_from_batches_and_schema
def test_table_from_batches_and_schema():
schema = pa.schema([
pa.field('a', pa.int64()),
pa.field('b', pa.float64()),
])
batch = pa.RecordBatch.from_arrays([pa.array([1]), pa.array([3.14])],
names=['a', 'b'])
table = pa.Table.from_batches([batch], schema)
assert table.schema.equals(schema)
assert table.column(0) == pa.column('a', pa.array([1]))
assert table.column(1) == pa.column('b', pa.array([3.14]))
incompatible_schema = pa.schema([pa.field('a', pa.int64())])
with pytest.raises(pa.ArrowInvalid):
pa.Table.from_batches([batch], incompatible_schema)
incompatible_batch = pa.RecordBatch.from_arrays([pa.array([1])], ['a'])
with pytest.raises(pa.ArrowInvalid):
pa.Table.from_batches([incompatible_batch], schema)
开发者ID:emkornfield,项目名称:arrow,代码行数:19,代码来源:test_table.py
注:本文中的pyarrow.schema函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论