本文整理汇总了Python中messytables.type_guess函数的典型用法代码示例。如果您正苦于以下问题:Python type_guess函数的具体用法?Python type_guess怎么用?Python type_guess使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了type_guess函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_guessing_uses_first_in_case_of_tie
def test_guessing_uses_first_in_case_of_tie(self):
csv_file = StringIO.StringIO('''
2
1.1
1500''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(
rows.sample, types=[DecimalType, IntegerType], strict=False)
assert_equal(guessed_types, [DecimalType()])
guessed_types = type_guess(
rows.sample, types=[IntegerType, DecimalType], strict=False)
assert_equal(guessed_types, [IntegerType()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:13,代码来源:test_guessing.py
示例2: generate_mapping
def generate_mapping(fileobj, sample=2000):
row_set = CSVRowSet('data', fileobj, window=sample)
sample = list(row_set.sample)
headers, sample = sample[0], sample[1:]
values = frequent_values(sample)
types = type_guess(sample)
mapping = {}
for header, type_, value in zip(headers, types, values):
type_ = repr(type_).lower()
name = slugify(header.value).lower()
meta = {
'label': header.value,
'column': header.value,
'common_values': value,
'datatype': type_
}
if type_ in ['decimal', 'integer', 'float']:
meta['type'] = 'measure'
meta['datatype'] = 'float'
elif type_ in ['date']:
meta['type'] = 'date'
meta['datatype'] = 'date'
else:
meta['type'] = 'value'
mapping[name] = meta
return mapping
开发者ID:jagarcias,项目名称:openspending.etl,代码行数:26,代码来源:mapgen.py
示例3: get_schema
def get_schema(self, filename):
"""
Guess schema using messytables
"""
table_set = self.read_file(filename)
# Have I been able to read the filename
if table_set is None:
return []
# Get the first table as rowset
row_set = table_set.tables[0]
offset, headers = headers_guess(row_set.sample)
row_set.register_processor(headers_processor(headers))
row_set.register_processor(offset_processor(offset + 1))
types = type_guess(row_set.sample, strict=True)
# Get a sample as well..
sample = next(row_set.sample)
clean = lambda v: str(v) if not isinstance(v, str) else v
schema = []
for i, h in enumerate(headers):
schema.append([h,
str(types[i]),
clean(sample[i].value)])
return schema
开发者ID:purnima215,项目名称:dgit,代码行数:29,代码来源:tableformat.py
示例4: test_strict_type_guessing_with_large_file
def test_strict_type_guessing_with_large_file(self):
fh = horror_fobj('211.csv')
rows = CSVTableSet(fh).tables[0]
offset, headers = headers_guess(rows.sample)
rows.register_processor(offset_processor(offset + 1))
types = [StringType, IntegerType, DecimalType, DateUtilType]
guessed_types = type_guess(rows.sample, types, True)
assert_equal(len(guessed_types), 96)
assert_equal(guessed_types, [
IntegerType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
IntegerType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), IntegerType(), StringType(), DecimalType(),
DecimalType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
IntegerType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
IntegerType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), StringType(),
StringType(), StringType(), StringType(), DateUtilType(),
DateUtilType(), DateUtilType(), DateUtilType(), StringType(),
StringType(), StringType()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:34,代码来源:test_guessing.py
示例5: analyze_csv
def analyze_csv(url, sample=1000):
try:
fileobj = urlopen(url)
row_set = CSVRowSet('data', fileobj, window=sample)
sample = list(row_set.sample)
headers, sample = sample[0], sample[1:]
#values = frequent_values(sample)
types = type_guess(sample[500:], types=LIMITED_TYPES)
mapping = {}
for header, type_ in zip(headers, types):
type_ = repr(type_).lower()
name = slugify(header.value).lower()
meta = {
'label': header.value,
'column': header.value,
'datatype': type_
}
if type_ in ['decimal', 'integer', 'float']:
meta['type'] = 'measure'
meta['datatype'] = 'float'
elif type_.startswith('date'):
meta['type'] = 'date'
meta['datatype'] = 'date'
else:
meta['type'] = 'attribute'
mapping[name] = meta
return {'columns': [h.value for h in headers],
'mapping': mapping}
except Exception, e:
return {'error': unicode(e)}
开发者ID:asuffield,项目名称:openspending,代码行数:30,代码来源:analysis.py
示例6: rowset_as_jts
def rowset_as_jts(rowset, headers=None, types=None):
''' Create a json table schema from a rowset
'''
_, headers = messytables.headers_guess(rowset.sample)
types = map(celltype_as_string, messytables.type_guess(rowset.sample))
return headers_and_typed_as_jts(headers, types)
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:7,代码来源:jts.py
示例7: test_null_process
def test_null_process(self):
fh = horror_fobj('null.csv')
table_set = CSVTableSet(fh)
row_set = table_set.tables[0]
row_set.register_processor(null_processor(['null']))
data = list(row_set)
nones = [[x.value is None for x in row] for row in data]
assert_equal(nones[0], [False, True, False, False])
assert_equal(nones[1], [False, False, False, True])
assert_equal(nones[2], [False, True, False, False])
types = type_guess(row_set.sample, strict=True)
expected_types = [IntegerType(), BoolType(), BoolType(),
BoolType()]
assert_equal(types, expected_types)
row_set.register_processor(types_processor(types))
# after applying the types, '' should become None for int columns
data = list(row_set)
nones = [[x.value is None for x in row] for row in data]
assert_equal(nones[0], [False, True, False, False])
assert_equal(nones[1], [False, False, False, True])
assert_equal(nones[2], [False, True, True, True])
开发者ID:ahlusar1989,项目名称:messytables,代码行数:25,代码来源:test_read.py
示例8: main
def main(argv=None):
args = parse_args(argv)
if args.file is None:
# slurp the whole input since there seems to be a bug in messytables
# which should be able to handle streams but doesn't
args.file = cStringIO.StringIO(sys.stdin.read())
relation_key = args_to_relation_key(args)
table_set = any_tableset(args.file)
if len(table_set.tables) != 1:
raise ValueError("Can only handle files with a single table, not %s" % len(table_set.tables))
row_set = table_set.tables[0]
# guess header names and the offset of the header:
offset, headers = headers_guess(row_set.sample)
row_set.register_processor(strip_processor())
row_set.register_processor(headers_processor(headers))
# Temporarily, mark the offset of the header
row_set.register_processor(offset_processor(offset + 1))
# guess types and register them
types = type_guess(replace_empty_string(row_set.sample), strict=True, types=[StringType, DecimalType, IntegerType])
row_set.register_processor(types_processor(types))
# Messytables seems to not handle the case where there are no headers.
# Work around this as follows:
# 1) offset must be 0
# 2) if the types of the data match the headers, assume there are
# actually no headers
if offset == 0:
try:
[t.cast(v) for (t, v) in zip(types, headers)]
except:
pass
else:
# We don't need the headers_processor or the offset_processor
row_set._processors = []
row_set.register_processor(strip_processor())
row_set.register_processor(types_processor(types))
headers = None
# Construct the Myria schema
schema = messy_to_schema(types, headers)
logging.info("Myria schema: {}".format(json.dumps(schema)))
# Prepare data for writing to Myria
data, kwargs = write_data(row_set, schema)
if not args.dry:
# Connect to Myria and send the data
connection = myria.MyriaConnection(hostname=args.hostname, port=args.port, ssl=args.ssl)
ret = connection.upload_file(relation_key, schema, data, args.overwrite, **kwargs)
sys.stdout.write(pretty_json(ret))
else:
sys.stdout.write(data)
开发者ID:helgag,项目名称:myria-python,代码行数:59,代码来源:upload_file.py
示例9: test_non_strict_guessing_handles_padding
def test_non_strict_guessing_handles_padding(self):
csv_file = StringIO.StringIO('''
1, , 2.1
2, , 1.1
foo, , 1500''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample, strict=False)
assert_equal(len(guessed_types), 3)
assert_equal(guessed_types, [IntegerType(), StringType(), DecimalType()])
开发者ID:rossjones,项目名称:messytables,代码行数:9,代码来源:test_guessing.py
示例10: test_strict_guessing_handles_padding
def test_strict_guessing_handles_padding(self):
csv_file = io.BytesIO(b'''
1, , 2
2, , 1.1
foo, , 1500''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample, strict=True)
assert_equal(len(guessed_types), 3)
assert_equal(guessed_types,
[StringType(), StringType(), DecimalType()])
开发者ID:MikeData,项目名称:messytables,代码行数:10,代码来源:test_guessing.py
示例11: parse
def parse(stream, excel_type='xls', sheet=1, guess_types=True, **kwargs):
'''Parse Excel (xls or xlsx) to structured objects.
:param excel_type: xls | xlsx
:param sheet: index of sheet in spreadsheet to convert (starting from index = 1)
'''
sheet_number = int(sheet) - 1
xlsclass = XLSTableSet
if excel_type == 'xlsx':
xlsclass = XLSXTableSet
table_set = xlsclass.from_fileobj(stream)
try:
row_set = table_set.tables[sheet_number]
except IndexError:
raise Exception('This file does not have sheet number %d' %
(sheet_number + 1))
offset, headers = headers_guess(row_set.sample)
fields = []
dup_columns = {}
noname_count = 1
if guess_types:
guess_types = [StringType, IntegerType, FloatType, DecimalType,
DateUtilType]
row_types = type_guess(row_set.sample, guess_types)
for index, field in enumerate(headers):
field_dict = {}
if "" == field:
field = '_'.join(['column', str(noname_count)])
headers[index] = field
noname_count += 1
if headers.count(field) == 1:
field_dict['id'] = field
else:
dup_columns[field] = dup_columns.get(field, 0) + 1
field_dict['id'] = u'_'.join([field, str(dup_columns[field])])
if guess_types:
if isinstance(row_types[index], DateUtilType):
field_dict['type'] = 'DateTime'
else:
field_dict['type'] = str(row_types[index])
fields.append(field_dict)
row_set.register_processor(headers_processor([x['id'] for x in fields]))
row_set.register_processor(offset_processor(offset + 1))
def row_iterator():
for row in row_set:
data_row = {}
for index, cell in enumerate(row):
data_row[cell.column] = cell.value
yield data_row
return row_iterator(), {'fields': fields}
开发者ID:Web5design,项目名称:dataconverters,代码行数:54,代码来源:xls.py
示例12: test_json_type
def test_json_type(self):
csv_file = StringIO.StringIO('''
"{""a"":""b"", ""c"":""d""}", "[1, 2, 3]", 12a
"[""a"", [1, 2, {""a"":""b""}]]", "{""a"": 1, ""b"":[1, 2]}", abc
,, "abc"
''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample)
assert_equal(guessed_types, [JsonType(), JsonType(), StringType()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:11,代码来源:test_guessing.py
示例13: test_wkt_type
def test_wkt_type(self):
csv_file = StringIO.StringIO('''
"0102000020e6100000020000000000000000002640000000000000474000000000000024400000000000804640",
"0102000020787f0000020000000000000000002640000000000000474000000000000024400000000000804640", "SRID=4326;LINESTRING(11 46,10 45)"
"0101000020e610000000000000000026400000000000004740", "SRID=4326;LINESTRING(11 46,10 45)"
, "SRID=4326;POINT(11 46)"
''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample, strict=True)
assert_equal(guessed_types, [EWKB(), EWKT()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:12,代码来源:test_guessing.py
示例14: rowset_as_schema
def rowset_as_schema(rowset):
_, headers = messytables.headers_guess(rowset.sample)
types = map(celltype_as_string, messytables.type_guess(rowset.sample))
j = jsontableschema.JSONTableSchema()
for field_id, field_type in zip(headers, types):
j.add_field(field_id=field_id,
label=field_id,
field_type=field_type)
return j
开发者ID:mk270,项目名称:messytables-jts,代码行数:12,代码来源:messytables_jts.py
示例15: proc
def proc(f, database_name, table_name):
table_set = messytables.any_tableset(f)
row_set = table_set.tables[0]
# guess header names and the offset of the header:
offset, headers = messytables.headers_guess(row_set.sample)
row_set.register_processor(messytables.headers_processor(headers))
row_set.register_processor(messytables.offset_processor(offset + 1))
types = messytables.type_guess(row_set.sample, types=[
messytables.types.StringType,
messytables.types.DateType,
], strict=True)
hive_data_file = tempfile.NamedTemporaryFile(mode='w')
fields_ddl = ','.join([
' {0} {1}\n'.format(
canonicalize_column_name(colName),
hive_column_type(colType)
)
for colName, colType in zip(headers, types)
])
hive_sql = '''
DROP TABLE IF EXISTS {0};
CREATE TABLE {0} (
{1}
)
STORED AS TEXTFILE
TBLPROPERTIES ("comment"="add_messytable on {3}");
LOAD DATA LOCAL INPATH '{2}' OVERWRITE INTO TABLE {0};
'''.format(table_name, fields_ddl, hive_data_file.name,
datetime.datetime.now().isoformat())
hive_cmd_file = tempfile.NamedTemporaryFile(mode='w')
print(hive_sql, file=hive_cmd_file)
hive_cmd_file.flush()
row_set.register_processor(messytables.types_processor(types))
for row in row_set:
print('\001'.join(map(str, [ c.value for c in row])),
file=hive_data_file)
hive_data_file.flush()
subprocess.call([
'hive',
'--database', database_name,
'-f', hive_cmd_file.name,
])
开发者ID:Bridg,项目名称:bridg-messytable,代码行数:51,代码来源:add_messytable.py
示例16: test_type_guess_forced
def test_type_guess_forced(self):
csv_file = StringIO.StringIO('''
1, aaa, true
2, bbb, false
3, ccc,
4, , yes
5, ddd, no
''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(
rows.sample,
forced_types=[None, None, StringType()]
)
assert_equal(guessed_types, [IntegerType(), StringType(), StringType()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:14,代码来源:test_guessing.py
示例17: test_type_guess
def test_type_guess(self):
csv_file = io.BytesIO(b'''
1, 2012/2/12, 2, 02 October 2011, yes, 1
2, 2012/2/12, 2, 02 October 2011, true, 1
2.4, 2012/2/12, 1, 1 May 2011, no, 0
foo, bar, 1000, , false, 0
4.3, , 42, 24 October 2012,,
, 2012/2/12, 21, 24 December 2013, true, 1''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample)
assert_equal(guessed_types, [
DecimalType(), DateType('%Y/%m/%d'), IntegerType(),
DateType('%d %B %Y'), BoolType(), BoolType()])
开发者ID:MikeData,项目名称:messytables,代码行数:14,代码来源:test_guessing.py
示例18: create_new_model
def create_new_model(self, modelname, app_label):
""" Use messytables to guess field types and build a new model """
nocols = False
cols = self.csvfile[0]
for col in cols:
if not col:
nocols = True
if nocols:
cols = ['col_%s' % num for num in range(1, len(cols))]
print ('No column names for %s columns' % len(cols))
else:
cols = [cleancol.sub('_', col).lower() for col in cols]
try:
from messytables import any_tableset, type_guess
except:
self.errors.append(
'If you want to inspect CSV files to generate model code, you must install https://messytables.readthedocs.org')
self.modelname = ''
return
try:
table_set = any_tableset(self.filehandle)
row_set = table_set.tables[0]
types = type_guess(row_set.sample)
types = [str(typeobj) for typeobj in types]
except Exception as err:
self.errors.append('messytables could not run due to error')
self.errors.append(str(err))
self.modelname = ''
return
fieldset = []
maximums = self.get_maxlengths(cols)
for i, col in enumerate(cols):
length = maximums[i]
if types[i] == 'String' and length > 255:
types[i] = 'Text'
integer = length
decimal = int(length / 2)
if decimal > 10:
decimal = 10
blank = True
default = True
column = (col, types[i], length, length, integer, decimal, blank, default)
fieldset.append(column)
# Import here so that messytables is not a dependency for just using csvimport cmd
from csvimport.make_model import MakeModel
maker = MakeModel()
return maker.model_from_table('%s_%s' % (app_label, modelname), fieldset)
开发者ID:edcrewe,项目名称:django-csvimport,代码行数:50,代码来源:inspectcsv.py
示例19: test_type_guess
def test_type_guess(self):
csv_file = StringIO.StringIO('''
1, 2012/2/12, 2, 02 October 2011, yes, 11
2, 2012/2/12, 2, 02 October 2011, true, 9 am
2.4, 2012/2/12, 1, 1 May 2011, no, 23:00.123
foo, bar, 1000, , false, 12:00
4.3, , 42, 24 October 2012, , 7.12
, 2012/2/12, 21, 24 December 2013, true, 11PM ''')
rows = CSVTableSet(csv_file).tables[0]
guessed_types = type_guess(rows.sample)
assert_equal(guessed_types, [
DecimalType(), DateType('%Y/%m/%d'), IntegerType(),
DateType('%d %B %Y'), BoolType(), TimeType()])
开发者ID:MPBAUnofficial,项目名称:messytables,代码行数:14,代码来源:test_guessing.py
示例20: test_read_type_guess_simple
def test_read_type_guess_simple(self):
fh = horror_fobj("simple.csv")
table_set = CSVTableSet(fh)
row_set = table_set.tables[0]
types = type_guess(row_set.sample)
expected_types = [DateType("%Y-%m-%d"), IntegerType(), StringType()]
assert_equal(types, expected_types)
row_set.register_processor(types_processor(types))
data = list(row_set)
header_types = map(lambda c: c.type, data[0])
assert_equal(header_types, [StringType()] * 3)
row_types = map(lambda c: c.type, data[2])
assert_equal(expected_types, row_types)
开发者ID:bearrito,项目名称:messytables,代码行数:14,代码来源:test_read.py
注:本文中的messytables.type_guess函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论