本文整理汇总了Python中utils.datasets.Datasets类的典型用法代码示例。如果您正苦于以下问题:Python Datasets类的具体用法?Python Datasets怎么用?Python Datasets使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Datasets类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: mlt_query
def mlt_query(request):
logger = LogManager(__name__, 'SEARCH MLT')
es_params = request.POST
mlt_fields = [json.loads(field)['path'] for field in es_params.getlist('mlt_fields')]
handle_negatives = request.POST['handle_negatives']
docs_accepted = [a.strip() for a in request.POST['docs'].split('\n') if a]
docs_rejected = [a.strip() for a in request.POST['docs_rejected'].split('\n') if a]
# stopwords
stopword_lexicon_ids = request.POST.getlist('mlt_stopword_lexicons')
stopwords = []
for lexicon_id in stopword_lexicon_ids:
lexicon = Lexicon.objects.get(id=int(lexicon_id))
words = Word.objects.filter(lexicon=lexicon)
stopwords+=[word.wrd for word in words]
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
response = es_m.more_like_this_search(mlt_fields,docs_accepted=docs_accepted,docs_rejected=docs_rejected,handle_negatives=handle_negatives,stopwords=stopwords)
documents = []
for hit in response['hits']['hits']:
fields_content = get_fields_content(hit,mlt_fields)
documents.append({'id':hit['_id'],'content':fields_content})
template_params = {'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'documents':documents}
template = loader.get_template('mlt_results.html')
return HttpResponse(template.render(template_params, request))
开发者ID:cbentes,项目名称:texta,代码行数:35,代码来源:views.py
示例2: run
def run(self, task_id):
self.task_id = task_id
task = Task.objects.get(pk=self.task_id)
params = json.loads(task.parameters)
task.update_status(Task.STATUS_RUNNING)
try:
ds = Datasets().activate_datasets_by_id(params['dataset'])
es_m = ds.build_manager(ES_Manager)
es_m.load_combined_query(self._parse_query(params))
self.es_m = es_m
self.params = params
valid, msg = self._check_if_request_bad(self.params)
if valid:
self._preprocessor_worker()
else:
raise UserWarning(msg)
except TaskCanceledException as e:
# If here, task was canceled while processing
# Delete task
task = Task.objects.get(pk=self.task_id)
task.delete()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'processor_worker_canceled', 'data': {'task_id': self.task_id}}), exc_info=True)
print("--- Task canceled")
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception(json.dumps(
{'process': 'PROCESSOR WORK', 'event': 'processor_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
# declare the job as failed.
task = Task.objects.get(pk=self.task_id)
task.result = json.dumps({'error': repr(e)})
task.update_status(Task.STATUS_FAILED, set_time_completed=True)
开发者ID:ekt68,项目名称:texta,代码行数:34,代码来源:preprocessor_worker.py
示例3: save
def save(request):
logger = LogManager(__name__, 'SAVE SEARCH')
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_params = request.POST
es_m.build(es_params)
combined_query = es_m.get_combined_query()
try:
q = combined_query
desc = request.POST['search_description']
s_content = json.dumps([request.POST[x] for x in request.POST.keys() if 'match_txt' in x])
search = Search(author=request.user,search_content=s_content,description=desc,dataset=Dataset.objects.get(pk=int(request.session['dataset'])),query=json.dumps(q))
search.save()
logger.set_context('user_name', request.user.username)
logger.set_context('search_id', search.id)
logger.info('search_saved')
except Exception as e:
print('-- Exception[{0}] {1}'.format(__name__, e))
logger.set_context('es_params', es_params)
logger.exception('search_saving_failed')
return HttpResponse()
开发者ID:cbentes,项目名称:texta,代码行数:26,代码来源:views.py
示例4: run
def run(self, task_id):
self.task_id = task_id
self.task_obj = Task.objects.get(pk=self.task_id)
params = json.loads(self.task_obj.parameters)
self.task_obj.update_status(Task.STATUS_RUNNING)
try:
ds = Datasets().activate_datasets_by_id(params['dataset'])
es_m = ds.build_manager(ES_Manager)
# es_m.load_combined_query(self._parse_query(params))
self.es_m = es_m
self.params = params
result = self._start_subworker()
self.task_obj.result = result
self.task_obj.update_status(Task.STATUS_COMPLETED, set_time_completed=True)
except TaskCanceledException as e:
# If here, task was canceled while processing
# Delete task
self.task_obj.delete()
logging.getLogger(INFO_LOGGER).info(json.dumps({'process': 'PROCESSOR WORK', 'event': 'management_worker_canceled', 'data': {'task_id': self.task_id}}))
print("--- Task canceled")
except Exception as e:
logging.getLogger(ERROR_LOGGER).exception(json.dumps(
{'process': 'PROCESSOR WORK', 'event': 'manager_worker_failed', 'data': {'task_id': self.task_id}}), exc_info=True)
# declare the job as failed.
self.task_obj.result = json.dumps({'error': repr(e)})
self.task_obj.update_status(Task.STATUS_FAILED, set_time_completed=True)
print('Done with management task')
开发者ID:ekt68,项目名称:texta,代码行数:31,代码来源:management_worker.py
示例5: index
def index(request):
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
datasets = Datasets().get_allowed_datasets(request.user)
language_models = Task.objects.filter(task_type=TaskTypes.TRAIN_MODEL.value).filter(status__iexact=Task.STATUS_COMPLETED).order_by('-pk')
# Hide fact graph if no facts_str_val is present in fields
display_fact_graph = 'hidden'
for i in fields:
if json.loads(i['data'])['type'] == "fact_str_val":
display_fact_graph = ''
break
template_params = {'display_fact_graph': display_fact_graph,
'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'lexicons': Lexicon.objects.all().filter(author=request.user),
'language_models': language_models,
'allowed_datasets': datasets,
}
template = loader.get_template('searcher.html')
return HttpResponse(template.render(template_params, request))
开发者ID:ekt68,项目名称:texta,代码行数:28,代码来源:views.py
示例6: api_document_tags_list
def api_document_tags_list(request, user, params):
""" Get document tags (via auth_token)
"""
dataset_id = params.get('dataset', None)
document_ids = params.get('document_ids', None)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
resp = mass_helper.get_document_by_ids(document_ids)
data = []
for doc in resp['hits']['hits']:
for f in doc['_source'].get('texta_facts', []):
if f['fact'] == 'TEXTA_TAG':
doc_id = doc['_id']
doc_path = f['doc_path']
doc_tag = f['str_val']
data.append({ 'document_id': doc_id, 'field': doc_path, 'tag': doc_tag})
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:api_v1.py
示例7: get_all_rows
def get_all_rows(es_params, request):
buffer_ = StringIO()
writer = csv.writer(buffer_)
writer.writerow([feature for feature in es_params['features']])
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
es_m.set_query_parameter('size', ES_SCROLL_BATCH)
features = sorted(es_params['features'])
response = es_m.scroll()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
while hits:
process_hits(hits, features, write=True, writer=writer)
buffer_.seek(0)
data = buffer_.read()
buffer_.seek(0)
buffer_.truncate()
yield data
response = es_m.scroll(scroll_id=scroll_id)
hits = response['hits']['hits']
scroll_id = response['_scroll_id']
开发者ID:cbentes,项目名称:texta,代码行数:31,代码来源:export_pages.py
示例8: index
def index(request):
ds = Datasets().activate_dataset(request.session)
es_m = ds.build_manager(ES_Manager)
fields = get_fields(es_m)
datasets = Datasets().get_allowed_datasets(request.user)
language_models = Task.objects.filter(task_type='train_model').filter(status__iexact='completed').order_by('-pk')
preprocessors = collect_map_entries(preprocessor_map)
enabled_preprocessors = [preprocessor for preprocessor in preprocessors]
# Hide fact graph if no facts_str_val is present in fields
display_fact_graph = 'hidden'
for i in fields:
if json.loads(i['data'])['type'] == "fact_str_val":
display_fact_graph = ''
break
template_params = {'display_fact_graph': display_fact_graph,
'STATIC_URL': STATIC_URL,
'URL_PREFIX': URL_PREFIX,
'fields': fields,
'searches': Search.objects.filter(author=request.user),
'lexicons': Lexicon.objects.all().filter(author=request.user),
'dataset': ds.get_index(),
'language_models': language_models,
'allowed_datasets': datasets,
'enabled_preprocessors': enabled_preprocessors,
'task_params': task_params}
template = loader.get_template('searcher.html')
return HttpResponse(template.render(template_params, request))
开发者ID:cbentes,项目名称:texta,代码行数:33,代码来源:views.py
示例9: api_tag_list
def api_tag_list(request, user, params):
""" Get list of available tags for API user (via auth_token)
"""
dataset_id = params['dataset']
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
tag_set = mass_helper.get_unique_tags()
tag_frequency = mass_helper.get_tag_frequency(tag_set)
tag_models = set([tagger.description for tagger in Task.objects.filter(task_type=TaskTypes.TRAIN_TAGGER.value)])
data = []
for tag in sorted(tag_frequency.keys()):
count = tag_frequency[tag]
has_model = tag in tag_models
doc = {'description': tag,
'count': count,
'has_model': has_model}
data.append(doc)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:28,代码来源:api_v1.py
示例10: api_mass_train_tagger
def api_mass_train_tagger(request, user, params):
""" Apply mass train tagger (via auth_token)
"""
# Read all params
dataset_id = params.get('dataset', None)
selected_tags = set(params.get('tags', []))
field = params.get("field", None)
normalizer_opt = params.get("normalizer_opt", "0")
classifier_opt = params.get("classifier_opt", "0")
reductor_opt = params.get("reductor_opt", "0")
extractor_opt = params.get("extractor_opt", "0")
retrain_only = params.get("retrain_only", False)
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
es_m = ds.build_manager(ES_Manager)
mass_helper = MassHelper(es_m)
data = mass_helper.schedule_tasks(selected_tags, normalizer_opt, classifier_opt, reductor_opt, extractor_opt, field, dataset_id, user)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:27,代码来源:api_v1.py
示例11: export_matched_data
def export_matched_data(request):
search_id = request.GET['search_id']
inclusive_metaquery = json.loads(request.GET['inclusive_grammar'])
ds = Datasets().activate_dataset(request.session)
component_query = ElasticGrammarQuery(inclusive_metaquery, None).generate()
es_m = ds.build_manager(ES_Manager)
if search_id == '-1': # Full search
es_m.combined_query = component_query
else:
saved_query = json.loads(Search.objects.get(pk=search_id).query)
es_m.load_combined_query(saved_query)
es_m.merge_combined_query_with_query_dict(component_query)
inclusive_instructions = generate_instructions(inclusive_metaquery)
response = StreamingHttpResponse(get_all_matched_rows(es_m.combined_query['main'], request, inclusive_instructions), content_type='text/csv')
response['Content-Disposition'] = 'attachment; filename="%s"' % ('extracted.csv')
return response
开发者ID:cbentes,项目名称:texta,代码行数:25,代码来源:views.py
示例12: api_search_list
def api_search_list(request, user, params):
""" Get list of available searches for API user (via auth_token)
"""
# Read all params
dataset_id = int(params['dataset'])
ds = Datasets()
ds.activate_datasets_by_id(dataset_id, use_default=False)
# Check if dataset_id is valid
if not ds.is_active():
error = {'error': 'invalid dataset parameter'}
data_json = json.dumps(error)
return HttpResponse(data_json, status=400, content_type='application/json')
# Build response structure
data = []
dataset = Dataset(pk=dataset_id)
search_list = list(Search.objects.filter(dataset=dataset))
for search in search_list:
row = {
'dataset': dataset_id,
'search': search.id,
'description': search.description
}
data.append(row)
data_json = json.dumps(data)
return HttpResponse(data_json, status=200, content_type='application/json')
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:api_v1.py
示例13: get_all_rows
def get_all_rows(es_params, request):
features = es_params['features']
# Prepare in-memory csv writer.
buffer_ = StringIO()
writer = csv.writer(buffer_)
# Write the first headers.
writer.writerow(features)
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
es_m.set_query_parameter('size', ES_SCROLL_BATCH)
# Fetch the initial scroll results.
response = es_m.scroll()
scroll_id = response['_scroll_id']
hits = response['hits']['hits']
while hits:
process_hits(hits, features, write=True, writer=writer)
# Return some data with the StreamingResponce
yield _get_buffer_data(buffer_)
# Continue with the scroll.
response = es_m.scroll(scroll_id=scroll_id)
hits = response['hits']['hits']
scroll_id = response['_scroll_id']
开发者ID:ekt68,项目名称:texta,代码行数:29,代码来源:export_pages.py
示例14: get_query
def get_query(request):
es_params = request.POST
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
# GET ONLY MAIN QUERY
query = es_m.combined_query['main']
return HttpResponse(json.dumps(query))
开发者ID:ekt68,项目名称:texta,代码行数:8,代码来源:views.py
示例15: __init__
def __init__(self, params):
self.field = json.loads(params['field'])['path']
query = json.loads(Search.objects.get(pk=int(params['search'])).query)
# Define selected mapping
ds = Datasets().activate_dataset_by_id(params['dataset'])
self.es_m = ds.build_manager(ES_Manager)
self.es_m.load_combined_query(query)
开发者ID:cbentes,项目名称:texta,代码行数:8,代码来源:data_manager.py
示例16: get_grammar_listing
def get_grammar_listing(request):
ds = Datasets().activate_dataset(request.session)
dataset = ds.get_index()
mapping = ds.get_mapping()
grammars = Grammar.objects.filter(author=request.user, dataset__index=dataset, dataset__mapping=mapping).order_by('-last_modified')
grammar_json = json.dumps([{'id':grammar.id, 'name':grammar.name, 'last_modified':grammar.last_modified.strftime("%d/%m/%y %H:%M:%S")} for grammar in grammars])
return HttpResponse(grammar_json)
开发者ID:cbentes,项目名称:texta,代码行数:9,代码来源:views.py
示例17: get_example_texts
def get_example_texts(request, field, value):
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
query = { "size":10, "highlight": {"fields": {field: {}}}, "query": {"match": {field: value}}}
response = es_m.perform_query(query)
matched_sentences = []
for hit in response['hits']['hits']:
for match in hit['highlight'].values():
matched_sentences.append(match[0])
return matched_sentences
开发者ID:ekt68,项目名称:texta,代码行数:10,代码来源:views.py
示例18: parse_request
def parse_request(self,request):
self.lookup_types = request.POST['lookup_types'].split(',')
self.key_constraints = request.POST['key_constraints'].split(',')
self.content = request.POST['content'].split('\n')[-1].strip()
ds = Datasets().activate_datasets(request.session)
self.es_m = ds.build_manager(ES_Manager)
self.user = request.user
开发者ID:ekt68,项目名称:texta,代码行数:10,代码来源:autocomplete.py
示例19: remove_by_query
def remove_by_query(request):
es_params = request.POST
ds = Datasets().activate_datasets(request.session)
es_m = ds.build_manager(ES_Manager)
es_m.build(es_params)
# Process(target=remove_worker,args=(es_m,'notimetothink')).start()
response = remove_worker(es_m, 'notimetothink')
return HttpResponse(response)
开发者ID:ekt68,项目名称:texta,代码行数:10,代码来源:views.py
示例20: __init__
def __init__(self, parameters, callback_progress=None):
ds = Datasets().activate_dataset_by_id(parameters['dataset'])
query = self._parse_query(parameters)
self.field = json.loads(parameters['field'])['path']
self.es_m = ds.build_manager(ES_Manager)
self.es_m.load_combined_query(query)
self.callback_progress = callback_progress
if self.callback_progress:
total_elements = self.get_total_documents()
callback_progress.set_total(total_elements)
开发者ID:cbentes,项目名称:texta,代码行数:12,代码来源:language_model_manager.py
注:本文中的utils.datasets.Datasets类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论