本文整理汇总了Python中user_portrait.global_utils.ES_CLUSTER_FLOW1类的典型用法代码示例。如果您正苦于以下问题:Python ES_CLUSTER_FLOW1类的具体用法?Python ES_CLUSTER_FLOW1怎么用?Python ES_CLUSTER_FLOW1使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ES_CLUSTER_FLOW1类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_user_influence
def get_user_influence(uid, date):
date = str(date).replace("-","")
index_name = pre_index + date
try:
bci_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
bci_info = {}
result = {}
for key in BCI_LIST:
result[key] = bci_info.get(key, 0)
user_index = result["user_index"]
query_body = {
"query":{
"filtered":{
"filter":{
"range":{
"user_index":{
"gt": user_index
}
}
}
}
}
}
total_count = es_cluster.count(index=index_name, doc_type=influence_doctype)['count']
order_count = es_cluster.count(index=index_name, doc_type=influence_doctype, body=query_body)['count']
result["total_count"] = total_count
result["order_count"] = order_count + 1
return result
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:32,代码来源:personal_influence.py
示例2: query_brust
def query_brust(index_name,field_name, range_1=0, range_2=50000, count=0):
query_body = {
"query":{
"filtered": {
"query": {
"match_all":{}
},
"filter": {
"range": {
field_name: {
"gte": range_1,
"lt": range_2
}
}
}
}
}
}
if count == 1:
result = es.count(index=index_name, doc_type="bci", body=query_body)['count']
return result
else:
query_body['size'] = 1000
result = es.search(index=index_name, doc_type="bci", body=query_body)['hits']['hits']
profile_list = []
for item in result:
profile_list.append(item['_id'])
return profile_list
开发者ID:ztybuaa,项目名称:user_portrait,代码行数:32,代码来源:search_user_index_function.py
示例3: search_portrait_history_active_info
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"):
# date.formate: 20130901
date_list = time_series(date)
try:
result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source']
except NotFoundError:
return "NotFound"
except:
return None
date_max = {}
for date_str in date_list:
query_body = {
'query':{
'match_all':{}
},
'size': 1,
'sort': [{date_str: {'order': 'desc'}}]
}
try:
max_item = es.search(index=index_name, doc_type=doctype, body=query_body)['hits']['hits']
except Exception, e:
raise e
date_max[date_str] = max_item[0]['_source'][date_str]
开发者ID:taozhiiq,项目名称:user_portrait,代码行数:25,代码来源:search_user_index_function.py
示例4: search_top_index
def search_top_index(index_name, top_k=1, index_type="bci", top=False, sort_order="user_index"):
query_body = {
"query": {
"match_all": {}
},
"size": top_k,
"sort": [{sort_order: {"order": "desc"}}]
}
if top:
result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits'][0]['_source'][sort_order]
else:
search_result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
uid_list = []
for item in search_result:
uid_list.append(item['_id'])
profile_result = es_profile.mget(index="weibo_user",doc_type="user", body={"ids":uid_list}, _source=True)['docs']
portrait_result = es_portrait.mget(index="user_portrait", doc_type="user", body={"ids":uid_list}, _source=True)['docs']
result = []
rank = 1
for i in range(len(search_result)):
info = ['','','','']
info[0] = rank
if profile_result[i]['found']:
info[1] = profile_result[i]['_source'].get('photo_url','')
info[3] = profile_result[i]['_source'].get('nick_name','')
info[2] = search_result[i].get('_id','')
if sort_order in ["user_index","origin_weibo_retweeted_brust_average","origin_weibo_comment_brust_average"]:
info.append(search_result[i]['_source'][sort_order])
if portrait_result[i]['found']:
info.append("1")
else:
info.append("0")
elif sort_order == "origin_weibo_retweeted_top_number":
info.append(search_result[i]['_source']['origin_weibo_retweeted_top_number'])
mid = search_result[i]['_source']['origin_weibo_top_retweeted_id']
info.append(weiboinfo2url(info[2],mid))
if portrait_result[i]['found']:
info.append("1")
else:
info.append("0")
elif sort_order == "origin_weibo_comment_top_number":
info.append(search_result[i]['_source']['origin_weibo_comment_top_number'])
mid = search_result[i]['_source']['origin_weibo_top_comment_id']
info.append(weiboinfo2url(info[2],mid))
if portrait_result[i]['found']:
info.append("1")
else:
info.append("0")
rank += 1
result.append(info)
return result
开发者ID:ztybuaa,项目名称:user_portrait,代码行数:57,代码来源:search_user_index_function.py
示例5: update_record_index
def update_record_index(uid_list):
bulk_action = []
for each in uid_list:
info = {}
info['uid'] = str(each)
info['low_number'] = 0
xdata = expand_update_action(info)
bulk_action.extend([xdata[0], xdata[1]])
es.bulk(bulk_action, index=index_destination, doc_type=index_destination_doctype, timeout=30)
开发者ID:huxiaoqian,项目名称:revised_user_portrait,代码行数:10,代码来源:update_activeness_record.py
示例6: influenced_detail
def influenced_detail(uid, date, style):
date1 = str(date).replace("-", "")
index_name = pre_index + date1
# detail_text = {}
style = int(style)
try:
user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
result = {}
return result
origin_retweetd = json.loads(user_info["origin_weibo_retweeted_top"])
origin_comment = json.loads(user_info["origin_weibo_comment_top"])
retweeted_retweeted = json.loads(user_info["retweeted_weibo_retweeted_top"])
retweeted_comment = json.loads(user_info["retweeted_weibo_comment_top"])
if style == 0:
detail_text = get_text(origin_retweetd, date, user_info, style)
elif style == 1:
detail_text = get_text(origin_comment, date, user_info, style)
elif style == 2:
detail_text = get_text(retweeted_retweeted, date, user_info, style)
else:
detail_text = get_text(retweeted_comment, date, user_info, style)
# detail_text["origin_retweeted"] = get_text(origin_retweetd, date)
# detail_text["origin_comment"] = get_text(origin_comment, date)
# detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date)
# detail_text["retweeted_comment"] = get_text(retweeted_comment, date)
return detail_text
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:29,代码来源:personal_influence.py
示例7: tag_vector
def tag_vector(uid, date):
date1 = str(date).replace('-', '')
index_name = pre_index + date1
index_flow_text = pre_text_index + date
result = []
try:
bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
tag = influence_tag["0"]
result.append(tag)
return result
origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"])
retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"])
origin_comment = json.loads(bci_result["origin_weibo_comment_detail"])
retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"])
sum_retweeted = sum(origin_retweeted.values()) + sum(origin_comment.values())
sum_comment = sum(retweeted_retweeted.values()) + sum(retweeted_comment.values())
if sum_retweeted >= retweeted_threshold:
if sum_comment >= comment_threshold:
tag = influence_tag['3']
else:
tag = influence_tag['1']
else:
if sum_comment >= comment_threshold:
tag = influence_tag['2']
else:
tag = influence_tag['4']
result.append(tag)
return result
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:32,代码来源:personal_influence.py
示例8: statistics_influence_people
def statistics_influence_people(uid, date, style):
# output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution
results = {} # retwweted weibo people and comment weibo people
date1 = str(date).replace("-", "")
index_name = pre_index + date1
index_flow_text = pre_text_index + date
try:
bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
bci_result = []
return results
origin_retweeted_mid = [] # origin weibo mid
retweeted_retweeted_mid = [] # retweeted weibo mid
origin_comment_mid = []
retweeted_comment_mid = []
origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"])
retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"])
origin_comment = json.loads(bci_result["origin_weibo_comment_detail"])
retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"])
retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values())
comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values())
if origin_retweeted:
origin_retweeted_mid = filter_mid(origin_retweeted)
if retweeted_retweeted:
retweeted_retweeted_mid = filter_mid(retweeted_retweeted)
if origin_comment:
origin_comment_mid = filter_mid(origin_comment)
if retweeted_comment:
retweeted_comment_mid = filter_mid(retweeted_comment)
query_body = {"query": {"filtered": {"filter": {"bool": {"should": [], "must": []}}}}, "size": 10000}
if int(style) == 0: # retweeted
retweeted_origin = []
if retweeted_retweeted_mid:
text_result = es.mget(
index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_retweeted_mid}
)["docs"]
for item in text_result:
mid = item.get("source", {}).get("root_mid", "0")
retweeted_origin.append(mid)
retweeted_results = influenced_user_detail(uid, date, origin_retweeted_mid, retweeted_origin, 3)
retweeted_results["total_number"] = retweeted_total_number
results = retweeted_results
else:
retweeted_origin = []
if retweeted_comment_mid:
text_result = es.mget(
index=index_flow_text, doc_type=flow_text_index_type, body={"ids": retweeted_comment_mid}
)["docs"]
for item in text_result:
mid = item.get("source", {}).get("root_mid", "0")
retweeted_origin.append(mid)
comment_results = influenced_user_detail(uid, date, origin_comment_mid, retweeted_origin, 2)
comment_results["total_number"] = comment_total_number
results = comment_results
return results
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:59,代码来源:personal_influence.py
示例9: get_user_detail
def get_user_detail(date, input_result, status):
results = []
if status=='show_in':
uid_list = input_result
if status=='show_compute':
uid_list = input_result.keys()
if status=='show_in_history':
uid_list = input_result.keys()
if date!='all':
index_name = 'bci_' + ''.join(date.split('-'))
else:
now_ts = time.time()
now_date = ts2datetime(now_ts)
index_name = 'bci_' + ''.join(now_date.split('-'))
index_type = 'bci'
user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
max_evaluate_influ = get_evaluate_max(index_name)
for i in range(0, len(uid_list)):
uid = uid_list[i]
bci_dict = user_bci_result[i]
profile_dict = user_profile_result[i]
try:
bci_source = bci_dict['_source']
except:
bci_source = None
if bci_source:
influence = bci_source['user_index']
influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
influence = influence * 100
else:
influence = ''
try:
profile_source = profile_dict['_source']
except:
profile_source = None
if profile_source:
uname = profile_source['nick_name']
location = profile_source['user_location']
fansnum = profile_source['fansnum']
statusnum = profile_source['statusnum']
else:
uname = ''
location = ''
fansnum = ''
statusnum = ''
if status == 'show_in':
results.append([uid, uname, location, fansnum, statusnum, influence])
if status == 'show_compute':
in_date = json.loads(input_result[uid])[0]
compute_status = json.loads(input_result[uid])[1]
if compute_status == '1':
compute_status = '3'
results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
if status == 'show_in_history':
in_status = input_result[uid]
results.append([uid, uname, location, fansnum, statusnum, influence, in_status])
return results
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:59,代码来源:utils.py
示例10: get_recommentation
def get_recommentation(submit_user):
if RUN_TYPE:
now_ts = time.time()
else:
now_ts = datetime2ts(RUN_TEST_TIME)
in_portrait_set = set(r.hkeys("compute"))
result = []
for i in range(7):
iter_ts = now_ts - i*DAY
iter_date = ts2datetime(iter_ts)
submit_user_recomment = "recomment_" + submit_user + "_" + str(iter_date)
bci_date = ts2datetime(iter_ts - DAY)
submit_user_recomment = r.hkeys(submit_user_recomment)
bci_index_name = "bci_" + bci_date.replace('-', '')
exist_bool = es_cluster.indices.exists(index=bci_index_name)
if not exist_bool:
continue
if submit_user_recomment:
user_bci_result = es_cluster.mget(index=bci_index_name, doc_type="bci", body={'ids':submit_user_recomment}, _source=True)['docs']
user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':submit_user_recomment}, _source=True)['docs']
max_evaluate_influ = get_evaluate_max(bci_index_name)
for i in range(len(submit_user_recomment)):
uid = submit_user_recomment[i]
bci_dict = user_bci_result[i]
profile_dict = user_profile_result[i]
try:
bci_source = bci_dict['_source']
except:
bci_source = None
if bci_source:
influence = bci_source['user_index']
influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
influence = influence * 100
else:
influence = ''
try:
profile_source = profile_dict['_source']
except:
profile_source = None
if profile_source:
uname = profile_source['nick_name']
location = profile_source['user_location']
fansnum = profile_source['fansnum']
statusnum = profile_source['statusnum']
else:
uname = ''
location = ''
fansnum = ''
statusnum = ''
if uid in in_portrait_set:
in_portrait = "1"
else:
in_portrait = "0"
recomment_day = iter_date
result.append([iter_date, uid, uname, location, fansnum, statusnum, influence, in_portrait])
return result
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:58,代码来源:utils.py
示例11: search_k
def search_k(es, index_name, index_type, start, field="user_index", size=100):
query_body = {
"query":{
"match_all": {}
},
"size": size,
"from": start,
"sort": [{field: {"order": "desc"}}]
}
result = es.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
search_list = []
for item in result:
search_list.append(item['_source'])
return search_list
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:17,代码来源:rank_portrait_in_active_user.py
示例12: get_evaluate_max
def get_evaluate_max(index_name):
max_result = {}
index_type = 'bci'
evaluate_index = ['user_index']
for evaluate in evaluate_index:
query_body = {
'query':{
'match_all':{}
},
'size':1,
'sort':[{evaluate: {'order': 'desc'}}]
}
try:
result = es_cluster.search(index=index_name, doc_type=index_type, body=query_body)['hits']['hits']
except Exception, e:
raise e
max_evaluate = result[0]['_source'][evaluate]
max_result[evaluate] = max_evaluate
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:18,代码来源:utils.py
示例13: comment_on_influence
def comment_on_influence(uid, date):
date1 = str(date).replace('-', '')
index_name = pre_index + date1
index_flow_text = pre_text_index + date
result = []
underline = []
try:
bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
description = CURRENT_INFLUENCE_CONCLUSION['0']
result.append(description)
return ([result, underline])
user_index = bci_result['user_index']
if user_index < CURRNET_INFLUENCE_THRESHOULD[0]:
description = CURRENT_INFLUENCE_CONCLUSION['0']
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[0] and user_index < CURRNET_INFLUENCE_THRESHOULD[1]:
description = CURRENT_INFLUENCE_CONCLUSION['1']
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[1] and user_index < CURRNET_INFLUENCE_THRESHOULD[2]:
description = CURRENT_INFLUENCE_CONCLUSION['2']
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[2] and user_index < CURRNET_INFLUENCE_THRESHOULD[3]:
description = CURRENT_INFLUENCE_CONCLUSION['3']
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[3] and user_index < CURRNET_INFLUENCE_THRESHOULD[4]:
description = CURRENT_INFLUENCE_CONCLUSION['4']
else:
description = CURRENT_INFLUENCE_CONCLUSION['5']
result.append(description)
for i in range(4):
if bci_result[INFLUENCE_TOTAL_LIST[i]] > INFLUENCE_TOTAL_THRESHOULD[i]:
result.append(INFLUENCE_TOTAL_CONCLUSION[i])
if bci_result[INFLUENCE_BRUST_LIST[i]] > INFLUENCE_BRUST_THRESHOULD[i]:
result.append(INFLUENCE_BRUST_CONCLUSION[i])
underline.append(UNDERLINE_CONCLUSION[i])
else:
result.append('')
underline.append('')
else:
result.extend(['',''])
underline.append('')
return [result, underline]
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:43,代码来源:personal_influence.py
示例14: comment_on_influence
def comment_on_influence(uid, date):
date1 = str(date).replace("-", "")
index_name = pre_index + date1
index_flow_text = pre_text_index + date
result = []
underline = []
try:
bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
description = CURRENT_INFLUENCE_CONCLUSION["0"]
result.append(description)
return [result, underline]
user_index = bci_result["user_index"]
if user_index < CURRNET_INFLUENCE_THRESHOULD[0]:
description = CURRENT_INFLUENCE_CONCLUSION["0"]
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[0] and user_index < CURRNET_INFLUENCE_THRESHOULD[1]:
description = CURRENT_INFLUENCE_CONCLUSION["1"]
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[1] and user_index < CURRNET_INFLUENCE_THRESHOULD[2]:
description = CURRENT_INFLUENCE_CONCLUSION["2"]
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[2] and user_index < CURRNET_INFLUENCE_THRESHOULD[3]:
description = CURRENT_INFLUENCE_CONCLUSION["3"]
elif user_index >= CURRNET_INFLUENCE_THRESHOULD[3] and user_index < CURRNET_INFLUENCE_THRESHOULD[4]:
description = CURRENT_INFLUENCE_CONCLUSION["4"]
else:
description = CURRENT_INFLUENCE_CONCLUSION["5"]
result.append(description)
for i in range(4):
if bci_result[INFLUENCE_TOTAL_LIST[i]] > INFLUENCE_TOTAL_THRESHOULD[i]:
result.append(INFLUENCE_TOTAL_CONCLUSION[i])
if bci_result[INFLUENCE_BRUST_LIST[i]] > INFLUENCE_BRUST_THRESHOULD[i]:
result.append(INFLUENCE_BRUST_CONCLUSION[i])
underline.append(UNDERLINE_CONCLUSION[i])
else:
result.append("")
underline.append("")
else:
result.extend(["", ""])
underline.append("")
return [result, underline]
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:43,代码来源:personal_influence.py
示例15: count_es
def count_es(es, index_name,doctype, sort_order="user_index",range_1=0, range_2=3000):
query_body = {
"query":{
"filtered": {
"query": {
"match_all":{}
},
"filter": {
"range": {
sort_order: {
"gte": range_1,
"lt": range_2
}
}
}
}
}
}
result = es.count(index=index_name, doc_type=doctype, body=query_body)['count']
return result
开发者ID:ztybuaa,项目名称:user_portrait,代码行数:23,代码来源:search_user_index_function.py
示例16: search_portrait_history_active_info
def search_portrait_history_active_info(uid, date, index_name="copy_user_portrait", doctype="user"):
# date.formate: 20130901
date_list = time_series(date)
try:
result = es.get(index=index_name, doc_type=doctype, id=uid, _source=True)['_source']
except NotFoundError:
return "NotFound"
except:
return None
return_dict = {}
for item in date_list:
return_dict[item] = result.get(item, 0)
in_list = []
for item in sorted(date_list):
in_list.append(return_dict[item])
#print 'in_list:', in_list
max_influence = max(in_list)
ave_influence = sum(in_list) / float(7)
min_influence = min(in_list)
if max_influence - min_influence <= 400 and ave_influence >= 900:
mark = u'平稳高影响力'
elif max_influence - min_influence > 400 and ave_influence >= 900:
mark = u'波动高影响力'
elif max_influence - min_influence <= 400 and ave_influence < 900 and ave_influence >= 500:
mark = u'平稳一般影响力'
elif max_influence - min_influence > 400 and ave_influence < 900 and ave_influence >= 500:
mark = u'波动一般影响力'
elif max_influence - min_influence <= 400 and ave_influence < 500:
mark = u'平稳低影响力'
else:
mark = u'波动低影响力'
description = [u'该用户为', mark]
return [in_list, description]
开发者ID:ztybuaa,项目名称:user_portrait,代码行数:36,代码来源:search_user_index_function.py
示例17: search_influence_detail
def search_influence_detail(uid_list, index_name, doctype):
result = es.mget(index=index_name, doc_type=doctype, body={"ids": uid_list}, _source=True)["docs"]
return result[0]['_source']
开发者ID:ztybuaa,项目名称:user_portrait,代码行数:4,代码来源:search_user_index_function.py
示例18: influenced_detail
def influenced_detail(uid, date, style):
date1 = str(date).replace('-', '')
index_name = pre_index + date1
index_text = "flow_text_" + date
#detail_text = {}
style = int(style)
try:
user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
result = {}
return result
origin_retweetd_dict = json.loads(user_info["origin_weibo_retweeted_detail"])
origin_comment_dict = json.loads(user_info['origin_weibo_comment_detail'])
retweeted_retweeted_dict = json.loads(user_info["retweeted_weibo_retweeted_detail"])
retweeted_comment_dict = json.loads(user_info["retweeted_weibo_comment_detail"])
origin_retweetd = sorted(origin_retweetd_dict.items(), key=lambda x:x[1], reverse=True)
origin_comment = sorted(origin_comment_dict.items(), key=lambda x:x[1], reverse=True)
retweeted_retweeted = sorted(retweeted_retweeted_dict.items(), key=lambda x:x[1], reverse=True)
retweeted_comment = sorted(retweeted_comment_dict.items(), key=lambda x:x[1], reverse=True)
query_body_origin = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"term":{"message_type": 1}},
{"term":{"uid": uid}}
]
}
}
}
},
"size": 10000
}
result_1 = es.search(index=index_text, doc_type="text", body=query_body_origin)['hits']['hits']
origin_set = set()
if result_1:
for item in result_1:
origin_set.add(item['_id'])
query_body_retweeted = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"term":{"message_type": 3}},
{"term":{"uid": uid}}
]
}
}
}
},
"size": 10000
}
result_2 = es.search(index=index_text, doc_type="text", body=query_body_retweeted)['hits']['hits']
retweeted_set = set()
if result_2:
for item in retweeted_set:
retweeted_set.add(item['_id'])
if origin_retweetd:
for item in origin_retweetd:
if item[0] not in origin_set:
origin_retweetd.remove(item)
if origin_comment:
for item in origin_comment:
if item[0] not in origin_set:
origin_comment.remove(item)
if retweeted_retweeted:
for item in retweeted_retweeted:
if item[0] not in retweeted_set:
retweeted_retweeted.remove(item)
if retweeted_comment:
for item in retweeted_comment:
if item[0] not in retweeted_set:
retweeted_comment.remove(item)
if style == 0:
detail_text = get_text(origin_retweetd[:20], date, user_info, style)
elif style == 1:
detail_text = get_text(origin_comment[:20], date, user_info, style)
elif style == 2:
detail_text = get_text(retweeted_retweeted[:20], date, user_info, style)
else:
detail_text = get_text(retweeted_comment[:20], date, user_info, style)
#detail_text["origin_retweeted"] = get_text(origin_retweetd, date)
#detail_text["origin_comment"] = get_text(origin_comment, date)
#detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date)
#detail_text["retweeted_comment"] = get_text(retweeted_comment, date)
return detail_text
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:97,代码来源:personal_influence.py
示例19: get_user_detail
def get_user_detail(date, input_result, status, user_type="influence", auth=""):
bci_date = ts2datetime(datetime2ts(date) - DAY)
results = []
if status=='show_in':
uid_list = input_result
if status=='show_compute':
uid_list = input_result.keys()
if status=='show_in_history':
uid_list = input_result.keys()
if date!='all':
index_name = 'bci_' + ''.join(bci_date.split('-'))
else:
now_ts = time.time()
now_date = ts2datetime(now_ts)
index_name = 'bci_' + ''.join(now_date.split('-'))
index_type = 'bci'
user_bci_result = es_cluster.mget(index=index_name, doc_type=index_type, body={'ids':uid_list}, _source=True)['docs']
user_profile_result = es_user_profile.mget(index='weibo_user', doc_type='user', body={'ids':uid_list}, _source=True)['docs']
max_evaluate_influ = get_evaluate_max(index_name)
for i in range(0, len(uid_list)):
uid = uid_list[i]
bci_dict = user_bci_result[i]
profile_dict = user_profile_result[i]
try:
bci_source = bci_dict['_source']
except:
bci_source = None
if bci_source:
influence = bci_source['user_index']
influence = math.log(influence/max_evaluate_influ['user_index'] * 9 + 1 ,10)
influence = influence * 100
else:
influence = ''
try:
profile_source = profile_dict['_source']
except:
profile_source = None
if profile_source:
uname = profile_source['nick_name']
location = profile_source['user_location']
fansnum = profile_source['fansnum']
statusnum = profile_source['statusnum']
else:
uname = ''
location = ''
fansnum = ''
statusnum = ''
if status == 'show_in':
if user_type == "sensitive":
tmp_ts = datetime2ts(date) - DAY
tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
if tmp_data:
sensitive_dict = json.loads(tmp_data)
sensitive_words = sensitive_dict.keys()
else:
sensitive_words = []
results.append([uid, uname, location, fansnum, statusnum, influence, sensitive_words])
else:
results.append([uid, uname, location, fansnum, statusnum, influence])
if auth:
hashname_submit = "submit_recomment_" + date
tmp_data = json.loads(r.hget(hashname_submit, uid))
recommend_list = (tmp_data['operation']).split('&')
admin_list = []
admin_list.append(tmp_data['system'])
admin_list.append(list(set(recommend_list)))
admin_list.append(len(recommend_list))
results[-1].extend(admin_list)
if status == 'show_compute':
in_date = json.loads(input_result[uid])[0]
compute_status = json.loads(input_result[uid])[1]
if compute_status == '1':
compute_status = '3'
results.append([uid, uname, location, fansnum, statusnum, influence, in_date, compute_status])
if status == 'show_in_history':
in_status = input_result[uid]
if user_type == "sensitive":
tmp_ts = datetime2ts(date) - DAY
tmp_data = r_cluster.hget("sensitive_"+str(tmp_ts), uid)
if tmp_data:
sensitive_dict = json.loads(tmp_data)
sensitive_words = sensitive_dict.keys()
results.append([uid, uname, location, fansnum, statusnum, influence, in_status, sensitive_words])
else:
results.append([uid, uname, location, fansnum, statusnum, influence, in_status])
return results
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:87,代码来源:utils.py
示例20: statistics_influence_people
def statistics_influence_people(uid, date, style):
# output: different retweeted and comment, uids' domain distribution, topic distribution, registeration geo distribution
results = {} # retwweted weibo people and comment weibo people
date1 = str(date).replace('-', '')
index_name = pre_index + date1
index_flow_text = pre_text_index + date
try:
bci_result = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
bci_result = []
return results
origin_mid = [] # origin weibo mid
retweeted_mid = [] # retweeted weibo mid
query_body = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
]
}
}
}
},
"size":1000
}
body_1 = copy.deepcopy(query_body)
body_2 = copy.deepcopy(query_body)
body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 1}}, {"term":{"uid": uid}}])
result_1 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_1)["hits"]["hits"]
if result_1:
for item in result_1:
origin_mid.append(item['_id'])
body_1["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term":{"message_type": 3}}, {"term":{"uid": uid}}])
result_2 = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=body_2)["hits"]["hits"]
if result_2:
for item in result_2:
if item['_source'].get('root_mid', ''):
retweeted_mid.append(item['_source']['root_mid'])
origin_retweeted = json.loads(bci_result["origin_weibo_retweeted_detail"])
retweeted_retweeted = json.loads(bci_result["retweeted_weibo_retweeted_detail"])
origin_comment = json.loads(bci_result["origin_weibo_comment_detail"])
retweeted_comment = json.loads(bci_result["retweeted_weibo_comment_detail"])
"""
retweeted_total_number = sum(origin_retweeted.values()) + sum(retweeted_retweeted.values())
comment_total_number = sum(origin_comment.values()) + sum(retweeted_comment.values())
if origin_retweeted:
origin_retweeted_mid = filter_mid(origin_retweeted)
if retweeted_retweeted:
retweeted_retweeted_mid = filter_mid(retweeted_retweeted)
if origin_comment:
origin_comment_mid = filter_mid(origin_comment)
if retweeted_comment:
retweeted_comment_mid = filter_mid(retweeted_comment)
query_body = {
"query":{
"filtered":{
"filter":{
"bool":{
"should":[
],
"must": [
]
}
}
}
},
"size":10000
}
"""
if int(style) == 0: # retweeted
retweeted_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 3)
results = retweeted_results
else:
comment_results = influenced_user_detail(uid, date, origin_mid, retweeted_mid, 2)
results = comment_results
return results
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:87,代码来源:personal_influence.py
注:本文中的user_portrait.global_utils.ES_CLU |
请发表评论