本文整理汇总了Python中user_portrait.global_utils.es_flow_text.search函数的典型用法代码示例。如果您正苦于以下问题:Python search函数的具体用法?Python search怎么用?Python search使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了search函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: query_hot_mid
def query_hot_mid(ts, keywords_list, text_type,size=100):
query_body = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"range":{
"timestamp":{
"gte":ts - time_interval,
"lt": ts
}
}},
{"terms": {"keywords_string": keywords_list}},
{"term": {"message_type": "0"}}
]
}
}
}
},
"aggs":{
"all_interests":{
"terms":{"field": "root_mid", "size": size}
}
}
}
datetime = ts2datetime(ts)
datetime_1 = ts2datetime(ts-time_interval)
index_name = flow_text_index_name_pre + datetime
exist_es = es_text.indices.exists(index_name)
index_name_1 = flow_text_index_name_pre + datetime_1
exist_bool_1 = es_text.indices.exists(index_name_1)
print datetime, datetime_1
if datetime == datetime_1 and exist_es:
search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"]
elif datetime != datetime_1 and exist_bool_1:
search_results = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body)["aggregations"]["all_interests"]["buckets"]
else:
search_results = []
hot_mid_list = []
if search_results:
for item in search_results:
print item
temp = []
temp.append(item['key'])
temp.append(item['doc_count'])
hot_mid_list.append(temp)
#print hot_mid_list
return hot_mid_list
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:53,代码来源:full_text_serach.py
示例2: query_mid_list
def query_mid_list(ts, keywords_list, time_segment, social_sensors=[]):
query_body = {
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{"range": {
"timestamp": {
"gte": ts - time_segment,
"lt": ts
}
}},
{"terms": {"keywords_string": keywords_list}}
]
}
}
}
},
"size": 10000
}
if social_sensors:
query_body['query']['filtered']['filter']['bool']['must'].append({"terms": {"uid": social_sensors}})
datetime = ts2datetime(ts)
index_name = flow_text_index_name_pre + datetime
exist_es = es_text.indices.exists(index_name)
if exist_es:
search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body, fields=["root_mid"])["hits"]["hits"]
else:
search_results = []
origin_mid_list = set() # all related weibo mid list
if search_results:
for item in search_results:
#if item.get("fields", ""):
# origin_mid_list.append(item["fields"]["root_mid"][0])
#else:
origin_mid_list.add(item["_id"])
datetime_1 = ts2datetime(ts-time_segment)
index_name_1 = flow_text_index_name_pre + datetime_1
exist_bool = es_text.indices.exists(index_name_1)
if datetime != datetime_1 and exist_bool:
search_results_1 = es_text.search(index=index_name_1, doc_type=flow_text_index_type, body=query_body, fields=['root_mid'])["hits"]["hits"]
if search_results_1:
for item in search_results_1:
#if item.get("fields", ""):
# origin_mid_list.append(item["fields"]["root_mid"][0])
#else:
origin_mid_list.add(item["_id"])
return list(origin_mid_list)
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:53,代码来源:full_text_serach.py
示例3: search_group_sentiment_weibo
def search_group_sentiment_weibo(task_name, start_ts, sentiment):
weibo_list = []
#step1:get task_name uid
try:
group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
id=task_name, _source=False, fields=['uid_list'])
except:
group_result = {}
if group_result == {}:
return 'task name invalid'
try:
uid_list = group_result['fields']['uid_list']
except:
uid_list = []
if uid_list == []:
return 'task uid list null'
#step3: get ui2uname
uid2uname = {}
try:
user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type,\
body={'ids':uid_list}, _source=False, fields=['uname'])['docs']
except:
user_portrait_result = []
for item in user_portrait_result:
uid = item['_id']
if item['found']==True:
uname = item['fields']['uname'][0]
uid2uname[uid] = uname
#step4:iter date to search weibo
weibo_list = []
iter_date = ts2datetime(start_ts)
flow_text_index_name = flow_text_index_name_pre + str(iter_date)
#step4: get query_body
if sentiment != '2':
query_body = [{'terms': {'uid': uid_list}}, {'term':{'sentiment': sentiment}}, \
{'range':{'timestamp':{'gte':start_ts, 'lt': start_ts+DAY}}}]
else:
query_body = [{'terms':{'uid':uid_list}}, {'terms':{'sentiment': SENTIMENT_SECOND}},\
{'range':{'timestamp':{'gte':start_ts, 'lt':start_ts+DAY}}}]
try:
flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body={'query':{'bool':{'must': query_body}}, 'sort': [{'timestamp':{'order':'asc'}}], 'size': MAX_VALUE})['hits']['hits']
except:
flow_text_result = []
for flow_text_item in flow_text_result:
source = flow_text_item['_source']
weibo = {}
weibo['uid'] = source['uid']
weibo['uname'] = uid2uname[weibo['uid']]
weibo['ip'] = source['ip']
try:
weibo['geo'] = '\t'.join(source['geo'].split('&'))
except:
weibo['geo'] = ''
weibo['text'] = source['text']
weibo['timestamp'] = source['timestamp']
weibo['sentiment'] = source['sentiment']
weibo_list.append(weibo)
return weibo_list
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:60,代码来源:utils.py
示例4: get_repost_weibo
def get_repost_weibo(mid, weibo_timestamp):
repost_result = []
index_date = ts2datetime(weibo_timestamp)
index_name = flow_text_index_name_pre + index_date
query_body = {
'query':{
'bool':{
'must':[
{'term':{'root_mid': mid}},
{'range':{'timestamp':{'gte': weibo_timestamp}}},
{'term':{'message_type': 2}}
]
}
}
}
try:
flow_text_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
body=query_body)['hits']['hits']
except:
flow_text_result = []
repost_uid_list = [item['_source']['uid'] for item in flow_text_result]
repost_user_info_dict = get_user_profile_weibo(repost_uid_list)
statuses = []
for item in flow_text_result:
item_source = item['_source']
item_source['user'] = repost_user_info_dict[item['uid']]
statuses.append(item_source)
return statuses
开发者ID:huxiaoqian,项目名称:revised_user_portrait,代码行数:29,代码来源:new_search.py
示例5: get_psycho_status
def get_psycho_status(uid_list):
results = {}
uid_sentiment_dict = {}
#time for es_flow_text
now_ts = time.time()
now_date_ts = datetime2ts(ts2datetime(now_ts))
#run_type
if RUN_TYPE == 0:
now_date_ts = datetime2ts(RUN_TEST_TIME)
start_date_ts = now_date_ts - DAY * WEEK
for i in range(0, WEEK):
iter_date_ts = start_date_ts + DAY * i
flow_text_index_date = ts2datetime(iter_date_ts)
flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
try:
flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False, fields=['uid', 'sentiment'])['hits']['hits']
except:
flow_text_exist = []
for flow_text_item in flow_text_exist:
uid = flow_text_item['fields']['uid'][0]
sentiment = flow_text_item['fields']['sentiment'][0]
if uid in uid_sentiment_dict:
try:
uid_sentiment_dict[uid][str(sentiment)] += 1
except:
uid_sentiment_dict[uid][str(sentiment)] = 1
else:
uid_sentiment_dict[uid] = {str(sentiment): 1}
#compute first and second psycho_status
for uid in uid_list:
results[uid] = {'first':{}, 'second':{}}
try:
user_sentiment_result = uid_sentiment_dict[uid]
except:
user_sentiment_result = {}
all_count = sum(user_sentiment_result.values())
#compute second level sentiment---negative type sentiment
second_sentiment_count_list = [user_sentiment_result[item] for item in user_sentiment_result if item in SENTIMENT_SECOND]
second_sentiment_all_count = sum(second_sentiment_count_list)
for sentiment_item in SENTIMENT_SECOND:
try:
results[uid]['second'][sentiment_item] = float(user_sentiment_result[sentiment_item]) / all_count
except:
results[uid]['second'][sentiment_item] = 0
#compute first level sentiment---middle, postive, negative
user_sentiment_result['7'] = second_sentiment_all_count
for sentiment_item in SENTIMENT_FIRST:
try:
sentiment_ratio = float(user_sentiment_result[sentiment_item]) / all_count
except:
sentiment_ratio = 0
results[uid]['first'][sentiment_item] = sentiment_ratio
return results
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:55,代码来源:utils.py
示例6: get_activity_weibo
def get_activity_weibo(task_name, start_ts):
results = []
#step1: get task_name uid
try:
group_result = es_group_result.get(index=group_index_name, doc_type=group_index_type ,\
id=task_name, _source=False, fields=['uid_list'])
except:
group_result = {}
if group_result == {}:
return 'task name invalid'
try:
uid_list = group_result['fields']['uid_list']
except:
uid_list = []
if uid_list == []:
return 'task uid list null'
#step2: get uid2uname
uid2uname = {}
try:
user_portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
body = {'ids':uid_list}, _source=False, fields=['uname'])['docs']
except:
user_portrait_result = []
for item in user_portrait_result:
uid = item['_id']
if item['found']==True:
uname = item['fields']['uname'][0]
uid2uname[uid] = uname
#step3: search time_segment weibo
time_segment = FOUR_HOUR
end_ts = start_ts + time_segment
time_date = ts2datetime(start_ts)
flow_text_index_name = flow_text_index_name_pre + time_date
query = []
query.append({'terms':{'uid': uid_list}})
query.append({'range':{'timestamp':{'gte':start_ts, 'lt':end_ts}}})
try:
flow_text_es_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type, \
body={'query':{'bool':{'must':query}}, 'sort':'timestamp', 'size':MAX_VALUE})['hits']['hits']
except:
flow_text_es_result = []
for item in flow_text_es_result:
weibo = {}
source = item['_source']
weibo['timestamp'] = ts2date(source['timestamp'])
weibo['ip'] = source['ip']
weibo['text'] = source['text']
if source['geo']:
weibo['geo'] = '\t'.join(source['geo'])
else:
weibo['geo'] = ''
results.append(weibo)
return results
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:54,代码来源:utils.py
示例7: new_get_user_weibo
def new_get_user_weibo(uid, sort_type):
results = []
weibo_list = []
now_date = ts2datetime(time.time())
#run_type
if RUN_TYPE == 0:
now_date = RUN_TEST_TIME
sort_type = 'timestamp'
#step1:get user name
try:
user_profile_result = es_user_profile.get(index=profile_index_name, doc_type=profile_index_type,\
id=uid, _source=False, fields=['nick_name'])
except:
user_profile_result = {}
if user_profile_result:
uname = user_profile_result['fields']['nick_name'][0]
else:
uname = ''
#step2:get user weibo
for i in range(7, 0, -1):
iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
index_name = flow_text_index_name_pre + iter_date
try:
weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
body={'query':{'filtered':{'filter':{'term': {'uid': uid}}}}, 'sort':sort_type, 'size':100})['hits']['hits']
except:
weibo_result = []
if weibo_result:
weibo_list.extend(weibo_result)
print 'weibo_list:', weibo_list[0]
sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
for weibo_item in sort_weibo_list:
source = weibo_item['_source']
mid = source['mid']
uid = source['uid']
text = source['text']
ip = source['geo']
timestamp = source['timestamp']
date = ts2date(timestamp)
sentiment = source['sentiment']
#run_type
if RUN_TYPE == 1:
retweet_count = source['retweet_count']
comment_count = source['comment_count']
sensitive_score = source['sensitive']
else:
retweet_count = 0
comment_count = 0
sensitive_score = 0
city = ip2city(ip)
results.append([mid, uid, text, ip, city,timestamp, date, retweet_count, comment_count, sensitive_score])
return results
开发者ID:huxiaoqian,项目名称:revised_user_portrait,代码行数:53,代码来源:new_search.py
示例8: get_social_inter_content
def get_social_inter_content(uid1, uid2, type_mark):
weibo_list = []
#get two type relation about uid1 and uid2
#search weibo list
now_ts = int(time.time())
#run_type
if RUN_TYPE == 1:
now_date_ts = datetime2ts(ts2datetime(now_ts))
else:
now_date_ts = datetime2ts(RUN_TEST_TIME)
#uid2uname
uid2uname = {}
try:
portrait_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type ,\
body={'ids': [uid1, uid2]}, _source=False, fields=['uid', 'uname'])['docs']
except:
portrait_result = []
for item in portrait_result:
uid = item['_id']
if item['found'] == True:
uname = item['fields']['uname'][0]
uid2uname[uid] = uname
else:
uid2uname[uid] = 'unknown'
#iter date to search weibo list
for i in range(7, 0, -1):
iter_date_ts = now_date_ts - i*DAY
iter_date = ts2datetime(iter_date_ts)
flow_text_index_name = flow_text_index_name_pre + str(iter_date)
query = []
query.append({'bool':{'must':[{'term':{'uid':uid1}}, {'term':{'directed_uid': int(uid2)}}]}})
if type_mark=='out':
query.append({'bool':{'must':[{'term':{'uid':uid2}}, {'term':{'directed_uid': int(uid1)}}]}})
try:
flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body={'query': {'bool':{'should': query}}, 'sort':[{'timestamp':{'order': 'asc'}}], 'size':MAX_VALUE})['hits']['hits']
except:
flow_text_result = []
for flow_text in flow_text_result:
source = flow_text['_source']
weibo = {}
weibo['timestamp'] = source['timestamp']
weibo['ip'] = source['ip']
weibo['geo'] = source['geo']
weibo['text'] = '\t'.join(source['text'].split('&'))
weibo['uid'] = source['uid']
weibo['uname'] = uid2uname[weibo['uid']]
weibo['directed_uid'] = str(source['directed_uid'])
weibo['directed_uname'] = uid2uname[str(source['directed_uid'])]
weibo_list.append(weibo)
return weibo_list
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:53,代码来源:utils.py
示例9: read_flow_text_sentiment
def read_flow_text_sentiment(uid_list):
"""
读取用户微博(返回结果有微博情绪标签):
输入数据:uid_list(字符串型列表)
输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
"""
word_dict = dict() # 词频字典
weibo_list = [] # 微博列表
now_ts = time.time()
now_date_ts = datetime2ts(ts2datetime(now_ts))
now_date_ts = datetime2ts("2013-09-08")
start_date_ts = now_date_ts - DAY * WEEK
for i in range(0, WEEK):
iter_date_ts = start_date_ts + DAY * i
flow_text_index_date = ts2datetime(iter_date_ts)
flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
print flow_text_index_name
try:
flow_text_exist = es_flow_text.search(
index=flow_text_index_name,
doc_type=flow_text_index_type,
body={"query": {"filtered": {"filter": {"terms": {"uid": uid_list}}}}, "size": MAX_VALUE},
_source=False,
fields=["text", "uid", "sentiment", "keywords_dict", "timestamp"],
)["hits"]["hits"]
except:
flow_text_exist = []
for flow_text_item in flow_text_exist:
uid = flow_text_item["fields"]["uid"][0].encode("utf-8")
text = flow_text_item["fields"]["text"][0].encode("utf-8")
sentiment = int(flow_text_item["fields"]["sentiment"][0])
ts = flow_text_item["fields"]["timestamp"][0]
keywords_dict = json.loads(flow_text_item["fields"]["keywords_dict"][0])
keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
keywords_dict = eval(keywords_dict)
if word_dict.has_key(uid):
item_dict = Counter(word_dict[uid])
keywords_dict = Counter(keywords_dict)
item_dict = dict(item_dict + keywords_dict)
word_dict[uid] = item_dict
else:
word_dict[uid] = keywords_dict
weibo_list.append([uid, text, sentiment, ts])
return word_dict, weibo_list
开发者ID:jianjian0dandan,项目名称:sensitive_user_portrait,代码行数:50,代码来源:weibo_api.py
示例10: get_influence_content
def get_influence_content(uid, timestamp_from, timestamp_to):
weibo_list = []
#split timestamp range to new_range_dict_list
from_date_ts = datetime2ts(ts2datetime(timestamp_from))
to_date_ts = datetime2ts(ts2datetime(timestamp_to))
new_range_dict_list = []
if from_date_ts != to_date_ts:
iter_date_ts = from_date_ts
while iter_date_ts < to_date_ts:
iter_next_date_ts = iter_date_ts + DAY
new_range_dict_list.append({'range':{'timestamp':{'gte':iter_date_ts, 'lt':iter_next_date_ts}}})
iter_date_ts = iter_next_date_ts
if new_range_dict_list[0]['range']['timestamp']['gte'] < timestamp_from:
new_range_dict_list[0]['range']['timestamp']['gte'] = timestamp_from
if new_range_dict_list[-1]['range']['timestamp']['lt'] > timestamp_to:
new_range_dict_list[-1]['range']['timestamp']['lt'] = timestamp_to
else:
new_range_dict_list = [{'range':{'timestamp':{'gte':timestamp_from, 'lt':timestamp_to}}}]
#iter date to search flow_text
iter_result = []
for range_item in new_range_dict_list:
range_from_ts = range_item['range']['timestamp']['gte']
range_from_date = ts2datetime(range_from_ts)
flow_text_index_name = flow_text_index_name_pre + range_from_date
query = []
query.append({'term':{'uid':uid}})
query.append(range_item)
try:
flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body={'query':{'bool':{'must': query}}, 'sort':[{'timestamp':'asc'}]})['hits']['hits']
except:
flow_text_exist = []
iter_result.extend(flow_text_exist)
# get weibo list
for item in flow_text_exist:
source = item['_source']
weibo = {}
weibo['timestamp'] = ts2date(source['timestamp'])
weibo['ip'] = source['ip']
weibo['text'] = source['text']
if source['geo']:
weibo['geo'] = '\t'.join(source['geo'].split('&'))
else:
weibo['geo'] = ''
weibo_list.append(weibo)
return weibo_list
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:47,代码来源:utils.py
示例11: read_flow_text_sentiment
def read_flow_text_sentiment(uid_list):
'''
读取用户微博(返回结果有微博情绪标签):
输入数据:uid_list(字符串型列表)
输出数据:word_dict(用户分词结果字典),weibo_list(用户微博列表)
word_dict示例:{uid1:{'w1':f1,'w2':f2...}...}
weibo_list示例:[[uid1,text1,s1,ts1],[uid2,text2,s2,ts2],...](每一条记录对应四个值:uid、text、sentiment、timestamp)
'''
word_dict = dict()#词频字典
weibo_list = []#微博列表
now_ts = time.time()
now_date_ts = datetime2ts(ts2datetime(now_ts))
now_date_ts = datetime2ts('2013-09-08')
start_date_ts = now_date_ts - DAY * WEEK
for i in range(0,WEEK):
iter_date_ts = start_date_ts + DAY * i
flow_text_index_date = ts2datetime(iter_date_ts)
flow_text_index_name = flow_text_index_name_pre + flow_text_index_date
print flow_text_index_name
try:
flow_text_exist = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'size': MAX_VALUE}, _source=False, fields=['text','uid','sentiment','keywords_dict','timestamp'])['hits']['hits']
except:
flow_text_exist = []
for flow_text_item in flow_text_exist:
uid = flow_text_item['fields']['uid'][0].encode('utf-8')
text = flow_text_item['fields']['text'][0].encode('utf-8')
sentiment = int(flow_text_item['fields']['sentiment'][0])
ts = flow_text_item['fields']['timestamp'][0]
keywords_dict = json.loads(flow_text_item['fields']['keywords_dict'][0])
keywords_dict = json.dumps(keywords_dict, encoding="UTF-8", ensure_ascii=False)
keywords_dict = eval(keywords_dict)
if word_dict.has_key(uid):
item_dict = Counter(word_dict[uid])
keywords_dict = Counter(keywords_dict)
item_dict = dict(item_dict + keywords_dict)
word_dict[uid] = item_dict
else:
word_dict[uid] = keywords_dict
weibo_list.append([uid,text,sentiment,ts])
return word_dict,weibo_list
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:45,代码来源:weibo_api.py
示例12: group_user_weibo
def group_user_weibo(task_name, submit_user, sort_type):
weibo_list = []
now_date = ts2datetime(time.time())
#run_type
if RUN_TYPE == 0:
now_date = RUN_TEST_TIME
sort_type = 'timestamp'
#step1: get group user
task_id = submit_user + '-' + task_name
try:
group_exist_result = es_group_result.get(index=group_index_name, doc_type=group_index_type,\
id=task_id)['_source']
except:
group_exist_result = {}
if not group_exist_result:
return 'group no exist'
#step2: get user weibo list
uid_list = group_exist_result['uid_list']
for i in range(7,0,-1):
iter_date = ts2datetime(datetime2ts(now_date) - i * DAY)
index_name = flow_text_index_name_pre + iter_date
try:
weibo_result = es_flow_text.search(index=index_name, doc_type=flow_text_index_type,\
body={'query':{'filtered':{'filter':{'terms':{'uid': uid_list}}}}, 'sort':sort_type, 'size':100})['hits']['hits']
except:
weibo_result = []
if weibo_result:
weibo_list.extend(weibo_result)
sort_weibo_list = sorted(weibo_list, key=lambda x:x['_source'][sort_type], reverse=True)[:100]
#step3: get user name
try:
portrait_exist_result = es_user_portrait.mget(index=portrait_index_name, doc_type=portrait_index_type, \
body={'ids':uid_list})['docs']
except:
portrait_exist_result = []
uid2uname_dict = {}
for portrait_item in portrait_exist_result:
uid = portrait_item['_id']
if portrait_item['found'] == True:
source = portrait_item['_source']
uname = source['uname']
else:
uname = 'unknown'
uid2uname_dict[uid] = uname
weibo_list = []
for weibo_item in sort_weibo_list:
source = weibo_item['_source']
mid = source['mid']
uid = source['uid']
uname = uid2uname_dict[uid]
text = source['text']
ip = source['geo']
timestamp = source['timestamp']
date = ts2date(timestamp)
sentiment = source['sentiment']
weibo_url = weiboinfo2url(uid, mid)
#run_type:
if RUN_TYPE == 1:
retweet_count = source['retweet_count']
comment_count = source['comment_count']
sensitive_score = source['sensitive_score']
else:
retweet_count = 0
comment_count = 0
sensitive_score = 0
city = ip2city(ip)
weibo_list.append([mid, uid, uname, text, ip, city, timestamp, date, retweet_count, comment_count, sensitive_score, weibo_url])
return weibo_list
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:68,代码来源:utils.py
示例13: search_retweet_network_keywords
def search_retweet_network_keywords(task_id, uid):
results = {}
task_results = es_network_task.get(index=network_keywords_index_name, \
doc_type=network_keywords_index_type, id=task_id)['_source']
start_date = task_results['start_date']
start_ts = datetime2ts(start_date)
end_date = task_resuts['end_date']
end_ts = datetime2ts(end_date)
iter_date_ts = start_ts
to_date_ts = end_ts
iter_query_date_list = [] # ['2013-09-01', '2013-09-02']
while iter_date_ts <= to_date_ts:
iter_date = ts2datetime(iter_date_ts)
iter_query_date_list.append(iter_date)
iter_date_ts += DAY
#step2: get iter search flow_text_index_name
#step2.1: get search keywords list
query_must_list = []
keyword_nest_body_list = []
keywords_string = task_results['query_keywords']
keywords_list = keywords_string.split('&')
for keywords_item in keywords_list:
keyword_nest_body_list.append({'wildcard': {'text': '*' + keywords_item + '*'}})
query_must_list.append({'bool': {'should': keyword_nest_body_list}})
network_results = {}
retweet_query = query_must_list
be_retweet_query = query_must_list
#retweet
retweet_query.append({'term': {'uid': uid}})
item_results = {}
for iter_date in iter_query_date_list:
flow_text_index_name = flow_text_index_name_pre + iter_date
query_body = {
'query':{
'bool':{
'must':retweet_query
}
},
'size': 100
}
flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body=query_body)['hits']['hits']
for item in flow_text_result:
source = item['_source']
source_uid = source['directed_uid']
try:
item_results[source_uid] += 1
except:
item_results[source_uid] = 1
results = retweet_dict2results(uid, item_results)
network_results['retweet'] = results
#be_retweet
retweet_query.append({'term': {'directed_uid': uid}})
item_results = {}
for iter_date in iter_query_date_list:
flow_text_index_name = flow_text_index_name_pre + iter_date
query_body = {
'query':{
'bool':{
'must':retweet_query
}
},
'size': 100
}
flow_text_result = es_flow_text.search(index=flow_text_index_name, doc_type=flow_text_index_type,\
body=query_body)['hits']['hits']
for item in flow_text_result:
source = item['_source']
source_uid = source['directed_uid']
try:
item_results[source_uid] += 1
except:
item_results[source_uid] = 1
results = retweet_dict2results(uid, item_results)
network_results['be_retweet'] = results
return network_results
开发者ID:jianjian0dandan,项目名称:user_portrait_0324,代码行数:77,代码来源:utils.py
示例14: influenced_detail
def influenced_detail(uid, date, style):
date1 = str(date).replace('-', '')
index_name = pre_index + date1
index_text = "flow_text_" + date
#detail_text = {}
style = int(style)
try:
user_info = es_cluster.get(index=index_name, doc_type=influence_doctype, id=uid)["_source"]
except:
result = {}
return result
origin_retweetd_dict = json.loads(user_info["origin_weibo_retweeted_detail"])
origin_comment_dict = json.loads(user_info['origin_weibo_comment_detail'])
retweeted_retweeted_dict = json.loads(user_info["retweeted_weibo_retweeted_detail"])
retweeted_comment_dict = json.loads(user_info["retweeted_weibo_comment_detail"])
origin_retweetd = sorted(origin_retweetd_dict.items(), key=lambda x:x[1], reverse=True)
origin_comment = sorted(origin_comment_dict.items(), key=lambda x:x[1], reverse=True)
retweeted_retweeted = sorted(retweeted_retweeted_dict.items(), key=lambda x:x[1], reverse=True)
retweeted_comment = sorted(retweeted_comment_dict.items(), key=lambda x:x[1], reverse=True)
query_body_origin = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"term":{"message_type": 1}},
{"term":{"uid": uid}}
]
}
}
}
},
"size": 10000
}
result_1 = es.search(index=index_text, doc_type="text", body=query_body_origin)['hits']['hits']
origin_set = set()
if result_1:
for item in result_1:
origin_set.add(item['_id'])
query_body_retweeted = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"term":{"message_type": 3}},
{"term":{"uid": uid}}
]
}
}
}
},
"size": 10000
}
result_2 = es.search(index=index_text, doc_type="text", body=query_body_retweeted)['hits']['hits']
retweeted_set = set()
if result_2:
for item in retweeted_set:
retweeted_set.add(item['_id'])
if origin_retweetd:
for item in origin_retweetd:
if item[0] not in origin_set:
origin_retweetd.remove(item)
if origin_comment:
for item in origin_comment:
if item[0] not in origin_set:
origin_comment.remove(item)
if retweeted_retweeted:
for item in retweeted_retweeted:
if item[0] not in retweeted_set:
retweeted_retweeted.remove(item)
if retweeted_comment:
for item in retweeted_comment:
if item[0] not in retweeted_set:
retweeted_comment.remove(item)
if style == 0:
detail_text = get_text(origin_retweetd[:20], date, user_info, style)
elif style == 1:
detail_text = get_text(origin_comment[:20], date, user_info, style)
elif style == 2:
detail_text = get_text(retweeted_retweeted[:20], date, user_info, style)
else:
detail_text = get_text(retweeted_comment[:20], date, user_info, style)
#detail_text["origin_retweeted"] = get_text(origin_retweetd, date)
#detail_text["origin_comment"] = get_text(origin_comment, date)
#detail_text["retweeted_retweeted"] = get_text(retweeted_retweeted, date)
#detail_text["retweeted_comment"] = get_text(retweeted_comment, date)
return detail_text
开发者ID:ferrero-zhang,项目名称:user_portrait_0324,代码行数:97,代码来源:personal_influence.py
示例15: aggregation_hot_keywords
def aggregation_hot_keywords(start_time, stop_time, keywords_list):
start_time = int(start_time)
stop_time = int(stop_time)
query_body = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
{"terms": {"keywords_string": keywords_list}},
{"range":{
"timestamp":{
"gte":start_time,
"lt": stop_time
}
}}
]
}
}
}
},
"aggs":{
"all_keywords":{
"terms": {"field": "keywords_string", "size": PRE_AGGREGATION_NUMBER}
}
}
}
keywords_dict = dict()
datetime = ts2datetime(float(stop_time))
index_name = flow_text_index_name_pre + datetime
exist_es = es_text.indices.exists(index_name)
if exist_es:
search_results = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["aggregations"]['all_keywords']['buckets']
if search_results:
for item in search_results:
keywords_dict[item['key']] = item['doc_count']
datetime_1 = ts2datetime(float(start_time))
if datetime_1 == datetime:
pass
else:
ts = float(stop_time)
while 1:
keywords_dict_1 = dict()
ts = ts-day_time
datetime = ts2datetime(ts)
index_name = flow_text_index_name_pre + datetime
exist_es = es_text.indices.exists(index_name)
if exist_es:
search_results_1 = es_text.search(index=index_name, doc_type=flow_text_index_type, body=query_body)["aggregations"]['all_keywords']['buckets']
if search_results_1:
print search_results_1
for item in search_results_1:
keywords_dict_1[item['key']] = item['doc_count']
for iter_key in keywords_dict_1.keys():
if keywords_dict.has_key(iter_key):
keywords_dict[iter_key] += keywords_dict_1[iter_key]
else:
keywords_dict[iter_key] = keywords_dict_1[iter_key]
if datetime_1 == datetime:
break
print keywords_dict
return_dict = sorted(keywords_dict.items(), key=lambda x:x[1], reverse=True)[:AGGRAGATION_KEYWORDS_NUMBER]
return return_dict
开发者ID:huxiaoqian,项目名称:user_portrait,代码行数:66,代码来源:full_text_serach.py
示例16: influenced_people
def influenced_people(uid, mid, influence_style, date, default_number=20):
# uid
# which weibo----mid, retweeted weibo ---seek for root_mid
# influence_style: retweeted(0) or comment(1)
date1 = ts2datetime(datetime2ts(date)).replace('-', '')
index_name = pre_index + date1
index_flow_text = pre_text_index + date
text_result = es.get(index=index_flow_text, doc_type=flow_text_index_type, id=mid)["_source"]
temp_mid = text_result.get("root_mid",'') #判断微博是否是原创微博
if temp_mid:
mid_type = 1 # 非原创微博
else:
mid_type = 0 # 原创微博
query_body = {
"query":{
"filtered":{
"filter":{
"bool":{
"must":[
]
}
}
}
},
"size": 100000
}
if mid_type == 0:
if int(influence_style) == 0: # origin weibo, all retweeted people
query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"root_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": mid}}])
else: # commented people
query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": mid}}])
else:
if int(influence_style) == 0: # origin weibo, all retweeted people
query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 3}}, {"term": {"root_mid": temp_mid}}])
else: # commented people
query_body["query"]["filtered"]["filter"]["bool"]["must"].extend([{"term": {"directed_uid": uid}}, {"term": {"message_type": 2}}, {"term": {"root_mid": temp_mid}}])
search_results = es.search(index=index_flow_text, doc_type=flow_text_index_type, body=query_body, fields=["uid"], timeout=30)["hits"]["hits"]
results = []
if search_results:
for item in search_results:
if int(item["fields"]["uid"][0]) == int(uid):
pass
else:
results.append(item["fields"]["uid"][0])
results = list(set(results))
else:
results = []
if results:
portrait_results = es_user_portrait.mget(index=user_portrait, doc_type=portrait_index_type, body={"ids": results}, fields=["domain", "topic_string", "activity_geo_dict","importance", "influence"])["docs"]
else:
portrait_results = {}
in_portrait = []
out_portrait = []
in_portrait_info = []
retweeted_domain = {}
retweeted_topic = {}
retweeted_geo = {}
average_influence = 0
total_influence = 0
count = 0
if portrait_results:
for item in portrait_results:
if item["found"]:
temp = []
count += 1
temp.append(item['_id'])
temp.append(item["fields"]["importance"][0])
in_portrait.append(temp)
temp_domain = item["fields"]["domain"][0].split('&')
temp_topic = item["fields"]["topic_string"][0].split('&')
temp_geo = json.loads(item["fields"]["activity_geo_dict"][0])[-1].keys()
total_influence += item["fields"]["influence"][0]
retweeted_domain = aggregation(temp_domain, retweeted_domain)
retweeted_topic = aggregation(temp_topic, retweeted_topic)
retweeted_geo = aggregation(temp_geo, retweeted_geo)
else:
out_portrait.append(item['_id'])
retweeted_domain = proportion(retweeted_domain)
retweeted_topic = proportion(retweeted_topic)
retweeted_geo = proportion(retweeted_geo)
try:
average_influence = total_influence/count
except:
average_influence = 0
sorted_retweeted_domain = sorted(retweeted_domain.items(),key=lambda x:x[1], reverse=True)
sorted_retweeted_topic = sorted(retweeted_topic.items(),key=lambda x:x[1], reverse=True)
sorted_retweeted_geo = sorted(retweeted_geo.items(), key=lambda x:x[1], reverse=True)
retweeted_results = dict()
retweeted_results["domian"] = sorted_retweeted_domain[:5]
retweeted_results["topic"] = sorted_retweeted_topic[:5]
retweeted_results["geo"] = sorted_retweeted_geo[:5]
retweeted_results["influence"] = average_influence
in_portrait = sorted(in_portrait, key=lambda x:x[1], reverse=True)
#.........这里部分代码省略.........
|
请发表评论