本文整理汇总了Python中ms_spider_fw.DBSerivce.DBService类的典型用法代码示例。如果您正苦于以下问题:Python DBService类的具体用法?Python DBService怎么用?Python DBService使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了DBService类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: gen_url
def gen_url():
def url_join(t):
if '.html' in t:
return None
else:
temp = t.rsplit('/', 1)
return temp[0] + '/contactinfo/' + temp[1] + '.html'
def change_par(x):
if '//www' in x:
return url_join(x)
elif '//pt' in x:
return url_join(x.replace('//pt', '//www'))
elif '//ru' in x:
return url_join(x.replace('//ru', '//www'))
elif '//es' in x:
return url_join(x.replace('//es', '//www'))
else:
return None
db_g = DBService(dbName=db_name, tableName='aliexpress_temp', **connect_dict)
href_list_t = db_g.getData(var='store_href', distinct=True)
href_s = map(
lambda t: change_par(t), map(
lambda x: x[0], href_list_t
)
)
return list(set(filter(lambda x: 1 if x else 0, href_s)))
开发者ID:yangmingsong,项目名称:python,代码行数:28,代码来源:contact_info_aliexpress_nops_addcookies.py
示例2: push2DB
def push2DB():
from ms_spider_fw.DBSerivce import DBService
data=getKeyword()
db=DBService('taobaodata','keyword')
tableTitle=['categoryFi', 'categorySe', 'categoryTi']
db.createTable(tableTitle=tableTitle)
db.data2DB(data=data)
开发者ID:yangmingsong,项目名称:python,代码行数:7,代码来源:taobaoKeyword.py
示例3: spiderMain
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(100)
DB = DBService(dbName='jddata', tableName='thirdPartShopInfo')
DB.createTable(
tableTitle=['productHref', 'companyName', 'shopName', 'shopHref', 'scoreSum', 'scoreProduct', 'scoreProductAvg',
'scoreService',
'scoreServiceAvg', 'scoreExpress', 'scoreExpressAvg', 'gradeHref'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
# proxy_test=temp[0]
# if proxy_test=='-':
# continue
# else:
# print(proxy_test)
print(temp[0])
DB.data2DB(data=[url] + temp)
else:
time.sleep(1)
开发者ID:yangmingsong,项目名称:python,代码行数:29,代码来源:spider.py
示例4: productInfo
def productInfo():
db = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = db.getData(var='productHref,commentCount', limit=200000)
proDict = {}
for item in data:
proDict[item[0]] = item[1]
return proDict
开发者ID:yangmingsong,项目名称:python,代码行数:7,代码来源:jdDataAna.py
示例5: craweldhref
def craweldhref():
db = DBService('elec_platform', 'yms_tmall_shopinfo_com_withoutjudge')
href = db.getData(var='href')
href = [item[0] for item in href]
F = lambda x: x[:-1] if x[-1] == '/' else x
href = map(F, href)
print(len(href))
return href
开发者ID:yangmingsong,项目名称:python,代码行数:8,代码来源:spider.py
示例6: companyInfo
def companyInfo():
# 返回公司信息,字典形式
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData(limit=200000)
data = [item for item in data if not item[2] == '-']
comDict = {}
for item in data:
comDict[item[1]] = item[1:]
return comDict
开发者ID:yangmingsong,项目名称:python,代码行数:9,代码来源:jdDataAna.py
示例7: gen_url
def gen_url():
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_3")
url_detail_page = DB.getData(var="credit_detail_href", distinct=True)
urls = map(lambda x: x[0] if x else " ", url_detail_page)
url = []
for t in urls:
if t:
url.append(t)
return url
开发者ID:yangmingsong,项目名称:python,代码行数:9,代码来源:spider_third_page.py
示例8: run
def run(thread_count=1000):
run_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
print '#'*100
print qu_proxy_ok.qsize()
while qu_proxy_ok.qsize():
res.append(qu_proxy_ok.get())
db_server_c.data2DB(data=res)
开发者ID:yangmingsong,项目名称:python,代码行数:10,代码来源:proxy_ok(daily).py
示例9: proxy_collection
def proxy_collection():
# get proxies from website
proxies_list_website = pc.get_proxies_from_website()
# at the same time , get other proxies from local database
table_names_proxies = 'proxy_other_source,proxy_you_dai_li'
proxies_list_local = list()
for proxies_t_n in table_names_proxies.split(','):
dbs = DBService(dbName='base', tableName=proxies_t_n, **connect_dict)
proxies_list_local += map(lambda x: x[0], dbs.getData(var='proxy_port'))
return list(set(proxies_list_website + proxies_list_local))
开发者ID:yangmingsong,项目名称:python,代码行数:10,代码来源:contact_info_aliexpress_nops_addcookies.py
示例10: commentHrefList
def commentHrefList():
db = DBService('elec_platform', 'tmall_baseinfo_everyweek')
judgePageHref = db.getData(var='name,href,judgepage_href')
judgePageHref = [tuple(item) for item in judgePageHref if
not 'http' in item[2]]
judgePageHref = [item for item in judgePageHref if not item[2].isnumeric()]
judgePageHref = set(judgePageHref)
judgePageHref = list(judgePageHref)
print(len(judgePageHref))
return judgePageHref
开发者ID:yangmingsong,项目名称:python,代码行数:10,代码来源:spider.py
示例11: run
def run(thread_count=20000):
muti_thread_test(thread_count)
db_server_c = DBService(dbName=db_name, tableName='proxy_ok', **connect_dict)
db_server_c.createTable(tableTitle=['proxy_port', 'test_time'], x='Y')
res = []
while qu_proxy_ok.qsize():
res.append([
qu_proxy_ok.get(),
time.strftime('%Y-%m-%d %X', time.localtime())
])
db_server_c.data2DB(data=res)
开发者ID:yangmingsong,项目名称:python,代码行数:11,代码来源:httpbin_proxy_test.py
示例12: begin
def begin():
db = DBService(dbName='jddata', tableName='thirdPartShopInfo')
data = db.getData()
title = db.getTableTitle()[1:-2]
S = set()
for item in data:
S.add(tuple(item[1:-2]))
data = []
for item in S:
data.append(list(item))
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=title, data=data, fileName='jdData')
开发者ID:yangmingsong,项目名称:python,代码行数:12,代码来源:jdDataAna.py
示例13: sumCommentCount
def sumCommentCount():
db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddCommnetCount')
# db = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
data = db.getData(var='shopName,commnetCount')
dict = {}
for item in data:
if item[0] in dict.keys():
dict[item[0]] = int(item[1]) + dict[item[0]]
else:
dict[item[0]] = int(item[1])
data = []
for item in dict.items():
data.append([item[0], item[1]])
csv = CSV()
csv.writeCsv(savePath='D:/spider', fileTitle=['shopName', 'commnetCount'], data=data, fileName='jdDataSum')
开发者ID:yangmingsong,项目名称:python,代码行数:15,代码来源:jdDataAna.py
示例14: startUrlList
def startUrlList(self):
"""
# 方法重载
:return:
"""
dbs = DBService(dbName='jddata', tableName='jdproductbaseinfo2database')
data = dbs.getData(var='productHref,sku', distinct=True)
dataThirdPartBase = [item[0] for item in data if len(item[1]) >= 10]
dataHadCrawled = DBService(dbName='jddata', tableName='thirdPartShopInfo').getData(var='productHref')
if not dataHadCrawled:
return dataThirdPartBase
dataHadCrawled = set([item[0] for item in dataHadCrawled])
dataThirdPart = [item for item in dataThirdPartBase if item not in dataHadCrawled]
dataThirdPart = [item for item in dataThirdPart if item[:4] == 'http']
# print len(dataThirdPart)
return dataThirdPart
开发者ID:yangmingsong,项目名称:python,代码行数:16,代码来源:spider.py
示例15: savePicture
def savePicture():
from screenShot import saveScreenShot
from ms_spider_fw.DBSerivce import DBService
import time
import random
db = DBService(dbName='tmalldata', tableName='tmall_baseinfo_realtime')
data = db.getData(var='name,href', distinct=True)
nameD = map(lambda x: x[0], data)
data = map(lambda x: x[1], data)
print(len(data))
dri = None
for url in data:
name=nameD[data.index(url)]
print(name)
dri = saveScreenShot(url, driver=dri,title=name)
time.sleep(abs(random.gauss(3, 2)))
开发者ID:yangmingsong,项目名称:python,代码行数:17,代码来源:scrennShot_test.py
示例16: spiderMain
def spiderMain():
"""
# main主程序
:return:
"""
dler = Dler()
dler.downLoad(10)
DB = DBService(#host='localhost',
# user='root',
# passwd='',
# charset='utf8',
# dbName='spider',
dbName='alibaba',
tableName='alibaba_cow_powder_3')
DB.createTable(tableTitle=
['company_name',
'keyword',
'sale',
'href',
'member_id',
'offer_id',
'cxt_year',
'credit_detail_href',
'goods_from',
'product_title_sample',
'product_detail_sample',
'location',
'url_base'])
while True:
que = DBN.queueForDownLoad
if not que.empty():
url, src = que.get()
pPer = PPer(src)
temp = pPer.pageParser()
if temp:
temp = map(lambda x: x + [url], temp)
DB.data2DB(data=temp)
print(u'++成功:%s'%url)
else:
print(u'--失败:%s'%url)
else:
time.sleep(1)
开发者ID:yangmingsong,项目名称:python,代码行数:44,代码来源:second_page_2.py
示例17: get_parser
def get_parser(url, driver):
import random
time.sleep(abs(random.gauss(5, 5)))
driver.get(url)
print(driver.title)
contacts_name = "-"
contacts_sex = "-"
contacts_job = "-"
try:
contacts_name = driver.find_element_by_css_selector(".contact-info .membername").text
contacts_sex = driver.find_element_by_css_selector(".contact-info>dl>dd").text.split(" ")[1]
contacts_job = driver.find_element_by_css_selector(".contact-info>dl>dd").text.split("(")[1]
contacts_job = contacts_job.split(")")[0]
except:
pass
phone_frames = driver.find_elements_by_css_selector(".contcat-desc dl")
cell_phone = "-"
tel_phone = "-"
fax_phone = "-"
shop_addr = "-"
for i in range(len(phone_frames)):
text = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dt").text.strip()
if text == u"移动电话:":
cell_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"电 话:":
tel_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"传 真:":
fax_phone = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
elif text == u"地 址:":
shop_addr = driver.find_element_by_css_selector(".contcat-desc dl:nth-child(" + str(i + 1) + ") dd").text
continue
spider_time = time.strftime("%Y-%m-%d %X", time.localtime())
result = [contacts_name, contacts_sex, contacts_job, cell_phone, tel_phone, fax_phone, shop_addr, spider_time, url]
DB = DBService(dbName="alibaba", tableName="alibaba_cow_powder_phone")
DB.data2DB(data=result)
开发者ID:yangmingsong,项目名称:python,代码行数:39,代码来源:spider_third_page.py
示例18: getCategoryAndStartUrl
def getCategoryAndStartUrl():
import json
global queue_for_url_targetBase
queue_for_url_targetBase = Queue(0)
src = myUrlOpen.requestByProxy('http://dc.3.cn/category/get?callback=getCategoryCallback')
srcTemp = src.split('(', 1)[1][:-1]
srcTemp = srcTemp.decode('gbk', 'ignore')
srcJson = json.loads(srcTemp)['data']
category = []
for Fi in srcJson:
targetFi = Fi['s']
for Se in targetFi:
targetSeTitle = Se['n']
targetSe = Se['s']
for Ti in targetSe:
targetTiTitle = Ti['n']
targetTi = Ti['s']
for Fo in targetTi:
targetFoTitle = Fo['n']
categoryTemp = [targetSeTitle.split('|')[1], targetSeTitle.split('|')[0],
targetTiTitle.split('|')[1], targetTiTitle.split('|')[0],
targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]]
category.append(categoryTemp)
queue_for_url_targetBase.put((targetFoTitle.split('|')[1], targetFoTitle.split('|')[0]))
db = DBService(dbName='jddata', tableName='jdkeyword')
db.createTable(tableTitle=['category_fi_name', 'category_fi', 'category_se_name', 'category_se', 'category_ti_name',
'category_ti'])
db.data2DB(data=category)
# for item in category:
# print(item)
# try:
# db.data2DB(data=item)
# except:continue
# print('=' * 50)
return category
开发者ID:yangmingsong,项目名称:python,代码行数:36,代码来源:jdNew.py
示例19: dataGen
def dataGen():
comDict = companyInfo()
proDict = productInfo()
dict = {}
for item in comDict.items():
if item[0] in proDict.keys():
dict[item[0]] = comDict[item[0]] + [proDict[item[0]]]
else:
continue
data = [item[1] for item in dict.items()]
db1 = DBService(dbName='jddata', tableName='thirdPartShopInfo')
title = db1.getTableTitle()
title = title + ['commnetCount']
print(title)
db2 = DBService(dbName='jddata', tableName='thirdPartShopInfoAddtest')
db2.createTable(tableTitle=title)
db2.data2DB(data=data)
开发者ID:yangmingsong,项目名称:python,代码行数:17,代码来源:jdDataAna.py
示例20: int
#coding:utf8
__author__ = '613108'
from ms_spider_fw.DBSerivce import DBService
dbs=DBService(dbName='elec_platform',tableName='tmall_baseinfo_everyweek')
data=dbs.getData()
data=[item for item in data if int(item[-2])>=35]
print(len(data))
开发者ID:yangmingsong,项目名称:python,代码行数:7,代码来源:tmallDataWeekly.py
注:本文中的ms_spider_fw.DBSerivce.DBService类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论