本文整理汇总了Python中user_agent.generate_user_agent函数的典型用法代码示例。如果您正苦于以下问题:Python generate_user_agent函数的具体用法?Python generate_user_agent怎么用?Python generate_user_agent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了generate_user_agent函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_navigator_option
def test_navigator_option(self):
for x in range(100):
ua = generate_user_agent(navigator='firefox')
self.assertTrue('firefox' in ua.lower())
ua = generate_user_agent(navigator='chrome')
self.assertTrue('chrome' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:7,代码来源:test.py
示例2: test_platform_option_tuple
def test_platform_option_tuple(self):
for x in range(100):
ua = generate_user_agent(platform=('win', 'linux'))
ua = generate_user_agent(platform=('win', 'linux', 'mac'))
ua = generate_user_agent(platform=('win',))
ua = generate_user_agent(platform=('linux',))
ua = generate_user_agent(platform=('mac',))
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:7,代码来源:test.py
示例3: test_device_type_smartphone_chrome
def test_device_type_smartphone_chrome():
for _ in range(50):
agent = generate_user_agent(device_type='smartphone',
navigator='chrome')
assert 'Mobile' in agent
agent = generate_user_agent(device_type='tablet', navigator='chrome')
assert 'Mobile' not in agent
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py
示例4: test_platform_option_tuple
def test_platform_option_tuple():
for _ in range(50):
generate_user_agent(os=('win', 'linux'))
generate_user_agent(os=('win', 'linux', 'mac'))
generate_user_agent(os=('win',))
generate_user_agent(os=('linux',))
generate_user_agent(os=('mac',))
开发者ID:lorien,项目名称:user_agent,代码行数:7,代码来源:user_agent.py
示例5: test_platform_navigator_option
def test_platform_navigator_option(self):
for x in range(100):
ua = generate_user_agent(platform='win', navigator='firefox')
self.assertTrue('firefox' in ua.lower())
self.assertTrue('windows' in ua.lower())
ua = generate_user_agent(platform='win', navigator='chrome')
self.assertTrue('chrome' in ua.lower())
self.assertTrue('windows' in ua.lower())
开发者ID:jamb0ss,项目名称:user_agent,代码行数:9,代码来源:test.py
示例6: test_platform_option
def test_platform_option():
for _ in range(50):
agent = generate_user_agent(os='linux')
assert 'linux' in agent.lower()
agent = generate_user_agent(os='win')
assert 'windows' in agent.lower()
agent = generate_user_agent(os='mac')
assert 'mac' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py
示例7: test_navigator_option
def test_navigator_option():
for _ in range(50):
agent = generate_user_agent(navigator='firefox')
assert 'firefox' in agent.lower()
agent = generate_user_agent(navigator='chrome')
assert 'chrome' in agent.lower()
agent = generate_user_agent(navigator='ie')
assert 'msie' in agent.lower() or 'rv:11' in agent.lower()
开发者ID:lorien,项目名称:user_agent,代码行数:10,代码来源:user_agent.py
示例8: test_platform_option
def test_platform_option(self):
for x in range(100):
ua = generate_user_agent(platform='linux')
self.assertTrue('linux' in ua.lower())
ua = generate_user_agent(platform='win')
self.assertTrue('windows' in ua.lower())
ua = generate_user_agent(platform='mac')
self.assertTrue('mac' in ua.lower())
self.assertRaises(UserAgentRuntimeError,
generate_user_agent,
platform=11)
开发者ID:alexfalcucc,项目名称:user_agent,代码行数:14,代码来源:test.py
示例9: getheadline
def getheadline(companyName, day, firstlink, prevdatelink):
'''
scrap headlines from finance.yahoo.com
'''
#date = '2016-02-'+str(day)
searchUrl = 'http://finance.yahoo.com/q/h?s='+companyName+'&t=2016-04-'+str(day)
#use fake useragent
#ua = generate_user_agent()
head = generate_user_agent().encode('ascii', 'ignore')
headers = {'useragent':head}
response = requests.get(searchUrl, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
links = soup.select('div.yfi_quote_headline ul > li > a')
#write the search results in file, a new file for each day
filename = 'links'+str(day)+'.txt'
with io.open(filename, encoding='utf-8', mode='w+') as ns:
count = 1
for link in links:
nextlinks = link.get('href')+'\n'
if count == 1:
ns.write(nextlinks)
firstlink = nextlinks
elif prevdatelink == nextlinks:
print "All uniques headlines scraped"
break
else:
ns.write(nextlinks)
count += 1
ns.close()
return firstlink
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:33,代码来源:get_headlines.py
示例10: get_proxies
def get_proxies(proxy_type, ip_set, start_page, end_page):
"""extract proxies from page source code, store them in redis
Args:
proxy_type (str): base url for proxy type, like the global variables CHINA and OTHER
ip_set (str): which set should the ips be stored in redis
start_page (int): which page to start crawling
end_page (int): which page to stop crawling
"""
try:
conn = get_connection()
except Exception:
print 'Error while connecting to redis'
return
proxies, curr_proxy =[], None
for page in xrange(start_page, end_page+1):
if page % 2 == 0:
time.sleep(20)
# get page source code
headers = {'user-agent': generate_user_agent(), 'referer': 'http://www.xicidaili.com/'}
text = requests.get(proxy_type+str(page), headers = headers).text
# extract ips from source code
soup = BeautifulSoup(text, 'lxml')
for tr in soup.find_all('tr')[1:]:
tds = tr.find_all('td')
#if u'美国' in tds[3].text:
proxy = tds[1].text+':'+tds[2].text
if is_valid('https://www.amazon.com/', proxy):
conn.sadd(ip_set, proxy)
print '%s added to ip set %s' %(proxy, ip_set)
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:30,代码来源:GetProxy.py
示例11: getBaiduDictCate
def getBaiduDictCate():
"""
功能:得到百度词库的分类,有三级分类,因为三级分类太细而且较少,所以将三级分类纳入其二级分类
:return:两个词典,第一个词典记录大类的ID和内容的对应关系,第二个词典记录了第一个词典中每一类大类下的所有分类
"""
bigCateDict = {}
smallCateDict ={}
initPageURL = r'https://shurufa.baidu.com/dict'
cateBaseURL = r'https://shurufa.baidu.com/dict_list?cid='
# 防止502错误
userAgent = generate_user_agent()
referrer = 'http://shurufa.baidu.com/dict.html'
headers = {}
headers['User-Agent'] = userAgent
headers['Referer'] = referrer
# 抓取大类
try:
request = urllib2.Request(url=initPageURL, headers=headers)
response = urllib2.urlopen(request)
data = response.read()
except urllib2.HTTPError, e:
print 'Error while getting the big category,error code:',e.code
sys.exit()
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:25,代码来源:getCategory.py
示例12: getCategoryPages
def getCategoryPages(caterotyID,downloadDIR):
"""通过类别的初始页面得到该类别的总页数,并将所有的页数放到 PAGE_QUEUE 中供所有线程下载
:param caterotyID: 下载的词库类型的 ID,用于找到正确 url
:param downloadDIR: 下载词库的存放目录
:return:
"""
global CATEID, DOWNLOAD_DIR, PAGE_BASE_URL, THREAD_LOCK
CATEID = caterotyID
DOWNLOAD_DIR = downloadDIR
PAGE_BASE_URL = 'https://shurufa.baidu.com/dict_list?cid=%s' % CATEID
pagePattern = re.compile(r'page=(\d+)#page') # 在网页源码找到其他页面的URL的正则表达匹配模式
# 防止502错误
userAgent = generate_user_agent()
referrer = 'http://shurufa.baidu.com/dict.html'
headers = {}
headers['User-Agent'] = userAgent
headers['Referer'] = referrer
# 找到最大页的页码,然后所有页面就是1到最大页面
# 可能会返回502,500错误,最多尝试5次
maxTry = 8
data = None
for i in xrange(maxTry):
try:
request = urllib2.Request(url=PAGE_BASE_URL, headers=headers)
response = urllib2.urlopen(request)
data = response.read()
break
except urllib2.HTTPError, e:
if i == maxTry-1:
with io.open(DOWNLOAD_LOG.decode('utf8'), mode = 'a', encoding = 'utf8') as f:
f.write((str(e.code)+' error while parsing url '+PAGE_BASE_URL+'\n').decode('utf8'))
except:
开发者ID:WuLC,项目名称:ThesaurusSpider,代码行数:35,代码来源:multiThreadDownload.py
示例13: getarticle
def getarticle(readfile):
''' get the article and save it in a different file '''
try:
fileopen = open(readfile)
except IOError:
print "file " + readfile + " not in the location specified"
return
i = 1
for line in fileopen:
try:
ua = generate_user_agent()
head = ua.encode('ascii', 'ignore')
headers = {'useragent':head}
print "reading article :"
print line
html = requests.get(line, headers = headers).text
tex = fulltext(html)
writefile = "201604"+str(j)+"_"+str(i)+".txt"
with io.open(writefile, encoding='utf-8', mode='w+') as ns:
strng = ' '.join(tex.split())
ns.write(strng)
ns.close()
i = i + 1
except:
pass
开发者ID:aizaazali,项目名称:StockMarketAnalyzer-Hive_Pig,代码行数:27,代码来源:getarticle.py
示例14: get_address
def get_address(proxy):
"""fetch american address from https://fakena.me/random-real-address/
Args:
proxy (str): proxy to visit the target site, ip:port
Returns:
format_addr (str): american address in the form of "address_line # city # state # zip"
"""
ignore_warnings()
url = r'https://fakena.me/random-real-address/'
referer = r'https://fakena.me'
header = {'user-agent' : generate_user_agent() , 'referer':referer }
curr_proxy ={
'http': 'http://%s'%proxy
}
text = requests.get(url, headers = header, proxies = curr_proxy).text
pattern = re.compile('<strong>(.+)<br>(.+)</strong>')
result = re.findall(pattern, text)
if result: # sometimes the result is empty
print result[0][0], result[0][1]
address_line = result[0][0]
city, state_zip = result[0][1].split(',')
state, zip = state_zip.split()
format_addr = address_line+'#'+city+'#'+state+'#'+zip
return format_addr
else:
return ''
开发者ID:bdchinacs,项目名称:AmazonRobot,代码行数:29,代码来源:GetUserInfo.py
示例15: send_query
def send_query(self, query):
# TODO: Randomize query, i.e. remove/change unused arguments to vary query signature
self.queries_sent += 1
if self.queries_sent % self.queries_change == 0:
self.queries_change = randint(3, 13)
ScholarConf.USER_AGENT = generate_user_agent()
return super(BibDLQuerier, self).send_query(query)
开发者ID:igsor,项目名称:bibdl,代码行数:8,代码来源:bibdl.py
示例16: on_blocked
def on_blocked(self):
ScholarConf.USER_AGENT = generate_user_agent() # Randomize user agent
self.timeout *= 2.0 # Increase timeout (exponential backoff)
if self.blocked_cmd is not None:
status, output = getstatusoutput(self.blocked_cmd)
if status != 0:
self.status.error(output)
开发者ID:igsor,项目名称:bibdl,代码行数:8,代码来源:bibdl.py
示例17: get_request
def get_request(url):
"""
Takes in a url
Outputs a list of html for each user's posts
"""
headers = {"User-Agent": generate_user_agent()}
response = requests.get(url, headers)
return response
开发者ID:millertracy,项目名称:g-project,代码行数:9,代码来源:mhf_scrape.py
示例18: invoke
def invoke(self, url):
headers = {'User-Agent': generate_user_agent()}
req = requests.get(url, headers= headers)
soup = BeautifulSoup(req.text, 'lxml') #from_encoding="gb2312")
books = soup.select("div.book_list > ul > li")
for book in books:
self.parse_book(book)
开发者ID:sjtu-cs,项目名称:service-scraper,代码行数:9,代码来源:book_parser.py
示例19: download_images
def download_images(link_file_path, download_dir, log_dir):
"""download images whose links are in the link file
Args:
link_file_path (str): path of file containing links of images
download_dir (str): directory to store the downloaded images
Returns:
None
"""
print('Start downloading with link file {0}..........'.format(link_file_path))
if not os.path.exists(log_dir):
os.makedirs(log_dir)
main_keyword = link_file_path.split('/')[-1]
log_file = log_dir + 'download_selenium_{0}.log'.format(main_keyword)
logging.basicConfig(level=logging.DEBUG, filename=log_file, filemode="a+", format="%(asctime)-15s %(levelname)-8s %(message)s")
img_dir = download_dir + main_keyword + '/'
count = 0
headers = {}
if not os.path.exists(img_dir):
os.makedirs(img_dir)
# start to download images
with open(link_file_path, 'r') as rf:
for link in rf:
try:
o = urlparse(link)
ref = o.scheme + '://' + o.hostname
#ref = 'https://www.google.com'
ua = generate_user_agent()
headers['User-Agent'] = ua
headers['referer'] = ref
print('\n{0}\n{1}\n{2}'.format(link.strip(), ref, ua))
req = urllib.request.Request(link.strip(), headers = headers)
response = urllib.request.urlopen(req)
data = response.read()
file_path = img_dir + '{0}.jpg'.format(count)
with open(file_path,'wb') as wf:
wf.write(data)
print('Process-{0} download image {1}/{2}.jpg'.format(main_keyword, main_keyword, count))
count += 1
if count % 10 == 0:
print('Process-{0} is sleeping'.format(main_keyword))
time.sleep(5)
except urllib.error.URLError as e:
print('URLError')
logging.error('URLError while downloading image {0}reason:{1}'.format(link, e.reason))
continue
except urllib.error.HTTPError as e:
print('HTTPError')
logging.error('HTTPError while downloading image {0}http code {1}, reason:{2}'.format(link, e.code, e.reason))
continue
except Exception as e:
print('Unexpected Error')
logging.error('Unexpeted error while downloading image {0}error type:{1}, args:{2}'.format(link, type(e), e.args))
continue
开发者ID:linhanquan,项目名称:GoogleImagesDownloader,代码行数:56,代码来源:download_with_selenium.py
示例20: download_with_time_limit
def download_with_time_limit(link_file_path, download_dir, log_dir, limit_time = 10):
main_keyword = link_file_path.split('/')[-1]
if not os.path.exists(log_dir):
os.makedirs(log_dir)
log_file = log_dir + 'download_selenium_{0}.log'.format(main_keyword)
logging.basicConfig(level = logging.DEBUG, filename = log_file, filemode = "a+", format = "%(asctime)-15s %(levelname)-8s %(message)s")
img_dir = download_dir + main_keyword + '/'
count = 0
headers = {}
if not os.path.exists(img_dir):
os.makedirs(img_dir)
signal.signal(signal.SIGALRM, handler)
with open(link_file_path, 'r') as rf:
for link in rf:
try:
ref = 'https://www.google.com'
o = urlparse(link)
ref = o.scheme + '://' + o.hostname
ua = generate_user_agent()
headers['User-Agent'] = ua
headers['referer'] = ref
# limit the time of downloading a image
try:
signal.alarm(limit_time) # set a timeout(alarm)
req = urllib.request.Request(link.strip(), headers = headers)
response = urllib.request.urlopen(req)
data = response.read()
except TimeLimitError as e:
print('TimeLimitError: process-{0} encounters {1}'.format(main_keyword, e.value))
logging.error('TimeLimitError while downloading image{0}'.format(link))
continue
finally:
signal.alarm(0) # disable the alarm
file_path = img_dir + '{0}.jpg'.format(count)
with open(file_path,'wb') as wf:
wf.write(data)
print('Process-{0} download image {1}/{2}.jpg'.format(main_keyword, main_keyword, count))
count += 1
if count % 10 == 0:
print('Process-{0} is sleeping'.format(main_keyword))
time.sleep(5)
except urllib.error.HTTPError as e:
print('HTTPError')
logging.error('HTTPError while downloading image {0}http code {1}, reason:{2}'.format(link, e.code, e.reason))
continue
except urllib.error.URLError as e:
print('URLError')
logging.error('URLError while downloading image {0}reason:{1}'.format(link, e.reason))
continue
except Exception as e:
print('Unexpected Error')
logging.error('Unexpeted error while downloading image {0}error type:{1}, args:{2}'.format(link, type(e), e.args))
continue
开发者ID:WuLC,项目名称:GoogleImagesDownloader,代码行数:55,代码来源:download_images_with_time_limit.py
注:本文中的user_agent.generate_user_agent函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论