本文整理汇总了Python中util.http.get_html函数的典型用法代码示例。如果您正苦于以下问题:Python get_html函数的具体用法?Python get_html怎么用?Python get_html使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_html函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: snopes
def snopes(inp):
".snopes <topic> -- searches snopes for an urban legend about <topic>"
search_page = http.get_html(search_url, sp_q=inp, sp_c="1")
result_urls = search_page.xpath("//a[@target='_self']/@href")
if not result_urls:
return "no matching pages found"
snopes_page = http.get_html(result_urls[0])
snopes_text = snopes_page.text_content()
claim = re.search(r"Claim: .*", snopes_text).group(0).strip()
status = re.search(r"Status: .*", snopes_text)
if status is not None:
status = status.group(0).strip()
else: # new-style statuses
status = "Status: %s." % re.search(r"FALSE|TRUE|MIXTURE|UNDETERMINED",
snopes_text).group(0).title()
claim = re.sub(r"[\s\xa0]+", " ", claim) # compress whitespace
status = re.sub(r"[\s\xa0]+", " ", status)
return "%s %s %s" % (claim, status, result_urls[0])
开发者ID:APU-CSFC,项目名称:skybot,代码行数:25,代码来源:snopes.py
示例2: get_beer
def get_beer(inp):
""" search beeradvocate.com """
search_url = "http://beeradvocate.com/search"
base_url = "http://beeradvocate.com"
post_dict = {
'q' : inp,
'qt' : 'beer',
}
results = http.get_html(search_url, post_data=urlencode(post_dict))
try:
result = results.xpath("//div[@id='content']/div[@class='pageWidth']/div[@class='pageContent']/div[@class='mainContainer']/div[@class='mainContent']/fieldset/div[@id='baContent']/div[2]/ul/li[1]")[0]
except IndexError:
return "No Results"
page_url = base_url + result.xpath('a')[0].get('href')
scores = http.get_html(page_url).cssselect('.BAscore_big')
beer_info = [x.text_content() for x in result.xpath('a')]
return "%s by %s :: Community Score: %s :: Bros Score: %s :: %s" % (beer_info[0],
beer_info[1],
scores[0].text_content(),
scores[1].text_content(), page_url)
开发者ID:coaxmetal,项目名称:DL-Skybot-Plugins,代码行数:25,代码来源:beeradvocate.py
示例3: man
def man(inp, say=''):
""".man <command> [section] - Returns man page for specified command, section defaults to 1 if not specified."""
raw = inp.split()
command = raw[0]
if len(raw) == 2 and raw[1].isdigit():
page = raw[1]
else:
page = "1"
try:
manpage = str(http.get_html(base_url, topic=command, section=page))
# If not specified man page
if re.match(r'.+(\>No matches for ").+', manpage):
page = "all"
manpage = str(http.get_html(base_url, topic=command, section=page))
# If man page exists for command
if not re.match(r'.+(\>No matches for ").+', manpage) and 1 == 2:
if page != "all":
say("{} - {}({})".format(web.try_googl(base_url.format(command, page)),
command, page))
else:
say("{} - {}({}) (No section {})".format((web.try_googl(base_url.format(command, page)),
command, page, raw[1])))
else:
system_manpage = get_system_manpage(command)
if system_manpage:
haste_url = web.haste(system_manpage, ext='txt')
googl_url = web.try_googl(haste_url)
say("{} - {}".format(googl_url, command, page))
else:
return "There is no man page for {}.".format(command)
except Exception as e: # (http.HTTPError, http.URLError) as e:
print(">>> u'HTTP Error: {}'".format(e))
return "HTTP Error, please try again in a few minutes."
开发者ID:Cameri,项目名称:Gary,代码行数:35,代码来源:man.py
示例4: timefunction2
def timefunction2(inp, nick="", reply=None, db=None, notice=None):
"time [location] [dontsave] | [@ nick] -- Gets time for <location>."
save = True
if '@' in inp:
nick = inp.split('@')[1].strip()
location = database.get(db,'users','location','nick',nick)
if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
else:
location = database.get(db,'users','location','nick',nick)
if not inp:
if not location:
notice(time.__doc__)
return
else:
# if not location: save = True
if " dontsave" in inp: save = False
location = inp.split()[0]
# now, to get the actual time
try:
url = "https://time.is/%s" % location.replace(' ','+').replace(' save','')
html = http.get_html(url)
prefix = html.xpath("//div[@id='msgdiv']/h1/a/text()")[0].strip()
curtime = html.xpath("//div[contains(@id,'twd')]/text()")[0].strip()
ampm = html.xpath("//div[contains(@id,'twd')]/span/text()")[0].strip()
date = html.xpath("//h2[contains(@id,'dd')]/text()")[0].strip()
except IndexError:
return "Could not get time for that location."
if location and save: database.set(db,'users','location',location,'nick',nick)
return u'Time in {} is \x02{} {}\x02 [{}]'.format(prefix, curtime, ampm.upper(), date)
开发者ID:Noclip21,项目名称:uguubot,代码行数:34,代码来源:time.py
示例5: forum_link
def forum_link(inp, bot=None):
if 'sa_user' not in bot.config or \
'sa_password' not in bot.config:
return
login(bot.config['sa_user'], bot.config['sa_password'])
thread = http.get_html(showthread, threadid=inp.group(1), perpage='1',
cookies=True)
breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()')
if not breadcrumbs:
return
thread_title = breadcrumbs[-1]
forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2])
poster = thread.xpath('//dt[@class="author"]/text()')[0]
# 1 post per page => n_pages = n_posts
num_posts = thread.xpath('//a[@title="last page"]/@href')
if not num_posts:
num_posts = 1
else:
num_posts = int(num_posts[0].rsplit('=', 1)[1])
return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % (
forum_title, thread_title, poster, num_posts,
's' if num_posts > 1 else '')
开发者ID:pvenegas,项目名称:skybot,代码行数:31,代码来源:somethingawful.py
示例6: mtg
def mtg(inp):
""".mtg <name> - Gets information about Magic the Gathering card <name>."""
url = 'http://magiccards.info/query?v=card&s=cname'
h = http.get_html(url, q=inp)
name = h.find('body/table/tr/td/span/a')
if name is None:
return "no cards found"
card = name.getparent().getparent().getparent()
type = card.find('td/p').text.replace('\n', '')
# this is ugly
text = http.html.tostring(card.xpath("//p[@class='ctext']/b")[0])
text = text.replace('<br>', '$')
text = http.html.fromstring(text).text_content()
text = re.sub(r'(\w+\s*)\$+(\s*\w+)', r'\1. \2', text)
text = text.replace('$', ' ')
text = re.sub(r'\(.*?\)', '', text) # strip parenthetical explanations
text = re.sub(r'\.(\S)', r'. \1', text) # fix spacing
name.make_links_absolute(base_url=url)
link = name.attrib['href']
name = name.text_content().strip()
type = type.strip()
text = ' '.join(text.split())
return (
' | '.join(
(" ".join(name.split()),
" ".join(type.split()),
" ".join(text.split()),
link))
)
开发者ID:Cameri,项目名称:Gary,代码行数:35,代码来源:mtg.py
示例7: readtitle
def readtitle(match, say=None, nick=None):
parsed_url = match.group().split(' ')[0]
if any(word in parsed_url for word in skipurls):
return
try:
request_url = http.get_html(parsed_url)
except http.HTTPError as e:
errors = {400: 'bad request (ratelimited?) 400',
401: 'unauthorized 401 ',
403: 'forbidden 403',
404: 'invalid user/id 404',
500: 'something is broken 500',
502: 'something is down ("getting upgraded?") 502',
503: 'something is overloaded 503',
410: 'something something 410'}
if e.code == 404:
return 'bad url?'
if e.code in errors:
return 'error: ' + errors[e.code]
return 'error: unknown %s' % e.code
try:
titleget = request_url.xpath('//title/text()')[0]
titleuni = " - " + unicode(titleget.strip())
except IndexError:
titleuni = ""
shorturl = web.try_googl(parsed_url)
say(shorturl + titleuni)
开发者ID:Cameri,项目名称:Gary,代码行数:30,代码来源:readtitle.py
示例8: fixchewy
def fixchewy(match,chan='',input=None):
if match.group(1) is not None:
if "Error" not in input.lastparam or "L: " in input.lastparam:
return
card = match.group(1)
else:
url = match.group(2)
result = http.get_html(url)
card = result.xpath('//title')[0].text.split("(")[0].strip()
result = http.get_json("http://api.deckbrew.com/mtg/cards", name=card)
if len(result) == 0:
return
for cards in result:
if cards['name'].lower() == card.lower():
card = cards
break
for edition in card['editions']:
if edition['set_id'][:1] != "p" and 'price' in edition:
price = edition['price']
break
if price:
prices = "L: $%s M: $%s H: $%s" % ('{:.2f}'.format(price['low']/100.), '{:.2f}'.format(price['median']/100.), '{:.2f}'.format(price['high']/100.))
return "chewy's bot sucks here are prices: %s" % prices
开发者ID:Veste,项目名称:skybot,代码行数:25,代码来源:mtg.py
示例9: define
def define(text):
"""define <word> -- Fetches definition of <word>.
:type text: str
"""
url = 'http://ninjawords.com/'
h = http.get_html(url + http.quote_plus(text))
definition = h.xpath('//dd[@class="article"] | '
'//div[@class="definition"] |'
'//div[@class="example"]')
if not definition:
return 'No results for ' + text + ' :('
result = format_output(h, definition, True)
if len(result) > 450:
result = format_output(h, definition, False)
if len(result) > 450:
result = result[:result.rfind(' ', 0, 450)]
result = re.sub(r'[^A-Za-z]+\.?$', '', result) + ' ...'
return result
开发者ID:FurCode,项目名称:RoboCop2,代码行数:25,代码来源:dictionary.py
示例10: amazon
def amazon(inp):
"""az [query] -- Searches amazon for query"""
href = "http://www.amazon.com/s/url=search-alias%3Daps&field-keywords={}".format(inp.replace(" ","%20"))
results = http.get_html(href)
# title = results.xpath('//title/text()')[0]
try:
title = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/h2/text()")[0]
url = results.xpath("//li[@id='result_0']/div/div/div/div/div/a/@href")[0]
price = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/a/span/text()")[0]
rating = results.xpath("//li[@id='result_0']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]
except:
title = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/h2/text()")[0]
url = results.xpath("//li[@id='result_1']/div/div/div/div/div/a/@href")[0]
price = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/a/span/text()")[0]
rating = results.xpath("//li[@id='result_1']/div/div/div/div/div/div/div/span/span/a/i/span/text()")[0]
azid = re.match(r'^.*\/dp\/([\w]+)\/.*',url).group(1)
star_count = round(float(rating.split(' ')[0]),0)
stars=""
for x in xrange(0,int(star_count)):
stars = "{}{}".format(stars,'★')
for y in xrange(int(star_count),5):
stars = "{}{}".format(stars,'☆')
return '\x02{}\x02 - {} - \x034{}\x02 - http://amzn.com/{}'.format(title, stars, price, azid).decode('utf-8')
开发者ID:FrozenPigs,项目名称:uguubot,代码行数:26,代码来源:amazon.py
示例11: time
def time(inp, nick="", reply=None, db=None, notice=None):
"time [location] [dontsave] | [@ nick] -- Gets time for <location>."
save = True
if '@' in inp:
nick = inp.split('@')[1].strip()
location = database.get(db,'users','location','nick',nick)
if not location: return "No location stored for {}.".format(nick.encode('ascii', 'ignore'))
else:
location = database.get(db,'users','location','nick',nick)
if not inp:
if not location:
notice(time.__doc__)
return
else:
if not location: save = True
if " save" in inp: save = True
location = inp.split()[0]
# now, to get the actual time
try:
url = "https://www.google.com/search?q=time+in+%s" % location.replace(' ','+').replace(' save','')
html = http.get_html(url)
prefix = html.xpath("//div[contains(@class,'vk_c vk_gy')]//span[@class='vk_gy vk_sh']/text()")[0].strip()
curtime = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_bk vk_ans']/text()")[0].strip()
day = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/text()")[0].strip()
date = html.xpath("//div[contains(@class,'vk_c vk_gy')]//div[@class='vk_gy vk_sh']/span/text()")[0].strip()
except IndexError:
return "Could not get time for that location."
if location and save: database.set(db,'users','location',location,'nick',nick)
return u'{} is \x02{}\x02 [{} {}]'.format(prefix, curtime, day, date)
开发者ID:edwinfinch,项目名称:uguubot,代码行数:34,代码来源:time.py
示例12: predb
def predb(inp):
'.predb <query> -- searches scene releases using orlydb.com'
try:
h = http.get_html("http://orlydb.com/", q=inp)
except HTTPError:
return 'orlydb seems to be down'
results = h.xpath("//div[@id='releases']/div/span[@class='release']/..")
if not results:
return "zero results"
result = results[0]
date, time = result.xpath("span[@class='timestamp']/text()")[0].split()
section, = result.xpath("span[@class='section']//text()")
name, = result.xpath("span[@class='release']/text()")
size = result.xpath("span[@class='inforight']//text()")
if size:
size = ' :: ' + size[0].split()[0]
else:
size = ''
return '%s - %s - %s%s' % (date, section, name, size)
开发者ID:APU-CSFC,项目名称:skybot,代码行数:26,代码来源:pre.py
示例13: ebay_url
def ebay_url(match,bot):
apikey = bot.config.get("api_keys", {}).get("ebay")
# if apikey:
# # ebay_item_re = (r'http:.+ebay.com/.+/(\d+).+', re.I)
# itemid = re.match('http:.+ebay.com/.+/(\d+).+',match, re.I)
# url = 'http://open.api.ebay.com/shopping?callname=GetSingleItem&responseencoding=JSON&appid={}&siteid=0&version=515&ItemID={}&IncludeSelector=Description,ItemSpecifics'.format(apikey,itemid.group(1))
# print url
# else:
print "No eBay api key set."
item = http.get_html(match)
title = item.xpath("//h1[@id='itemTitle']/text()")[0].strip()
price = item.xpath("//span[@id='prcIsum_bidPrice']/text()")
if not price: price = item.xpath("//span[@id='prcIsum']/text()")
if not price: price = item.xpath("//span[@id='mm-saleDscPrc']/text()")
if price: price = price[0].strip()
else: price = '?'
try: bids = item.xpath("//span[@id='qty-test']/text()")[0].strip()
except: bids = "Buy It Now"
feedback = item.xpath("//span[@class='w2b-head']/text()")
if not feedback: feedback = item.xpath("//div[@id='si-fb']/text()")
if feedback: feedback = feedback[0].strip()
else: feedback = '?'
return http.process_text("\x02{}\x02 - \x02\x033{}\x03\x02 - Bids: {} - Feedback: {}".format(title, price, bids, feedback))
开发者ID:Anonymike,项目名称:pasta-bot,代码行数:27,代码来源:urls.py
示例14: legal
def legal(inp):
now = datetime.datetime.now()
name = inp.replace(' ', '_')
html = http.get_html('http://rottentomatoes.com/celebrity/%s/' % (name))
date = html.xpath('//dl[@class="bottom_divider"]/dd/text()')[0]
#return date
info = date.split(' ')
month = info[0]
birth_day = info[1].strip(",")
birth_year = info[2]
birth_month = months[month]
birthdate = datetime.date(int(birth_year), int(birth_month), int(birth_day))
age = now.year - int(birth_year)
if age >= 18:
return "legal - is %s" % (age)
else:
year_18 = int(birth_year) + 18
birthday_18 = "%s %s %s" % (birth_day, full_month[month], year_18)
#return birthday_18
#return "%s :: %s" % (birth_month, str(day_18))
return "%s will be 18 in %s" % (inp, timesince.timeuntil(birthdate, now=birthday_18))
return months[birth_month]
开发者ID:limnick,项目名称:siri,代码行数:30,代码来源:legal.py
示例15: urban
def urban(inp):
'''.u/.urban <phrase> [#] -- looks up <phrase> with [#] definition on urbandictionary.com'''
args = inp.split(" ")
# Look for a number to cycle through definitions, optionally
if(len(args) > 1):
try:
int(args[-1])
number = int(args.pop())
index = number - 1
except(ValueError):
index = 0
else:
index = 0
args = " ".join(args)
url = 'http://www.urbandictionary.com/define.php'
page = http.get_html(url, term=args)
words = page.xpath("//*[@id='entries']/div/span")
defs = page.xpath("//div[@class='definition']")
if not defs:
return 'no definitions found'
# Put together a string from the xpath requests.
out = words[index].text.strip() + ': ' + ' '.join(
defs[index].text.split())
if len(out) > 400:
out = out[:out.rfind(' ', 0, 400)] + '...'
return out
开发者ID:NoiSek,项目名称:classhole,代码行数:35,代码来源:dictionary.py
示例16: forum_link
def forum_link(inp, api_key=None):
if api_key is None or 'user' not in api_key or 'password' not in api_key:
return
login(api_key['user'], api_key['password'])
thread = http.get_html(showthread, threadid=inp.group(1), perpage='1',
cookies=True)
breadcrumbs = thread.xpath('//div[@class="breadcrumbs"]//a/text()')
if not breadcrumbs:
return
thread_title = breadcrumbs[-1]
forum_title = forum_abbrevs.get(breadcrumbs[-2], breadcrumbs[-2])
poster = thread.xpath('//dt[contains(@class, author)]//text()')[0]
# 1 post per page => n_pages = n_posts
num_posts = thread.xpath('//a[@title="Last page"]/@href')
if not num_posts:
num_posts = 1
else:
num_posts = int(num_posts[0].rsplit('=', 1)[1])
return '\x02%s\x02 > \x02%s\x02 by \x02%s\x02, %s post%s' % (
forum_title, thread_title, poster, num_posts,
's' if num_posts > 1 else '')
开发者ID:Ell,项目名称:Siri,代码行数:30,代码来源:somethingawful.py
示例17: steamcalc
def steamcalc(inp, nick='', chan='', say=None):
'''Usage: '.steamcalc username'. Grab's selected user's steam accounts monetary worth in USD.'''
inpEncode = urllib.quote(inp)
try:
h = http.get_html("http://steamcalculator.com/id/%s" % inpEncode)
except urllib2.HTTPError:
return("Hmm it looks like you entered an incorrect name. Be sure that it has no spaces or non ascii characters.")
try:
getAmountText = h.xpath('//div[@id="rightdetail"]/text()')[0]
except IndexError:
say("That user doesnt exist or something. Fuck off.")
getAmountNum = h.xpath('//div[@id="rightdetail"]/h1/text()')[0]
#getLastGame = h.xpath('//
amountSplit = getAmountText.split(' ')
amountGameNum = int(amountSplit[1])
moneySplit = getAmountNum.split(' ')
amountMonetary = moneySplit[0]
valueStrip = amountMonetary.strip().lstrip("$")
value = float(valueStrip)
output = "\x02%s\x0f owns \x02%i\x0f games on Steam. Their account is worth \x02$%.2f\x0f." % (inp, amountGameNum, value)
if amountGameNum >= 125:
output = output + " <--- jesus fuck quit buying games you neckbeard."
return(output)
开发者ID:limnick,项目名称:siri,代码行数:34,代码来源:steam.py
示例18: check_touhou
def check_touhou(inp,chan=None,bot=None):
#if channel[chan]:
channels = bot.channelconfig.walk(gather_subsection)
for channel in channels:
print channel
return
chan_url = http.quote('{channel|%s}/1' % '#pantsumen') #str(chan)
url='http://booru.touhouradio.com/post/list/%s' % chan_url
try: html = http.get_html(url)
except ValueError: return None
firstimage = html.xpath("//span[@class='thumb']//img/@src")[0]
try:
if firstimage in touhou_list[chan]:
return "New Activity on TouhouRadio!"
except:
pass
touhou_list[chan] = firstimage
print touhou_list[chan]
开发者ID:Anonymike,项目名称:pasta-bot,代码行数:25,代码来源:touhouradio.py
示例19: pre
def pre(inp):
"""pre <query> -- searches scene releases using orlydb.com"""
try:
h = http.get_html("http://orlydb.com/", q=inp)
except http.HTTPError as e:
return 'Unable to fetch results: {}'.format(e)
results = h.xpath("//div[@id='releases']/div/span[@class='release']/..")
if not results:
return "No results found."
result = results[0]
date = result.xpath("span[@class='timestamp']/text()")[0]
section = result.xpath("span[@class='section']//text()")[0]
name = result.xpath("span[@class='release']/text()")[0]
# parse date/time
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
date_string = date.strftime("%d %b %Y")
since = timesince.timesince(date)
size = result.xpath("span[@class='inforight']//text()")
if size:
size = ' - ' + size[0].split()[0]
else:
size = ''
return '{} - {}{} - {} ({} ago)'.format(section, name, size, date_string, since)
开发者ID:FurCode,项目名称:RoboCop2,代码行数:31,代码来源:scene.py
示例20: redtube
def redtube(inp, nick='', chan='', say=None):
search = urllib.quote_plus(inp)
searchURL = "http://redtube.com/?search=%s" % (search)
getSearch = http.get_html(searchURL)
videoTitle = getSearch.xpath('/html/body/div/div/div[3]/ul/li/div[2]/h2/a/')[0]
#videoUrl = getSearch.xpath('//div[@class="video"]/a/href/text()')[0]
#final = "%s :: %s" % (videoTitle, videoUrl)
say(videoTitle)
开发者ID:limnick,项目名称:siri,代码行数:8,代码来源:porn.py
注:本文中的util.http.get_html函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论