本文整理汇总了Python中mechanize.Browser类的典型用法代码示例。如果您正苦于以下问题:Python Browser类的具体用法?Python Browser怎么用?Python Browser使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Browser类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: lookup_offers_isbn
def lookup_offers_isbn(item_id):
offers = []
br = Browser()
res = br.open("http://books.half.ebay.com/ws/web/HalfISBNSearch?isbn=%s" % item_id)
soup = BeautifulSoup(res.read())
ratings = soup.findAll('span',{'class': 'Header'})
for r in ratings:
rating = r.text
prices= r.parent.parent.parent.findNextSibling('table').findAll('tr')[1:]
linktext = r.parent.parent.parent.findNextSiblings('table')[1].find(text=re.compile('View all.*'))
if linktext:
all = linktext.parent['href']
# get link
res2 = br.open(all)
soup = BeautifulSoup(res2.read())
rating2 = soup.findAll('span',{'class': 'Header'})
prices = rating2[0].parent.parent.parent.parent.findAll('table')[3].findAll('tr')[1:]
for row in prices:
m = re.search("itemid=(\d+)",row.find('a',href=re.compile("itemid=\d+"))['href'])
itemid=m.group(1)
seller = row.find('a',{'class':'SellerDisplayLink'}).text
price = row.find('span',{'class':'ItemPrice'}).text
price = string.replace(price,",","")
if price.startswith("$"):
price = price[1:]
offers.append({ 'rating' : rating, 'seller' : seller, 'listing_id' : itemid, 'price' : str(price) })
print rating,seller,itemid,price
return offers
开发者ID:clarsen,项目名称:booksell,代码行数:28,代码来源:halfcom.py
示例2: down_image
def down_image(self, img):
print "down image from " + img
down_br = Browser()
down_cj = CookieJar()
down_br.set_cookiejar(down_cj)
fn = tempfile.mktemp(suffix='.png')
return down_br.retrieve(img, filename = fn)[0]
开发者ID:lite,项目名称:yebob_utils,代码行数:7,代码来源:Yebob.py
示例3: on_task_start
def on_task_start(self, task, config):
try:
from mechanize import Browser
except ImportError:
raise PluginError('mechanize required (python module), please install it.', log)
userfield = config.get('userfield', 'username')
passfield = config.get('passfield', 'password')
url = config['url']
username = config['username']
password = config['password']
br = Browser()
br.set_handle_robots(False)
try:
br.open(url)
except Exception as e:
# TODO: improve error handling
raise PluginError('Unable to post login form', log)
#br.set_debug_redirects(True)
#br.set_debug_responses(True)
#br.set_debug_http(True)
for form in br.forms():
loginform = form
try:
loginform[userfield] = username
loginform[passfield] = password
break
except Exception as e:
pass
else:
received = os.path.join(task.manager.config_base, 'received')
if not os.path.isdir(received):
os.mkdir(received)
filename = os.path.join(received, '%s.formlogin.html' % task.name)
with open(filename, 'w') as f:
f.write(br.response().get_data())
log.critical('I have saved the login page content to %s for you to view' % filename)
raise PluginError('Unable to find login fields', log)
br.form = loginform
br.submit()
cookiejar = br._ua_handlers["_cookies"].cookiejar
# Add cookiejar to our requests session
task.requests.add_cookiejar(cookiejar)
# Add handler to urllib2 default opener for backwards compatibility
handler = urllib2.HTTPCookieProcessor(cookiejar)
if urllib2._opener:
log.debug('Adding HTTPCookieProcessor to default opener')
urllib2._opener.add_handler(handler)
else:
log.debug('Creating new opener and installing it')
urllib2.install_opener(urllib2.build_opener(handler))
开发者ID:Anaerin,项目名称:Flexget,代码行数:60,代码来源:plugin_formlogin.py
示例4: login_to_kaggle
def login_to_kaggle(self):
""" Login to Kaggle website
Parameters:
-----------
None
Returns:
browser: Browser
a mechanizer Browser object to be used for further access to site
"""
if self.verbose:
print("Logging in to Kaggle..."),
br = Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.open(self.kag_login_url)
br.select_form(nr=0)
br['UserName'] = self.kag_username
br['Password'] = self.kag_password
br.submit(nr=0)
if br.title() == "Login | Kaggle":
raise KaggleError("Unable to login Kaggle with username %s (response title: %s)" % (self.kag_username,br.title()))
if self.verbose:
print("done!")
return br
开发者ID:joostgp,项目名称:ml_toolbox,代码行数:32,代码来源:kaggle.py
示例5: GetXboxLiveFriends
def GetXboxLiveFriends(self):
"""Return a list of tuples (gamer_tag, gamer_presence)."""
br = Browser()
br.open('http://live.xbox.com/en-US/profile/Friends.aspx')
br.select_form(name='f1')
br['login'] = self.login
br['passwd'] = self.passwd
br.submit() # Submit login form.
br.select_form(name='fmHF')
response = br.submit() # Submit redirect form.
friend_list = response.read()
response.close()
soup = BeautifulSoup(friend_list)
friend_table = soup.find('table', {'class': FRIEND_TABLE_CLASS})
if friend_table is None:
raise XboxLiveError('Parsing failure.')
friends = []
for row in friend_table.contents[1:]: # Skip header row.
gamer_tag = row.find('td', {'class': GAMER_TAG_CLASS})
gamer_tag = str(gamer_tag.find('a').contents[0])
gamer_presence = row.find('td', {'class': GAMER_PRESENCE_CLASS})
gamer_presence = str(gamer_presence.find('h4').contents[0])
friends.append((gamer_tag, gamer_presence))
return friends
开发者ID:damonkohler,项目名称:pypert,代码行数:26,代码来源:xbox_live.py
示例6: newBrowser
def newBrowser(self):
# Create new browsers all the time because its data structures grow
# unboundedly (texas#135)
br = Browser()
br.add_password(self.hostname, self.username, self.password)
br.set_handle_robots(None)
return br
开发者ID:abubeck,项目名称:cob_bringup_overlays,代码行数:7,代码来源:ddwrt.py
示例7: fetch_laws_page_from_year
def fetch_laws_page_from_year(year, temporaryDirectory):
lawsDirectory = os.path.join(temporaryDirectory, 'all_laws');
if not os.path.exists(lawsDirectory):
os.makedirs(lawsDirectory)
print('The laws directory did not exist so I created it')
print(lawsDirectory)
fileToWriteLawsListIn = os.path.join(lawsDirectory, year + '.html')
print('File to write in is ' + fileToWriteLawsListIn)
lawWasNotDownloaded = not os.path.isfile(fileToWriteLawsListIn)
if lawWasNotDownloaded:
startDownload = int(round(time.time() * 1000))
print('Getting laws from year ' + year)
url = get_ugly_url_for_laws(year)
browser = Browser()
browser.open(url)
html = browser.response().get_data()
with open(fileToWriteLawsListIn, 'a') as f:
f.write (html)
endDownload = int(round(time.time() * 1000))
print('Finished downloading laws for year ' + year + '. It took only '
+ str(endDownload - startDownload) + ' milliseconds')
else:
print('This year was already fetched ' + year
+ '. Skipping to the next year')
开发者ID:andreicristianpetcu,项目名称:pubdocs,代码行数:28,代码来源:1_clrro_fetch_laws_per_year.py
示例8: __init__
def __init__(self):
Browser.__init__(self)
self.set_handle_robots(False)
self.addheaders = [(
'Accept',
'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
)]
开发者ID:vinceau,项目名称:browserplus,代码行数:7,代码来源:browserplus.py
示例9: mrsc
def mrsc(gid):
mech = Browser()
url = "http://espn.go.com/ncf/playbyplay?gameId="+gid+"&period=0"
#print url
page = mech.open(url)
html = page.read()
print url
if html.count('Play-by-play not currently available.') == 0:
soup = BeautifulSoup(html)
table = soup.findAll("table")[-1]
rows = table.findAll('tr')[::-1]
c=0
toret=''
keepgoing=True
cup=html[::-1][:html[::-1].find(' left; font: 700 14px/25px Helvetica,Arial,sans-serif;" colspan="3"><div style="margin-right: 6px;"'[::-1])][::-1]
cup=cup[cup.find('a name="')+len('a name="'):]
cup=cup[:cup.find('"')]
while c < 7 and keepgoing and c < len(rows):
cols = rows[c].findAll('td')
#print rows[c]
if len(cols) > 2:
#if str(cols[2]) != '<td> </td>' and str(cols[3]) != '<td> </td>':
toret=str(' '.join(cols[0].findAll(text=True)))+'. '+str(' '.join(cols[1].findAll(text=True)))
keepgoing=False
c=c+1
toret=toret.replace(' ',' ').strip()
if toret != '': toret=toret+' '
poss=''
if cup != '' and len(cup) < 30: poss=cup
else:
toret=''
poss=''
return [toret,poss]
开发者ID:epmatsw,项目名称:FootballBot,代码行数:33,代码来源:following.py
示例10: getLastEntries
def getLastEntries(self, url, lastDate):
""" get all entries from an HTML table list if it is newer
than prevEntry. Format is from graz FF site """
mech = Browser()
mech.set_handle_robots(False)
try:
page = mech.open(url)
except urllib2.HTTPError:
if url == None:
url = "(empty url)"
self.logger.error("Could not read url "+url)
return []
html = page.read()
soup = BeautifulSoup(html)
link = soup.findAll('a')
if len(link) == 0:
logger.error('No links in the page: %s', url)
return []
returnLinks = []
for l in link:
try:
date = datetime.strptime(l.string, "topo-%Y-%m-%d-%H:%M.tsv.gz")
except ValueError:
continue
if date > lastDate:
returnLinks.append(url+l.string)
else:
break
return returnLinks
开发者ID:leonardomaccari,项目名称:communityNetworkMonitor,代码行数:32,代码来源:FFWien.py
示例11: main
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--total-jobs', metavar='<total-jobs>', help='total number of jobs downloading documents', type=int)
parser.add_argument('--job', metavar='<job>', help='job number between 1 and <total-jobs>', type=int)
args = parser.parse_args()
check_args(parser, args)
br = Browser()
br.set_handle_robots(False)
# br.set_debug_responses(True)
data = urlencode({'user': USERNAME, 'pass': getpass()})
document_urls = [LOGIN_PREFIX + url.strip() + '&view=etext' for url in file(DOCUMENT_URLS_FILE)]
start = args.job - 1
step = args.total_jobs
for url in iterview(document_urls[start::step]):
try:
get_document_pages(br, url, data)
except Exception as e:
print >> sys.stderr, '\n', (url, e)
开发者ID:hannawallach,项目名称:declassified-documents,代码行数:26,代码来源:get_document_pages.py
示例12: gen_path
def gen_path(request):
x = json.loads(request.POST['data']) #fetches data
print x
adj_mat = [] #creates empty adjacency matrix
i1 = j1 = 0
num_cities = len(x)
for i in x:
tmp_mat = []
for j in x:
if i!=j:
API_KEY = "AIzaSyDBOSr6_XxvISPGX54P9bPnooE3RUpRTp0"
orig_coord = x[i]
dest_coord = x[j]
br = Browser() #creates mechanize instance
br.set_handle_robots(False)
# print "https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)
result = br.open("https://maps.googleapis.com/maps/api/distancematrix/json?origins={0}&destinations={1}&key={2}".format(orig_coord, dest_coord, API_KEY)).read() #makes a call to GoogleMapsAPI
json_result = json.loads(result)
tmp_mat.append(int(json_result['rows'][0]['elements'][0]['distance']['value']))
else:
tmp_mat.append(0)
adj_mat.append(tmp_mat)
obj = ArpanDaErCode()
ans = ""
ans = ArpanDaErCode.solve(obj, adj_mat, num_cities) #gets sequence from model
print ans
ret = {'data': [str(ii) for ii in ans]}
return HttpResponse(str(json.dumps(ret))) #returns the sequens in JSON format for the JS to handle
开发者ID:RijuSen1996,项目名称:bppimt_hackon,代码行数:32,代码来源:views.py
示例13: scrap_query
def scrap_query(query, bang=None):
r = ddg_query('imbd ' + query, bang=bang)
if 'redirect' in dir(r) and 'primary' in dir(r.redirect):
url = r.redirect.primary
else:
logger.info('Could not find imdb searchpage from DuckDuckGo bang')
return None
br = Browser()
br.set_handle_robots(False)
br.addheaders = [('User-agent', 'Mozilla/5.0 (Windows NT 6.2;\
WOW64) AppleWebKit/537.11 (KHTML, like Gecko)\
Chrome/23.0.1271.97 Safari/537.11')]
r = br.open(url)
soup = BeautifulSoup(r)
for link in soup.find_all('a'):
href = link.get('href','')
match = re.search(r"imdb\.com/.*tt(?P<number>[^/]*)", href)
if match:
imdb_id = check_imdb(match.group('number'))
return imdb_id
return None
开发者ID:getzze,项目名称:imdbfetcher,代码行数:27,代码来源:searchengine_api.py
示例14: __init__
def __init__(self,options):
(self.configfile,self.config,self.moduleconfig) = self.initialize_config(options)
# If we have a particular log level for this module, use that,
# otherwise use the global log level. If that isn't defined
# either, use the INFO loglevel.
if 'log' in self.moduleconfig:
loglevel = self.moduleconfig['log']
else:
loglevel = self.config.get('log','INFO')
self.log = self.setup_logger(self.module_dir,loglevel)
self.base_dir = self.config['datadir']
if self.browser_use_robustfactory:
self.browser = Browser(factory=RobustFactory())
else:
self.browser = Browser()
self.browser.addheaders = [('User-agent', 'lagen.nu-bot ([email protected])')]
# logger = logging.getLogger("mechanize")
# logger.addHandler(logging.StreamHandler(sys.stdout))
# logger.setLevel(logging.DEBUG)
# self.browser.set_debug_http(True)
# self.browser.set_debug_responses(True)
# self.browser.set_debug_redirects(True)
self.ns = {'rinfo': Namespace(Util.ns['rinfo']),
'rinfoex':Namespace(Util.ns['rinfoex']),
'dct': Namespace(Util.ns['dct'])}
开发者ID:staffanm,项目名称:legacy.lagen.nu,代码行数:30,代码来源:DocumentRepository.py
示例15: num_itens
def num_itens(self,busca, data_inicial, data_final):
br = Browser()
response1 = \
br.open("http://portal.in.gov.br/in/imprensa1/pesquisa_avancada")
br.select_form(name="formBusca")
br["texto_todas"] = busca
br["dataPublicacaoInicial"] = data_inicial[:5]
br["dataPublicacaoFinal"] = data_final[:5]
br["ano"] = [data_final[-4:]]
br["idJornal"] = ["1", "2", "3", "4"]
# print(br.form)
br.form.action = \
"http://www.in.gov.br/imprensa/pesquisa/pesquisaresultado.jsp"
res = br.submit()
texto = res.read()
x1, x2, x3 = texto.partition("ite")
x1, x2, x3 = x1.rpartition(">")
try:
arq = open(self.retornar_html(),"w")
arq.write(texto)
arq.close()
except:
print("Erro ao tentar salvar página de buscas!")
x3 = x3.replace(",","")
x3 = x3.strip()
#Retorna o número de itens achados
if x3 == "Um":
return 1
if len(x3) > 0:
return int(x3)
else:
return 0
开发者ID:andresmrm,项目名称:trazdia,代码行数:35,代码来源:proc.py
示例16: name
def name(request, string):
movie = string.replace("_", "+")
br = Browser()
br.open("http://www.imdb.com/find?s=tt&q="+movie)
link = br.find_link(url_regex=re.compile(r"/title/tt.*"))
data = br.follow_link(link)
soup = BeautifulSoup(data.read())
title = soup.find('h1').contents[0].strip()
name = title.replace(" ", "")
rating = soup.find('span', itemprop='ratingValue').contents[0]
duration = soup.find('time', itemprop='duration').contents[0].strip()
releaseDate = soup.find('a', title='See more release dates').contents[0]
director = soup.find('span', itemprop='director').getText()
actor_all = []
actors = soup.findAll('span', itemprop='actors')
for i in range(len(actors)):
actor_all.append((actors[i].contents[1]).getText())
genres_all = []
genres = soup.findAll('span', itemprop='genre')
for i in range(len(genres)):
genres_all.append(genres[i].getText())
jsonObject = {}
jsonObject['Name:'] = name
jsonObject['IMDB Rating:'] = rating
jsonObject['Duration'] = duration
jsonObject["Actors: "] = actor_all
jsonObject['Director:'] = director
jsonObject['Genres'] = genres_all
jsonObject['Release Date'] = releaseDate
movie_details = json.dumps(jsonObject)
return HttpResponse(movie_details)
开发者ID:manimanjari,项目名称:imdbMovieFinder,代码行数:34,代码来源:views.py
示例17: __init__
def __init__(self, request, username,password,context=''):
"""
Al instanciar la classe, es loguejarà utilitzant el nom d'usuari i password proporcionats,
Si browser_login conté algo són les cookies guardades de la última sessio loguejada que s'ha fet.
Recarregarem les cookies en un Browser nou, i aixi estalviarà uns segons que consumiria del login.
"""
self.context=context
self.request=request
registry = self.request.registry
self.epitool=registry.getUtility(IEPIUtility)
self.username = username
self.password = password
self.browser_login, elk = self.epitool.recoverBrowserSession(self.request, self.username,'presencia')
if self.browser_login:
self.br=Browser()
self.br.set_handle_robots(False)
cj = LWPCookieJar()
self.br.set_cookiejar(cj)
for co in self.browser_login:
ck = Cookie(version=co['version'], name=co['name'], value=co['value'], port=co['port'], port_specified=co['port_specified'], domain=co['domain'], domain_specified=co['domain_specified'], domain_initial_dot=co['domain_initial_dot'], path=co['path'], path_specified=co['path_specified'], secure=co['secure'], expires=co['expires'], discard=co['discard'], comment=co['comment'], comment_url=co['comment_url'], rest=co['rest'])
cj.set_cookie(ck)
print "Logging-in into presencia via browser"
else:
self.br = Browser()
self.br.set_handle_equiv(False)
self.login(message=u"Logging-in into presencia via regular login")
return
开发者ID:sneridagh,项目名称:epi,代码行数:28,代码来源:presencia.py
示例18: parseFeeds
def parseFeeds(self):
mech = Browser()
mech.addheaders = [ ('User-agent', 'Mozilla/5.0 (compatible)') ]
mech.set_handle_robots(False)
for url in self.feedUrls:
#url = "http://feeds.feedburner.com/PurdueEngNews?format=xml"
page = mech.open(url)
html = page.read()
soup = BeautifulStoneSoup(html)
headlines = []
descriptions = []
i=0
self.newsList = []
for item in soup.findAll('item'):
if (i > 20):
break
date = item.find('pubdate')
title = item.find('title')
link = item.find('link')
desc = item.find('description')
if (len(title.contents) > 0):
title2 = title.contents[0]
else:
title2 = 'None'
self.newsList.append(NewsStory(date.contents[0], title2, link.contents[0], \
desc.contents[0]))
i+=1
for story in self.newsList:
headlines.append(story.title)
descriptions.append(story.link)
#story.display()
self.headlineList.append(headlines)
self.descList.append(descriptions)
self.populateTopicList()
开发者ID:jevinskie,项目名称:purdue-kiosk,代码行数:34,代码来源:newshelper.py
示例19: google
def google(query):
print("\n\t[!] Searching on Google...\n")
print("[QUERY] >> " + query)
try:
query = query.replace(" ", "+")
req = "https://www.google.com.br/search?q=%s&num=50&start=0" % query
br = Browser()
br.set_handle_robots(False)
br.addheaders = [("User-agent", "chrome")]
html = br.open(req).read()
soup = BeautifulSoup(html, "html5lib")
with open("./output/google-%s.txt" % query[8:], "w") as log:
for results in soup.findAll(attrs={"class": "g"}):
for title in results.findAll("h3", attrs={"class": "r"}):
t = title.text
t = t.title()
for link in results.findAll(attrs={"class": "s"}):
l = link.cite.text
print(t)
print(l + '\n')
log.write(str(l) + '\n')
except Exception as e:
print(e)
开发者ID:Pedro-Souza,项目名称:My-Scripts,代码行数:26,代码来源:GoogleSearch.py
示例20: __init__
class Du8Doc:
def __init__(self):
self.br = Browser()
def from_html(self, html):
text = re.sub("<.+>\n", "", html)
text = re.sub("</.+>\n", "", text)
text = re.sub('(<br/?>\s*)+', '\n', text)
text = re.sub(' ', ' ', text)
return text
def get_links(self, url):
res = self.br.open(url)
data = res.get_data()
soup = BeautifulSoup(data, "html5lib")
div_content = soup.find('table')
urls = div_content.find_all("a")
return [url.get('href') for url in urls ]
def get_content(self, link):
res = self.br.open(link)
data = res.get_data()
soup = BeautifulSoup(data, "html5lib")
title, chapter = soup.html.title.string.split("-")[0:2]
div_content = soup.find(id="content").prettify()
content = self.from_html(div_content)
return title, chapter, content
开发者ID:lite,项目名称:du8_utils,代码行数:28,代码来源:du8.py
注:本文中的mechanize.Browser类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论