• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python urllib.basejoin函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urllib.basejoin函数的典型用法代码示例。如果您正苦于以下问题:Python basejoin函数的具体用法?Python basejoin怎么用?Python basejoin使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了basejoin函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: findVideoFrameLink

def findVideoFrameLink(page, data):
    
    minheight=300
    minwidth=300
    
    frames = findFrames(data)
    if not frames:
        return None
    
    iframes = re.findall(data, "(frame[^>]* height=[\"']*(\d+)[\"']*[^>]*>)")

    if iframes:
        for iframe in iframes:

            height = int(iframe[1])
            if height > minheight:
                m = re.findall(iframe[0], "[\"' ]width=[\"']*(\d+[%]*)[\"']*")
                if m:
                    if m[0] == '100%':
                        width = minwidth+1
                    else:
                        width = int(m[0])
                    if width > minwidth:
                        m = re.findall(iframe[0], '[\'"\s]src=["\']*\s*([^"\' ]+)\s*["\']*')
                        if m:
                            link = m[0]
                            if not link.startswith('http://'):
                                up = urlparse.urlparse(urllib.unquote(page))
                                if link.startswith('/'):
                                    link = urllib.basejoin(up[0] + '://' + up[1],link)
                                else:
                                    link = urllib.basejoin(up[0] + '://' + up[1] + '/' + up[2],link)
                            return link.strip()

    # Alternative 1
    iframes = re.findall(data, "(frame[^>]*[\"; ]height:\s*(\d+)[^>]*>)")
    if iframes:
        for iframe in iframes:
            height = int(iframe[1])
            if height > minheight:
                m = re.findall(iframe[0], "[\"; ]width:\s*(\d+)")
                if m:
                    width = int(m[0])
                    if width > minwidth:
                        m = re.findall(iframe[0], '[ ]src=["\']*\s*([^"\' ]+)\s*["\']*')
                        if m:
                            link = m[0]
                            if not link.startswith('http://'):
                                link = urllib.basejoin(page,link)
                            return link.strip()

    # Alternative 2 (Frameset)
    iframes = re.findall(data, '<FRAMESET[^>]+100%[^>]+>\s*<FRAME[^>]+src="([^"]+)"')
    if iframes:
        link = iframes[0]
        if not link.startswith('http://'):
            link = urllib.basejoin(page,link)
        return link.strip()
        
    return None
开发者ID:Anniyan,项目名称:SportsDevil-Fixes,代码行数:60,代码来源:scrapingUtils.py


示例2: _parseLinks

   def _parseLinks(self,url):

      c=httplib2.Http()

      resp,content=c.request(url)

      tree=libxml2.htmlParseDoc(content,"utf-8")

      links=htmltools.find_elements(tree.getRootElement(),"a")

      reflink=""

      magnet=None

      for i in links:

         if i.getContent().lstrip().rstrip()=="Download torrent":

            reflink=urllib.basejoin(url,i.prop('href'))

         if i.getContent().lstrip().rstrip()=="magnet link":

            magnet=urllib.basejoin(url,i.prop('href'))

            if "&" in magnet:

               j=magnet.index("&")

               magnet=magnet[:j]

      return reflink,magnet
开发者ID:Mektub,项目名称:hconfig,代码行数:31,代码来源:BTSCENE.py


示例3: __init__

    def __init__(self):
        baseurl = 'http://164.100.47.132/LssNew/psearch/'
        date2num = {\
            (datetime.date(1998, 03, 23),      \
             datetime.date(1999, 04, 24)): 12, \
            (datetime.date(1999, 10, 20),      \
             datetime.date(2004, 02, 05)): 13, \
            (datetime.date(2004, 06, 02),      \
             datetime.date(2009, 02, 26)): 14, \
            (datetime.date(2009, 06, 01),      \
             datetime.date(2014, 06, 01)): 15, \
          }
        num2webform = { 12: 'DebateAdvSearch12.aspx', \
                        13: 'DebateAdvSearch13.aspx', \
                        14: 'DebateAdvSearch14.aspx', \
                        15: 'DebateAdvSearch15.aspx', \
                      }

        num2dateqry = { 12: 'DebateAdvSearch12.aspx', \
                        13: 'DebateAdvSearch13.aspx', \
                        14: 'DebateAdvSearch14.aspx', \
                        15: 'DebateAdvSearch15.aspx', \
                      }
        self.webformUrls = {}
        for k in date2num.keys():
            self.webformUrls[k] = urllib.basejoin(baseurl, \
                                                  num2webform[date2num[k]]) 
        self.dateqryUrls = {}
        for k in date2num.keys():
            self.dateqryUrls[k] = urllib.basejoin(baseurl, \
                                                  num2dateqry[date2num[k]]) 
开发者ID:edudemy,项目名称:judis-re,代码行数:31,代码来源:loksabha.py


示例4: _get_magnet

 def _get_magnet(self,url):
    i=len(url)-1
    while url[i]!='/':
       i-=1
    url=url[:i+1]+urllib.quote_plus(url[i+1:])
    c=httplib2.Http()
    resp,content=c.request(url)
    if "set-cookie" in resp:
       cookie=resp['set-cookie']
    else:
       cookie=None
    tree=libxml2.htmlParseDoc(content,"utf-8")
    form=htmltools.find_elements(tree.getRootElement(),"form",id="frmAdultDisclaimer")
    if form:
       form=form[0]
       inputs=htmltools.find_elements(form,"input")
       body={}
       for i in inputs:
          body[i.prop('name')]=i.prop('value')
       del body['btn_Decline']
       body=urllib.urlencode(body)
       headers={'Content-type':"application/x-www-form-urlencoded"}
       if cookie:
          headers['Cookie']=cookie
       url=urllib.basejoin(url,form.prop('action'))
       resp,content=c.request(url,"POST",body,headers)
       if "set-cookie" in resp:
          cookie=resp['set-cookie']
       if cookie:
          headers['Cookie']=cookie
       url=urllib.basejoin(url,resp["location"])
       resp,content=c.request(url,headers=headers)
       tree=libxml2.htmlParseDoc(content,"utf-8")
    return htmltools.find_elements(tree.getRootElement(),"a",**{'class':'dwld_links'})[0].prop('href')
开发者ID:Mektub,项目名称:hconfig,代码行数:34,代码来源:SUMO.py


示例5: search

    def search(self, pattern=''):
        pattern = urllib.quote(pattern)

        url = '?s=%(pattern)s' % {'pattern': pattern}

        search = []

        search_elem = self.get_html_tree(url)

        if not search_elem or search_elem.find('div', {'class': 'alert alert-warning'}):
            # Sorry, no results were found.
            return search

        div_elems = search_elem.findAll(
            'div', {'class': 'col-lg-3 col-xs-3 col-sm-3 item'})
        for div_elem in div_elems:
            a_elem = div_elem.findAll('a')[-1]
            img_elem = div_elem.find('img')

            tv_show = re.sub(urllib.basejoin(self.main_url, 'watch-'), '',
                             a_elem.get('href'))

            item = {'label': a_elem.getText(),
                    'tv_show': tv_show,
                    'thumbnail': urllib.basejoin(self.main_url, img_elem.get('src'))}

            search.append(item)

        return search
开发者ID:nabilbendafi,项目名称:plugin.video.beetv,代码行数:29,代码来源:beetv.py


示例6: __iter__

    def __iter__(self):
        if self.target:
            basepath = xmlrpclib.ServerProxy(self.target).getPhysicalPath()
        for item in self.previous:
            if not self.target:
                yield item
                continue
            keys = item.keys()
            type_, path = item.get(self.typekey(*keys)[0]), item.get(self.pathkey(*keys)[0])
            
            if not (type_ and path):             # not enough info
                yield item; continue



            #fti = self.ttool.getTypeInfo(type_)
            #if fti is None:                           # not an existing type
            #    msg = "constructor: no type found %s:%s" % (type_,path)
            #    logger.log(logging.ERROR, msg)
            #    yield item; continue

            elems = path.strip('/').rsplit('/', 1)
            
            for attempt in range(0, 3):
                try:
                
                    url = urllib.basejoin(self.target, path)
                    proxy = xmlrpclib.ServerProxy(url)
                    container, id = (len(elems) == 1 and ('', elems[0]) or elems)
                    #if id == 'index.html':
                    try:
                        #test paths in case of acquition
                        rpath = proxy.getPhysicalPath()
                        #TODO: should check type to see if it's correct?
                        rpath = rpath[len(basepath):]
                        if path == '/'.join(rpath):
                            self.logger.debug("%s already exists. Not creating"% ('/'.join(rpath)) )
                            break
                    except xmlrpclib.Fault:
                        # Doesn't already exist
                        pass
                    purl = urllib.basejoin(self.target,container)
                    pproxy = xmlrpclib.ServerProxy(purl)
                    try:
                        pproxy.invokeFactory(type_, id)
                        self.logger.info("%s Created with type=%s"% (path, type_) )
                    except xmlrpclib.ProtocolError,e:
                        if e.errcode == 302:
                            pass
                        else:
                            raise
                    except xmlrpclib.Fault:
                        self.logger.warning("Failure while creating '%s' of type '%s'"% (path, type_) )
                        pass
                    break
                except xmlrpclib.ProtocolError,e:
                    if e.errcode == 503:
                        continue
                    else:
                        raise
开发者ID:mitchellrj,项目名称:transmogrify.ploneremote,代码行数:60,代码来源:remoteconstructor.py


示例7: _parse_result

 def _parse_result(self, page_url, result_line):
    
    torrent_link, category, title, size, seeders, leechers, health = TorrentSearch.htmltools.find_elements(result_line, "td")
    torrent_url = urllib.basejoin(page_url, TorrentSearch.htmltools.find_elements(torrent_link, "a")[0].prop('href').replace('/torrent_download/','/download/'))
    if len(TorrentSearch.htmltools.find_elements(title, "a"))==2:
       details_link = TorrentSearch.htmltools.find_elements(title, "a")[0]
    else:
       details_link = TorrentSearch.htmltools.find_elements(title, "a")[1]
    title = details_link.getContent()
    details_link = urllib.basejoin(page_url, details_link.prop('href'))
    size=size.getContent()
    size=size[:-4]+" "+size[-2:]
    seeders=eval(seeders.getContent())
    leechers=eval(leechers.getContent())
    
    category=self._parse_category(TorrentSearch.htmltools.find_elements(category, "a")[0].prop('href').split('/')[-2])
    
    c=httplib2.Http()
    resp,content=self.http_queue_request(details_link)
    tree=libxml2.htmlParseDoc(content,"utf-8")
    lines=TorrentSearch.htmltools.find_elements(TorrentSearch.htmltools.find_elements(tree, "td", **{'class':'tabledata0'})[0].parent.parent,"tr")
    for i in lines:
       cells=TorrentSearch.htmltools.find_elements(i, "td")
       if cells[0].getContent()=="Info hash:":
          hashvalue=cells[1].getContent()
       elif cells[0].getContent()=="Torrent added:":
          date=cells[1].getContent().split(" ")[0]
          date=time.strptime(date,"%Y-%m-%d")
          date=datetime.date(date.tm_year, date.tm_mon, date.tm_mday)
    
    self.add_result(ExtraTorrentPluginResult(title, date, size, seeders, leechers, torrent_url, hashvalue, category))
开发者ID:Mektub,项目名称:hconfig,代码行数:31,代码来源:extratorrent.py


示例8: paso_a_paso

def paso_a_paso():
    url = "%s/tips/recetas" % BASE_URL
    html = urllib.urlopen(url).read()
    dom = lxml.html.document_fromstring(html)
    answer = []
    serie = models.Serie()
    serie.title = 'Paso a paso'
    serie.description = "por Martiniano Molina"
    serie.url = 'rss://%s/content/elgourmet/paso_a_paso' % settings.MY_BASE_URL
    serie.thumbnail = dom.cssselect("#cab_logo img")[0].get("src")
    serie.episodes = []
    serie.show_name = 'paso_a_paso'
    for a in dom.cssselect("#contenedor a"):
        try:
            url2 = a.get('href')
            if not url2.startswith('receta'): continue
            url2 = urllib.basejoin(BASE_URL, url2)

            episode = models.Episode()
            episode.title = a.cssselect("h2")[0].text_content()
            print "\t%s" % episode.title
            html2 = urllib.urlopen(url2).read()
            episode.url = url2
            episode.thumbnail = urllib.basejoin(BASE_URL, dom.cssselect("img")[0].get('src'))
            x = re.findall('"file": ?"(.*?)"', html2)
            episode.video_url = get_video_url(x[0], STREAMING_URL)
            serie.episodes.append(episode)
        except Exception,e:
            print "Error: %s" % e
开发者ID:dbenders,项目名称:boxee-server,代码行数:29,代码来源:elgourmet_old.py


示例9: _make_img_urls

    def _make_img_urls(self, product_key, img_count):
        """
        the keyworld `RLLZ` in url  meaning large size(about 800*1000), `RLLD` meaning small size (about 400 *500)
        http://www.ruelala.com/images/product/131385/1313856984_RLLZ_1.jpg
        http://www.ruelala.com/images/product/131385/1313856984_RLLZ_2.jpg

        http://www.ruelala.com/images/product/131385/1313856984_RLLZ_1.jpg
        http://www.ruelala.com/images/product/131385/1313856984_RLLZ_2.jpg
        """
        urls = []
        prefix = 'http://www.ruelala.com/images/product/'
        for i in range(0, img_count):
            subfix = '%s/%s_RLLZ_%d.jpg' %(product_key[:6], product_key, i+1)
            url = urllib.basejoin(prefix, subfix)
            urls.append(url)

        # num_image_urls() if return 0, means RLLZ and RLLA is not work, use RLLDE instead.
        if img_count == 0:
            for j in xrange(0, 1000):
                sub = '%s/%s_RLLDE_%d.jpg' %(product_key[:6], product_key, j+1)
                url = urllib.basejoin(prefix, sub)
                status = self.net.fetch_image(url)
                if status != 404:
                    urls.append(url)
                else:
                    return urls
        return urls
开发者ID:mobishift2011,项目名称:amzn,代码行数:27,代码来源:server.py


示例10: __init__

 def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
     lobis.Lobis.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
     self.baseurl   = 'http://lobis.nic.in/'
     self.courturl  = urllib.basejoin(self.baseurl, '/phhc/')
     self.cookieurl = urllib.basejoin(self.baseurl, \
                                      '/phhc/juddt.php?scode=28')
     self.dateurl   = urllib.basejoin(self.baseurl, \
                                      '/phhc/juddt1.php?dc=28&fflag=1')
开发者ID:edudemy,项目名称:judis-re,代码行数:8,代码来源:punjab.py


示例11: __init__

 def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
     utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
     self.cookiefile  = tempfile.NamedTemporaryFile()
     self.baseurl = 'http://ldemo.mp.nic.in'
     self.cookieurl = urllib.basejoin(self.baseurl, \
                                      'causelist/ciskiosk/ordermain.php')
     self.dateurl = urllib.basejoin(self.baseurl, \
                             '/causelist/ciskiosk/order_action.php?as9=ok3')
开发者ID:edudemy,项目名称:judis-re,代码行数:8,代码来源:madhyapradesh.py


示例12: _run_search

 def _run_search(self,pattern,href=None,page=0):
    if href==None:
       href="http://mononoke-bt.org/browse2.php?search="+urllib.quote_plus(pattern)
    resp,content=self.http_queue_request(href,headers={'Cookie':self._app.parse_cookie(self.login_cookie)})
    tree=libxml2.htmlParseDoc(content,"utf-8")
    pager=htmltools.find_elements(tree.getRootElement(),"div",**{'class':'animecoversfan'})[0].parent.next
    try:
       data=htmltools.find_elements(pager,"b")[-1].getContent()
       i=len(data)-1
       while data[i] in "0123456789":
          i-=1
       self.results_count=eval(data[i+1:])
    except:
       pass
    restable=pager.next.next
    lines=htmltools.find_elements(restable,"tr",1)[1:-2]
    for i in lines:
       try:
          cells=htmltools.find_elements(i,"td")
          team, show, stype, name, torrent_link, nbfiles, nbcmt, rate, date, size, views, dl, seeders, leechers, ratio=cells
          link=htmltools.find_elements(name,"a")[0]
          label=link.getContent()
          link=urllib.basejoin(href,link.prop('href'))
          torrent_link=urllib.basejoin(href,htmltools.find_elements(torrent_link,"a")[0].prop('href'))+"&r=1"
          date=htmltools.find_elements(date,"nobr")[0].children.getContent()
          date=time.strptime(date,"%Y-%m-%d")
          date=datetime.date(date.tm_year,date.tm_mon,date.tm_mday)
          strsize=""
          cell=size.children
          while cell:
             if cell.name=="text":
                if strsize:
                   strsize+=" "
                strsize+=cell.getContent().upper()
             cell=cell.next
          size=strsize.replace('O','B')
          seeders=eval(seeders.getContent())
          leechers=eval(leechers.getContent())
          resp,content=self.http_queue_request(link,headers={'Cookie':self._app.parse_cookie(self.login_cookie)})
          itemtree=libxml2.htmlParseDoc(content,"utf-8")
          tds=htmltools.find_elements(itemtree.getRootElement(),"td")
          hashvalue=None
          for j in tds:
             if j.getContent()=="Info hash":
                hashvalue=j.next.next.getContent()
          self.add_result(MononokeBTPluginResult(label,date,size,seeders,leechers,torrent_link,hashvalue))
       except:
          pass
       if self.stop_search:
          return
    if not self.stop_search:
       try:
          b=htmltools.find_elements(pager,"b")[-1]
          if b.parent.name=="a":
             url="http://mononoke-bt.org/browse2.php?search=%s&page=%d"%(urllib.quote_plus(pattern),page+1)
             self._run_search(pattern,url,page+1)
       except:
          pass
开发者ID:Mektub,项目名称:hconfig,代码行数:58,代码来源:mononoke-bt.py


示例13: download_oneday

    def download_oneday(self, relpath, dateobj):
        dateurl = urllib.basejoin(self.baseurl, '/hcjudge/date_output.php')
        postdata = [('d1', dateobj.day), ('m1', dateobj.month),  \
                    ('y1', dateobj.year), ('d2', dateobj.day),   \
                    ('m2', dateobj.month), ('y2', dateobj.year), \
                    ('button', 'Submit')]

        webpage = self.download_url(dateurl, postdata = postdata)

        if not webpage:
            self.logger.warning(u'No webpage for %s date: %s' % \
                                 (dateurl, dateobj))
            return []

        d = utils.parse_webpage(webpage)

        if not d:
            self.logger.error(u'HTML parsing failed for date: %s' %  dateobj)
            return []

        newdls = []

        for link in d.findAll('a'):
            href = link.get('href')
            title = utils.get_tag_contents(link)

            if (not href) or (not title):
                self.logger.warning(u'Could not process %s' % link)
                continue

            words = href.split('/')
            filename = words[-1]

            url = urllib.basejoin(dateurl, href)

            self.logger.info(u'link: %s title: %s' % (href, title))

            relurl = os.path.join (relpath, filename)
            filepath = os.path.join(self.rawdir, relurl)
            metapath = os.path.join(self.metadir, relurl)

            if not os.path.exists(filepath):
                webpage = self.download_url(url)

                if not webpage:
                    self.logger.warning(u'No webpage %s' % url)
                else:
                    utils.save_file(filepath, webpage)
                    self.logger.info(u'Saved %s' % url)
                    newdls.append(relurl)

            if os.path.exists(filepath) and \
                    (self.updateMeta or not os.path.exists(metapath)):
                metainfo = self.get_meta_info(title, dateobj)
                if metainfo:
                    utils.print_tag_file(metapath, metainfo)

        return newdls     
开发者ID:edudemy,项目名称:judis-re,代码行数:58,代码来源:jharkhand.py


示例14: __iter__

    def __iter__(self):
    
        self.checkOptions()
                            
        for item in self.previous:
            if not self.target:
                yield item
                continue
            
            keys = item.keys()
            
            # Apply defaultMatcher() function to extract necessary data
            # 1) which item will be transitioned
            # 2) with which transition
            pathkey = self.pathkey(*keys)[0]
            transitionskey = self.transitionskey(*keys)[0]

            if not (pathkey and transitionskey): # not enough info
                yield item
                continue
            
            path, transitions = item[pathkey], item[transitionskey]
            if isinstance(transitions, basestring):
                transitions = (transitions,)
                            
            remote_url = urllib.basejoin(self.target, path)
            if not remote_url.endswith("/"):
                remote_url += "/"
                

            for transition in transitions:
    
                transition_trigger_url = urllib.basejoin(remote_url, "content_status_modify?workflow_action=" + transition)
                self.logger.info("%s performing transition '%s'" % (path, transition))
                
                from httplib import HTTPException
                
                try:
                
                    f= urllib.urlopen(transition_trigger_url)
                    data = f.read()
                    
                    # Use Plone not found page signature to detect bad URLs
                    if "Please double check the web address" in data:
                        import pdb ; pdb.set_trace()
                        raise RuntimeError("Bad remote URL:" + transition_trigger_url)

                except HTTPException, e:
                    # Other than HTTP 200 OK should end up here,
                    # unless URL is broken in which case Plone shows
                    # "Your content was not found page"
                    self.logger.error("fail")
                    msg = "Remote workflow transition failed %s->%s" %(path,transition)
                    self.logger.log(logging.ERROR, msg, exc_info=True)
            
            yield item
开发者ID:mamico,项目名称:transmogrify.ploneremote,代码行数:56,代码来源:remoteworkflowupdater.py


示例15: _run_search

 def _run_search(self,pattern,href=None):
    if href==None:
       href="http://www.torrent411.com/search/"+urllib.quote_plus(pattern)
    resp,content=self.http_queue_request(href)
    content=_codecs.utf_8_encode(_codecs.latin_1_decode(content)[0])[0]
    tree=libxml2.htmlParseDoc(content,"utf-8")
    pager=htmltools.find_elements(htmltools.find_elements(tree.getRootElement(),"table",**{'class':'NB-frame'})[1],"p")[0]
    try:
       b=htmltools.find_elements(pager,"b")[-1]
       data=b.getContent()
       i=len(data)-1
       while data[i] in "012346789":
          i-=1
       self.results_count=eval(data[i+1:])
    except:
       pass
    restable=htmltools.find_elements(pager.next.next,"table")[0]
    restable=htmltools.find_elements(restable,"table")[1]
    body=htmltools.find_elements(restable,"tbody")[0]
    lines=htmltools.find_elements(body,"tr",1)
    for i in lines:
       try:
          cat,link,a,date,b,c,d,e,f,g,h,i,size,j,seeders,leechers=htmltools.find_elements(i,"td")
          date=date.getContent().replace(chr(194)+chr(160)+"at"+chr(194)+chr(160)," ")
          date=time.strptime(date,"%Y-%m-%d %H:%M:%S")
          date=datetime.date(date.tm_year,date.tm_mon,date.tm_mday)
          size=size.getContent().replace(chr(194)+chr(160)," ")
          seeders=eval(seeders.getContent())
          leechers=eval(leechers.getContent())
          link=htmltools.find_elements(link,"a")[0]
          label=link.prop('title')
          link=urllib.basejoin("http://www.torrent411.com",link.prop('href'))
          resp,content=self.http_queue_request(link)
          content=_codecs.utf_8_encode(_codecs.latin_1_decode(content)[0])[0]
          itemtree=libxml2.htmlParseDoc(content,"utf-8")
          table=htmltools.find_elements(itemtree.getRootElement(),"table",**{'cellpadding':'3'})[1]
          desc,name,torrent,cat,siz,hashvalue=htmltools.find_elements(table,"tr")[:6]
          torrent=htmltools.find_elements(torrent,"a")[0].prop('href')
          hashvalue=htmltools.find_elements(hashvalue,"td")[1].getContent()
          self.add_result(Torrent411PluginResult(label,date,size,seeders,leechers,torrent,hashvalue))
       except:
          pass
       if self.stop_search:
          return
    if not self.stop_search:
       try:
          links=htmltools.find_elements(pager,"a")
          next_link=None
          for i in links:
             if i.getContent()=="Next"+chr(194)+chr(160)+">>":
                next_link=i
          if next_link:
             link=urllib.basejoin("http://www.torrent411.com",next_link.prop('href'))
             self._run_search(pattern,link)
       except:
          pass
开发者ID:Mektub,项目名称:hconfig,代码行数:56,代码来源:torrent411.py


示例16: __init__

    def __init__(self, name, rawdir, metadir, statsdir, updateMeta=False):
        utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
        self.baseurl = "http://patnahighcourt.bih.nic.in"
        self.hostname = "patnahighcourt.bih.nic.in"
        self.dateurl = urllib.basejoin(self.baseurl, "/judgment/judgDateWise.aspx")
        self.formaction = "judgDateWise.aspx"

        self.cookiefile = tempfile.NamedTemporaryFile()
        self.cookieurl = urllib.basejoin(self.baseurl, "/judgment/default.aspx")
        self.download_url(self.cookieurl, savecookies=self.cookiefile.name)
开发者ID:sushant354,项目名称:judis-re,代码行数:10,代码来源:patna.py


示例17: __init__

    def __init__(self, name, rawdir, metadir, statsdir, updateMeta = False):
        utils.BaseCourt.__init__(self, name, rawdir, metadir, statsdir, updateMeta)
        self.baseurl = 'http://rti.india.gov.in'

        self.dateurl = urllib.basejoin(self.baseurl, \
                                       '/decision_categorywise.php')
        self.posturl = self.dateurl
        self.resulturl = urllib.basejoin(self.dateurl, \
                                         '/result_decision_categorywise.php')
        self.cookiefile  = tempfile.NamedTemporaryFile()
开发者ID:edudemy,项目名称:judis-re,代码行数:10,代码来源:cic.py


示例18: get_all_mp3

def get_all_mp3(url):
    '''get all mp3 from a url'''
    data = urllib2.urlopen(url).read()
    re_com = re.compile('http://.*?\.mp3')
    all = re_com.findall(data)
    re_com = re.compile('<a href=\"(.*?\.mp3)\"')
    ll = re_com.findall(data)
    for i in ll:
        if urllib.basejoin(url,i) not in all:
            all.append(urllib.basejoin(url,i))
    return list(set(all)) #删除重复歌曲
开发者ID:Jarrey,项目名称:fmpi,代码行数:11,代码来源:get_sogou_mp3.py


示例19: urlMerge

def urlMerge(params, src):
    paramArr = __parseParams(params)
    paramTrunk = paramArr[0].replace('%s', src).replace("\t","")
    paramFile= paramArr[1].replace('%s', src).replace("\t","")

    if not paramFile.startswith('http'):
        up = urlparse.urlparse(urllib.unquote(paramTrunk))
        if paramFile.startswith('/'):
            return urllib.basejoin(up[0] + '://' + up[1], paramFile)
        else:
            return urllib.basejoin(up[0] + '://' + up[1] + '/' + up[2],paramFile)
    return src
开发者ID:mrknow,项目名称:filmkodi,代码行数:12,代码来源:customConversions.py


示例20: _run_search

 def _run_search(self,pattern,href=None):
    if href==None:
       href="http://linuxtracker.org/index.php?page=torrents&search="+urllib.quote_plus(pattern)
    resp,content=self.http_queue_request(href)
    tree=libxml2.htmlParseDoc(content,"utf-8")
    try:
       pager=htmltools.find_elements(tree.getRootElement(),"form",name="change_page")[0]
       options=htmltools.find_elements(pager,"option")
       self.results_count=50*len(options)
    except:
       pager=None
       self.results_count=50
    restable=htmltools.find_elements(tree.getRootElement(),"table",**{'class':'lista'})[1]
    lines=htmltools.find_elements(restable,"tr")[1:]
    for i in lines:
       try:
          cat,link,torrent_link,date,seeders,leechers,a,b=htmltools.find_elements(i,"td")
          label=link.getContent()
          link=urllib.basejoin(href,htmltools.find_elements(link,"a")[0].prop('href'))
          torrent_link=urllib.basejoin(href,htmltools.find_elements(torrent_link,"a")[0].prop('href'))
          date=time.strptime(date.getContent(),"%d/%m/%Y")
          date=datetime.date(date.tm_year,date.tm_mon,date.tm_mday)
          seeders=eval(seeders.getContent())
          leechers=eval(leechers.getContent())
          resp,content=self.http_queue_request(link)
          itemtree=libxml2.htmlParseDoc(content,"utf-8")
          table=htmltools.find_elements(itemtree.getRootElement(),"table",**{'class':'coltable'})[0]
          size=None
          hashvalue=None
          for td in htmltools.find_elements(table,"td"):
             if td.getContent()=="Size" and size==None:
                size=td.next.next.getContent()
             if td.getContent()=="Info Hash" and hashvalue==None:
                hashvalue=td.next.next.getContent()
          self.add_result(linuxTRACKERPluginResult(label,date,size,seeders,leechers,torrent_link,hashvalue))
       except:
          pass
       if self.stop_search:
          return
    if not self.stop_search:
       try:
          if pager:
             spans=htmltools.find_elements(pager,"span")
             i=0
             while i<len(spans) and spans[i].prop('class')!='pagercurrent':
                i+=1
             i+=1
             if i<len(spans):
                link=htmltools.find_elements(spans[i],"a")[0]
                link=urllib.basejoin(href,link.prop('href'))
                self._run_search(pattern,link)
       except:
          pass
开发者ID:Mektub,项目名称:hconfig,代码行数:53,代码来源:linuxtracker.py



注:本文中的urllib.basejoin函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python urllib.error函数代码示例发布时间:2022-05-27
下一篇:
Python urllib.addinfourl函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap