• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python urlparse.urlparse函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urlparse.urlparse.urlparse函数的典型用法代码示例。如果您正苦于以下问题:Python urlparse函数的具体用法?Python urlparse怎么用?Python urlparse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了urlparse函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: searchcrawler

def searchcrawler(url,keyword=''):
    """
    tb搜索页爬虫
    """
    html=get_html(url)
    #print html
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'row item icon-datalink'})
        if items_row:
            print '=======================row search row=========================='
            #print items
            for item in items_row:
                item_info = item.find('div',{'class':'col title'}).h3.a
                item_url = item_info['href']
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
                print item_id
                judge_site(item_url,keyword)
        items_col = soup.findAll('div',{'class':'col item icon-datalink'})
        if items_col:
            print '=======================row search col=========================='
            #print items
            for item in items_col:
                item_info = item.find('div',{'class':'item-box'}).h3.a
                item_url = item_info['href']
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
                print item_id
                judge_site(item_url,keyword)
开发者ID:chu888chu888,项目名称:Crawler-python-tbcrawler,代码行数:32,代码来源:crawler.py


示例2: rendered_wall_posts

def rendered_wall_posts( wall_posts ):
	for wall_post in wall_posts:
		title = ''
		desc = ''
		site_image = ''
		article_title = ''
		urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[[email protected]&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', wall_post.data['post_content'])
		for url in urls: 
			parse_obj = urlparse.urlparse(url)
			site = parse_obj.netloc
			path = parse_obj.path
			conn = httplib.HTTPConnection(site)
			conn.request('HEAD',path)
			response = conn.getresponse()
			conn.close()
			ctype = response.getheader('Content-Type')
			if response.status < 400 and ctype.startswith('image'):
				wall_post.data['post_content'] = wall_post.data['post_content']+"<br/><a href='"+url+"' target='_blank'><img width=300 src='"+url+"' target = '_blank'/></a>"
			else:
				og = opengraph.OpenGraph(url)
				if not len(og.items()) == 2:
					for x,y in og.items():
						if x == 'type' and y == 'video':
							for k,l in og.items():
								if k == 'site_name' and l == 'YouTube':
							
									url_data = urlparse.urlparse(url)
									query = urlparse.parse_qs(url_data.query)
									video = query["v"][0]
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe width='300' height='200' src='//www.youtube.com/embed/"+video+"' frameborder='0' allowfullscreen></iframe>"
								elif k == 'site_name' and l == 'Vimeo':
									url_data = urlparse.urlparse(url)
									video = url_data.path
									wall_post.data['post_content'] = wall_post.data['post_content'].replace(url,"")+"<br/><iframe src='//player.vimeo.com/video"+video+"' width='300' height='200' frameborder='0' webkitallowfullscreen mozallowfullscreen allowfullscreen></iframe> <p></p>"
						elif x == 'type' and y == 'article':
							for k,l in og.items():
								if k == 'title':
									article_title = l
								elif k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'] +"<br/><table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+article_title+"</a><br/>"+title+"</td></td></table>"
						elif x=='type':
							for k,l in og.items():
								if k == 'site_name':
									title = l
								elif k=='description':
									desc = l
								elif k=='image':
									site_image = l
							wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<table><tr><td><img width='50' src='"+site_image+"'</td><td><a href='"+url+"' target='_blank'/>"+title+"</a><br/>"+desc+"</td></td></table>")
				else:
					wall_post.data['post_content'] = wall_post.data['post_content'].replace(url, "<a href='"+url+"' target='_blank'>"+url+"</a>")	
	return wall_posts	
开发者ID:dithua,项目名称:collato,代码行数:57,代码来源:views.py


示例3: searchcrawler

def searchcrawler(url):
    
    html=get_html(url)
#     print url
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'item-box st-itembox'})
        if items_row:
            print '=======================row search row=========================='
            for item in items_row:
#                 print item
                item_info = item.find('h3',{'class':'summary'}).a
                item_url = item_info['href']
#                 print item_url
                
                
                sid_info = item.find('div',{'class':'col seller feature-dsi-tgr'}).a
                print sid_info
                sid_item_url = sid_info['href']
                sid_url_info = urlparse.urlparse(sid_item_url)
                sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0]
                print sid_id
                
                judge_site(item_url, sid_id)
                
#                 logging.warning(item_id)
#                 
#                 download_reply_by_id(item_id)
                
        items_col = soup.findAll('div',{'class':'product-item row icon-datalink'})       
        if items_col:
            
            print '=======================row search col=========================='
            #print items
            for item in items_col:
                item_info = item.find('div',{'class':'title'}).a
                item_url = item_info['href']
#                 url_info = urlparse.urlparse(item_url)
#                 item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_url
#                 print item_id

                sid_info = item.find('div',{'class':'seller'}).a
                print sid_info
                sid_item_url = sid_info['href']
                sid_url_info = urlparse.urlparse(sid_item_url)
                sid_id = urlparse.parse_qs(sid_url_info.query,True)['user_number_id'][0]
                print sid_id
                
                judge_site(item_url, sid_id)
开发者ID:fubendong,项目名称:wangw,代码行数:50,代码来源:tb.py


示例4: post

 def post(self):
   try:
     name = self.request.POST['name']
     topic = MicroTopic.all().filter('name =', name).get()
     if not topic:
       raise ReatiweError("Topic %s does not exists." % name)
     if self.request.POST['mode']:
       mode = self.request.POST['mode']
     else:
       mode = "subscribe"
     form_fields = { "hub.mode": mode,
                     "hub.callback": "%s/callback/%s" % (settings.SITE_URL, topic.name),
                     "hub.topic": topic.url,
                     "hub.verify": "sync",
                     "hub.verify_token": topic.name }
     result = 200
     url = self.request.POST['hub']
     req = urllib2.Request(url, urllib.urlencode(form_fields))
     o = urlparse.urlparse(url)
     # superfeedr support
     if o.username and o.password:
       base64string = base64.encodestring('%s:%s' % (o.username, o.password))[:-1]
       authheader =  "Basic %s" % base64string
       new_url = "%s://%s%s" % (o.scheme, o.hostname, o.path)
       req = urllib2.Request(new_url, urllib.urlencode(form_fields))
       req.add_header("Authorization", authheader)
     urllib2.urlopen(req)
   except DownloadError, e:
     logging.error('DownloadError: %s' % repr(e))
     pass
开发者ID:zh,项目名称:ReaTiWe,代码行数:30,代码来源:webhooks.py


示例5: searchcrawler

def searchcrawler(url):
    
    html=get_html(url)
#     print url
    if html:
        soup = BeautifulSoup(html,fromEncoding='gbk')
        items_row = soup.findAll('div',{'class':'product-iWrap'})
        #items_row = soup.find('div',{'class':'item-box st-itembox'})
#         print items_row
        if items_row:
            print '=======================row search row=========================='
            for item in items_row:
#                 print item
                try:
                    item_info = item.find('p',{'class':'productTitle'}).a
                except:
                    item_info = item.find('div',{'class':'productTitle productTitle-spu'}).a
                
#                 print item_info
                item_url = item_info['href']
#                 print item_url
                
                url_info = urlparse.urlparse(item_url)
                item_id = urlparse.parse_qs(url_info.query,True)['id'][0]
                print item_id
                logging.warning(item_id)
                
#                 item_id = 16862466992
                download_reply_by_id(item_id)
开发者ID:fubendong,项目名称:test,代码行数:29,代码来源:phone_get.py


示例6: gensitemap

def gensitemap(server, urlformat):
    '''
    Crea la ruta del índice de sitemap para el servidor de archivos dado.
    Se conecta a los índices de segundo nivel y obtiene su fecha de modificación.

    @type server: dict-like
    @param server: Documento del servidor tal cual viene de MongoDB

    @rtype tuple (str, datetime) o None
    @return tupla con la url y su fecha de modificación, o None si no se puede
            obtener la url.
    '''
    subdomain = server["ip"].split(".")[0]
    serverno = int(subdomain[6:])
    url = urlformat % serverno
    domain = urlparse.urlparse(url)[1]
    con = httplib.HTTPConnection(domain)
    con.request("HEAD", url)
    response =  con.getresponse()

    if response.status == 200:
        mtime = time.mktime(time.strptime(
           response.getheader("last-Modified"),
            "%a, %d %b %Y %H:%M:%S %Z"))
        return (url, datetime.datetime.fromtimestamp(mtime))

    return None
开发者ID:kultus,项目名称:foofind-web,代码行数:27,代码来源:index.py


示例7: startupagent

    def startupagent(self, sender, **kwargs):

        if not self.bind_web_address:
            _log.info('Web server not started.')
            return
        import urlparse
        parsed = urlparse.urlparse(self.bind_web_address)
        hostname = parsed.hostname
        port = parsed.port

        _log.info('Starting web server binding to {}:{}.' \
                   .format(hostname, port))
        self.registeredroutes.append((re.compile('^/discovery/$'), 'callable',
                                      self._get_discovery))
        self.registeredroutes.append((re.compile('^/discovery/allow$'),
                                      'callable',
                                      self._allow))
        self.registeredroutes.append((re.compile('^/$'), 'callable',
                                      self._redirect_index))
        port = int(port)
        vhome = os.environ.get('VOLTTRON_HOME')
        logdir = os.path.join(vhome, "log")
        if not os.path.exists(logdir):
            os.makedirs(logdir)

        self.appContainer = WebApplicationWrapper(self, hostname, port)
        svr = WSGIServer((hostname, port), self.appContainer)
        self._server_greenlet = gevent.spawn(svr.serve_forever)
开发者ID:schandrika,项目名称:volttron,代码行数:28,代码来源:web.py


示例8: fps_ipn_handler

    def fps_ipn_handler(self, request):
        uri = request.build_absolute_uri()
        parsed_url = urlparse.urlparse(uri)
        resp = self.fps_connection.verify_signature(UrlEndPoint="%s://%s%s" % (parsed_url.scheme,
                                                                  parsed_url.netloc,
                                                                  parsed_url.path),
                                                    HttpParameters=request.body)
        if not resp.VerifySignatureResult.VerificationStatus == "Success":
            return HttpResponseForbidden()

        data = dict(map(lambda x: x.split("="), request.body.split("&")))
        for (key, val) in data.items():
            data[key] = urllib.unquote_plus(val)
        if AmazonFPSResponse.objects.filter(transactionId=data["transactionId"]).count():
            resp = AmazonFPSResponse.objects.get(transactionId=data["transactionId"])
        else:
            resp = AmazonFPSResponse()
        for (key, val) in data.items():
            attr_exists = hasattr(resp, key)
            if attr_exists and not callable(getattr(resp, key, None)):
                if key == "transactionDate":
                    val = datetime.datetime(*time.localtime(float(val))[:6])
                setattr(resp, key, val)
        resp.save()
        if resp.statusCode == "Success":
            transaction_was_successful.send(sender=self.__class__,
                                            type=data["operation"],
                                            response=resp)
        else:
            if not "Pending" in resp.statusCode:
                transaction_was_unsuccessful.send(sender=self.__class__,
                                                  type=data["operation"],
                                                  response=resp)
        # Return a HttpResponse to prevent django from complaining
        return HttpResponse(resp.statusCode)
开发者ID:BrajeshKhare,项目名称:merchant,代码行数:35,代码来源:amazon_fps_integration.py


示例9: startupagent

    def startupagent(self, sender, **kwargs):

        if not self.bind_web_address:
            _log.info('Web server not started.')
            return
        import urlparse
        parsed = urlparse.urlparse(self.bind_web_address)
        hostname = parsed.hostname
        port = parsed.port

        _log.info('Starting web server binding to {}:{}.' \
                   .format(hostname, port))
        self.registeredroutes.append((re.compile('^/discovery/$'), 'callable',
                                      self._get_discovery))
        self.registeredroutes.append((re.compile('^/discovery/allow$'),
                                      'callable',
                                      self._allow))
        self.registeredroutes.append((re.compile('^/$'), 'callable',
                                      self._redirect_index))
        port = int(port)
        vhome = os.environ.get('VOLTTRON_HOME')
        logdir = os.path.join(vhome, "log")
        if not os.path.exists(logdir):
            os.makedirs(logdir)
        with open(os.path.join(logdir, 'web.access.log'), 'wb') as accesslog:
            with open(os.path.join(logdir, 'web.error.log'), 'wb') as errlog:
                server = pywsgi.WSGIServer((hostname, port), self.app_routing,
                                       log=accesslog, error_log=errlog)
                server.serve_forever()
开发者ID:cbs-iiith,项目名称:volttron,代码行数:29,代码来源:web.py


示例10: judge_site

def judge_site(url,keyword=''):
    """
    判断物品是tb还是tm
    """
    url_info = urlparse.urlparse(url)
    urlkey = urlparse.parse_qs(url_info.query,True)
    iid = int(urlkey['id'][0])
    #print 'url_info:',url_info[1]
    try:
        if url_info[1] == 'detail.tmall.com':
            print 'it is a tm item'
            if check_item_update_time(iid,'tm'):
                return
            data = getTmallItemInfo(iid,keyword)
        elif urlkey.get('cm_id'):
            print 'it is a tm item'
            if check_item_update_time(iid,'tm'):
                return
            data = getTmallItemInfo(iid,keyword)
        else:
            print 'it is a tb item'
            if check_item_update_time(iid,'tb'):
                return
            data = getTaobaoItemInfo(iid,keyword)
    except Exception ,e:
        print traceback.print_exc()
        return
开发者ID:chu888chu888,项目名称:Crawler-python-tbcrawler,代码行数:27,代码来源:crawler.py


示例11: judge_site

def judge_site(url, sid_id):
    """
    判断物品是tb还是tm
    """
    url_info = urlparse.urlparse(url)
    urlkey = urlparse.parse_qs(url_info.query,True)
    iid = int(urlkey['id'][0])
    print iid
#     print 'url_info:',url_info[1]
    try:
        if url_info[1] == 'detail.tmall.com':
            print 'it is a tm item'
            
#             data = download_tm_reply_by_id(iid)
        elif urlkey.get('cm_id'):
            print 'it is a tm item cm_id'
            
#             data = download_tm_reply_by_id(iid)
        else:
            print 'it is a tb item'
            
            data = download_tb_reply_by_id(iid, sid_id)
    except Exception ,e:
        print traceback.print_exc()
        return
开发者ID:fubendong,项目名称:wangw,代码行数:25,代码来源:tb.py


示例12: is_local_service

def is_local_service(name):
    """
    Determine if a service definition describes a service running on
    the local node. This is true if the service URL is for localhost,
    matches the machine's name, or ec2 public name
    """
    if name is None:
        return False
    if "://" in name:
        url = urlparse.urlparse(name)
        if ":" in url.netloc:
            name = url.netloc.split(":")[0]
        else:
            name = url.netloc
    elif ":" in name:
        name = name.split(":")[0]

    if name == "localhost":
        return True

    if '.' in name:
        name = name.split('.')[0]
    node = platform.node()
    if '.' in node:
        node = node.split('.')[0]

    if name == node:
        return True
    pn = public_name()
    if pn is not None and pn.split(".")[0] == name:
        return True
    return False
开发者ID:bbockelm,项目名称:globus-toolkit,代码行数:32,代码来源:__init__.py


示例13: wait_for_servers

def wait_for_servers(urls, timeout):
    import time, urlparse, httplib
    from ssl import SSLError
    
    for u in urls:
        parsed = urlparse.urlparse(u.lower(), "https")
        netloc = parsed.hostname
        if parsed.port: netloc = "%s:%s" % (netloc, parsed.port)
        if parsed.scheme == "http":
            cnxn = httplib.HTTPConnection(netloc)
        elif parsed.scheme == "https":
            cnxn = httplib.HTTPSConnection(netloc)
        else:
            raise Exception("Don't know how to handle scheme %s" % parsed.scheme)
        i = 0
        while(i < timeout):
            try:
                cnxn.connect()
            except SSLError:
                break;
            except Exception as e:
                if "Connection refused" in str(e):
                    time.sleep(1)
                    i = i - 1
                elif "SSL" in str(e):
                    break
                else:
                    raise
            else:
                break
开发者ID:CarolinaFernandez,项目名称:ocf-expedient-ro,代码行数:30,代码来源:utils.py


示例14: _extracturls

 def _extracturls(self):
     #print "Extract URLs"
     urls = []
     htmlsrc, charset, parenturl = self.htmlSrcTuple
     if htmlsrc != None:
         resulturls = []
         urlExtractor = ExtractLinks(resulturls)
         try:
             if charset == None:
                 urlExtractor.feed(htmlsrc)
             else:
                 urlExtractor.feed(htmlsrc.decode(charset))
         except HTMLParser.HTMLParseError:
             pass
         try:
             urlExtractor.reset() # I think close needs special treatment .close()
         except HTMLParser.HTMLParseError:
             urlExtractor.reset()
         #this piece of code forms the URIs to full URLs by joining the
         #parenturl with the network location free URLs extracted
         for i in xrange(len(resulturls)): #replacing range() for performance reasons
             urlres = urlparse.urlparse(resulturls[i], "http")
             if urlres.netloc == "":
                 resulturls[i] = urlparse.urljoin(parenturl, resulturls[i])
             urls.extend(resulturls)
     return urls
开发者ID:dpritsos,项目名称:Synergy-Crawler,代码行数:26,代码来源:linkextractors.py


示例15: getParams

def getParams(path):
    query = urlparse.urlparse(path).query
    queryDict = dict([x.split('=') for x in query.split('&')])

    width = queryDict['WIDTH']
    height = queryDict['HEIGHT']
    bbox = queryDict['BBOX']
    return Params(int(width), int(height), map(float, bbox.split(',')))
开发者ID:gumik,项目名称:google-maps-wms,代码行数:8,代码来源:google_maps_wms.py


示例16: get_sub_domain_from_http_host

def get_sub_domain_from_http_host(http_host):
    '''
    @note: 从http host中获取子域名前缀
    '''
    import urlparse
    if http_host:
        http_host = ('http://%s' % http_host) if not http_host.startswith('http') else http_host
        prefix = urlparse.urlparse(http_host)[1].split('.', 1)[0]
        return prefix
开发者ID:lantianlz,项目名称:cheka,代码行数:9,代码来源:utils.py


示例17: relative_uri

def relative_uri(base, target):
    """
    >>> relative_uri(u"http://example.com/foo/", u"http://example.com/foo/bar")
    u'bar'

    >>> relative_uri(u"http://example.com/baz/", u"http://example.com/foo/bar")
    u'../foo/bar'

    >>> relative_uri(u"http://example2.com/baz/", u"http://example.com/foo/bar")
    u'http://example.com/foo/bar'

    """
    base_bits=urlparse.urlparse(base)
    target_bits=urlparse.urlparse(target)
    if base_bits.netloc != target_bits.netloc:
        return target
    base_dir='.'+posixpath.dirname(base_bits.path)
    target='.'+target_bits.path
    return posixpath.relpath(target,start=base_dir)
开发者ID:ericmoritz,项目名称:static-ld,代码行数:19,代码来源:cmd.py


示例18: extract_query_params

def extract_query_params(url, *names):
    """
    Extracts names in the list from url
    @param url:
    @param names:
    @return: dict
    """
    parsed_res = urlparse.urlparse(url)
    d = urlparse.parse_qs(parsed_res.query)

    return {key:value[0] for (key, value) in d.iteritems() if key in names}
开发者ID:beforebeta,项目名称:dealfu,代码行数:11,代码来源:utils.py


示例19: get

 def get(self):
     self.response.headers['Content-Type'] = 'text/html'
     path = os.path.join(os.path.dirname(__file__), 'admin.html')
     u = urlparse.urlparse(self.request.url)
     dashboard = ""
     if u.netloc.startswith("localhost"):
         dashboard = "/_ah/admin"
     else:
         appname = u.netloc[:u.netloc.find(".")]
         dashboard = "https://appengine.google.com/dashboard?&app_id=s~" + appname
     self.response.out.write(template.render(path, {"dashboard" : dashboard}))
开发者ID:glukagen,项目名称:EnglishQualityGame,代码行数:11,代码来源:admin.py


示例20: add_params

def add_params(url, params):
    import urllib
    import urlparse

    url_parts = list(urlparse.urlparse(url))
    query = dict(urlparse.parse_qsl(url_parts[4]))
    query.update(params)

    url_parts[4] = urllib.urlencode(query)

    return urlparse.urlunparse(url_parts)
开发者ID:rcoh,项目名称:oauth-shim,代码行数:11,代码来源:authshim.py



注:本文中的urlparse.urlparse.urlparse函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python urlparser.urlparser函数代码示例发布时间:2022-05-27
下一篇:
Python urlparse.urlunsplit函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap