• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python urlparse.urlsplit函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urllib2.urlparse.urlsplit函数的典型用法代码示例。如果您正苦于以下问题:Python urlsplit函数的具体用法?Python urlsplit怎么用?Python urlsplit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了urlsplit函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: validate_links

def validate_links(data):
    widgets = [Bar(), SimpleProgress()]
    pbar = ProgressBar(widgets=widgets, maxval=len(data)).start()
    for i, element in enumerate(data):
        url = element['url']
        if url == '':
            continue
        scheme = urlparse.urlsplit(url).scheme
        host = urlparse.urlsplit(url).netloc
        if scheme in ('http', 'https') and \
            url_status_cache.get(url) is not True:
            try:
                request = head(url, timeout=10)
                # some web sites cannot into head requests
                if request.status_code in (403, 405, 500) or \
                    host in ('mobil.morgenpost.de'):
                    request = get(url)
            except Timeout as e:
                stderr.write('Connection to <%s> timeouted.\n' % url)
                exit(1)
            except ConnectionError as e:
                stderr.write('Connection to <%s> failed.\n' % url)
                stderr.write(str(e) + '\n')
                exit(1)
            if request.ok:
                url_status_cache.set(url, request.ok)
            else:
                stderr.write('<%s> is unreachable.\n' % url)
                exit(1)
        pbar.update(i+1)
开发者ID:erlehmann,项目名称:redokast,代码行数:30,代码来源:validate-links.py


示例2: _send_header

    def _send_header(self, header_pieces, headers, body, is_request):

        if not self.headers_prepared:
            body_length = len(body)
            had_length = False
            had_host = False
            if is_request:
                resource = header_pieces[1]
                splitted = urlparse.urlsplit(resource)
                url = splitted.path
                if splitted.query:
                    url += '?' + splitted.query
                header_line = '%s %s HTTP/%s\r\n' % (header_pieces[0], url, header_pieces[2])
            else:
                header_line = 'HTTP/%s %s %s\r\n' % header_pieces

            io_request = StringIO()
            io_request.write(header_line)
            for name, value in headers.iteritems():
                if name == 'content-length':
                    io_request.write('%s: %s\r\n' % (name.title(), body_length))
                    had_length = True
                else:
                    io_request.write('%s: %s\r\n' % (name.title(), value))
                if name == 'host':
                    had_host = True

            if not had_length and body_length > 0:
                io_request.write('%s: %s\r\n' % ('Content-Length', body_length))

            if not had_host and is_request:
                splitted = urlparse.urlsplit(resource)
                io_request.write('%s: %s\r\n' % ('Host', splitted.hostname))

            io_request.write('\r\n')
            self.buffer = io_request.getvalue()
            io_request.close()
            self.headers_prepared = True
            self.to_write = len(self.buffer)
            self.written = 0

        if not self.headers_sent:
            while self.to_write > 0:
                written = self.csock.send(self.buffer[self.written:])
                self.written += written
                self.to_write -= written

            self.headers_sent = True
开发者ID:g-fleischer,项目名称:wtfy,代码行数:48,代码来源:HttpWriter.py


示例3: download_metadata

def download_metadata(target_directory):
    """
    Downloads XML files for PMCIDs on stdin into given directory.
    """
    stderr.write('Input PMCIDs, delimited by whitespace: ')
    pmcids = stdin.read().split()
    if len(pmcids) == 0:
        raise RuntimeError, 'No PMCIDs found.'

    # delete files from earlier invocations
    listing = listdir(target_directory)
    for filename in listing:
        file_path = path.join(target_directory, filename)
        stderr.write("Removing “%s” … " % file_path)
        remove(file_path)
        stderr.write("done.\n")

    # chunk function by nosklo, source:
    # <http://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks#answer-434328>
    def chunker(seq, size):
        return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))

    for i, chunk in enumerate(chunker(pmcids, 365)):
        url = _get_query_url_from_pmcids(chunk)
        yield { 'url': url, 'completed': 0, 'total': 1 }

        url_path = urlparse.urlsplit(url).path
        local_filename = path.join(target_directory, \
            url_path.split('/')[-1] + str(i))
        with open(local_filename, 'wb') as local_file:
            content = _get_file_from_pmcids(chunk)
            local_file.write(content.read())
            yield { 'url': url, 'completed': 1, 'total': 1 }
开发者ID:npettiaux,项目名称:open-access-media-importer,代码行数:33,代码来源:pmc_pmcid.py


示例4: validateURL

    def validateURL (cls, full_url, video_item=True):
        """Make sure the url passed is in a valid form and return a video parser object"""
        if not isinstance (full_url, str):
            raise TypeError ("Argument must be a string")

        spliturl = urlparse.urlsplit (full_url)
        hostname = spliturl.hostname
#        print len (cls.parsers.keys ())

        if not hostname:
            return None
        elif hostname.startswith ("www."):
            hostname = hostname.lstrip ("www.")

        if hostname not in cls.parsers:
            return None

        page_parser = cls.parsers[hostname].checkURL (full_url)
        if page_parser and video_item:
            youtube_video = VideoItem (page_parser)
        elif page_parser:
            youtube_video = page_parser
        else:
            youtube_video = None

        return youtube_video
开发者ID:Ryochan7,项目名称:YouTubed-2x,代码行数:26,代码来源:parsermanager.py


示例5: set_language_ex

def set_language_ex(request):
    next = request.POST.get('next', request.GET.get('next'))
    if not is_safe_url(url=next, host=request.get_host()):
        next = request.META.get('HTTP_REFERER')
        if not is_safe_url(url=next, host=request.get_host()):
            next = '/'

    # remove lang from query
    scheme, netloc, path, query, fragment = urlparse.urlsplit(next)
    parsed_query = urlparse.parse_qsl(query)
    altered = False
    for k, v in parsed_query[:]:
        if LANG_GET_KEY == k:
            parsed_query.remove((k, v))
            altered = True
    if altered:
        query = urllib.urlencode(parsed_query)
        next = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

    response = http.HttpResponseRedirect(next)
    if request.method == 'POST':
        lang_code = request.POST.get('language', None)
        if lang_code and check_for_language(lang_code):
            if hasattr(request, 'session'):
                request.session[LANGUAGE_SESSION_KEY] = lang_code
            else:
                response.set_cookie(settings.LANGUAGE_COOKIE_NAME, lang_code,
                                    max_age=settings.LANGUAGE_COOKIE_AGE,
                                    path=settings.LANGUAGE_COOKIE_PATH,
                                    domain=settings.LANGUAGE_COOKIE_DOMAIN)
    return response
开发者ID:Nnonexistent,项目名称:chemphys,代码行数:31,代码来源:views.py


示例6: download_content_list

def download_content_list(detail_url, headers, timeout):
    """
    sample url: http://weixin.sogou.com/gzhjs?openid=oIWsFt86NKeSGd_BQKp1GcDkYpv0&ext=D4y5Z3wUwj5uk6W7Yk9BqC3LAaFqirWHT5QFje14y0dip_leVhZF6qjo9Mm_UUVg&cb=sogou.weixin_gzhcb&page=1&gzhArtKeyWord=&tsn=0&t=1459425446419&_=1459425446169

    其中openid是固定的
    ext也是固定的
    cb=sogou.weixin_gzhcb这个也是固定的
    唯一变化的就是这个t以及_这2个字段,看上去是打开这个页面的时间戳
    """
    global start_flag
    total_records = 0
    context_lst = []
    _t = start_flag 
    now = int(time.time() * 1000)
    url_netloc = urlparse.urlsplit(detail_url)
    cur_url = 'http://%s/gzhjs?%s' % (url_netloc.netloc, url_netloc.query)
    params = "cb=sogou.weixin_gzhcb&page=%s&gzhArtKeyWord=&tsn=0&t=%s&_=%s"
    query_url = cur_url + '&' + params

    for i in range(1, 11):
        target_url = query_url % (i, now, _t)
        print target_url
        resp = download_page(target_url, headers, timeout=DEFAULT_TIMEOUT)
        strip_text = resp.text.replace('sogou.weixin_gzhcb(', '')
        strip_text = strip_text[:len(strip_text)-1]
        context_lst.extend(json.loads(strip_text).get('items', []))
        if not total_records:
            total_records = json.loads(strip_text).get('totalItems', 0)
        _t = _t + 1
        time.sleep(2)

    return context_lst
开发者ID:seraphln,项目名称:wheel,代码行数:32,代码来源:crawler.py


示例7: submit

    def submit(self, opener, res):
        """submit WAYF form with IDP

        :param opener: the urllib2 opener
        :param data: the form data as a dictionary
        :param res: the response object

        """
        log.info("Submitting form to wayf")
        # Set IDP to correct IDP
        wayf_data = {}
        idp = self.idp
        data = self.data
        idps = {}
        for d in data["user_idp"]:
            if isinstance(data["user_idp"][d], dict):
                idps.update(data["user_idp"][d])
        if not idp.get_idp() in idps:
            raise WAYFException("Can't find IdP '%s' in WAYF's IdP list" % idp)
        wayf_data["user_idp"] = idps[idp.get_idp()]
        wayf_data["Select"] = "Select"
        if data["form"]["action"].startswith("?"):
            urlsp = urlparse.urlsplit(res.url)
            urlsp = urlparse.urlunsplit((urlsp[0], urlsp[1], urlsp[2], "", ""))
            url = res.url + data["form"]["action"]
        else:
            url = urlparse.urljoin(res.url, data["form"]["action"])
        data = urllib.urlencode(wayf_data)
        request = Request(url, data)
        log.debug("POST: %s" % request.get_full_url())
        response = opener.open(request)
        return request, response
开发者ID:russell,项目名称:sibboleth,代码行数:32,代码来源:forms.py


示例8: open_url

def open_url(url, **kwargs):
    """
    open_url(url, **kwargs) - open url and return file descriptor

    url - local file path or full url path. Allowed protocols are local file
    path, file, http and ftp

    kwargs - additional attributes according to protocol, 'mode' for local
    path and file protocol, 'proxy', 'data' and 'timeout' (Python >= 2.6)
    for http and ftp protocols

    Examples:

    open_url('/home/praetorian/secret.txt')
    open_url('file:///home/praetorian/secret.txt', mode='r')
    open_url('http://domain.tld/secret.txt', proxy='172:16:1:100:8000')
    open_url('ftp://domain.tld/secret.txt')
    """
    bits = urlparse.urlsplit(url)
    attrs = kwargs

    if bits.scheme in ('', 'file'):
        url = bits.netloc + bits.path
        opener = open
    elif bits.scheme in ('http', 'ftp'):
        handlers = []
        if 'proxy' in attrs:
            handlers.append(ProxyHandler({bits.scheme: attrs.pop('proxy')}))

        url =  bits.geturl()
        opener = build_opener(*handlers).open
    else:
        raise URLError("Unsupported protocol '%s'" % bits.scheme)

    return opener(url, **attrs)
开发者ID:centrumholdings,项目名称:yowie,代码行数:35,代码来源:fileutils.py


示例9: _do_request

    def _do_request(self, request_id, parameters={}):
        """
        """
        if request_id is None:
            # Generate a new request identifier using the class' default generator
            request_id = self.idgenerator.id()
        
        req_params = dict(parameters)
        req_params.update(dict(
            partner = self.partner,
            vendor = self.vendor,
            user = self.username,
            pwd = self.password,            
        ))
        
        parmlist = self._build_parmlist(req_params)
        
        headers = {
            'Host': urlparse.urlsplit(self.url_base)[1],
            'X-VPS-REQUEST-ID': str(request_id),
            'X-VPS-CLIENT-TIMEOUT': str(self.timeout), # Doc says to do this
            'X-VPS-Timeout': str(self.timeout), # Example says to do this
            'X-VPS-INTEGRATION-PRODUCT': self.CLIENT_IDENTIFIER,
            'X-VPS-INTEGRATION-VERSION': self.API_VERSION,
            'X-VPS-VIT-OS-NAME': sys.platform,
            'Connection': 'close',
            'Content-Type': 'text/namevalue',            
            }

        self.log.debug(u'Request Headers: %s' % headers)
            
        try_count = 0
        results = None
        while (results is None and try_count < self.MAX_RETRY_COUNT):
            try:
                try_count += 1
                request = Request(
                    url = self.url_base, 
                    data = parmlist.encode('utf-8'), 
                    headers = headers)
                    
                response = urlopen(request)
                result_parmlist = response.read()
                response.close()
                
                self.log.debug(
                    u'Result text: %s' % result_parmlist.decode('utf-8')
                )
                
                results = self._parse_parmlist(result_parmlist)
            except Exception, e:
                
                if try_count < self.MAX_RETRY_COUNT:
                    self.log.warn(
                        u'API request attempt %s of %s failed - %%s' % (
                            try_count, self.MAX_RETRY_COUNT), e
                        )
                else:
                    self.log.exception(u'Final API request failed - %s', e)
                    raise e
开发者ID:briang1,项目名称:python-payflowpro,代码行数:60,代码来源:client.py


示例10: version_matcher

 def version_matcher(self, url):
    	fname = os.path.basename(urlparse.urlsplit(url).path)
     version_match = re.search(r"([0-9]{2}.[0-9]{0,2}.[0-9]{0,2})", fname)
     if version_match == None:
         raise ProcessorError("Something went wrong matching FMP update to full version.")
     else:
         return version_match.group(1)
开发者ID:grahampugh,项目名称:recipes,代码行数:7,代码来源:FilemakerProAdvancedUpdateURLProcessor.py


示例11: victimise

def victimise(victim, uri):
    raw_url = victim + uri
    scheme, netloc, path, raw_query, fragment = urlparse.urlsplit(raw_url)
    query = urlparse.parse_qs(raw_query)
    url = urlparse.urlunsplit((scheme, netloc, path, urlencode(query, True), fragment))
    print url
    http_client.fetch(url, fetch, use_gzip=False)
开发者ID:rodders,项目名称:darklaunch,代码行数:7,代码来源:simplesender.py


示例12: download_metadata

def download_metadata(target_directory):
    """
    Downloads XML files for DOIs on stdin into given directory.
    """
    stderr.write('Input DOIs, delimited by whitespace: ')
    dois = stdin.read().split()
    if len(dois) == 0:
        raise RuntimeError, 'No DOIs found.'

    stderr.write('Getting PubMed Central IDs for given DOIs … ')
    pmcids = _get_pmcids_from_dois(dois)
    if len(pmcids) == 0:
        raise RuntimeError, 'No PubMed Central IDs for given DOIs found.'
    stderr.write('found: %s\n' % ', '.join(pmcids))

    url = _get_query_url_from_pmcids(pmcids)
    yield { 'url': url, 'completed': 0, 'total': 1 }

    url_path = urlparse.urlsplit(url).path
    local_filename = path.join(target_directory, \
        url_path.split('/')[-1])
    with open(local_filename, 'wb') as local_file:
        content = _get_file_from_pmcids(pmcids)
        local_file.write(content.read())
        yield { 'url': url, 'completed': 1, 'total': 1 }
开发者ID:npettiaux,项目名称:open-access-media-importer,代码行数:25,代码来源:pmc_doi.py


示例13: login_proceed

def login_proceed(request):
    """View that handles the successful login.
    """

    template_name = '_user_login.html'

    # Check if the request came from logout page, if so set
    # authentication to redirect to home page
    referer_path = urlparse.urlsplit(request.META['HTTP_REFERER'])[2]
    if referer_path == reverse('auth_logout'):
      response = {
        'authentication': 'success',
        'redirect': reverse('home_page'),
        }
    elif referer_path == reverse('registration_activation_complete'):
      response = {
        'authentication': 'success',
        'redirect': reverse('view_profile'),
        }
    else:
        response = {
          'authentication': 'success',
          'markup': loader.render_to_string(template_name,
                                            RequestContext(request, {}))
        }

    json_response = json.dumps(response)
    return http.HttpResponse(json_response)
开发者ID:arvindkhadri,项目名称:pytask,代码行数:28,代码来源:views.py


示例14: make_requests_from_url

    def make_requests_from_url(self, url):

        kw = self.macro.query(url)
        us = urlparse.urlsplit(url)
        qstr = dict(urlparse.parse_qsl(us.query))
        base = urlparse.urlunsplit(us._replace(query=''))
        meta = {'keyword':kw}
        return FormRequest(base, formdata=qstr, method=self.start_method, headers=self.headers, cookies=self.cookies, dont_filter=True, meta=meta)
开发者ID:BlankRain,项目名称:webbot,代码行数:8,代码来源:webbot_spider.py


示例15: testIndexRedirect

 def testIndexRedirect(self):
     if settings.SET_URL_ROOT_HANDLER:
         response = self.client.get('/')
         self.assertEquals(response.status_code, 302)
         # Documentation says that we must get response.headers, but
         # instead we have HttpResponseRedirect object here
         self.assertEquals(urlparse.urlsplit(response['Location'])[2],
                           '/' + settings.BLOG_URLCONF_ROOT)
开发者ID:gvidon,项目名称:blombum,代码行数:8,代码来源:tests.py


示例16: generate_urls

def generate_urls(obj, macro):
    try:
        if type(obj)==list:
            for url in obj:
                yield macro.expand(url)

        elif type(obj)==dict:
            base = macro.expand(obj['base'].encode('utf-8'))
            us = urlparse.urlsplit(base)
            qstr = dict(urlparse.parse_qsl(us.query))
            qstr.update(obj.get('qstr', {}))
            base = urlparse.urlunsplit(us._replace(query=''))

            for k,v in qstr.iteritems():
                if type(v)==dict and type(v['val'])==unicode:
                    v = v['val'].encode(v.get('enc', 'utf-8'), errors='ignore')
                qstr[k] = macro.expand(v)

            if 'keywords' in obj:
                kw_obj = obj['keywords']

                sub = kw_obj.get('sub')
                if sub:
                    frm = sub.get('from')
                    to = sub.get('to')
                    sub = functools.partial(re.sub, frm, to)
                else:
                    sub = lambda x:x

                for kw in load_keywords(kw_obj):

                    if kw==MAGIC:
                        yield 'http://0.0.0.0'
                        continue

                    key = kw_obj['name'].encode('utf-8')
                    val = kw
                    col = kw_obj.get('col', 0)
                    sep = kw_obj.get('sep')
                    if col>0:
                        val = val.split(sep)[col-1]
                    val = sub(val)
                    if kw_obj.get('query', True):
                        qstr.update({key:val})
                        url = base+'?'+urlencode(qstr)
                    else:
                        val = val.encode(kw_obj.get('enc', 'utf-8'), errors='ignore') if type(val)==unicode else str(val)
                        url = base.replace(key, val)+'?'+urlencode(qstr)
                    macro.update({'sep':sep})
                    macro.bind(url, kw)
                    yield url
            else:
                url = base+'?'+urlencode(qstr)
                yield url

    except Exception as ex:
        log.msg(u'cannot generate urls: {}'.format(ex), level=log.ERROR)
        raise CloseSpider()
开发者ID:BlankRain,项目名称:webbot,代码行数:58,代码来源:utils.py


示例17: urlStringToServers

def urlStringToServers(urlString):
    " convert |-sep list of urls to list of hostnames "
    servers = set()
    urls = urlString.split("|")
    for url in urls:
        parts = urlparse.urlsplit(url)
        server = parts[1]
        server = server.replace("www.", "").strip()
        if server!="" and not "pubmedcentral" in server:
            servers.add(server)
    return servers
开发者ID:Moxikai,项目名称:pubMunch,代码行数:11,代码来源:pubResolvePublishers.py


示例18: getapodlist

def getapodlist(url, picpath):
	feed = feedparser.parse(url)
	for item in feed["items"]:
		pic = item["description"]
		parseurl = urlparse.urlsplit(pic)
		outfile = parseurl.parse.split("/")[3]
		picfile = os.path.join(picpath, outfile)
		if os.path.isfile(picfile):
			pass
		else:
			urlretrieve(pic, picfile)
开发者ID:mabeltron,项目名称:apodpaper,代码行数:11,代码来源:main.py


示例19: delete_remote_file

        def delete_remote_file():
            remote_filename = os.path.basename(urlparse.urlsplit(remote_url)[2])
            remote_file = os.path.join(cfg_remotepath, remote_filename)

            ssh_retval = subprocess.call(["ssh", "-o", "PasswordAuthentication=no",
                                          "-o", "StrictHostKeyChecking=no",
                                          remotehost, "-f", "rm", "-f", remote_file],
                                          stdout=open(os.devnull),
                                          stderr=open(os.devnull))
            if 0 != ssh_retval:
                print "Failed to delete remote file"
开发者ID:Bobfrat,项目名称:coi-services,代码行数:11,代码来源:test_register_process_definition.py


示例20: parse_recipe

 def parse_recipe(cls, url):
     maker_dict = {'www.manjulaskitchen.com':ManjulasMaker,
                   'www.101cookbooks.com':OneCookMaker,
                   'www.gourmet.com':GourmetMaker}    
     target_maker = urlparse.urlsplit(url)[1]
     current_maker = maker_dict[target_maker]
     
     #create child and call child's process_url method        
     current_recipe = current_maker(url).process_url()
     
     #passes back to the caller what the child class passes back        
     return current_recipe
开发者ID:BethanyG,项目名称:NANA,代码行数:12,代码来源:RecipeMaker.py



注:本文中的urllib2.urlparse.urlsplit函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python urllib3.connection_from_url函数代码示例发布时间:2022-05-27
下一篇:
Python urlparse.urlparse函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap