本文整理汇总了Python中urllib2.urlparse.urlsplit函数的典型用法代码示例。如果您正苦于以下问题:Python urlsplit函数的具体用法?Python urlsplit怎么用?Python urlsplit使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了urlsplit函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: validate_links
def validate_links(data):
widgets = [Bar(), SimpleProgress()]
pbar = ProgressBar(widgets=widgets, maxval=len(data)).start()
for i, element in enumerate(data):
url = element['url']
if url == '':
continue
scheme = urlparse.urlsplit(url).scheme
host = urlparse.urlsplit(url).netloc
if scheme in ('http', 'https') and \
url_status_cache.get(url) is not True:
try:
request = head(url, timeout=10)
# some web sites cannot into head requests
if request.status_code in (403, 405, 500) or \
host in ('mobil.morgenpost.de'):
request = get(url)
except Timeout as e:
stderr.write('Connection to <%s> timeouted.\n' % url)
exit(1)
except ConnectionError as e:
stderr.write('Connection to <%s> failed.\n' % url)
stderr.write(str(e) + '\n')
exit(1)
if request.ok:
url_status_cache.set(url, request.ok)
else:
stderr.write('<%s> is unreachable.\n' % url)
exit(1)
pbar.update(i+1)
开发者ID:erlehmann,项目名称:redokast,代码行数:30,代码来源:validate-links.py
示例2: _send_header
def _send_header(self, header_pieces, headers, body, is_request):
if not self.headers_prepared:
body_length = len(body)
had_length = False
had_host = False
if is_request:
resource = header_pieces[1]
splitted = urlparse.urlsplit(resource)
url = splitted.path
if splitted.query:
url += '?' + splitted.query
header_line = '%s %s HTTP/%s\r\n' % (header_pieces[0], url, header_pieces[2])
else:
header_line = 'HTTP/%s %s %s\r\n' % header_pieces
io_request = StringIO()
io_request.write(header_line)
for name, value in headers.iteritems():
if name == 'content-length':
io_request.write('%s: %s\r\n' % (name.title(), body_length))
had_length = True
else:
io_request.write('%s: %s\r\n' % (name.title(), value))
if name == 'host':
had_host = True
if not had_length and body_length > 0:
io_request.write('%s: %s\r\n' % ('Content-Length', body_length))
if not had_host and is_request:
splitted = urlparse.urlsplit(resource)
io_request.write('%s: %s\r\n' % ('Host', splitted.hostname))
io_request.write('\r\n')
self.buffer = io_request.getvalue()
io_request.close()
self.headers_prepared = True
self.to_write = len(self.buffer)
self.written = 0
if not self.headers_sent:
while self.to_write > 0:
written = self.csock.send(self.buffer[self.written:])
self.written += written
self.to_write -= written
self.headers_sent = True
开发者ID:g-fleischer,项目名称:wtfy,代码行数:48,代码来源:HttpWriter.py
示例3: download_metadata
def download_metadata(target_directory):
"""
Downloads XML files for PMCIDs on stdin into given directory.
"""
stderr.write('Input PMCIDs, delimited by whitespace: ')
pmcids = stdin.read().split()
if len(pmcids) == 0:
raise RuntimeError, 'No PMCIDs found.'
# delete files from earlier invocations
listing = listdir(target_directory)
for filename in listing:
file_path = path.join(target_directory, filename)
stderr.write("Removing “%s” … " % file_path)
remove(file_path)
stderr.write("done.\n")
# chunk function by nosklo, source:
# <http://stackoverflow.com/questions/434287/what-is-the-most-pythonic-way-to-iterate-over-a-list-in-chunks#answer-434328>
def chunker(seq, size):
return (seq[pos:pos + size] for pos in xrange(0, len(seq), size))
for i, chunk in enumerate(chunker(pmcids, 365)):
url = _get_query_url_from_pmcids(chunk)
yield { 'url': url, 'completed': 0, 'total': 1 }
url_path = urlparse.urlsplit(url).path
local_filename = path.join(target_directory, \
url_path.split('/')[-1] + str(i))
with open(local_filename, 'wb') as local_file:
content = _get_file_from_pmcids(chunk)
local_file.write(content.read())
yield { 'url': url, 'completed': 1, 'total': 1 }
开发者ID:npettiaux,项目名称:open-access-media-importer,代码行数:33,代码来源:pmc_pmcid.py
示例4: validateURL
def validateURL (cls, full_url, video_item=True):
"""Make sure the url passed is in a valid form and return a video parser object"""
if not isinstance (full_url, str):
raise TypeError ("Argument must be a string")
spliturl = urlparse.urlsplit (full_url)
hostname = spliturl.hostname
# print len (cls.parsers.keys ())
if not hostname:
return None
elif hostname.startswith ("www."):
hostname = hostname.lstrip ("www.")
if hostname not in cls.parsers:
return None
page_parser = cls.parsers[hostname].checkURL (full_url)
if page_parser and video_item:
youtube_video = VideoItem (page_parser)
elif page_parser:
youtube_video = page_parser
else:
youtube_video = None
return youtube_video
开发者ID:Ryochan7,项目名称:YouTubed-2x,代码行数:26,代码来源:parsermanager.py
示例5: set_language_ex
def set_language_ex(request):
next = request.POST.get('next', request.GET.get('next'))
if not is_safe_url(url=next, host=request.get_host()):
next = request.META.get('HTTP_REFERER')
if not is_safe_url(url=next, host=request.get_host()):
next = '/'
# remove lang from query
scheme, netloc, path, query, fragment = urlparse.urlsplit(next)
parsed_query = urlparse.parse_qsl(query)
altered = False
for k, v in parsed_query[:]:
if LANG_GET_KEY == k:
parsed_query.remove((k, v))
altered = True
if altered:
query = urllib.urlencode(parsed_query)
next = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
response = http.HttpResponseRedirect(next)
if request.method == 'POST':
lang_code = request.POST.get('language', None)
if lang_code and check_for_language(lang_code):
if hasattr(request, 'session'):
request.session[LANGUAGE_SESSION_KEY] = lang_code
else:
response.set_cookie(settings.LANGUAGE_COOKIE_NAME, lang_code,
max_age=settings.LANGUAGE_COOKIE_AGE,
path=settings.LANGUAGE_COOKIE_PATH,
domain=settings.LANGUAGE_COOKIE_DOMAIN)
return response
开发者ID:Nnonexistent,项目名称:chemphys,代码行数:31,代码来源:views.py
示例6: download_content_list
def download_content_list(detail_url, headers, timeout):
"""
sample url: http://weixin.sogou.com/gzhjs?openid=oIWsFt86NKeSGd_BQKp1GcDkYpv0&ext=D4y5Z3wUwj5uk6W7Yk9BqC3LAaFqirWHT5QFje14y0dip_leVhZF6qjo9Mm_UUVg&cb=sogou.weixin_gzhcb&page=1&gzhArtKeyWord=&tsn=0&t=1459425446419&_=1459425446169
其中openid是固定的
ext也是固定的
cb=sogou.weixin_gzhcb这个也是固定的
唯一变化的就是这个t以及_这2个字段,看上去是打开这个页面的时间戳
"""
global start_flag
total_records = 0
context_lst = []
_t = start_flag
now = int(time.time() * 1000)
url_netloc = urlparse.urlsplit(detail_url)
cur_url = 'http://%s/gzhjs?%s' % (url_netloc.netloc, url_netloc.query)
params = "cb=sogou.weixin_gzhcb&page=%s&gzhArtKeyWord=&tsn=0&t=%s&_=%s"
query_url = cur_url + '&' + params
for i in range(1, 11):
target_url = query_url % (i, now, _t)
print target_url
resp = download_page(target_url, headers, timeout=DEFAULT_TIMEOUT)
strip_text = resp.text.replace('sogou.weixin_gzhcb(', '')
strip_text = strip_text[:len(strip_text)-1]
context_lst.extend(json.loads(strip_text).get('items', []))
if not total_records:
total_records = json.loads(strip_text).get('totalItems', 0)
_t = _t + 1
time.sleep(2)
return context_lst
开发者ID:seraphln,项目名称:wheel,代码行数:32,代码来源:crawler.py
示例7: submit
def submit(self, opener, res):
"""submit WAYF form with IDP
:param opener: the urllib2 opener
:param data: the form data as a dictionary
:param res: the response object
"""
log.info("Submitting form to wayf")
# Set IDP to correct IDP
wayf_data = {}
idp = self.idp
data = self.data
idps = {}
for d in data["user_idp"]:
if isinstance(data["user_idp"][d], dict):
idps.update(data["user_idp"][d])
if not idp.get_idp() in idps:
raise WAYFException("Can't find IdP '%s' in WAYF's IdP list" % idp)
wayf_data["user_idp"] = idps[idp.get_idp()]
wayf_data["Select"] = "Select"
if data["form"]["action"].startswith("?"):
urlsp = urlparse.urlsplit(res.url)
urlsp = urlparse.urlunsplit((urlsp[0], urlsp[1], urlsp[2], "", ""))
url = res.url + data["form"]["action"]
else:
url = urlparse.urljoin(res.url, data["form"]["action"])
data = urllib.urlencode(wayf_data)
request = Request(url, data)
log.debug("POST: %s" % request.get_full_url())
response = opener.open(request)
return request, response
开发者ID:russell,项目名称:sibboleth,代码行数:32,代码来源:forms.py
示例8: open_url
def open_url(url, **kwargs):
"""
open_url(url, **kwargs) - open url and return file descriptor
url - local file path or full url path. Allowed protocols are local file
path, file, http and ftp
kwargs - additional attributes according to protocol, 'mode' for local
path and file protocol, 'proxy', 'data' and 'timeout' (Python >= 2.6)
for http and ftp protocols
Examples:
open_url('/home/praetorian/secret.txt')
open_url('file:///home/praetorian/secret.txt', mode='r')
open_url('http://domain.tld/secret.txt', proxy='172:16:1:100:8000')
open_url('ftp://domain.tld/secret.txt')
"""
bits = urlparse.urlsplit(url)
attrs = kwargs
if bits.scheme in ('', 'file'):
url = bits.netloc + bits.path
opener = open
elif bits.scheme in ('http', 'ftp'):
handlers = []
if 'proxy' in attrs:
handlers.append(ProxyHandler({bits.scheme: attrs.pop('proxy')}))
url = bits.geturl()
opener = build_opener(*handlers).open
else:
raise URLError("Unsupported protocol '%s'" % bits.scheme)
return opener(url, **attrs)
开发者ID:centrumholdings,项目名称:yowie,代码行数:35,代码来源:fileutils.py
示例9: _do_request
def _do_request(self, request_id, parameters={}):
"""
"""
if request_id is None:
# Generate a new request identifier using the class' default generator
request_id = self.idgenerator.id()
req_params = dict(parameters)
req_params.update(dict(
partner = self.partner,
vendor = self.vendor,
user = self.username,
pwd = self.password,
))
parmlist = self._build_parmlist(req_params)
headers = {
'Host': urlparse.urlsplit(self.url_base)[1],
'X-VPS-REQUEST-ID': str(request_id),
'X-VPS-CLIENT-TIMEOUT': str(self.timeout), # Doc says to do this
'X-VPS-Timeout': str(self.timeout), # Example says to do this
'X-VPS-INTEGRATION-PRODUCT': self.CLIENT_IDENTIFIER,
'X-VPS-INTEGRATION-VERSION': self.API_VERSION,
'X-VPS-VIT-OS-NAME': sys.platform,
'Connection': 'close',
'Content-Type': 'text/namevalue',
}
self.log.debug(u'Request Headers: %s' % headers)
try_count = 0
results = None
while (results is None and try_count < self.MAX_RETRY_COUNT):
try:
try_count += 1
request = Request(
url = self.url_base,
data = parmlist.encode('utf-8'),
headers = headers)
response = urlopen(request)
result_parmlist = response.read()
response.close()
self.log.debug(
u'Result text: %s' % result_parmlist.decode('utf-8')
)
results = self._parse_parmlist(result_parmlist)
except Exception, e:
if try_count < self.MAX_RETRY_COUNT:
self.log.warn(
u'API request attempt %s of %s failed - %%s' % (
try_count, self.MAX_RETRY_COUNT), e
)
else:
self.log.exception(u'Final API request failed - %s', e)
raise e
开发者ID:briang1,项目名称:python-payflowpro,代码行数:60,代码来源:client.py
示例10: version_matcher
def version_matcher(self, url):
fname = os.path.basename(urlparse.urlsplit(url).path)
version_match = re.search(r"([0-9]{2}.[0-9]{0,2}.[0-9]{0,2})", fname)
if version_match == None:
raise ProcessorError("Something went wrong matching FMP update to full version.")
else:
return version_match.group(1)
开发者ID:grahampugh,项目名称:recipes,代码行数:7,代码来源:FilemakerProAdvancedUpdateURLProcessor.py
示例11: victimise
def victimise(victim, uri):
raw_url = victim + uri
scheme, netloc, path, raw_query, fragment = urlparse.urlsplit(raw_url)
query = urlparse.parse_qs(raw_query)
url = urlparse.urlunsplit((scheme, netloc, path, urlencode(query, True), fragment))
print url
http_client.fetch(url, fetch, use_gzip=False)
开发者ID:rodders,项目名称:darklaunch,代码行数:7,代码来源:simplesender.py
示例12: download_metadata
def download_metadata(target_directory):
"""
Downloads XML files for DOIs on stdin into given directory.
"""
stderr.write('Input DOIs, delimited by whitespace: ')
dois = stdin.read().split()
if len(dois) == 0:
raise RuntimeError, 'No DOIs found.'
stderr.write('Getting PubMed Central IDs for given DOIs … ')
pmcids = _get_pmcids_from_dois(dois)
if len(pmcids) == 0:
raise RuntimeError, 'No PubMed Central IDs for given DOIs found.'
stderr.write('found: %s\n' % ', '.join(pmcids))
url = _get_query_url_from_pmcids(pmcids)
yield { 'url': url, 'completed': 0, 'total': 1 }
url_path = urlparse.urlsplit(url).path
local_filename = path.join(target_directory, \
url_path.split('/')[-1])
with open(local_filename, 'wb') as local_file:
content = _get_file_from_pmcids(pmcids)
local_file.write(content.read())
yield { 'url': url, 'completed': 1, 'total': 1 }
开发者ID:npettiaux,项目名称:open-access-media-importer,代码行数:25,代码来源:pmc_doi.py
示例13: login_proceed
def login_proceed(request):
"""View that handles the successful login.
"""
template_name = '_user_login.html'
# Check if the request came from logout page, if so set
# authentication to redirect to home page
referer_path = urlparse.urlsplit(request.META['HTTP_REFERER'])[2]
if referer_path == reverse('auth_logout'):
response = {
'authentication': 'success',
'redirect': reverse('home_page'),
}
elif referer_path == reverse('registration_activation_complete'):
response = {
'authentication': 'success',
'redirect': reverse('view_profile'),
}
else:
response = {
'authentication': 'success',
'markup': loader.render_to_string(template_name,
RequestContext(request, {}))
}
json_response = json.dumps(response)
return http.HttpResponse(json_response)
开发者ID:arvindkhadri,项目名称:pytask,代码行数:28,代码来源:views.py
示例14: make_requests_from_url
def make_requests_from_url(self, url):
kw = self.macro.query(url)
us = urlparse.urlsplit(url)
qstr = dict(urlparse.parse_qsl(us.query))
base = urlparse.urlunsplit(us._replace(query=''))
meta = {'keyword':kw}
return FormRequest(base, formdata=qstr, method=self.start_method, headers=self.headers, cookies=self.cookies, dont_filter=True, meta=meta)
开发者ID:BlankRain,项目名称:webbot,代码行数:8,代码来源:webbot_spider.py
示例15: testIndexRedirect
def testIndexRedirect(self):
if settings.SET_URL_ROOT_HANDLER:
response = self.client.get('/')
self.assertEquals(response.status_code, 302)
# Documentation says that we must get response.headers, but
# instead we have HttpResponseRedirect object here
self.assertEquals(urlparse.urlsplit(response['Location'])[2],
'/' + settings.BLOG_URLCONF_ROOT)
开发者ID:gvidon,项目名称:blombum,代码行数:8,代码来源:tests.py
示例16: generate_urls
def generate_urls(obj, macro):
try:
if type(obj)==list:
for url in obj:
yield macro.expand(url)
elif type(obj)==dict:
base = macro.expand(obj['base'].encode('utf-8'))
us = urlparse.urlsplit(base)
qstr = dict(urlparse.parse_qsl(us.query))
qstr.update(obj.get('qstr', {}))
base = urlparse.urlunsplit(us._replace(query=''))
for k,v in qstr.iteritems():
if type(v)==dict and type(v['val'])==unicode:
v = v['val'].encode(v.get('enc', 'utf-8'), errors='ignore')
qstr[k] = macro.expand(v)
if 'keywords' in obj:
kw_obj = obj['keywords']
sub = kw_obj.get('sub')
if sub:
frm = sub.get('from')
to = sub.get('to')
sub = functools.partial(re.sub, frm, to)
else:
sub = lambda x:x
for kw in load_keywords(kw_obj):
if kw==MAGIC:
yield 'http://0.0.0.0'
continue
key = kw_obj['name'].encode('utf-8')
val = kw
col = kw_obj.get('col', 0)
sep = kw_obj.get('sep')
if col>0:
val = val.split(sep)[col-1]
val = sub(val)
if kw_obj.get('query', True):
qstr.update({key:val})
url = base+'?'+urlencode(qstr)
else:
val = val.encode(kw_obj.get('enc', 'utf-8'), errors='ignore') if type(val)==unicode else str(val)
url = base.replace(key, val)+'?'+urlencode(qstr)
macro.update({'sep':sep})
macro.bind(url, kw)
yield url
else:
url = base+'?'+urlencode(qstr)
yield url
except Exception as ex:
log.msg(u'cannot generate urls: {}'.format(ex), level=log.ERROR)
raise CloseSpider()
开发者ID:BlankRain,项目名称:webbot,代码行数:58,代码来源:utils.py
示例17: urlStringToServers
def urlStringToServers(urlString):
" convert |-sep list of urls to list of hostnames "
servers = set()
urls = urlString.split("|")
for url in urls:
parts = urlparse.urlsplit(url)
server = parts[1]
server = server.replace("www.", "").strip()
if server!="" and not "pubmedcentral" in server:
servers.add(server)
return servers
开发者ID:Moxikai,项目名称:pubMunch,代码行数:11,代码来源:pubResolvePublishers.py
示例18: getapodlist
def getapodlist(url, picpath):
feed = feedparser.parse(url)
for item in feed["items"]:
pic = item["description"]
parseurl = urlparse.urlsplit(pic)
outfile = parseurl.parse.split("/")[3]
picfile = os.path.join(picpath, outfile)
if os.path.isfile(picfile):
pass
else:
urlretrieve(pic, picfile)
开发者ID:mabeltron,项目名称:apodpaper,代码行数:11,代码来源:main.py
示例19: delete_remote_file
def delete_remote_file():
remote_filename = os.path.basename(urlparse.urlsplit(remote_url)[2])
remote_file = os.path.join(cfg_remotepath, remote_filename)
ssh_retval = subprocess.call(["ssh", "-o", "PasswordAuthentication=no",
"-o", "StrictHostKeyChecking=no",
remotehost, "-f", "rm", "-f", remote_file],
stdout=open(os.devnull),
stderr=open(os.devnull))
if 0 != ssh_retval:
print "Failed to delete remote file"
开发者ID:Bobfrat,项目名称:coi-services,代码行数:11,代码来源:test_register_process_definition.py
示例20: parse_recipe
def parse_recipe(cls, url):
maker_dict = {'www.manjulaskitchen.com':ManjulasMaker,
'www.101cookbooks.com':OneCookMaker,
'www.gourmet.com':GourmetMaker}
target_maker = urlparse.urlsplit(url)[1]
current_maker = maker_dict[target_maker]
#create child and call child's process_url method
current_recipe = current_maker(url).process_url()
#passes back to the caller what the child class passes back
return current_recipe
开发者ID:BethanyG,项目名称:NANA,代码行数:12,代码来源:RecipeMaker.py
注:本文中的urllib2.urlparse.urlsplit函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论