本文整理汇总了Python中w3lib.html.get_meta_refresh函数的典型用法代码示例。如果您正苦于以下问题:Python get_meta_refresh函数的具体用法?Python get_meta_refresh怎么用?Python get_meta_refresh使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_meta_refresh函数的19个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_inside_script
def test_inside_script(self):
baseurl = 'http://example.org'
body = """
<html>
<head><script>if(!foobar()){ $('<meta http-equiv="refresh" content="0;url=http://example.org/foobar_required" />').appendTo('body'); }</script></head>
</html>"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
self.assertEqual(get_meta_refresh(body, baseurl, ignore_tags=()), (0.0, "http://example.org/foobar_required"))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:8,代码来源:test_html.py
示例2: test_inside_noscript
def test_inside_noscript(self):
baseurl = 'http://example.org'
body = """
<html>
<head><noscript><meta http-equiv="refresh" content="0;url=http://example.org/javascript_required" /></noscript></head>
</html>"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
self.assertEqual(get_meta_refresh(body, baseurl, ignore_tags=()), (0.0, "http://example.org/javascript_required"))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:8,代码来源:test_html.py
示例3: test_float_refresh_intervals
def test_float_refresh_intervals(self):
# float refresh intervals
baseurl = 'http://example.com'
body = """<meta http-equiv="refresh" content=".1;URL=index.html" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (0.1, 'http://example.com/index.html'))
body = """<meta http-equiv="refresh" content="3.1;URL=index.html" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (3.1, 'http://example.com/index.html'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:8,代码来源:test_html.py
示例4: test_without_url
def test_without_url(self):
# refresh without url should return (None, None)
baseurl = 'http://example.org'
body = """<meta http-equiv="refresh" content="5" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
body = """<meta http-equiv="refresh" content="5;
url=http://example.org/newpage" /></head>"""
self.assertEqual(get_meta_refresh(body, baseurl), (5, 'http://example.org/newpage'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:9,代码来源:test_html.py
示例5: get_meta_refresh
def get_meta_refresh(response):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.text[0:4096]
_metaref_cache[response] = html.get_meta_refresh(text, response.url,
response.encoding, ignore_tags=('script', 'noscript'))
return _metaref_cache[response]
开发者ID:ArturGaspar,项目名称:scrapy,代码行数:7,代码来源:response.py
示例6: get_meta_refresh
def get_meta_refresh(response):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.body_as_unicode()[0:4096]
_metaref_cache[response] = html.get_meta_refresh(text, response.url, \
response.encoding)
return _metaref_cache[response]
开发者ID:bihicheng,项目名称:scrapy,代码行数:7,代码来源:response.py
示例7: test_tag_name
def test_tag_name(self):
baseurl = 'http://example.org'
body = """
<html>
<head><title>Dummy</title><metafoo http-equiv="refresh" content="5;url=http://example.org/newpage" /></head>
<body>blahablsdfsal&</body>
</html>"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:8,代码来源:test_html.py
示例8: test_multiline
def test_multiline(self):
# meta refresh in multiple lines
baseurl = 'http://example.org'
body = """<html><head>
<META
HTTP-EQUIV="Refresh"
CONTENT="1; URL=http://example.org/newpage">"""
self.assertEqual(get_meta_refresh(body, baseurl), (1, 'http://example.org/newpage'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:8,代码来源:test_html.py
示例9: test_leading_newline_in_url
def test_leading_newline_in_url(self):
baseurl = 'http://example.org'
body = """
<html>
<head><title>Dummy</title><meta http-equiv="refresh" content="0; URL=
http://www.example.org/index.php" />
</head>
</html>"""
self.assertEqual(get_meta_refresh(body, baseurl), (0.0, 'http://www.example.org/index.php'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:9,代码来源:test_html.py
示例10: get_meta_refresh
def get_meta_refresh(response):
"""Parse the http-equiv refrsh parameter from the given response"""
if response not in _metaref_cache:
text = response.text[0:4096]
text = _noscript_re.sub(u'', text)
text = _script_re.sub(u'', text)
_metaref_cache[response] = html.get_meta_refresh(text, response.url,
response.encoding)
return _metaref_cache[response]
开发者ID:390218462,项目名称:scrapy,代码行数:9,代码来源:response.py
示例11: get_url
def get_url(self, response):
result = response.meta['result']
url = None
if isinstance(response, HtmlResponse):
interval, url = get_meta_refresh(response.body, response.url, response.encoding, ignore_tags=())
result['url'] = url
# mark probable spam
if self.isredditspam_link(result['url']):
result['spam'] = 'url'
result = SearchResultItem(result)
yield self.parse_result(result)
开发者ID:nyov,项目名称:reddit_info_bot,代码行数:14,代码来源:search.py
示例12: test_nonascii_url_latin1
def test_nonascii_url_latin1(self):
# non-ascii chars in the url (latin1)
baseurl = 'http://example.com'
body = b"""<meta http-equiv="refresh" content="3; url=http://example.com/to\xa3">"""
self.assertEqual(get_meta_refresh(body, baseurl, 'latin1'), (3, 'http://example.com/to%A3'))
开发者ID:christwell,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例13: test_nonascii_url_utf8
def test_nonascii_url_utf8(self):
# non-ascii chars in the url (utf8 - default)
baseurl = 'http://example.com'
body = b"""<meta http-equiv="refresh" content="3; url=http://example.com/to\xc2\xa3">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/to%C2%A3'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例14: test_entities_in_redirect_url
def test_entities_in_redirect_url(self):
# entities in the redirect url
baseurl = 'http://example.org'
body = """<meta http-equiv="refresh" content="3; url='http://www.example.com/other'">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://www.example.com/other'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例15: test_relative_redirects
def test_relative_redirects(self):
# relative redirects
baseurl = 'http://example.com/page/this.html'
body = """<meta http-equiv="refresh" content="3; url=other.html">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/page/other.html'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例16: test_html_comments_with_uncommented_meta_refresh
def test_html_comments_with_uncommented_meta_refresh(self):
# html comments must not interfere with uncommented meta refresh header
baseurl = 'http://example.com'
body = """<!-- commented --><meta http-equiv="refresh" content="3; url=http://example.com/">-->"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例17: test_commented_meta_refresh
def test_commented_meta_refresh(self):
# html commented meta refresh header must not directed
baseurl = 'http://example.com'
body = """<!--<meta http-equiv="refresh" content="3; url=http://example.com/">-->"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:5,代码来源:test_html.py
示例18: test_nonascii_url_latin1_query
def test_nonascii_url_latin1_query(self):
# non-ascii chars in the url path and query (latin1)
# only query part should be kept latin1 encoded before percent escaping
baseurl = 'http://example.com'
body = b"""<meta http-equiv="refresh" content="3; url=http://example.com/to\xa3?unit=\xb5">"""
self.assertEqual(get_meta_refresh(body, baseurl, 'latin1'), (3, 'http://example.com/to%C2%A3?unit=%B5'))
开发者ID:Preetwinder,项目名称:w3lib,代码行数:6,代码来源:test_html.py
示例19: test_get_meta_refresh
def test_get_meta_refresh(self):
baseurl = 'http://example.org'
body = """
<html>
<head><title>Dummy</title><meta http-equiv="refresh" content="5;url=http://example.org/newpage" /></head>
<body>blahablsdfsal&</body>
</html>"""
self.assertEqual(get_meta_refresh(body, baseurl), (5, 'http://example.org/newpage'))
# refresh without url should return (None, None)
body = """<meta http-equiv="refresh" content="5" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
body = """<meta http-equiv="refresh" content="5;
url=http://example.org/newpage" /></head>"""
self.assertEqual(get_meta_refresh(body, baseurl), (5, 'http://example.org/newpage'))
# meta refresh in multiple lines
body = """<html><head>
<META
HTTP-EQUIV="Refresh"
CONTENT="1; URL=http://example.org/newpage">"""
self.assertEqual(get_meta_refresh(body, baseurl), (1, 'http://example.org/newpage'))
# entities in the redirect url
body = """<meta http-equiv="refresh" content="3; url='http://www.example.com/other'">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://www.example.com/other'))
baseurl = 'http://example.com/page/this.html'
# relative redirects
body = """<meta http-equiv="refresh" content="3; url=other.html">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/page/other.html'))
# non-standard encodings (utf-16)
baseurl = 'http://example.com'
body = """<meta http-equiv="refresh" content="3; url=http://example.com/redirect">"""
body = body.decode('ascii').encode('utf-16')
self.assertEqual(get_meta_refresh(body, baseurl, 'utf-16'), (3, 'http://example.com/redirect'))
# non-ascii chars in the url (utf8 - default)
body = """<meta http-equiv="refresh" content="3; url=http://example.com/to\xc2\xa3">"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/to%C2%A3'))
# non-ascii chars in the url (latin1)
body = """<meta http-equiv="refresh" content="3; url=http://example.com/to\xa3">"""
self.assertEqual(get_meta_refresh(body, baseurl, 'latin1'), (3, 'http://example.com/to%C2%A3'))
# html commented meta refresh header must not directed
body = """<!--<meta http-equiv="refresh" content="3; url=http://example.com/">-->"""
self.assertEqual(get_meta_refresh(body, baseurl), (None, None))
# html comments must not interfere with uncommented meta refresh header
body = """<!-- commented --><meta http-equiv="refresh" content="3; url=http://example.com/">-->"""
self.assertEqual(get_meta_refresh(body, baseurl), (3, 'http://example.com/'))
# float refresh intervals
body = """<meta http-equiv="refresh" content=".1;URL=index.html" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (0.1, 'http://example.com/index.html'))
body = """<meta http-equiv="refresh" content="3.1;URL=index.html" />"""
self.assertEqual(get_meta_refresh(body, baseurl), (3.1, 'http://example.com/index.html'))
开发者ID:Dior222,项目名称:w3lib,代码行数:61,代码来源:test_html.py
注:本文中的w3lib.html.get_meta_refresh函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论