本文整理汇总了Python中tests.get_testdata函数的典型用法代码示例。如果您正苦于以下问题:Python get_testdata函数的具体用法?Python get_testdata怎么用?Python get_testdata使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了get_testdata函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_extraction_encoding
def test_extraction_encoding(self):
body = get_testdata('link_extractor', 'linkextractor_noenc.html')
response_utf8 = HtmlResponse(url='http://example.com/utf8', body=body, headers={'Content-Type': ['text/html; charset=utf-8']})
response_noenc = HtmlResponse(url='http://example.com/noenc', body=body)
body = get_testdata('link_extractor', 'linkextractor_latin1.html')
response_latin1 = HtmlResponse(url='http://example.com/latin1', body=body)
lx = BaseSgmlLinkExtractor()
self.assertEqual(lx.extract_links(response_utf8), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
])
self.assertEqual(lx.extract_links(response_noenc), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%E2%82%AC.html', text='sample \xe2\x82\xac text'.decode('utf-8')),
])
# document encoding does not affect URL path component, only query part
# >>> u'sample_ñ.html'.encode('utf8')
# b'sample_\xc3\xb1.html'
# >>> u"sample_á.html".encode('utf8')
# b'sample_\xc3\xa1.html'
# >>> u"sample_ö.html".encode('utf8')
# b'sample_\xc3\xb6.html'
# >>> u"£32".encode('latin1')
# b'\xa332'
# >>> u"µ".encode('latin1')
# b'\xb5'
self.assertEqual(lx.extract_links(response_latin1), [
Link(url='http://example.com/sample_%C3%B1.html', text=''),
Link(url='http://example.com/sample_%C3%A1.html', text='sample \xe1 text'.decode('latin1')),
Link(url='http://example.com/sample_%C3%B6.html?price=%A332&%B5=unit', text=''),
])
开发者ID:01-,项目名称:scrapy,代码行数:34,代码来源:test_linkextractors_deprecated.py
示例2: test_w3c_5_5
def test_w3c_5_5(self):
body = get_testdata("w3c", "microdata.5.5.html")
expected = json.loads(get_testdata("w3c", "microdata.5.5.json").decode("UTF-8"))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:pombredanne,项目名称:extruct,代码行数:7,代码来源:test_microdata.py
示例3: test_w3c_7_1
def test_w3c_7_1(self):
body = get_testdata("w3c", "microdata.7.1.html")
expected = json.loads(get_testdata("w3c", "microdata.7.1.json").decode("UTF-8"))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body, "http://blog.example.com/progress-report")
self.assertDictEqual(data, expected)
开发者ID:pombredanne,项目名称:extruct,代码行数:7,代码来源:test_microdata.py
示例4: test_extraction_encoding
def test_extraction_encoding(self):
body = get_testdata("link_extractor", "linkextractor_noenc.html")
response_utf8 = HtmlResponse(
url="http://example.com/utf8", body=body, headers={"Content-Type": ["text/html; charset=utf-8"]}
)
response_noenc = HtmlResponse(url="http://example.com/noenc", body=body)
body = get_testdata("link_extractor", "linkextractor_latin1.html")
response_latin1 = HtmlResponse(url="http://example.com/latin1", body=body)
lx = BaseSgmlLinkExtractor()
self.assertEqual(
lx.extract_links(response_utf8),
[
Link(url="http://example.com/sample_%C3%B1.html", text=""),
Link(url="http://example.com/sample_%E2%82%AC.html", text="sample \xe2\x82\xac text".decode("utf-8")),
],
)
self.assertEqual(
lx.extract_links(response_noenc),
[
Link(url="http://example.com/sample_%C3%B1.html", text=""),
Link(url="http://example.com/sample_%E2%82%AC.html", text="sample \xe2\x82\xac text".decode("utf-8")),
],
)
self.assertEqual(
lx.extract_links(response_latin1),
[
Link(url="http://example.com/sample_%F1.html", text=""),
Link(url="http://example.com/sample_%E1.html", text="sample \xe1 text".decode("latin1")),
],
)
开发者ID:RexMao,项目名称:scrapy,代码行数:33,代码来源:test_linkextractors_deprecated.py
示例5: test_w3c_object_element
def test_w3c_object_element(self):
body = get_testdata('w3c', 'microdata.object.html')
expected = json.loads(get_testdata('w3c', 'microdata.object.json').decode('UTF-8'))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body, 'http://www.example.com/microdata/test')
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:7,代码来源:test_microdata.py
示例6: _test_data
def _test_data(formats):
uncompressed_body = get_testdata('compressed', 'feed-sample1.xml')
test_responses = {}
for format in formats:
body = get_testdata('compressed', 'feed-sample1.' + format)
test_responses[format] = Response('http://foo.com/bar', body=body)
return uncompressed_body, test_responses
开发者ID:505555998,项目名称:scrapy,代码行数:7,代码来源:test_downloadermiddleware_decompression.py
示例7: test_w3c_data_element
def test_w3c_data_element(self):
body = get_testdata('w3c', 'microdata.4.2.data.html')
expected = json.loads(get_testdata('w3c', 'microdata.4.2.data.json').decode('UTF-8'))
mde = MicrodataExtractor(strict=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:7,代码来源:test_microdata.py
示例8: test_w3c_5_2
def test_w3c_5_2(self):
body = get_testdata('w3c', 'microdata.5.2.html')
expected = json.loads(get_testdata('w3c', 'microdata.5.2.withtext.json').decode('UTF-8'))
mde = MicrodataExtractor(add_text_content=True)
data = mde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:7,代码来源:test_microdata.py
示例9: test_schemaorg_Event
def test_schemaorg_Event(self):
for i in [1, 2, 3, 4, 8]:
body = get_testdata("schema.org", "Event.{:03d}.html".format(i))
expected = json.loads(get_testdata("schema.org", "Event.{:03d}.json".format(i)).decode("UTF-8"))
mde = MicrodataExtractor()
data = mde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:pombredanne,项目名称:extruct,代码行数:8,代码来源:test_microdata.py
示例10: test_schemaorg_MusicRecording
def test_schemaorg_MusicRecording(self):
for i in [1]:
body = get_testdata('schema.org', 'MusicRecording.{:03d}.html'.format(i))
expected = json.loads(get_testdata('schema.org', 'MusicRecording.{:03d}.json'.format(i)).decode('UTF-8'))
mde = MicrodataExtractor()
data = mde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:8,代码来源:test_microdata.py
示例11: test_schemaorg_CreativeWork
def test_schemaorg_CreativeWork(self):
for i in [1]:
body = get_testdata('schema.org', 'CreativeWork.{:03d}.html'.format(i))
expected = json.loads(get_testdata('schema.org', 'CreativeWork.{:03d}.jsonld'.format(i)).decode('UTF-8'))
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:8,代码来源:test_jsonld.py
示例12: test_w3c_rdf11primer
def test_w3c_rdf11primer(self):
for i in [14]:
fileprefix = 'w3c.rdf11primer.example{:03d}'.format(i)
body = get_testdata('w3crdfa', fileprefix + '.html').decode('UTF-8')
expected = json.loads(
get_testdata('w3crdfa', fileprefix + '.expanded.json'
).decode('UTF-8'))
rdfae = RDFaExtractor()
data = rdfae.extract(body, url='http://www.exaple.com/index.html')
self.assertJsonLDEqual(data, expected)
开发者ID:scrapinghub,项目名称:extruct,代码行数:11,代码来源:test_rdfa.py
示例13: test_wikipedia_xhtml_rdfa
def test_wikipedia_xhtml_rdfa(self):
fileprefix = 'xhtml+rdfa'
body = get_testdata('wikipedia', fileprefix + '.html').decode('UTF-8')
expected = json.loads(
get_testdata('wikipedia', fileprefix + '.expanded.json'
).decode('UTF-8'))
rdfae = RDFaExtractor()
data = rdfae.extract(body, url='http://www.exaple.com/index.html')
self.assertJsonLDEqual(data, expected)
开发者ID:scrapinghub,项目名称:extruct,代码行数:11,代码来源:test_rdfa.py
示例14: test_songkick
def test_songkick(self):
for page in [
"Elysian Fields Brooklyn Tickets, The Owl Music Parlor, 31 Oct 2015",
#"Maxïmo Park Gigography, Tour History & Past Concerts",
#"Years & Years Tickets, Tour Dates 2015 & Concerts",
]:
body = get_testdata('songkick', '{}.html'.format(page))
expected = json.loads(get_testdata('songkick', '{}.jsonld'.format(page)).decode('UTF-8'))
jsonlde = JsonLdExtractor()
data = jsonlde.extract(body)
self.assertDictEqual(data, expected)
开发者ID:gz51837844,项目名称:extruct,代码行数:12,代码来源:test_jsonld.py
示例15: test_csviter_encoding
def test_csviter_encoding(self):
body1 = get_testdata('feeds', 'feed-sample4.csv')
body2 = get_testdata('feeds', 'feed-sample5.csv')
response = TextResponse(url="http://example.com/", body=body1, encoding='latin1')
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'latin1', u'value': u'test'},
{u'id': u'2', u'name': u'something', u'value': u'\xf1\xe1\xe9\xf3'}])
response = TextResponse(url="http://example.com/", body=body2, encoding='cp852')
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'cp852', u'value': u'test'},
{u'id': u'2', u'name': u'something', u'value': u'\u255a\u2569\u2569\u2569\u2550\u2550\u2557'}])
开发者ID:easyshell,项目名称:scrapy,代码行数:15,代码来源:test_utils_iterators.py
示例16: test_w3c_rdfaprimer
def test_w3c_rdfaprimer(self):
for i in [5, 6, 7, 8, 9, 10, 11, 15]:
fileprefix = 'w3c.rdfaprimer.example{:03d}'.format(i)
print(fileprefix)
body = get_testdata('w3crdfa', fileprefix + '.html').decode('UTF-8')
expected = json.loads(
get_testdata('w3crdfa', fileprefix + '.expanded.json'
).decode('UTF-8'))
rdfae = RDFaExtractor()
data = rdfae.extract(body, url='http://www.example.com/index.html')
print("extracted:\n%s" % pformat(tupleize(data)))
print("expected:\n%s" % pformat(tupleize(expected)))
print("extracted:\n%s" % self.prettify(data))
print("expected:\n%s" % self.prettify(expected))
self.assertJsonLDEqual(data, expected)
开发者ID:scrapinghub,项目名称:extruct,代码行数:16,代码来源:test_rdfa.py
示例17: test_csviter_delimiter_binary_response_assume_utf8_encoding
def test_csviter_delimiter_binary_response_assume_utf8_encoding(self):
body = get_testdata('feeds', 'feed-sample3.csv').replace(b',', b'\t')
response = Response(url="http://example.com/", body=body)
csv = csviter(response, delimiter='\t')
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
开发者ID:easyshell,项目名称:scrapy,代码行数:10,代码来源:test_utils_iterators.py
示例18: test_csviter_wrong_quotechar
def test_csviter_wrong_quotechar(self):
body = get_testdata('feeds', 'feed-sample6.csv')
response = TextResponse(url="http://example.com/", body=body)
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u"'id'": u"1", u"'name'": u"'alpha'", u"'value'": u"'foobar'"},
{u"'id'": u"2", u"'name'": u"'unicode'", u"'value'": u"'\xfan\xedc\xf3d\xe9\u203d'"},
{u"'id'": u"'3'", u"'name'": u"'multi'", u"'value'": u"'foo"},
{u"'id'": u"4", u"'name'": u"'empty'", u"'value'": u""}])
开发者ID:easyshell,项目名称:scrapy,代码行数:10,代码来源:test_utils_iterators.py
示例19: test_csviter_exception
def test_csviter_exception(self):
body = get_testdata('feeds', 'feed-sample3.csv')
response = TextResponse(url="http://example.com/", body=body)
iter = csviter(response)
next(iter)
next(iter)
next(iter)
next(iter)
self.assertRaises(StopIteration, next, iter)
开发者ID:easyshell,项目名称:scrapy,代码行数:11,代码来源:test_utils_iterators.py
示例20: test_csviter_falserow
def test_csviter_falserow(self):
body = get_testdata('feeds', 'feed-sample3.csv')
body = b'\n'.join((body, b'a,b', b'a,b,c,d'))
response = TextResponse(url="http://example.com/", body=body)
csv = csviter(response)
self.assertEqual([row for row in csv],
[{u'id': u'1', u'name': u'alpha', u'value': u'foobar'},
{u'id': u'2', u'name': u'unicode', u'value': u'\xfan\xedc\xf3d\xe9\u203d'},
{u'id': u'3', u'name': u'multi', u'value': FOOBAR_NL},
{u'id': u'4', u'name': u'empty', u'value': u''}])
开发者ID:easyshell,项目名称:scrapy,代码行数:12,代码来源:test_utils_iterators.py
注:本文中的tests.get_testdata函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论