本文整理汇总了Python中mwparserfromhell.parse函数的典型用法代码示例。如果您正苦于以下问题:Python parse函数的具体用法?Python parse怎么用?Python parse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: get_captions
def get_captions(title):
params = {
'action': 'query',
'list': 'allpages',
'apfrom': title.split(':', 1)[1],
'aplimit': '100',
'apnamespace': '10'
}
data = api(**params)
langs = {}
prefix = title + ' '
for item in data['query']['allpages']:
if item['title'].startswith(prefix):
lang = item['title'].split('(')[1].split(')')[0]
langs[lang] = item['title']
text = ''
for lang in sorted(langs):
lang_name = get_language_name(lang)
content = page_content(langs[lang])
if content.strip().startswith('#REDIRECT'):
# ???
continue
code = mwparserfromhell.parse(content)
try:
temp = code.filter_templates()[0]
except IndexError:
continue
caption_code = temp.get(1).value
# We want templates like {{w|FooBar}} to render, so expand them
expanded = expand_templates(unicode(caption_code))
caption = unicode(mwparserfromhell.parse(expanded).strip_code())
text += '%s: %s\n' % (lang_name, caption)
return text
开发者ID:Toollabs,项目名称:daily-image-l,代码行数:34,代码来源:dailyimagel.py
示例2: update
def update(self, push=True):
self.fetch_info()
self.parse_info()
print self.LOCATION
print self.CATEGORY
print self.ABOUT
print self.MOVEMENT
print self.PRESSURE
print self.WINDS
#print self.UTC_TIMESTAMP
#actually update crap
#return
text = self.wikipage.get()
code = mwparserfromhell.parse(text)
main = pywikibot.Page(self.wikipage.site, '2012 Atlantic hurricane season')
main_text = main.get()
main_code = mwparserfromhell.parse(main_text)
for template in code.filter_templates():
name = template.name.lower().strip()
if name == 'Infobox hurricane current'.lower():
if template.get('name').value.strip() == 'Hurricane Sandy':
template.get('time').value = self.UTC_TIMESTAMP
template.get('category').value = self.CATEGORY
template.get('gusts').value = self.format_wind(self.WINDS)
template.get('lat').value = self.LOCATION['latc']
template.get(1).value = self.LOCATION['latd']
template.get('lon').value = self.LOCATION['lonc']
template.get(2).value = self.LOCATION['lond']
template.get('movement').value = self.format_movement(self.MOVEMENT)
template.get('pressure').value = self.format_pressure(self.PRESSURE)
pywikibot.showDiff(text, unicode(code))
if push:
self.wikipage.put(unicode(code), 'Bot: Updating hurricane infobox. Errors? [[User talk:Legoktm|report them!]]')
开发者ID:Mdann52,项目名称:pywikipedia-scripts,代码行数:33,代码来源:hurricane.py
示例3: parse
def parse():
text = rfd.get()
code = mwparserfromhell.parse(text)
requests = []
section = code.get_sections()[2]
for section in code.get_sections()[1:]:
#print section
#print type(section)
data = {'section': section}
header = unicode(section.filter_headings()[0])
data['header'] = header
text = mwparserfromhell.parse(unicode(section).replace(header +'\n', ''))
data['text'] = text
#print text
item = None
for template in text.filter_templates():
if unicode(template.name).startswith('Rfd group'):
data['type'] = 'bulk'
break
elif template.name == 'rfd links':
data['type'] = 'single'
item = template.get(1).value
break
if item:
item = pywikibot.ItemPage(repo, item)
data['item'] = item
requests.append(data)
return requests
开发者ID:HazardSJ,项目名称:wikidata,代码行数:29,代码来源:parser.py
示例4: test_multiple_nodes_newlines
def test_multiple_nodes_newlines(self):
snippet = "[[link1]]\n[[link2]]\n[[link3]]"
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link1]]", "[[link2]]\n[[link3]]")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link2]]", "[[link1]]\n[[link3]]")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link3]]", "[[link1]]\n[[link2]]")
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:8,代码来源:test_wikicode.py
示例5: test_multiple_nodes_spaces
def test_multiple_nodes_spaces(self):
snippet = "foo [[link1]] [[link2]] [[link3]] bar"
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link1]]", "foo [[link2]] [[link3]] bar")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link2]]", "foo [[link1]] [[link3]] bar")
wikicode = mwparserfromhell.parse(snippet)
self._do_test(wikicode, "[[link3]]", "foo [[link1]] [[link2]] bar")
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:8,代码来源:test_wikicode.py
示例6: cleanup_sectionlink
def cleanup_sectionlink(self, section_title):
code = mwparser.parse(section_title)
template = code.filter_templates()
if len(template) == 1 and template[0].name.matches(('Erl', 'erl')):
section_title = template[0].get(1)
title = mwparser.parse(unicode(section_title))
clean_title = title.strip_code(normalize=True, collapse=True).strip()
return clean_title
开发者ID:edgarskos,项目名称:AsuraBot,代码行数:9,代码来源:Samhlada.py
示例7: wikicode
def wikicode(self):
"""
Return the parsed wikitext (mwparserfromhell.wikicode.Wikicode object)
"""
if not self._wikicode:
try:
self._wikicode = mwparserfromhell.parse(self.wikitext)
except SystemError:
self._wikicode = mwparserfromhell.parse('')
return self._wikicode
开发者ID:necrop,项目名称:pylib3.2,代码行数:10,代码来源:article.py
示例8: _parse_revs_into_wcode
def _parse_revs_into_wcode(rev_text_dict):
result = []
for rev_id in rev_text_dict:
try:
result.append((rev_id, mwp.parse(rev_text_dict[rev_id])))
except mwp.parser.ParserError as e:
logger.warning(e)
logger.warning('Error parsing {0}'.format(rev_id))
result.append((rev_id, mwp.parse('')))
return result
开发者ID:WikiEducationFoundation,项目名称:academic_classification,代码行数:10,代码来源:cmdline.py
示例9: test_contains
def test_contains(self):
"""test Wikicode.contains()"""
code = parse("Here is {{aaa|{{bbb|xyz{{ccc}}}}}} and a [[page|link]]")
tmpl1, tmpl2, tmpl3 = code.filter_templates()
tmpl4 = parse("{{ccc}}").filter_templates()[0]
self.assertTrue(code.contains(tmpl1))
self.assertTrue(code.contains(tmpl3))
self.assertFalse(code.contains(tmpl4))
self.assertTrue(code.contains(str(tmpl4)))
self.assertTrue(code.contains(tmpl2.params[0].value))
开发者ID:earwig,项目名称:mwparserfromhell,代码行数:10,代码来源:test_wikicode.py
示例10: __init__
def __init__(self, title=None, text=None):
super(Article, self).__init__(text=text)
self.title = title
self.paragraphs = None
self.readable_text = None
self.lede_length = 1
if title is not None:
self.page = pwb.Page(site, title)
self.text = mwp.parse(self.page.text)
self.wikitext = mwp.parse(self.page.text)
开发者ID:thewikipedian,项目名称:dykreviewbot,代码行数:10,代码来源:dykbot.py
示例11: page_f
def page_f(pg):
count = 0
text = pg.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates(recursive=True):
if template.name.lower().strip() in CITE_TEMPLATES:
url = template.get('url').value.strip()
if 'msnbc.com' in url:
continue
isup = is_up(url)
if isup:
continue
if template.has_param('archiveurl'):
#if template.has_param('deadurl'):
# if template.get('deadurl').value.strip() == 'no':
# template.remove('deadurl')
# template.add('deadurl', 'yes')
# continue
continue
#find it on archive.org
ai_url = archive_page(url)
if not ai_url:
print 'Not found. :('
continue
raw_date = ai_url[27:27+14]
year = int(raw_date[:4])
day = int(raw_date[6:8])
month_num = int(raw_date[4:6])
month = MONTH_NAMES[month_num-1]
template.add('archiveurl', ai_url)
template.add('deadurl', 'yes')
template.add('archivedate', '%s %s %s' % (day, month, year))
count += 1
#lets remove all the {{dead link}} now
code = unicode(code)
for tag in re.finditer(r'<ref(.*?)>(.*?)</ref>', code):
p = mwparserfromhell.parse(tag.group(2))
for template in p.filter_templates():
set = False
if template.name.lower().strip() in CITE_TEMPLATES:
if template.has_param('archiveurl'):
set = True
elif template.name.lower().strip() in DEAD_LINK:
if set:
del p.nodes[p.nodes.index(unicode(template))]
code = code.replace(tag.group(2), unicode(p))
if text != code:
print 'No changes made on %s' % pg.title(asLink=True)
return
pywikibot.showDiff(text, unicode(code))
if raw_input('Save?').lower() == 'y':
pg.put(unicode(code), 'Manually-assisted archive url fetching.')
开发者ID:Mdann52,项目名称:pywikipedia-scripts,代码行数:53,代码来源:archive.py
示例12: test_matches
def test_matches(self):
"""test Wikicode.matches()"""
code1 = parse("Cleanup")
code2 = parse("\nstub<!-- TODO: make more specific -->")
self.assertTrue(code1.matches("Cleanup"))
self.assertTrue(code1.matches("cleanup"))
self.assertTrue(code1.matches(" cleanup\n"))
self.assertFalse(code1.matches("CLEANup"))
self.assertFalse(code1.matches("Blah"))
self.assertTrue(code2.matches("stub"))
self.assertTrue(code2.matches("Stub<!-- no, it's fine! -->"))
self.assertFalse(code2.matches("StuB"))
开发者ID:stanta,项目名称:ipc-parser-1,代码行数:12,代码来源:test_wikicode.py
示例13: process_page
def process_page(page):
text = page.get()
text, blah = AWB.do_page(text, date=False)
code = mwparserfromhell.parse(text)
urls = []
for m in urlregex.MATCH_URL.finditer(unicode(code)):
u = m.group(0)
if u.startswith(('http://ap.google', 'https://ap.google')):
urls.append(u)
"""
buffer = unicode(code)
for template in code.filter_templates():
for url in urls:
if url in template:
if template.has_param('archiveurl'):
urls.remove(url)
else:
buffer = buffer.replace(unicode(template), unicode(template)+TAG)
urls.remove(url)
code = buffer
"""
#find ref tags
loop1= False
for tag in re.finditer(r'<ref(.*?)>(.*?)</ref>', unicode(code)):
for url in urls:
if url in tag.group(2):
for template in mwparserfromhell.parse(tag.group(2)).filter_templates():
if template.has_param('archiveurl'):
try:
urls.remove(url)
except ValueError:
pass
loop1 = True
if loop1:
break
if 'dead link' in tag.group(0).lower():
urls.remove(url)
else:
code = unicode(code).replace(tag.group(0), '<ref'+tag.group(1)+'>'+tag.group(2)+TAG+'</ref>')
urls.remove(url)
if loop1:
loop1 = False
break
if urls:
print 'STILL HAVE THESE LEFT: '+', '.join(urls)
pywikibot.showDiff(text, unicode(code))
if text != unicode(code):
page.put(unicode(code), 'Bot: Tagging ap.google.* links with {{dead link}}')
return True
else:
return None
开发者ID:TAP-WP,项目名称:pywikipedia-scripts,代码行数:52,代码来源:ap_dead_link.py
示例14: test_transform
def test_transform(self):
wcode_list = [mwp.parse('{{Infobox something | thing}}'
'{{not-one else}}'
'{{infobox again}}'),
mwp.parse('{{Infobox num1 | thing}}'
'{{not-one else}}'
'{{infobox num2}}')]
result = ifb._transform(wcode_list)
self.assertEqual(len(result), 2)
self.assertEqual(result[0], 'infobox-something infobox-again')
self.assertEqual(result[1], 'infobox-num1 infobox-num2')
开发者ID:WikiEducationFoundation,项目名称:academic_classification,代码行数:13,代码来源:test_infobox.py
示例15: load_stub_templates
def load_stub_templates(self):
self.stub_templates = []
st = pywikibot.Page(self.site, 'Wikipedia:WikiProject Stub sorting/Stub types')
text = st.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
if template.name.startswith('Wikipedia:WikiProject Stub sorting/Stub types/'):
st_page = pywikibot.Page(self.site, unicode(template.name))
text = st_page.get()
code = mwparserfromhell.parse(text)
for template in code.filter_templates():
if template.name.lower() == 'tl':
self.stub_templates.append(unicode(template.get(1).value).lower())
开发者ID:Mdann52,项目名称:pywikipedia-scripts,代码行数:13,代码来源:tagger.py
示例16: _test_search
def _test_search(self, meth, expected):
"""Base test for insert_before(), insert_after(), and replace()."""
code = parse("{{a}}{{b}}{{c}}{{d}}{{e}}")
func = partial(meth, code)
func("{{b}}", "x", recursive=True)
func("{{d}}", "[[y]]", recursive=False)
func(code.get(2), "z")
self.assertEqual(expected[0], code)
self.assertRaises(ValueError, func, "{{r}}", "n", recursive=True)
self.assertRaises(ValueError, func, "{{r}}", "n", recursive=False)
fake = parse("{{a}}").get(0)
self.assertRaises(ValueError, func, fake, "n", recursive=True)
self.assertRaises(ValueError, func, fake, "n", recursive=False)
code2 = parse("{{a}}{{a}}{{a}}{{b}}{{b}}{{b}}")
func = partial(meth, code2)
func(code2.get(1), "c", recursive=False)
func("{{a}}", "d", recursive=False)
func(code2.get(-1), "e", recursive=True)
func("{{b}}", "f", recursive=True)
self.assertEqual(expected[1], code2)
code3 = parse("{{a|{{b}}|{{c|d={{f}}}}}}")
func = partial(meth, code3)
obj = code3.get(0).params[0].value.get(0)
self.assertRaises(ValueError, func, obj, "x", recursive=False)
func(obj, "x", recursive=True)
self.assertRaises(ValueError, func, "{{f}}", "y", recursive=False)
func("{{f}}", "y", recursive=True)
self.assertEqual(expected[2], code3)
code4 = parse("{{a}}{{b}}{{c}}{{d}}{{e}}{{f}}{{g}}{{h}}{{i}}{{j}}")
func = partial(meth, code4)
fake = parse("{{b}}{{c}}")
self.assertRaises(ValueError, func, fake, "q", recursive=False)
self.assertRaises(ValueError, func, fake, "q", recursive=True)
func("{{b}}{{c}}", "w", recursive=False)
func("{{d}}{{e}}", "x", recursive=True)
func(wrap(code4.nodes[-2:]), "y", recursive=False)
func(wrap(code4.nodes[-2:]), "z", recursive=True)
self.assertEqual(expected[3], code4)
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=False)
self.assertRaises(ValueError, func, "{{c}}{{d}}", "q", recursive=True)
code5 = parse("{{a|{{b}}{{c}}|{{f|{{g}}={{h}}{{i}}}}}}")
func = partial(meth, code5)
self.assertRaises(ValueError, func, "{{b}}{{c}}", "x", recursive=False)
func("{{b}}{{c}}", "x", recursive=True)
obj = code5.get(0).params[1].value.get(0).params[0].value
self.assertRaises(ValueError, func, obj, "y", recursive=False)
func(obj, "y", recursive=True)
self.assertEqual(expected[4], code5)
code6 = parse("here is {{some text and a {{template}}}}")
func = partial(meth, code6)
self.assertRaises(ValueError, func, "text and", "ab", recursive=False)
func("text and", "ab", recursive=True)
self.assertRaises(ValueError, func, "is {{some", "cd", recursive=False)
func("is {{some", "cd", recursive=True)
self.assertEqual(expected[5], code6)
开发者ID:stanta,项目名称:ipc-parser-1,代码行数:60,代码来源:test_wikicode.py
示例17: process_page
def process_page(self, page):
text = page.get()
text, blah = self.AWB.do_page(text, date=False)
code = mwparserfromhell.parse(text)
urls = []
for m in urlregex.MATCH_URL.finditer(unicode(code)):
u = m.group(0)
if self.matching.search(u):
urls.append(u)
else:
pass
#print 'Did not match: '+u
#find ref tags
loop1= False
for tag in re.finditer(r'<ref(.*?)>(.*?)</ref>', unicode(code)):
for url in urls:
if url in tag.group(2):
for template in mwparserfromhell.parse(tag.group(2)).filter_templates():
if template.has_param('archiveurl'):
try:
urls.remove(url)
except ValueError:
pass
loop1 = True
if loop1:
break
if 'dead link' in tag.group(0).lower():
urls.remove(url)
elif 'wayback' in tag.group(0).lower():
urls.remove(url)
elif 'webcite' in tag.group(0).lower():
urls.remove(url)
else:
code = unicode(code).replace(tag.group(0), '<ref'+tag.group(1)+'>'+tag.group(2)+self.tag+'</ref>')
urls.remove(url)
if loop1:
loop1 = False
break
if urls:
print 'STILL HAVE THESE LEFT: '+', '.join(urls)
pywikibot.showDiff(text, unicode(code))
if text != unicode(code):
if self.simulate:
print 'Not editing, just simulating.'
return None
page.put(unicode(code), 'Bot: Tagging %s links with {{dead link}}' %self.domain)
return True
else:
return None
开发者ID:Mdann52,项目名称:pywikipedia-scripts,代码行数:50,代码来源:ap_dead_link.py
示例18: test_get_ancestors_parent
def test_get_ancestors_parent(self):
"""test Wikicode.get_ancestors() and Wikicode.get_parent()"""
code = parse("{{a|{{b|{{d|{{e}}{{f}}}}{{g}}}}}}{{c}}")
tmpl = code.filter_templates(matches=lambda n: n.name == "f")[0]
parent1 = code.filter_templates(matches=lambda n: n.name == "d")[0]
parent2 = code.filter_templates(matches=lambda n: n.name == "b")[0]
parent3 = code.filter_templates(matches=lambda n: n.name == "a")[0]
fake = parse("{{f}}").get(0)
self.assertEqual([parent3, parent2, parent1], code.get_ancestors(tmpl))
self.assertIs(parent1, code.get_parent(tmpl))
self.assertEqual([], code.get_ancestors(parent3))
self.assertIs(None, code.get_parent(parent3))
self.assertRaises(ValueError, code.get_ancestors, fake)
self.assertRaises(ValueError, code.get_parent, fake)
开发者ID:earwig,项目名称:mwparserfromhell,代码行数:15,代码来源:test_wikicode.py
示例19: section_to_request
def section_to_request(enumerated_section_tuple):
enum_number, section_tuple = enumerated_section_tuple
section_header, section_wikitext = section_tuple
section = mwparserfromhell.parse(section_wikitext)
r = Request()
r.row_number = enum_number + 1
r.title = section_header
r.replies = unicode(section).count(u"(UTC)") - 1
signatures = []
for index, each_node in enumerate(section.nodes):
if type(each_node) == mwparserfromhell.nodes.text.Text and "(UTC)" in each_node:
# Get the last timestamp-looking thing (trick from http://stackoverflow.com/a/2988680/1757964)
for timestamp_match in TIMESTAMP.finditer(unicode(each_node)): pass
try:
timestamp = datetime.datetime.strptime(timestamp_match.group(0), SIGNATURE_TIME_FORMAT)
except ValueError:
timestamp = "{{unknown}}"
# Use the last user talk page link before the timestamp
for user_index in itertools.count(index - 1, -1):
user = USER.search(unicode(section.get(user_index)))
if user:
user = user.group(1)
break
# Check for user renames/redirects
user_page = pywikibot.Page(wiki, "User:" + user)
if user_page.isRedirectPage():
redirect_text = user_page.get(get_redirect=True)
user_wikicode = mwparserfromhell.parse(redirect_text)
redirect_link = user_wikicode.filter_wikilinks()[0]
user = redirect_link.title.split(":")[1]
signatures.append((user, timestamp))
# Process usernames by removing anchors
signatures = [(x.partition('#')[0], y) for x, y in signatures]
# Default values for everything
r.last_editor, r.last_edit_time = r.last_botop_editor, r.last_botop_time = "{{no result|None}}", "{{n/a}}"
if signatures:
r.last_editor, r.last_edit_time = signatures[-1]
for user, timestamp in reversed(signatures):
if is_botop(wiki, user):
r.last_botop_editor, r.last_botop_time = user, timestamp
break
return r
开发者ID:APerson241,项目名称:APersonBot,代码行数:48,代码来源:botreq-status.py
示例20: ensure_flagged_by_template
def ensure_flagged_by_template(wikicode, node, template_name, *template_parameters, overwrite_parameters=True):
"""
Makes sure that ``node`` in ``wikicode`` is immediately (except for
whitespace) followed by a template with ``template_name`` and optional
``template_parameters``.
:param wikicode: a :py:class:`mwparserfromhell.wikicode.Wikicode` object
:param node: a :py:class:`mwparserfromhell.nodes.Node` object
:param str template_name: the name of the template flag
:param template_parameters: optional template parameters
:returns: the template flag, as a
:py:class:`mwparserfromhell.nodes.template.Template` objet
"""
parent = get_parent_wikicode(wikicode, node)
adjacent = get_adjacent_node(parent, node, ignore_whitespace=True)
if template_parameters:
flag = "{{%s}}" % "|".join([template_name, *template_parameters])
else:
flag = "{{%s}}" % template_name
flag = mwparserfromhell.parse(flag).nodes[0]
assert(isinstance(flag, mwparserfromhell.nodes.Template))
if isinstance(adjacent, mwparserfromhell.nodes.Template) and adjacent.name.matches(template_name):
# in case of {{Dead link}} we want to preserve the original parameters
if overwrite_parameters is True:
wikicode.replace(adjacent, flag)
else:
flag = adjacent
else:
wikicode.insert_after(node, flag)
assert(get_parent_wikicode(wikicode, flag) is parent)
return flag
开发者ID:lahwaacz,项目名称:wiki-scripts,代码行数:34,代码来源:wikicode.py
注:本文中的mwparserfromhell.parse函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论