本文整理汇总了Python中pupa.scrape.Organization类的典型用法代码示例。如果您正苦于以下问题:Python Organization类的具体用法?Python Organization怎么用?Python Organization使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Organization类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: test_fix_bill_id
def test_fix_bill_id():
j = create_jurisdiction()
j.legislative_sessions.create(name='1900', identifier='1900')
org1 = ScrapeOrganization(name='House', classification='lower')
bill = ScrapeBill('HB 1', '1900', 'Test Bill ID',
classification='bill', chamber='lower')
oi = OrganizationImporter('jid')
oi.import_data([org1.as_dict()])
from pupa.settings import IMPORT_TRANSFORMERS
IMPORT_TRANSFORMERS['bill'] = {
'identifier': lambda x: re.sub(r'([A-Z]*)\s*0*([-\d]+)', r'\1 \2', x, 1)
}
bi = BillImporter('jid', oi, DumbMockImporter())
bi.import_data([bill.as_dict()])
ve = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
start_date='1900-04-02', classification='passage:bill',
result='fail', bill_chamber='lower', bill='HB1',
identifier='4',
bill_action='passage',
organization=org1._id)
VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
ve.as_dict(),
])
IMPORT_TRANSFORMERS['bill'] = {}
ve = VoteEvent.objects.get()
ve.bill.identifier == 'HB 1'
开发者ID:opencivicdata,项目名称:pupa,代码行数:35,代码来源:test_vote_event_importer.py
示例2: scrape_committees
def scrape_committees(self, session):
session_key = SESSION_KEYS[session]
committees_response = self.api_client.get('committees', session=session_key)
legislators = index_legislators(self, session_key)
for committee in committees_response:
org = Organization(
chamber={'S': 'upper', 'H': 'lower',
'J': 'legislature'}[committee['HouseOfAction']],
name=committee['CommitteeName'],
classification='committee')
org.add_source(
'https://olis.leg.state.or.us/liz/{session}'
'/Committees/{committee}/Overview'.format(session=session_key,
committee=committee['CommitteeName']))
members_response = self.api_client.get('committee_members',
session=session_key,
committee=committee['CommitteeCode'])
for member in members_response:
try:
member_name = legislators[member['LegislatorCode']]
except KeyError:
logger.warn('Legislator {} not found in session {}'.format(
member['LegislatorCode'], session_key))
member_name = member['LegislatorCode']
org.add_member(member_name, role=member['Title'] if member['Title'] else '')
yield org
开发者ID:neelneelpurk,项目名称:openstates,代码行数:29,代码来源:committees.py
示例3: get_organizations
def get_organizations(self):
organization = Organization(self.name, classification=self.classification)
leader_role = styles_of_address[self.division_id]['Leader']
member_role = self.member_role or styles_of_address[self.division_id]['Member']
parent = Division.get(self.division_id)
# Don't yield posts for premiers.
if parent._type not in ('province', 'territory'):
# Yield posts to allow ca_on_toronto to make changes.
post = Post(role=leader_role, label=parent.name, division_id=parent.id, organization_id=organization._id)
yield post
children = [child for child in parent.children() if child._type != 'place' and child._type not in self.exclude_types]
for child in children:
if not self.skip_null_valid_from and not child.attrs.get('validFrom') or child.attrs.get('validFrom') and (child.attrs['validFrom'] <= datetime.now().strftime('%Y-%m-%d') or child.attrs['validFrom'] == self.valid_from):
if self.use_type_id:
label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
else:
label = child.name
# Yield posts to allow ca_on_toronto to make changes.
post = Post(role=member_role, label=label, division_id=child.id, organization_id=organization._id)
yield post
if not children and parent.attrs['posts_count']:
for i in range(1, int(parent.attrs['posts_count'])): # exclude Mayor
organization.add_post(role=member_role, label='{} (seat {})'.format(parent.name, i), division_id=parent.id)
yield organization
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:30,代码来源:utils.py
示例4: test_parent_id_resolution
def test_parent_id_resolution():
parent = ScrapeOrganization('UN', classification='international')
child = ScrapeOrganization('UNESCO', classification='unknown', parent_id=parent._id)
OrganizationImporter('jurisdiction-id').import_data([parent.as_dict(), child.as_dict()])
assert Organization.objects.count() == 2
assert Organization.objects.get(name='UN').children.count() == 1
assert Organization.objects.get(name='UNESCO').parent.name == 'UN'
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_organization_importer.py
示例5: test_deduplication_other_name_exists
def test_deduplication_other_name_exists():
create_jurisdictions()
create_org()
org = ScrapeOrganization('UN', classification='international')
od = org.as_dict()
OrganizationImporter('jid1').import_data([od])
assert Organization.objects.all().count() == 1
开发者ID:opencivicdata,项目名称:pupa,代码行数:7,代码来源:test_organization_importer.py
示例6: scrape_lower_committee
def scrape_lower_committee(self, name, url):
page = self.lxmlize(url)
committee = Organization(chamber='lower', name=name,
classification="committee")
committee.add_source(url)
seen = set()
member_links = self.get_nodes(
page,
'//div[@class="mod-inner"]//a[contains(@href, "mem")]')
for member_link in member_links:
member_name = None
member_role = None
member_name = member_link.text
if member_name is None:
continue
# Figure out if this person is the chair.
if member_link == member_links[0]:
member_role = 'chair'
else:
member_role = 'member'
if name not in seen:
committee.add_member(member_name, member_role)
seen.add(member_name)
return committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:32,代码来源:committees.py
示例7: _scrape_lower_special_committees
def _scrape_lower_special_committees(self):
url = 'http://house.louisiana.gov/H_Cmtes/SpecialCommittees.aspx'
page = self.lxmlize(url)
committee_list = page.xpath('//div[@class="accordion"]')[0]
headers = committee_list.xpath('./h3')
for header in headers:
committee_name_text = header.xpath('string()')
committee_name = committee_name_text.strip()
committee_name = self._normalize_committee_name(committee_name)
chamber = 'legislature' if committee_name.startswith('Joint') else 'lower'
committee = Organization(committee_name, chamber=chamber,
classification='committee')
committee.add_source(url)
committee_memberlist = header.xpath('./following-sibling::div[@class="pane"]'
'//tr[@class="linkStyle2"]')
for row in committee_memberlist:
member_name = row.xpath('normalize-space(string(./th[1]))')
member_name = self._normalize_member_name(member_name)
member_role = row.xpath('normalize-space(string(./th[2]))')
member_role = self._normalize_member_role(member_role)
committee.add_member(member_name, member_role)
yield committee
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:committees.py
示例8: get_organizations
def get_organizations(self):
exclude_type_ids = getattr(self, 'exclude_type_ids', [])
use_type_id = getattr(self, 'use_type_id', False)
organization = Organization(self.name, classification=self.classification)
parent = Division.get(self.division_id)
if parent._type not in ('province', 'territory'):
post = Post(role=styles_of_address[self.division_id]['Leader'], label=parent.name, division_id=parent.id, organization_id=organization._id)
yield post
children = [child for child in parent.children() if child._type != 'place' and child._type not in exclude_type_ids]
for child in children:
if child:
if use_type_id:
label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
else:
label = child.name
post = Post(role=styles_of_address[self.division_id]['Member'], label=label, division_id=child.id, organization_id=organization._id)
yield post
if not children and parent.attrs['posts_count']:
for i in range(1, int(parent.attrs['posts_count'])): # exclude Mayor
organization.add_post(role=styles_of_address[self.division_id]['Member'], label='{} (seat {})'.format(parent.name, i), division_id=parent.id)
yield organization
开发者ID:ppival,项目名称:scrapers-ca,代码行数:27,代码来源:utils.py
示例9: add_committees
def add_committees(self, legislator_page, legislator, chamber, url):
# as of today, both chambers do committees the same way! Yay!
rows = self.get_nodes(
legislator_page,
'//div[@id="ContentPlaceHolder1_TabSenator_TabCommittees"]//table/'
'tr')
if len(rows) == 0:
return
for row in rows[1:]:
committee_name_text = self.get_node(row, './td[2]').text_content()
committee_name = committee_name_text.strip()
if not committee_name:
continue
role_text = self.get_node(row, './td[3]').text_content()
role = role_text.strip()
if committee_name not in self.committees:
comm = Organization(
name=committee_name, chamber=chamber, classification='committee')
comm.add_source(url)
self.committees[committee_name] = comm
self.committees[committee_name].add_member(
legislator.name,
role=role,
)
开发者ID:neelneelpurk,项目名称:openstates,代码行数:30,代码来源:people.py
示例10: scrape_approp_subcommittees
def scrape_approp_subcommittees(self, url):
html = self.get(url).text
doc = lxml.html.fromstring(html)
for strong in doc.xpath('//strong'):
com = Organization(
name=strong.text.strip(),
parent_id={
'name': 'Appropriations',
'classification': 'committee',
},
classification='committee',
)
com.add_source(url)
legislators = strong.getnext().tail.replace('Senators', '').strip()
for leg in re.split(', | and ', legislators):
if leg.endswith('(C)'):
role = 'chairman'
leg = leg[:-4]
elif leg.endswith('(VC)'):
role = 'vice chairman'
leg = leg[:-5]
elif leg.endswith('(MVC)'):
role = 'minority vice chairman'
leg = leg[:-6]
else:
role = 'member'
com.add_member(leg, role=role)
yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:committees.py
示例11: scrape_committee
def scrape_committee(self, term, href, name):
page = self.get(href).text
page = lxml.html.fromstring(page)
page.make_links_absolute(href)
members = page.xpath("//div[@class='view-content']"
"//a[contains(@href, 'members')]")
if '/joint/' in href:
chamber = 'legislature'
elif '/senate/' in href:
chamber = 'upper'
elif '/house/' in href:
chamber = 'lower'
else:
# interim committees and others were causing duplicate committee issues, skipping
self.warning('Failed to identify chamber for {}; skipping'.format(href))
return
cttie = Organization(name, chamber=chamber, classification='committee')
for a in members:
member = a.text
role = a.xpath("ancestor::div/h2[@class='pane-title']/text()")[0].strip()
role = {"Legislative Members": "member",
"Chairman": "chair",
"Vice Chairman": "member"}[role]
if member is None or member.startswith("District"):
continue
member = member.replace('Senator ', '').replace('Representative ', '')
cttie.add_member(member, role=role)
cttie.add_source(href)
yield cttie
开发者ID:sunlightlabs,项目名称:openstates,代码行数:35,代码来源:committees.py
示例12: test_committee_add_member_person
def test_committee_add_member_person():
c = Organization('Defense', classification='committee')
p = Person('John Adams')
c.add_member(p, role='chairman')
assert c._related[0].person_id == p._id
assert c._related[0].organization_id == c._id
assert c._related[0].role == 'chairman'
开发者ID:influence-usa,项目名称:pupa,代码行数:7,代码来源:test_people_org_scrape.py
示例13: scrape_chamber
def scrape_chamber(self, chamber, session):
url = "%s/GetActiveCommittees?biennium=%s" % (self._base_url, session)
page = self.get(url)
page = lxml.etree.fromstring(page.content)
for comm in xpath(page, "//wa:Committee"):
agency = xpath(comm, "string(wa:Agency)")
comm_chamber = {'House': 'lower', 'Senate': 'upper'}[agency]
if comm_chamber != chamber:
continue
name = xpath(comm, "string(wa:Name)")
# comm_id = xpath(comm, "string(wa:Id)")
# acronym = xpath(comm, "string(wa:Acronym)")
phone = xpath(comm, "string(wa:Phone)")
comm = Organization(name, chamber=chamber, classification='committee')
comm.extras['phone'] = phone
self.scrape_members(comm, agency)
comm.add_source(url)
if not comm._related:
self.warning('empty committee: %s', name)
else:
yield comm
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:25,代码来源:committees.py
示例14: scrape
def scrape(self):
urls = Urls(dict(list=legislators_url), self)
council = Organization(
'Temecula City Council',
classification='legislature')
council.add_source(urls.list.url)
yield council
for tr in urls.list.xpath('//table[2]//tr')[1:]:
# Parse some attributes.
name, role = tr.xpath('td/p[1]//font/text()')
image = tr.xpath('td/img/@src').pop()
# Create legislator.
person = Person(name, image=image)
# Add membership on council.
memb = person.add_membership(council, role=role)
# Add email address.
email, detail_url = tr.xpath('td//a/@href')
email = email[7:]
memb.contact_details.append(
dict(type='email', value=email, note='work'))
# Add sources.
person.add_source(urls.list.url)
person.add_source(detail_url)
yield person
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:32,代码来源:people.py
示例15: scrape
def scrape(self, chamber=None):
base_url = ('http://www.ncga.state.nc.us/gascripts/Committees/'
'Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails=')
chamber_slugs = {'upper': ['Senate%20Standing', 'Senate%20Select'],
'lower': ['House%20Standing', 'House%20Select']}
if chamber:
chambers = [chamber]
else:
chambers = ['upper', 'lower']
for chamber in chambers:
for ctype in chamber_slugs[chamber]:
data = self.get(base_url + ctype).text
doc = lxml.html.fromstring(data)
doc.make_links_absolute(base_url + ctype)
for comm in doc.xpath('//ul/li/a'):
name = comm.text
# skip committee of whole Senate
if 'Whole Senate' in name:
continue
url = comm.get('href')
committee = Organization(name=name, chamber=chamber,
classification="committee")
self.scrape_committee(committee, url)
committee.add_source(url)
if not committee._related:
self.warning('empty committee: %s', name)
else:
yield committee
开发者ID:jalbertbowden,项目名称:openstates,代码行数:31,代码来源:committees.py
示例16: scrape_reps_comm
def scrape_reps_comm(self):
# As of 1/27/15, the committee page has the wrong
# session number (126th) at the top, but
# has newly elected people, so we're rolling with it.
url = 'http://legislature.maine.gov/house/hsecoms.htm'
page = self.get(url).text
root = lxml.html.fromstring(page)
count = 0
for n in range(1, 12, 2):
path = 'string(//body/center[%s]/h1/a)' % (n)
comm_name = root.xpath(path)
committee = Organization(chamber='lower', name=comm_name, classification='committee')
count = count + 1
path2 = '/html/body/ul[%s]/li/a' % (count)
for el in root.xpath(path2):
rep = el.text
if rep.find('(') != -1:
mark = rep.find('(')
rep = rep[15: mark].strip()
if 'chair' in rep.lower():
role = 'chair'
rep = re.sub(r'(?i)[\s,]*chair\s*$', '', rep).strip()
else:
role = 'member'
committee.add_member(rep, role)
committee.add_source(url)
yield committee
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:33,代码来源:committees.py
示例17: scrape_committee
def scrape_committee(self, name, url, chamber):
org = Organization(name=name, chamber=chamber, classification='committee')
org.add_source(url)
data = self.get(url).text
doc = lxml.html.fromstring(data)
for leg in doc.xpath('//div[@id="members"]/div[@id="members"]/p/a/text()'):
leg = leg.replace('Representative ', '')
leg = leg.replace('Senator ', '')
leg = leg.strip()
if ' (' in leg:
leg, role = leg.split(' (')
if 'Vice-Chair' in role:
role = 'vice-chair'
elif 'Co-Chair' in role:
role = 'co-chair'
elif 'Chair' in role:
role = 'chair'
else:
raise Exception('unknown role: %s' % role)
else:
role = 'member'
org.add_member(leg, role)
return org
开发者ID:neelneelpurk,项目名称:openstates,代码行数:25,代码来源:committees.py
示例18: scrape_page
def scrape_page(self, link, chamber=None):
page = self.lxmlize(link.attrib['href'])
comName = link.text
roles = {
"Chair": "chair",
"Vice Chair": "vice-chair",
"Vice-Chair": "vice-chair",
}
committee = Organization(comName,
chamber=chamber,
classification='committee')
committee.add_source(link.attrib['href'])
for member in page.xpath('//div[@class="members"]/' +
'div[@class="roster-item"]'):
details = member.xpath('.//div[@class="member-details"]')[0]
person = details.xpath('./h4')[0].text_content()
# This page does random weird things with whitepace to names
person = ' '.join(person.strip().split())
if not person:
continue
role = details.xpath('./span[@class="member-role"]')
if role:
role = roles[role[0].text]
else:
role = 'member'
committee.add_member(person, role=role)
yield committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:28,代码来源:committees.py
示例19: scrape_senate_committee
def scrape_senate_committee(self, url):
html = self.get(url).text
doc = lxml.html.fromstring(html)
headers = doc.xpath('(//div[@class="row"])[2]//h1')
assert len(headers) == 1
name = ' '.join(headers[0].xpath('./text()'))
name = re.sub(r'\s+Committee.*$', '', name)
com = Organization(chamber='upper', name=name, classification='committee')
for member in doc.xpath('(//div[@class="row"])[3]/div[1]/ul[1]/li'):
text = member.text_content()
member_name = member.xpath('./a/text()')[0].replace('Representative ', '')
if 'Committee Chair' in text:
role = 'chair'
elif 'Minority Vice' in text:
role = 'minority vice chair'
elif 'Vice' in text:
role = 'majority vice chair'
else:
role = 'member'
com.add_member(member_name, role=role)
com.add_source(url)
yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:27,代码来源:committees.py
示例20: scrape
def scrape(self):
url = 'http://www.mec.mo.gov/EthicsWeb/CampaignFinance/CF11_SearchComm.aspx'
for letter in ['a', 'e', 'i', 'o', 'u', 'y']:
print("Searching '{}'".format(letter))
initial = self.get(url).text
parsed = lxml.html.fromstring(initial)
page_n = 0
data = get_form_data(parsed, first_time=True)
data['ctl00$ContentPlaceHolder$txtCandLast'] = letter
while True:
page_n += 1
print("Page: {}".format(page_n))
r = self.post(url, data=data, cookies=dict(PageIndex=str(1)))
output = lxml.html.fromstring(r.text)
rows = output.cssselect('#ctl00_ContentPlaceHolder_grvSearch tr')
for r in rows:
tds = r.cssselect('td')
if len(tds) > 3:
name = tds[2].text_content().strip()
_registrant = Person(
name=name,
source_identified=True
)
committee_name = tds[1].text_content().strip()
_office = Organization(
name=committee_name,
classification='Committee',
# parent_id=self.jurisdiction._state,
source_identified=True
)
_office.add_member(
_registrant,
role='committee candidate',
label='candidate for {n}'.format(n=_office.name),
)
yield _registrant
yield _office
if not output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"):
print(output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"))
break
data = get_form_data(output)
开发者ID:influence-usa,项目名称:scrapers-us-state,代码行数:60,代码来源:people.py
注:本文中的pupa.scrape.Organization类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论