• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python scrape.Organization类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pupa.scrape.Organization的典型用法代码示例。如果您正苦于以下问题:Python Organization类的具体用法?Python Organization怎么用?Python Organization使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Organization类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: test_fix_bill_id

def test_fix_bill_id():
    j = create_jurisdiction()
    j.legislative_sessions.create(name='1900', identifier='1900')

    org1 = ScrapeOrganization(name='House', classification='lower')
    bill = ScrapeBill('HB 1', '1900', 'Test Bill ID',
                      classification='bill', chamber='lower')

    oi = OrganizationImporter('jid')

    oi.import_data([org1.as_dict()])

    from pupa.settings import IMPORT_TRANSFORMERS
    IMPORT_TRANSFORMERS['bill'] = {
        'identifier': lambda x: re.sub(r'([A-Z]*)\s*0*([-\d]+)', r'\1 \2', x, 1)
    }

    bi = BillImporter('jid', oi, DumbMockImporter())
    bi.import_data([bill.as_dict()])

    ve = ScrapeVoteEvent(legislative_session='1900', motion_text='passage',
                         start_date='1900-04-02', classification='passage:bill',
                         result='fail', bill_chamber='lower', bill='HB1',
                         identifier='4',
                         bill_action='passage',
                         organization=org1._id)

    VoteEventImporter('jid', DumbMockImporter(), oi, bi).import_data([
        ve.as_dict(),
    ])

    IMPORT_TRANSFORMERS['bill'] = {}

    ve = VoteEvent.objects.get()
    ve.bill.identifier == 'HB 1'
开发者ID:opencivicdata,项目名称:pupa,代码行数:35,代码来源:test_vote_event_importer.py


示例2: scrape_committees

    def scrape_committees(self, session):
        session_key = SESSION_KEYS[session]
        committees_response = self.api_client.get('committees', session=session_key)

        legislators = index_legislators(self, session_key)

        for committee in committees_response:
            org = Organization(
                chamber={'S': 'upper', 'H': 'lower',
                         'J': 'legislature'}[committee['HouseOfAction']],
                name=committee['CommitteeName'],
                classification='committee')
            org.add_source(
                'https://olis.leg.state.or.us/liz/{session}'
                '/Committees/{committee}/Overview'.format(session=session_key,
                                                          committee=committee['CommitteeName']))
            members_response = self.api_client.get('committee_members',
                                                   session=session_key,
                                                   committee=committee['CommitteeCode'])
            for member in members_response:
                try:
                    member_name = legislators[member['LegislatorCode']]
                except KeyError:
                    logger.warn('Legislator {} not found in session {}'.format(
                        member['LegislatorCode'], session_key))
                    member_name = member['LegislatorCode']
                org.add_member(member_name, role=member['Title'] if member['Title'] else '')

            yield org
开发者ID:neelneelpurk,项目名称:openstates,代码行数:29,代码来源:committees.py


示例3: get_organizations

    def get_organizations(self):
        organization = Organization(self.name, classification=self.classification)

        leader_role = styles_of_address[self.division_id]['Leader']
        member_role = self.member_role or styles_of_address[self.division_id]['Member']

        parent = Division.get(self.division_id)
        # Don't yield posts for premiers.
        if parent._type not in ('province', 'territory'):
            # Yield posts to allow ca_on_toronto to make changes.
            post = Post(role=leader_role, label=parent.name, division_id=parent.id, organization_id=organization._id)
            yield post

        children = [child for child in parent.children() if child._type != 'place' and child._type not in self.exclude_types]

        for child in children:
            if not self.skip_null_valid_from and not child.attrs.get('validFrom') or child.attrs.get('validFrom') and (child.attrs['validFrom'] <= datetime.now().strftime('%Y-%m-%d') or child.attrs['validFrom'] == self.valid_from):
                if self.use_type_id:
                    label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
                else:
                    label = child.name
                # Yield posts to allow ca_on_toronto to make changes.
                post = Post(role=member_role, label=label, division_id=child.id, organization_id=organization._id)
                yield post

        if not children and parent.attrs['posts_count']:
            for i in range(1, int(parent.attrs['posts_count'])):  # exclude Mayor
                organization.add_post(role=member_role, label='{} (seat {})'.format(parent.name, i), division_id=parent.id)

        yield organization
开发者ID:opencivicdata,项目名称:scrapers-ca,代码行数:30,代码来源:utils.py


示例4: test_parent_id_resolution

def test_parent_id_resolution():
    parent = ScrapeOrganization('UN', classification='international')
    child = ScrapeOrganization('UNESCO', classification='unknown', parent_id=parent._id)
    OrganizationImporter('jurisdiction-id').import_data([parent.as_dict(), child.as_dict()])
    assert Organization.objects.count() == 2
    assert Organization.objects.get(name='UN').children.count() == 1
    assert Organization.objects.get(name='UNESCO').parent.name == 'UN'
开发者ID:johnfelipe,项目名称:pupa,代码行数:7,代码来源:test_organization_importer.py


示例5: test_deduplication_other_name_exists

def test_deduplication_other_name_exists():
    create_jurisdictions()
    create_org()
    org = ScrapeOrganization('UN', classification='international')
    od = org.as_dict()
    OrganizationImporter('jid1').import_data([od])
    assert Organization.objects.all().count() == 1
开发者ID:opencivicdata,项目名称:pupa,代码行数:7,代码来源:test_organization_importer.py


示例6: scrape_lower_committee

    def scrape_lower_committee(self, name, url):
        page = self.lxmlize(url)

        committee = Organization(chamber='lower', name=name,
                                 classification="committee")
        committee.add_source(url)

        seen = set()

        member_links = self.get_nodes(
            page,
            '//div[@class="mod-inner"]//a[contains(@href, "mem")]')

        for member_link in member_links:
            member_name = None
            member_role = None

            member_name = member_link.text
            if member_name is None:
                continue

            # Figure out if this person is the chair.
            if member_link == member_links[0]:
                member_role = 'chair'
            else:
                member_role = 'member'

            if name not in seen:
                committee.add_member(member_name, member_role)
                seen.add(member_name)

        return committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:32,代码来源:committees.py


示例7: _scrape_lower_special_committees

    def _scrape_lower_special_committees(self):
        url = 'http://house.louisiana.gov/H_Cmtes/SpecialCommittees.aspx'
        page = self.lxmlize(url)

        committee_list = page.xpath('//div[@class="accordion"]')[0]

        headers = committee_list.xpath('./h3')

        for header in headers:
            committee_name_text = header.xpath('string()')
            committee_name = committee_name_text.strip()
            committee_name = self._normalize_committee_name(committee_name)

            chamber = 'legislature' if committee_name.startswith('Joint') else 'lower'

            committee = Organization(committee_name, chamber=chamber,
                                     classification='committee')
            committee.add_source(url)

            committee_memberlist = header.xpath('./following-sibling::div[@class="pane"]'
                                                '//tr[@class="linkStyle2"]')

            for row in committee_memberlist:
                member_name = row.xpath('normalize-space(string(./th[1]))')
                member_name = self._normalize_member_name(member_name)
                member_role = row.xpath('normalize-space(string(./th[2]))')
                member_role = self._normalize_member_role(member_role)

                committee.add_member(member_name, member_role)

            yield committee
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:committees.py


示例8: get_organizations

    def get_organizations(self):
        exclude_type_ids = getattr(self, 'exclude_type_ids', [])
        use_type_id = getattr(self, 'use_type_id', False)

        organization = Organization(self.name, classification=self.classification)

        parent = Division.get(self.division_id)
        if parent._type not in ('province', 'territory'):
            post = Post(role=styles_of_address[self.division_id]['Leader'], label=parent.name, division_id=parent.id, organization_id=organization._id)
            yield post

        children = [child for child in parent.children() if child._type != 'place' and child._type not in exclude_type_ids]

        for child in children:
            if child:
                if use_type_id:
                    label = child.id.rsplit('/', 1)[1].capitalize().replace(':', ' ')
                else:
                    label = child.name
                post = Post(role=styles_of_address[self.division_id]['Member'], label=label, division_id=child.id, organization_id=organization._id)
                yield post

        if not children and parent.attrs['posts_count']:
            for i in range(1, int(parent.attrs['posts_count'])):  # exclude Mayor
                organization.add_post(role=styles_of_address[self.division_id]['Member'], label='{} (seat {})'.format(parent.name, i), division_id=parent.id)

        yield organization
开发者ID:ppival,项目名称:scrapers-ca,代码行数:27,代码来源:utils.py


示例9: add_committees

    def add_committees(self, legislator_page, legislator, chamber, url):
        # as of today, both chambers do committees the same way! Yay!
        rows = self.get_nodes(
            legislator_page,
            '//div[@id="ContentPlaceHolder1_TabSenator_TabCommittees"]//table/'
            'tr')

        if len(rows) == 0:
            return

        for row in rows[1:]:
            committee_name_text = self.get_node(row, './td[2]').text_content()
            committee_name = committee_name_text.strip()

            if not committee_name:
                continue

            role_text = self.get_node(row, './td[3]').text_content()
            role = role_text.strip()

            if committee_name not in self.committees:
                comm = Organization(
                    name=committee_name, chamber=chamber, classification='committee')
                comm.add_source(url)
                self.committees[committee_name] = comm

            self.committees[committee_name].add_member(
                legislator.name,
                role=role,
            )
开发者ID:neelneelpurk,项目名称:openstates,代码行数:30,代码来源:people.py


示例10: scrape_approp_subcommittees

    def scrape_approp_subcommittees(self, url):
        html = self.get(url).text
        doc = lxml.html.fromstring(html)

        for strong in doc.xpath('//strong'):
            com = Organization(
                name=strong.text.strip(),
                parent_id={
                    'name': 'Appropriations',
                    'classification': 'committee',
                },
                classification='committee',
            )
            com.add_source(url)

            legislators = strong.getnext().tail.replace('Senators', '').strip()
            for leg in re.split(', | and ', legislators):
                if leg.endswith('(C)'):
                    role = 'chairman'
                    leg = leg[:-4]
                elif leg.endswith('(VC)'):
                    role = 'vice chairman'
                    leg = leg[:-5]
                elif leg.endswith('(MVC)'):
                    role = 'minority vice chairman'
                    leg = leg[:-6]
                else:
                    role = 'member'
                com.add_member(leg, role=role)

            yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:committees.py


示例11: scrape_committee

    def scrape_committee(self, term, href, name):
        page = self.get(href).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(href)
        members = page.xpath("//div[@class='view-content']"
                             "//a[contains(@href, 'members')]")

        if '/joint/' in href:
            chamber = 'legislature'
        elif '/senate/' in href:
            chamber = 'upper'
        elif '/house/' in href:
            chamber = 'lower'
        else:
            # interim committees and others were causing duplicate committee issues, skipping
            self.warning('Failed to identify chamber for {}; skipping'.format(href))
            return

        cttie = Organization(name, chamber=chamber, classification='committee')
        for a in members:
            member = a.text
            role = a.xpath("ancestor::div/h2[@class='pane-title']/text()")[0].strip()
            role = {"Legislative Members": "member",
                    "Chairman": "chair",
                    "Vice Chairman": "member"}[role]

            if member is None or member.startswith("District"):
                continue

            member = member.replace('Senator ', '').replace('Representative ', '')

            cttie.add_member(member, role=role)

        cttie.add_source(href)
        yield cttie
开发者ID:sunlightlabs,项目名称:openstates,代码行数:35,代码来源:committees.py


示例12: test_committee_add_member_person

def test_committee_add_member_person():
    c = Organization('Defense', classification='committee')
    p = Person('John Adams')
    c.add_member(p, role='chairman')
    assert c._related[0].person_id == p._id
    assert c._related[0].organization_id == c._id
    assert c._related[0].role == 'chairman'
开发者ID:influence-usa,项目名称:pupa,代码行数:7,代码来源:test_people_org_scrape.py


示例13: scrape_chamber

    def scrape_chamber(self, chamber, session):

        url = "%s/GetActiveCommittees?biennium=%s" % (self._base_url, session)
        page = self.get(url)
        page = lxml.etree.fromstring(page.content)

        for comm in xpath(page, "//wa:Committee"):
            agency = xpath(comm, "string(wa:Agency)")
            comm_chamber = {'House': 'lower', 'Senate': 'upper'}[agency]
            if comm_chamber != chamber:
                continue

            name = xpath(comm, "string(wa:Name)")
            # comm_id = xpath(comm, "string(wa:Id)")
            # acronym = xpath(comm, "string(wa:Acronym)")
            phone = xpath(comm, "string(wa:Phone)")

            comm = Organization(name, chamber=chamber, classification='committee')
            comm.extras['phone'] = phone
            self.scrape_members(comm, agency)
            comm.add_source(url)
            if not comm._related:
                self.warning('empty committee: %s', name)
            else:
                yield comm
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:25,代码来源:committees.py


示例14: scrape

    def scrape(self):
        urls = Urls(dict(list=legislators_url), self)

        council = Organization(
            'Temecula City Council',
            classification='legislature')
        council.add_source(urls.list.url)
        yield council

        for tr in urls.list.xpath('//table[2]//tr')[1:]:

            # Parse some attributes.
            name, role = tr.xpath('td/p[1]//font/text()')
            image = tr.xpath('td/img/@src').pop()

            # Create legislator.
            person = Person(name, image=image)

            # Add membership on council.
            memb = person.add_membership(council, role=role)

            # Add email address.
            email, detail_url = tr.xpath('td//a/@href')
            email = email[7:]
            memb.contact_details.append(
                dict(type='email', value=email, note='work'))

            # Add sources.
            person.add_source(urls.list.url)
            person.add_source(detail_url)

            yield person
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:32,代码来源:people.py


示例15: scrape

    def scrape(self, chamber=None):
        base_url = ('http://www.ncga.state.nc.us/gascripts/Committees/'
                    'Committees.asp?bPrintable=true&sAction=ViewCommitteeType&sActionDetails=')

        chamber_slugs = {'upper': ['Senate%20Standing', 'Senate%20Select'],
                         'lower': ['House%20Standing', 'House%20Select']}

        if chamber:
            chambers = [chamber]
        else:
            chambers = ['upper', 'lower']

        for chamber in chambers:
            for ctype in chamber_slugs[chamber]:
                data = self.get(base_url + ctype).text
                doc = lxml.html.fromstring(data)
                doc.make_links_absolute(base_url + ctype)
                for comm in doc.xpath('//ul/li/a'):
                    name = comm.text
                    # skip committee of whole Senate
                    if 'Whole Senate' in name:
                        continue
                    url = comm.get('href')
                    committee = Organization(name=name, chamber=chamber,
                                             classification="committee")
                    self.scrape_committee(committee, url)
                    committee.add_source(url)
                    if not committee._related:
                        self.warning('empty committee: %s', name)
                    else:
                        yield committee
开发者ID:jalbertbowden,项目名称:openstates,代码行数:31,代码来源:committees.py


示例16: scrape_reps_comm

    def scrape_reps_comm(self):
        # As of 1/27/15, the committee page has the wrong
        # session number (126th) at the top, but
        # has newly elected people, so we're rolling with it.

        url = 'http://legislature.maine.gov/house/hsecoms.htm'
        page = self.get(url).text
        root = lxml.html.fromstring(page)

        count = 0

        for n in range(1, 12, 2):
            path = 'string(//body/center[%s]/h1/a)' % (n)
            comm_name = root.xpath(path)
            committee = Organization(chamber='lower', name=comm_name, classification='committee')
            count = count + 1

            path2 = '/html/body/ul[%s]/li/a' % (count)

            for el in root.xpath(path2):
                rep = el.text
                if rep.find('(') != -1:
                    mark = rep.find('(')
                    rep = rep[15: mark].strip()
                if 'chair' in rep.lower():
                    role = 'chair'
                    rep = re.sub(r'(?i)[\s,]*chair\s*$', '', rep).strip()
                else:
                    role = 'member'
                committee.add_member(rep, role)
            committee.add_source(url)

            yield committee
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:33,代码来源:committees.py


示例17: scrape_committee

    def scrape_committee(self, name, url, chamber):
        org = Organization(name=name, chamber=chamber, classification='committee')
        org.add_source(url)
        data = self.get(url).text
        doc = lxml.html.fromstring(data)

        for leg in doc.xpath('//div[@id="members"]/div[@id="members"]/p/a/text()'):
            leg = leg.replace('Representative ', '')
            leg = leg.replace('Senator ', '')
            leg = leg.strip()
            if ' (' in leg:
                leg, role = leg.split(' (')
                if 'Vice-Chair' in role:
                    role = 'vice-chair'
                elif 'Co-Chair' in role:
                    role = 'co-chair'
                elif 'Chair' in role:
                    role = 'chair'
                else:
                    raise Exception('unknown role: %s' % role)
            else:
                role = 'member'
            org.add_member(leg, role)

        return org
开发者ID:neelneelpurk,项目名称:openstates,代码行数:25,代码来源:committees.py


示例18: scrape_page

    def scrape_page(self, link, chamber=None):
        page = self.lxmlize(link.attrib['href'])
        comName = link.text
        roles = {
            "Chair": "chair",
            "Vice Chair": "vice-chair",
            "Vice-Chair": "vice-chair",
        }
        committee = Organization(comName,
                                 chamber=chamber,
                                 classification='committee')
        committee.add_source(link.attrib['href'])

        for member in page.xpath('//div[@class="members"]/' +
                                 'div[@class="roster-item"]'):
            details = member.xpath('.//div[@class="member-details"]')[0]
            person = details.xpath('./h4')[0].text_content()
            # This page does random weird things with whitepace to names
            person = ' '.join(person.strip().split())
            if not person:
                continue
            role = details.xpath('./span[@class="member-role"]')
            if role:
                role = roles[role[0].text]
            else:
                role = 'member'
            committee.add_member(person, role=role)
        yield committee
开发者ID:sunlightlabs,项目名称:openstates,代码行数:28,代码来源:committees.py


示例19: scrape_senate_committee

    def scrape_senate_committee(self, url):
        html = self.get(url).text
        doc = lxml.html.fromstring(html)

        headers = doc.xpath('(//div[@class="row"])[2]//h1')
        assert len(headers) == 1
        name = ' '.join(headers[0].xpath('./text()'))
        name = re.sub(r'\s+Committee.*$', '', name)

        com = Organization(chamber='upper', name=name, classification='committee')

        for member in doc.xpath('(//div[@class="row"])[3]/div[1]/ul[1]/li'):
            text = member.text_content()
            member_name = member.xpath('./a/text()')[0].replace('Representative ', '')
            if 'Committee Chair' in text:
                role = 'chair'
            elif 'Minority Vice' in text:
                role = 'minority vice chair'
            elif 'Vice' in text:
                role = 'majority vice chair'
            else:
                role = 'member'

            com.add_member(member_name, role=role)

        com.add_source(url)
        yield com
开发者ID:neelneelpurk,项目名称:openstates,代码行数:27,代码来源:committees.py


示例20: scrape

    def scrape(self):
        url = 'http://www.mec.mo.gov/EthicsWeb/CampaignFinance/CF11_SearchComm.aspx'

        
        for letter in ['a', 'e', 'i', 'o', 'u', 'y']:

            print("Searching '{}'".format(letter))
            initial = self.get(url).text
            parsed = lxml.html.fromstring(initial)        

            page_n = 0

            data = get_form_data(parsed, first_time=True)
            data['ctl00$ContentPlaceHolder$txtCandLast'] = letter
            
            while True:
                page_n += 1
            
                print("Page: {}".format(page_n))
            
                r = self.post(url, data=data, cookies=dict(PageIndex=str(1)))
                    
                output = lxml.html.fromstring(r.text)

                rows = output.cssselect('#ctl00_ContentPlaceHolder_grvSearch tr')
                
                for r in rows:
                    tds = r.cssselect('td')
                    if len(tds) > 3:

                        name = tds[2].text_content().strip()

                        _registrant = Person(
                            name=name,
                            source_identified=True
                        )
                        
                        committee_name = tds[1].text_content().strip()
                        _office = Organization(
                            name=committee_name,
                            classification='Committee',
                            # parent_id=self.jurisdiction._state,
                            source_identified=True
                        )

                        _office.add_member(
                            _registrant,
                            role='committee candidate',
                            label='candidate for {n}'.format(n=_office.name),
                        )

                        yield _registrant
                        yield _office

                            
                if not output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"):
                    print(output.xpath("//*[@id='ctl00_ContentPlaceHolder_grvSearch_ctl28_lbtnNextPage']"))
                    break
            
                data = get_form_data(output)
开发者ID:influence-usa,项目名称:scrapers-us-state,代码行数:60,代码来源:people.py



注:本文中的pupa.scrape.Organization类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python scrape.Person类代码示例发布时间:2022-05-25
下一篇:
Python scrape.Event类代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap