• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python scrape.Event类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中pupa.scrape.Event的典型用法代码示例。如果您正苦于以下问题:Python Event类的具体用法?Python Event怎么用?Python Event使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了Event类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: scrape_upper

    def scrape_upper(self):
        url = "http://www.oksenate.gov/Committees/meetingnotices.htm"
        page = lxml.html.fromstring(self.get(url).text)
        page.make_links_absolute(url)

        text = page.text_content()
        _, text = text.split('MEETING NOTICES')
        re_date = r'[A-Z][a-z]+,\s+[A-Z][a-z]+ \d+, \d{4}'
        chunks = zip(re.finditer(re_date, text), re.split(re_date, text)[1:])

        for match, data in chunks:
            when = match.group()
            when = datetime.datetime.strptime(when, "%A, %B %d, %Y")

            lines = filter(None, [x.strip() for x in data.splitlines()])
            time_ = re.search(r'^\s*TIME:\s+(.+?)\s+\x96', data, re.M).group(1)
            time_ = time_.replace('a.m.', 'AM').replace('p.m.', 'PM')
            time_ = time.strptime(time_, '%I:%M %p')
            when += datetime.timedelta(hours=time_.tm_hour, minutes=time_.tm_min)

            title = lines[0]

            where = re.search(r'^\s*PLACE:\s+(.+)', data, re.M).group(1)
            where = where.strip()

            event = Event(name=title,
                          start_date=self._tz.localize(when),
                          location_name=where)
            event.add_source(url)

            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:events.py


示例2: scrape

    def scrape(self):
        for page in self.eventPages(EVENTSPAGE):
            events_table = page.xpath("//table[@class='rgMasterTable']")[0]
            for events, headers, rows in self.parseDataTable(events_table) :
                print(events)
                location_string = events[u'Meeting\xa0Location']
                location_list = location_string.split('--')
                location = ', '.join(location_list[0:2])

                status_string = location_list[-1].split('Chicago, Illinois')
                if len(status_string) > 1 and status_string[1] :
                    status = status_string[1].lower()
                    if status not in ['cancelled', 'tentative', 'confirmed', 'passed'] :
                        print(status)
                        status = 'confirmed'
                else :
                    status = 'confirmed'



                when = events[u'Meeting\xa0Date']
                time_string = events[u'Meeting\xa0Time']
                event_time = datetime.datetime.strptime(time_string,
                                                        "%I:%M %p")
                when = when.replace(hour=event_time.hour)

                e = Event(name=events["Name"]["label"],
                          when=when,
                          location=location,
                          status=status)
                e.add_source(EVENTSPAGE)
                if events['Video'] != u'Not\xa0available' :
                    print(events['Video'])

                yield e
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:35,代码来源:events.py


示例3: scrape_committee_events

    def scrape_committee_events(self, code, name):
        events_url = \
                'http://www.cga.ct.gov/basin/fullcalendar/commevents.php?' \
                'comm_code={}'.format(code)
        events_data = self.get(events_url).text
        events = json.loads(events_data)

        DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
        for info in events:

            if info['title'] is None:
                self.warning("Event found with no title; it will be skipped")
                continue
            elif info['title'].startswith('CANCELLED:'):
                self.info("Cancelled event found; it will be skipped: {}".
                          format(info['title']))
                continue

            when = datetime.datetime.strptime(info['start'], DATETIME_FORMAT)
            # end = datetime.datetime.strptime(info['end'], DATETIME_FORMAT)
            where = "{0} {1}".format(info['building'].strip(), info['location'].strip())
            # end_time=self._tz.localize(end),
            event = Event(start_time=self._tz.localize(when),
                          timezone=self._tz.zone,
                          location_name=where,
                          name=info['title'],
                          description=info['title'],)
            event.add_source(events_url)

            yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:30,代码来源:events.py


示例4: scrape_meetings

    def scrape_meetings(self, meetings, group):
        """
        Scrape and save event data from a list of meetings.

        Arguments:
        meetings -- A list of lxml elements containing event information
        group -- The type of meeting. The legislature site applies
                 different formatting to events based on which group
                 they correspond to.  `group` should be one of the
                 following strings: 'house', 'senate', or 'commission'.

        """
        for meeting in meetings:
            when = self.get_date(meeting)
            description = self.get_description(meeting)
            location = self.get_location(meeting)

            if when and description and location:
                event = Event(name=description, start_date=when.replace(tzinfo=self.tz),
                              description=description,
                              location_name=location)
                agenda = self.get_agenda(meeting)
                if agenda:
                    event.add_agenda_item(agenda)
                event.add_source(url)
                yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:26,代码来源:events.py


示例5: event_obj

def event_obj():
    e = Event(
        name="get-together",
        start_date=datetime.datetime.utcnow().isoformat().split('.')[0] + 'Z',
        location_name="Joe's Place",
    )
    e.add_source(url='http://example.com/foobar')
    return e
开发者ID:opencivicdata,项目名称:pupa,代码行数:8,代码来源:test_event_scrape.py


示例6: ge

def ge():
    event = ScrapeEvent(
        name="America's Birthday",
        start_time="2014-07-04T05:00Z",
        location_name="America",
        timezone="America/New_York",
        all_day=True)
    event.add_person("George Washington")
    return event
开发者ID:anukat2015,项目名称:pupa,代码行数:9,代码来源:test_event_importer.py


示例7: event_obj

def event_obj():
    e = Event(
        name="get-together",
        start_time=datetime.datetime.utcnow(),
        location_name="Joe's Place",
        timezone="America/New_York",
    )
    e.add_source(url='foobar')
    return e
开发者ID:anukat2015,项目名称:pupa,代码行数:9,代码来源:test_event_scrape.py


示例8: scrape

    def scrape(self, chamber=None):
        URL = 'http://utahlegislature.granicus.com/ViewPublisherRSS.php?view_id=2&mode=agendas'
        doc = self.lxmlize(URL)
        events = doc.xpath('//item')

        for info in events:
            title_and_date = info.xpath('title/text()')[0].split(" - ")
            title = title_and_date[0]
            when = title_and_date[-1]
            # if not when.endswith(session[ :len("20XX")]):
            #    continue

            event = Event(name=title,
                          start_date=self._tz.localize(datetime.datetime.strptime(when,
                                                                                  '%b %d, %Y')),
                          location_name='State Capitol'
                          )
            event.add_source(URL)

            url = re.search(r'(http://.*?)\s', info.text_content()).group(1)
            try:
                doc = self.lxmlize(url)
            except HTTPError:
                self.logger.warning("Page missing, skipping")
                continue
            event.add_source(url)

            committee = doc.xpath('//a[text()="View committee page"]/@href')
            if committee:
                committee_doc = self.lxmlize(committee[0])
                committee_name = committee_doc.xpath(
                        '//h3[@class="heading committee"]/text()')[0].strip()
                event.add_participant(committee_name, type='committee',
                                      note='host')

            documents = doc.xpath('.//td')
            for document in documents:
                url = re.search(r'(http://.*?pdf)', document.xpath('@onclick')[0])
                if url is None:
                    continue
                url = url.group(1)
                event.add_document(
                        note=document.xpath('text()')[0],
                        url=url,
                        media_type='application/pdf'
                        )
                bills = document.xpath('@onclick')
                for bill in bills:
                    if "bills/static" in bill:
                        bill_name = bill.split("/")[-1].split(".")[0]
                        item = event.add_agenda_item('Bill up for discussion')
                        item.add_bill(bill_name)
            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:53,代码来源:events.py


示例9: scrape

    def scrape(self):
        page = self.lxmlize(calurl)
        events = page.xpath("//table[@class='agenda-body']//tr")[1:]

        for event in events:
            comit_url = event.xpath(
                ".//a[contains(@href, '/Pages/comm-info.aspx?c=')]")

            if len(comit_url) != 1:
                raise Exception

            comit_url = comit_url[0]
            who = self.scrape_participants(comit_url.attrib['href'])

            tds = event.xpath("./*")
            date = tds[0].text_content().strip()
            cttie = tds[1].text_content().strip()
            _chamber, cttie = [x.strip() for x in cttie.split(" - ", 1)]
            info = tds[2]
            name = info.xpath("./a[contains(@href, 'raw')]")[0]
            notice = name.attrib['href']
            name = name.text
            time, where = info.xpath("./i/text()")
            what = tds[3].text_content()
            what = what.replace("Items: ", "")
            if "(None)" in what:
                continue
            what = [x.strip() for x in what.split(";")]

            when = ", ".join([date, str(dt.datetime.now().year), time])
            when = dt.datetime.strptime(when, "%a %b %d, %Y, %I:%M %p")

            event = Event(
                name=name,
                location_name=where,
                start_date=self._tz.localize(when),
            )

            event.add_source(calurl)

            event.add_committee(cttie, note='host')

            event.add_document("notice", notice, media_type='application/pdf')

            for entry in what:
                item = event.add_agenda_item(entry)
                if entry.startswith('AB') or entry.startswith('SB'):
                    item.add_bill(entry)

            for thing in who:
                event.add_person(thing['name'])

            yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:53,代码来源:events.py


示例10: scrape_event_page

    def scrape_event_page(self, event):
        url = event.attrib['href']
        page = self.lxmlize(url)
        title = page.xpath("//h2[@class='evlist_header']")
        title = title[0].text.strip() if title else None
        if title is None:
            return
        if "CANCELED" in title:
            return

        info = page.xpath("//div[@style='position:relative;margin-right:40px;']")[0]
        blocks = info.xpath(".//div")
        ret = {}
        for block in blocks:
            els = block.xpath("./*")
            if not els:
                continue
            le = els[0]

            if le.tag != 'label':
                continue

            label, div = els

            ltex = label.text_content().strip()
            dtex = div.text_content().strip()
            ret[ltex] = dtex

        when = dt.datetime.utcnow()
        date, start, end = (x.strip() for x in ret['When:'].split("\n"))
        start = re.sub("^@", "", start).strip()
        end = end.replace("-", "").strip()

        replace = [
            ('Apr', 'April'),
        ]

        skip = ["Occurs every"]

        for k, v in replace:
            date = date.replace(k, v).strip()

        if True in (x in end for x in skip):
            return

        start = "%s %s" % (date, start)
        end = "%s %s" % (date, end)
        start, end = (dt.datetime.strptime(x, "%B %d, %Y %I:%M %p") for x in (start, end))

        event = Event( name=title, location=ret['Where:'], when=start, end=end)
        event.add_source(url)
        yield event
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:52,代码来源:events.py


示例11: scrape_chamber

    def scrape_chamber(self, chamber):
        url = utils.urls['events'][chamber]
        page = self.get(url).text
        page = lxml.html.fromstring(page)
        page.make_links_absolute(url)

        for table in page.xpath('//table[@class="CMS-MeetingDetail-CurrMeeting"]'):
            date_string = table.xpath('ancestor::div[@class="CMS-MeetingDetail"]/div/a/@name')[0]
            for row in table.xpath('tr'):
                time_string = row.xpath('td[@class="CMS-MeetingDetail-Time"]/text()')[0].strip()
                description = row.xpath(
                    'td[@class="CMS-MeetingDetail-Agenda"]/div/div'
                )[-1].text_content().strip()
                location = row.xpath(
                    'td[@class="CMS-MeetingDetail-Location"]'
                )[0].text_content().strip()
                committees = row.xpath('.//div[@class="CMS-MeetingDetail-Agenda-CommitteeName"]/a')
                bills = row.xpath('.//a[contains(@href, "billinfo")]')

                try:
                    start_time = datetime.datetime.strptime(
                        '{} {}'.format(date_string, time_string),
                        '%m/%d/%Y %I:%M %p',
                    )
                except ValueError:
                    break

                event = Event(
                    name=description,
                    start_time=self._tz.localize(start_time),
                    location_name=location,
                    timezone=self._tz.zone,
                )
                event.add_source(url)

                if bills or committees:
                    item = event.add_agenda_item(description)
                    for bill in bills:
                        parsed = urllib.parse.urlparse(bill.get('href'))
                        qs = urllib.parse.parse_qs(parsed.query)
                        item.add_bill('{}{} {}'.format(qs['body'], qs['type'], qs['bn']))
                    for committee in committees:
                        parsed = urllib.parse.urlparse(committee.get('href'))
                        qs = urllib.parse.parse_qs(parsed.query)
                        item.add_committee(
                            re.sub(r' \([S|H]\)$', '', committee.text),
                            id=qs.get('Code'),
                        )

                yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:50,代码来源:events.py


示例12: categorize_data

    def categorize_data(self, csv_data):
        return_objs = []
        Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
        for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
            if not line:
                continue

            # cur_obj will be the person or organization that made the contribution
            cur_obj = None
            contribution = Contribution(*line.split(','))
            
            if contribution.Contributor_Type in self.business_contribution_types:
                cur_obj = Organization(contribution.Contributor_Name)
            elif contribution.Contributor_Type in self.individual_contribution_types:
                cur_obj = Person(contribution.Contributor_Name)
            elif contribution.Contributor_Type == 'Unknown/Anonymous':
                if contribution.Contributor_Name: #ignoring un-named contributors
                    #these look like catch-all business contributions
                    cur_obj = Organization(contribution.Contributor_Name)
            if cur_obj: 
                #we don't set cur_obj in the event that there was an 
                #anonymous/unknown contribution without a Contribution_Name
                #so we need to check that it exists before adding to it
                cur_obj.add_source(url=self.search_url)
                cur_obj.source_identified = True
                if contribution.Contributor_Address:
                    cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
                if contribution.Employer_Name:
                    cur_obj.extras['Employer'] = contribution.Employer_Name
                if contribution.Employer_Occupation:
                    cur_obj.extras['Occupation'] = contribution.Employer_Occupation
                
                #recipiant_obj is the organization that received the contribution
                recipiant_obj = Organization(contribution.Receiving_Committee)  
                recipiant_obj.extras['Office'] = contribution.Office
                recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
                recipiant_obj.extras['Fundtype'] = contribution.Fundtype

                #transaction is the event linking the donor and recipiant
                transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
                transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
                transaction.extras['Contribution Type'] = contribution.Contribution_Type
                transaction.add_source(url=self.search_url)
                #transaction.source_identified = True
                transaction.participants.append(cur_obj.as_dict())
                transaction.participants.append(recipiant_obj.as_dict())
                yield (cur_obj, recipiant_obj, transaction)        
            else:
                yield []
开发者ID:AshleyTemple,项目名称:scrapers-us-state,代码行数:49,代码来源:contributions.py


示例13: scrape

    def scrape(self, session=None, chamber=None):
        if not session:
            session = self.latest_session()
            self.info('no session specified, using %s', session)

        url = "ftp://www.arkleg.state.ar.us/dfadooas/ScheduledMeetings.txt"
        page = self.get(url)
        page = csv.reader(StringIO(page.text), delimiter='|')

        for row in page:
            # Deal with embedded newline characters, which cause fake new rows
            LINE_LENGTH = 11
            while len(row) < LINE_LENGTH:
                row += next(page)

            desc = row[7].strip()

            match = re.match(r'^(.*)- (HOUSE|SENATE)$', desc)
            if match:

                comm = match.group(1).strip()
                comm = re.sub(r'\s+', ' ', comm)
                location = row[5].strip() or 'Unknown'
                when = datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S')
                when = self._tz.localize(when)
                # Only assign events to a session if they are in the same year
                # Given that session metadata have some overlap and
                # missing end dates, this is the best option available
                session_year = int(session[:4])
                if session_year != when.year:
                    continue

                description = "%s MEETING" % comm
                event = Event(
                        name=description,
                        start_time=when,
                        location_name=location,
                        description=description,
                        timezone=self._tz.zone
                )
                event.add_source(url)

                event.add_participant(comm, type='committee', note='host')
                # time = row[3].strip()
                # if time in TIMECODES:
                #     event['notes'] = TIMECODES[time]

                yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:48,代码来源:events.py


示例14: scrape_event_page

    def scrape_event_page(self, session, chamber, url, datetime):
        page = self.lxmlize(url)
        info = page.xpath("//p")
        metainfo = {}
        plaintext = ""
        for p in info:
            content = re.sub("\s+", " ", p.text_content())
            plaintext += content + "\n"
            if ":" in content:
                key, val = content.split(":", 1)
                metainfo[key.strip()] = val.strip()
        committee = metainfo['COMMITTEE']
        where = metainfo['PLACE']
        if "CHAIR" in where:
            where, chair = where.split("CHAIR:")
            metainfo['PLACE'] = where.strip()
            metainfo['CHAIR'] = chair.strip()

        chair = None
        if "CHAIR" in metainfo:
            chair = metainfo['CHAIR']

        plaintext = re.sub("\s+", " ", plaintext).strip()
        regexp = r"(S|J|H)(B|M|R) (\d+)"
        bills = re.findall(regexp, plaintext)

        event = Event(
            name=committee,
            start_date=self._tz.localize(datetime),
            location_name=where
        )

        event.add_source(url)
        event.add_participant(committee, type='committee', note='host')
        if chair is not None:
            event.add_participant(chair, type='legislator', note='chair')

        for bill in bills:
            chamber, type, number = bill
            bill_id = "%s%s %s" % (chamber, type, number)
            item = event.add_agenda_item('Bill up for discussion')
            item.add_bill(bill_id)

        event.add_agenda_item(plaintext)

        yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:46,代码来源:events.py


示例15: categorize_data

    def categorize_data(self, csv_data):
        #Is there a better place to define this?
        return_objs = []
        Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
        for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
            if not line:
                continue
            cur_obj = None
            try:
                contribution = Contribution(*line.split(','))
            except Exception as e:
                import pdb; pdb.set_trace()
            if contribution.Contributor_Type in self.business_contribution_types:
                cur_obj = Organization(contribution.Contributor_Name)
            elif contribution.Contributor_Type in self.individual_contribution_types:
                cur_obj = Person(contribution.Contributor_Name)
            elif contribution.Contributor_Type == 'Unknown/Anonymous':
                if contribution.Contributor_Name: #ignoring un-named contributors
                    #these look like catch-all business contributions
                    cur_obj = Organization(contribution.Contributor_Name)
            if cur_obj: 
                cur_obj.add_source(url=self.search_url)
                cur_obj.source_identified = True
                if contribution.Contributor_Address:
                    cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
                if contribution.Employer_Name:
                    cur_obj.extras['Employer'] = contribution.Employer_Name
                if contribution.Employer_Occupation:
                    cur_obj.extras['Occupation'] = contribution.Employer_Occupation
                
                recipiant_obj = Organization(contribution.Receiving_Committee)  
                recipiant_obj.extras['Office'] = contribution.Office
                recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
                recipiant_obj.extras['Fundtype'] = contribution.Fundtype

                transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
                transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
                transaction.extras['Contribution Type'] = contribution.Contribution_Type
                transaction.add_source(url=self.search_url)
                #transaction.source_identified = True
                transaction.participants.append(cur_obj.as_dict())
                transaction.participants.append(recipiant_obj.as_dict())
                yield (cur_obj, recipiant_obj, transaction)        
            else:
                yield [] 
开发者ID:entropomorphic,项目名称:scrapers-us-state,代码行数:45,代码来源:contributions.py


示例16: scrape_events

    def scrape_events(self, chamber, event_id):
        url = '%s%s' % (self.upper_url, event_id)
        html = self.get(url).text
        doc = lxml.html.fromstring(html)
        doc.make_links_absolute(url)
        rows = doc.xpath("//div[@id='WebPartWPQ2']")
        # some ids are empty
        if len(rows):
            table_data = rows[0].find('table')[1]

            for link in table_data.iterchildren('td'):
                td = link.xpath('//td[@class="ms-formbody"]')

                description = td[18].text
                when = td[19].text
                where = td[25].text
                # type = td[27].text
                meeting_lead = td[28].text

                when = datetime.datetime.strptime(when, "%m/%d/%Y  %H:%M %p")
                when = self._tz.localize(when)

                if where is None or where == "":
                    where = 'State House'
                event = Event(name=description,
                              start_date=when,
                              location_name=where)
                if td[20].text is None:
                    participants = meeting_lead
                else:
                    participants = td[20].text.split(';')
                if participants:
                    for participant in participants:
                        name = participant.strip().replace('HON.', '', 1)
                        if name != "":
                            event.add_participant(name, type='committee',
                                                  note='host')

                event.add_source(url)
                yield event
        else:
            # hack so we dont fail on the first id numbers where there are some gaps
            # between the numbers that work and not.
            if event_id > 1700:
                raise Exception("Parsing is done we are on future ids that are not used yet.")
开发者ID:neelneelpurk,项目名称:openstates,代码行数:45,代码来源:events.py


示例17: scrape_house_weekly_schedule

    def scrape_house_weekly_schedule(self):
        url = "http://house.louisiana.gov/H_Sched/Hse_MeetingSchedule.aspx"
        page = self.lxmlize(url)

        meeting_rows = page.xpath('//table[@id = "table229"]/tr')

        valid_meetings = [row for row in meeting_rows if row.xpath(
            './td[1]')[0].text_content().replace(u'\xa0', '') and row.xpath(
            './td/a/img[contains(@src, "PDF-AGENDA.png")]') and 'Not Meeting' not in row.xpath(
            './td[2]')[0].text_content()]

        for meeting in valid_meetings:
            try:
                guid = meeting.xpath('./td/a[descendant::img[contains(@src,'
                                     '"PDF-AGENDA.png")]]/@href')[0]
                # self.logger.debug(guid)
                self.warning("logger.debug" + guid)
            except KeyError:
                continue  # Sometimes we have a dead link. This is only on
                # dead entries.

            committee_name = meeting.xpath('./td[1]/text()')[0].strip()
            meeting_string = meeting.xpath('./td[2]')[0].text_content()

            if "@" in meeting_string:
                continue  # Contains no time data.
            date, time, location = ([s.strip() for s in meeting_string.split(
                ',') if s] + [None]*3)[:3]

            # check for time in date because of missing comma
            time_srch = re.search(r'\d{2}:\d{2} (AM|PM)', date)
            if time_srch:
                location = time
                time = time_srch.group()
                date = date.replace(time, '')

            # self.logger.debug(location)
            self.warning("logger.debug" + location)

            year = datetime.datetime.now().year
            datetime_string = ' '.join((date, str(year), time))
            when = datetime.datetime.strptime(datetime_string, '%b %d %Y %I:%M %p')
            when = self._tz.localize(when)

            description = 'Committee Meeting: {}'.format(committee_name)
            # self.logger.debug(description)
            self.warning("logger.debug" + description)

            event = Event(name=description,
                          start_date=self._tz.localize(when),
                          location_name=location)
            event.add_source(url)
            event.add_participant(committee_name, type='committee', note='host')
            event.add_document(note='Agenda', url=guid, text='agenda',
                               media_type='application/pdf')

            yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:57,代码来源:events.py


示例18: scrape

    def scrape(self):
        local_timezone =  pytz.timezone("US/Eastern")
        base_calendar_url = "http://www.miamidade.gov/cob/county-commission-calendar.asp"
        #things get messy more than a few months out
        #so we're just pulling 3 months. If we want three
        #more, they are called "nxx", "nxy" and "nxz"
        months = ["cur","nex","nxw"]
        for m in months:
            doc = self.lxmlize(base_calendar_url + "?next={}".format(m))
            events = doc.xpath("//table[contains(@style,'dotted #ccc')]")
            for event in events:
                rows = event.xpath(".//tr")
                for row in rows:
                    heading, data = row.xpath(".//td")
                    h = heading.text_content().lower().replace(":","").strip()
                    if h == "event":
                        title = data.text_content()
                        link = data.xpath(".//a")[0].attrib["href"]
                    elif h == "event date":
                        when = datetime.strptime(data.text, '%m/%d/%y %H:%M%p')
                        when = local_timezone.localize(when)
                    elif h == "location":
                        where = data.text
                    elif h == "description":
                        description = data.text

                if not description:
                    description = ""

                status = "confirmed"
                if "cancelled" in title.lower():
                    status = "cancelled"

                e = Event(name=title,
                            start_time=when,
                            timezone="US/Eastern",
                            location_name=where,
                            description=description,
                            status=status)
                
                e.add_source(link)
                yield e
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:42,代码来源:events.py


示例19: scrape_upper_events

    def scrape_upper_events(self):
        url = "https://www.flsenate.gov/Tracker/RSS/DailyCalendar"
        page = self.get(url).text
        feed = feedparser.parse(page)
        for entry in feed['entries']:
            # The feed breaks the RSS standard by making the pubdate the
            # actual event's date, not the RSS item publish date
            when = datetime.datetime(*entry['published_parsed'][:6])
            when = pytz.utc.localize(when)

            desc = entry['summary'].split(' - ')[0]
            location = entry['summary'].split(' - ')[1]

            event = Event(name=desc,
                          start_date=when,
                          description=desc,
                          location_name=location)

            event.add_source(entry['link'])
            yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:20,代码来源:events.py


示例20: scrape

    def scrape(self):
        start = dt.datetime.utcnow()
        start = start - dt.timedelta(days=10)
        end = start + dt.timedelta(days=30)

        url = URL.format(**{"from": start.strftime("%Y/%m/%d"), "til": end.strftime("%Y/%m/%d")})

        page = self.lxmlize(url)
        events = page.xpath("//ul[contains(@class, 'committee-events')]//li")

        for event in events:
            string = event.text_content()

            po = CLICK_INFO.match(event.xpath(".//span")[0].attrib["onclick"])
            if po is None:
                continue

            poid = po.groupdict()["info_id"]  # This is used to get more deetz on

            popage = self.popOverUrl(poid)
            when = dt.datetime.strptime(popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p")
            who = popage.xpath("//h1")[0].text
            related = []

            for item in popage.xpath("//div"):
                t = item.text
                if t is None:
                    continue

                t = t.strip()
                for related_entity in ORD_INFO.findall(t):
                    related.append({"ord_no": related_entity, "what": t})

            e = Event(name=who, when=when, location="unknown")
            e.add_source(url)

            for o in related:
                i = e.add_agenda_item(o["what"])
                i.add_bill(o["ord_no"], note="consideration")

            yield e
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:41,代码来源:events.py



注:本文中的pupa.scrape.Event类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python scrape.Organization类代码示例发布时间:2022-05-25
下一篇:
Python scrape.Bill类代码示例发布时间:2022-05-25
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap