本文整理汇总了Python中pupa.scrape.Event类的典型用法代码示例。如果您正苦于以下问题:Python Event类的具体用法?Python Event怎么用?Python Event使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Event类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: scrape_upper
def scrape_upper(self):
url = "http://www.oksenate.gov/Committees/meetingnotices.htm"
page = lxml.html.fromstring(self.get(url).text)
page.make_links_absolute(url)
text = page.text_content()
_, text = text.split('MEETING NOTICES')
re_date = r'[A-Z][a-z]+,\s+[A-Z][a-z]+ \d+, \d{4}'
chunks = zip(re.finditer(re_date, text), re.split(re_date, text)[1:])
for match, data in chunks:
when = match.group()
when = datetime.datetime.strptime(when, "%A, %B %d, %Y")
lines = filter(None, [x.strip() for x in data.splitlines()])
time_ = re.search(r'^\s*TIME:\s+(.+?)\s+\x96', data, re.M).group(1)
time_ = time_.replace('a.m.', 'AM').replace('p.m.', 'PM')
time_ = time.strptime(time_, '%I:%M %p')
when += datetime.timedelta(hours=time_.tm_hour, minutes=time_.tm_min)
title = lines[0]
where = re.search(r'^\s*PLACE:\s+(.+)', data, re.M).group(1)
where = where.strip()
event = Event(name=title,
start_date=self._tz.localize(when),
location_name=where)
event.add_source(url)
yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:31,代码来源:events.py
示例2: scrape
def scrape(self):
for page in self.eventPages(EVENTSPAGE):
events_table = page.xpath("//table[@class='rgMasterTable']")[0]
for events, headers, rows in self.parseDataTable(events_table) :
print(events)
location_string = events[u'Meeting\xa0Location']
location_list = location_string.split('--')
location = ', '.join(location_list[0:2])
status_string = location_list[-1].split('Chicago, Illinois')
if len(status_string) > 1 and status_string[1] :
status = status_string[1].lower()
if status not in ['cancelled', 'tentative', 'confirmed', 'passed'] :
print(status)
status = 'confirmed'
else :
status = 'confirmed'
when = events[u'Meeting\xa0Date']
time_string = events[u'Meeting\xa0Time']
event_time = datetime.datetime.strptime(time_string,
"%I:%M %p")
when = when.replace(hour=event_time.hour)
e = Event(name=events["Name"]["label"],
when=when,
location=location,
status=status)
e.add_source(EVENTSPAGE)
if events['Video'] != u'Not\xa0available' :
print(events['Video'])
yield e
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:35,代码来源:events.py
示例3: scrape_committee_events
def scrape_committee_events(self, code, name):
events_url = \
'http://www.cga.ct.gov/basin/fullcalendar/commevents.php?' \
'comm_code={}'.format(code)
events_data = self.get(events_url).text
events = json.loads(events_data)
DATETIME_FORMAT = '%Y-%m-%dT%H:%M:%SZ'
for info in events:
if info['title'] is None:
self.warning("Event found with no title; it will be skipped")
continue
elif info['title'].startswith('CANCELLED:'):
self.info("Cancelled event found; it will be skipped: {}".
format(info['title']))
continue
when = datetime.datetime.strptime(info['start'], DATETIME_FORMAT)
# end = datetime.datetime.strptime(info['end'], DATETIME_FORMAT)
where = "{0} {1}".format(info['building'].strip(), info['location'].strip())
# end_time=self._tz.localize(end),
event = Event(start_time=self._tz.localize(when),
timezone=self._tz.zone,
location_name=where,
name=info['title'],
description=info['title'],)
event.add_source(events_url)
yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:30,代码来源:events.py
示例4: scrape_meetings
def scrape_meetings(self, meetings, group):
"""
Scrape and save event data from a list of meetings.
Arguments:
meetings -- A list of lxml elements containing event information
group -- The type of meeting. The legislature site applies
different formatting to events based on which group
they correspond to. `group` should be one of the
following strings: 'house', 'senate', or 'commission'.
"""
for meeting in meetings:
when = self.get_date(meeting)
description = self.get_description(meeting)
location = self.get_location(meeting)
if when and description and location:
event = Event(name=description, start_date=when.replace(tzinfo=self.tz),
description=description,
location_name=location)
agenda = self.get_agenda(meeting)
if agenda:
event.add_agenda_item(agenda)
event.add_source(url)
yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:26,代码来源:events.py
示例5: event_obj
def event_obj():
e = Event(
name="get-together",
start_date=datetime.datetime.utcnow().isoformat().split('.')[0] + 'Z',
location_name="Joe's Place",
)
e.add_source(url='http://example.com/foobar')
return e
开发者ID:opencivicdata,项目名称:pupa,代码行数:8,代码来源:test_event_scrape.py
示例6: ge
def ge():
event = ScrapeEvent(
name="America's Birthday",
start_time="2014-07-04T05:00Z",
location_name="America",
timezone="America/New_York",
all_day=True)
event.add_person("George Washington")
return event
开发者ID:anukat2015,项目名称:pupa,代码行数:9,代码来源:test_event_importer.py
示例7: event_obj
def event_obj():
e = Event(
name="get-together",
start_time=datetime.datetime.utcnow(),
location_name="Joe's Place",
timezone="America/New_York",
)
e.add_source(url='foobar')
return e
开发者ID:anukat2015,项目名称:pupa,代码行数:9,代码来源:test_event_scrape.py
示例8: scrape
def scrape(self, chamber=None):
URL = 'http://utahlegislature.granicus.com/ViewPublisherRSS.php?view_id=2&mode=agendas'
doc = self.lxmlize(URL)
events = doc.xpath('//item')
for info in events:
title_and_date = info.xpath('title/text()')[0].split(" - ")
title = title_and_date[0]
when = title_and_date[-1]
# if not when.endswith(session[ :len("20XX")]):
# continue
event = Event(name=title,
start_date=self._tz.localize(datetime.datetime.strptime(when,
'%b %d, %Y')),
location_name='State Capitol'
)
event.add_source(URL)
url = re.search(r'(http://.*?)\s', info.text_content()).group(1)
try:
doc = self.lxmlize(url)
except HTTPError:
self.logger.warning("Page missing, skipping")
continue
event.add_source(url)
committee = doc.xpath('//a[text()="View committee page"]/@href')
if committee:
committee_doc = self.lxmlize(committee[0])
committee_name = committee_doc.xpath(
'//h3[@class="heading committee"]/text()')[0].strip()
event.add_participant(committee_name, type='committee',
note='host')
documents = doc.xpath('.//td')
for document in documents:
url = re.search(r'(http://.*?pdf)', document.xpath('@onclick')[0])
if url is None:
continue
url = url.group(1)
event.add_document(
note=document.xpath('text()')[0],
url=url,
media_type='application/pdf'
)
bills = document.xpath('@onclick')
for bill in bills:
if "bills/static" in bill:
bill_name = bill.split("/")[-1].split(".")[0]
item = event.add_agenda_item('Bill up for discussion')
item.add_bill(bill_name)
yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:53,代码来源:events.py
示例9: scrape
def scrape(self):
page = self.lxmlize(calurl)
events = page.xpath("//table[@class='agenda-body']//tr")[1:]
for event in events:
comit_url = event.xpath(
".//a[contains(@href, '/Pages/comm-info.aspx?c=')]")
if len(comit_url) != 1:
raise Exception
comit_url = comit_url[0]
who = self.scrape_participants(comit_url.attrib['href'])
tds = event.xpath("./*")
date = tds[0].text_content().strip()
cttie = tds[1].text_content().strip()
_chamber, cttie = [x.strip() for x in cttie.split(" - ", 1)]
info = tds[2]
name = info.xpath("./a[contains(@href, 'raw')]")[0]
notice = name.attrib['href']
name = name.text
time, where = info.xpath("./i/text()")
what = tds[3].text_content()
what = what.replace("Items: ", "")
if "(None)" in what:
continue
what = [x.strip() for x in what.split(";")]
when = ", ".join([date, str(dt.datetime.now().year), time])
when = dt.datetime.strptime(when, "%a %b %d, %Y, %I:%M %p")
event = Event(
name=name,
location_name=where,
start_date=self._tz.localize(when),
)
event.add_source(calurl)
event.add_committee(cttie, note='host')
event.add_document("notice", notice, media_type='application/pdf')
for entry in what:
item = event.add_agenda_item(entry)
if entry.startswith('AB') or entry.startswith('SB'):
item.add_bill(entry)
for thing in who:
event.add_person(thing['name'])
yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:53,代码来源:events.py
示例10: scrape_event_page
def scrape_event_page(self, event):
url = event.attrib['href']
page = self.lxmlize(url)
title = page.xpath("//h2[@class='evlist_header']")
title = title[0].text.strip() if title else None
if title is None:
return
if "CANCELED" in title:
return
info = page.xpath("//div[@style='position:relative;margin-right:40px;']")[0]
blocks = info.xpath(".//div")
ret = {}
for block in blocks:
els = block.xpath("./*")
if not els:
continue
le = els[0]
if le.tag != 'label':
continue
label, div = els
ltex = label.text_content().strip()
dtex = div.text_content().strip()
ret[ltex] = dtex
when = dt.datetime.utcnow()
date, start, end = (x.strip() for x in ret['When:'].split("\n"))
start = re.sub("^@", "", start).strip()
end = end.replace("-", "").strip()
replace = [
('Apr', 'April'),
]
skip = ["Occurs every"]
for k, v in replace:
date = date.replace(k, v).strip()
if True in (x in end for x in skip):
return
start = "%s %s" % (date, start)
end = "%s %s" % (date, end)
start, end = (dt.datetime.strptime(x, "%B %d, %Y %I:%M %p") for x in (start, end))
event = Event( name=title, location=ret['Where:'], when=start, end=end)
event.add_source(url)
yield event
开发者ID:ChaelCodes,项目名称:scrapers-us-municipal,代码行数:52,代码来源:events.py
示例11: scrape_chamber
def scrape_chamber(self, chamber):
url = utils.urls['events'][chamber]
page = self.get(url).text
page = lxml.html.fromstring(page)
page.make_links_absolute(url)
for table in page.xpath('//table[@class="CMS-MeetingDetail-CurrMeeting"]'):
date_string = table.xpath('ancestor::div[@class="CMS-MeetingDetail"]/div/a/@name')[0]
for row in table.xpath('tr'):
time_string = row.xpath('td[@class="CMS-MeetingDetail-Time"]/text()')[0].strip()
description = row.xpath(
'td[@class="CMS-MeetingDetail-Agenda"]/div/div'
)[-1].text_content().strip()
location = row.xpath(
'td[@class="CMS-MeetingDetail-Location"]'
)[0].text_content().strip()
committees = row.xpath('.//div[@class="CMS-MeetingDetail-Agenda-CommitteeName"]/a')
bills = row.xpath('.//a[contains(@href, "billinfo")]')
try:
start_time = datetime.datetime.strptime(
'{} {}'.format(date_string, time_string),
'%m/%d/%Y %I:%M %p',
)
except ValueError:
break
event = Event(
name=description,
start_time=self._tz.localize(start_time),
location_name=location,
timezone=self._tz.zone,
)
event.add_source(url)
if bills or committees:
item = event.add_agenda_item(description)
for bill in bills:
parsed = urllib.parse.urlparse(bill.get('href'))
qs = urllib.parse.parse_qs(parsed.query)
item.add_bill('{}{} {}'.format(qs['body'], qs['type'], qs['bn']))
for committee in committees:
parsed = urllib.parse.urlparse(committee.get('href'))
qs = urllib.parse.parse_qs(parsed.query)
item.add_committee(
re.sub(r' \([S|H]\)$', '', committee.text),
id=qs.get('Code'),
)
yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:50,代码来源:events.py
示例12: categorize_data
def categorize_data(self, csv_data):
return_objs = []
Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
if not line:
continue
# cur_obj will be the person or organization that made the contribution
cur_obj = None
contribution = Contribution(*line.split(','))
if contribution.Contributor_Type in self.business_contribution_types:
cur_obj = Organization(contribution.Contributor_Name)
elif contribution.Contributor_Type in self.individual_contribution_types:
cur_obj = Person(contribution.Contributor_Name)
elif contribution.Contributor_Type == 'Unknown/Anonymous':
if contribution.Contributor_Name: #ignoring un-named contributors
#these look like catch-all business contributions
cur_obj = Organization(contribution.Contributor_Name)
if cur_obj:
#we don't set cur_obj in the event that there was an
#anonymous/unknown contribution without a Contribution_Name
#so we need to check that it exists before adding to it
cur_obj.add_source(url=self.search_url)
cur_obj.source_identified = True
if contribution.Contributor_Address:
cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
if contribution.Employer_Name:
cur_obj.extras['Employer'] = contribution.Employer_Name
if contribution.Employer_Occupation:
cur_obj.extras['Occupation'] = contribution.Employer_Occupation
#recipiant_obj is the organization that received the contribution
recipiant_obj = Organization(contribution.Receiving_Committee)
recipiant_obj.extras['Office'] = contribution.Office
recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
recipiant_obj.extras['Fundtype'] = contribution.Fundtype
#transaction is the event linking the donor and recipiant
transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
transaction.extras['Contribution Type'] = contribution.Contribution_Type
transaction.add_source(url=self.search_url)
#transaction.source_identified = True
transaction.participants.append(cur_obj.as_dict())
transaction.participants.append(recipiant_obj.as_dict())
yield (cur_obj, recipiant_obj, transaction)
else:
yield []
开发者ID:AshleyTemple,项目名称:scrapers-us-state,代码行数:49,代码来源:contributions.py
示例13: scrape
def scrape(self, session=None, chamber=None):
if not session:
session = self.latest_session()
self.info('no session specified, using %s', session)
url = "ftp://www.arkleg.state.ar.us/dfadooas/ScheduledMeetings.txt"
page = self.get(url)
page = csv.reader(StringIO(page.text), delimiter='|')
for row in page:
# Deal with embedded newline characters, which cause fake new rows
LINE_LENGTH = 11
while len(row) < LINE_LENGTH:
row += next(page)
desc = row[7].strip()
match = re.match(r'^(.*)- (HOUSE|SENATE)$', desc)
if match:
comm = match.group(1).strip()
comm = re.sub(r'\s+', ' ', comm)
location = row[5].strip() or 'Unknown'
when = datetime.datetime.strptime(row[2], '%Y-%m-%d %H:%M:%S')
when = self._tz.localize(when)
# Only assign events to a session if they are in the same year
# Given that session metadata have some overlap and
# missing end dates, this is the best option available
session_year = int(session[:4])
if session_year != when.year:
continue
description = "%s MEETING" % comm
event = Event(
name=description,
start_time=when,
location_name=location,
description=description,
timezone=self._tz.zone
)
event.add_source(url)
event.add_participant(comm, type='committee', note='host')
# time = row[3].strip()
# if time in TIMECODES:
# event['notes'] = TIMECODES[time]
yield event
开发者ID:cliftonmcintosh,项目名称:openstates,代码行数:48,代码来源:events.py
示例14: scrape_event_page
def scrape_event_page(self, session, chamber, url, datetime):
page = self.lxmlize(url)
info = page.xpath("//p")
metainfo = {}
plaintext = ""
for p in info:
content = re.sub("\s+", " ", p.text_content())
plaintext += content + "\n"
if ":" in content:
key, val = content.split(":", 1)
metainfo[key.strip()] = val.strip()
committee = metainfo['COMMITTEE']
where = metainfo['PLACE']
if "CHAIR" in where:
where, chair = where.split("CHAIR:")
metainfo['PLACE'] = where.strip()
metainfo['CHAIR'] = chair.strip()
chair = None
if "CHAIR" in metainfo:
chair = metainfo['CHAIR']
plaintext = re.sub("\s+", " ", plaintext).strip()
regexp = r"(S|J|H)(B|M|R) (\d+)"
bills = re.findall(regexp, plaintext)
event = Event(
name=committee,
start_date=self._tz.localize(datetime),
location_name=where
)
event.add_source(url)
event.add_participant(committee, type='committee', note='host')
if chair is not None:
event.add_participant(chair, type='legislator', note='chair')
for bill in bills:
chamber, type, number = bill
bill_id = "%s%s %s" % (chamber, type, number)
item = event.add_agenda_item('Bill up for discussion')
item.add_bill(bill_id)
event.add_agenda_item(plaintext)
yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:46,代码来源:events.py
示例15: categorize_data
def categorize_data(self, csv_data):
#Is there a better place to define this?
return_objs = []
Contribution = namedtuple('Contribution', self.csv_header_row.replace(' ', '_'))
for line in csv_data.split('\n'): # explicity defining delimiter because otherwise fails in case of single line
if not line:
continue
cur_obj = None
try:
contribution = Contribution(*line.split(','))
except Exception as e:
import pdb; pdb.set_trace()
if contribution.Contributor_Type in self.business_contribution_types:
cur_obj = Organization(contribution.Contributor_Name)
elif contribution.Contributor_Type in self.individual_contribution_types:
cur_obj = Person(contribution.Contributor_Name)
elif contribution.Contributor_Type == 'Unknown/Anonymous':
if contribution.Contributor_Name: #ignoring un-named contributors
#these look like catch-all business contributions
cur_obj = Organization(contribution.Contributor_Name)
if cur_obj:
cur_obj.add_source(url=self.search_url)
cur_obj.source_identified = True
if contribution.Contributor_Address:
cur_obj.add_contact_detail(type='address', value=contribution.Contributor_Address)
if contribution.Employer_Name:
cur_obj.extras['Employer'] = contribution.Employer_Name
if contribution.Employer_Occupation:
cur_obj.extras['Occupation'] = contribution.Employer_Occupation
recipiant_obj = Organization(contribution.Receiving_Committee)
recipiant_obj.extras['Office'] = contribution.Office
recipiant_obj.extras['Filing Period'] = contribution.Filing_Period
recipiant_obj.extras['Fundtype'] = contribution.Fundtype
transaction = Event('Contribution', contribution.Contribution_Date, 'EST', 'Maryland') #EST and Maryland b/c MD
transaction.extras['Contribution Amount'] = contribution.Contribution_Amount
transaction.extras['Contribution Type'] = contribution.Contribution_Type
transaction.add_source(url=self.search_url)
#transaction.source_identified = True
transaction.participants.append(cur_obj.as_dict())
transaction.participants.append(recipiant_obj.as_dict())
yield (cur_obj, recipiant_obj, transaction)
else:
yield []
开发者ID:entropomorphic,项目名称:scrapers-us-state,代码行数:45,代码来源:contributions.py
示例16: scrape_events
def scrape_events(self, chamber, event_id):
url = '%s%s' % (self.upper_url, event_id)
html = self.get(url).text
doc = lxml.html.fromstring(html)
doc.make_links_absolute(url)
rows = doc.xpath("//div[@id='WebPartWPQ2']")
# some ids are empty
if len(rows):
table_data = rows[0].find('table')[1]
for link in table_data.iterchildren('td'):
td = link.xpath('//td[@class="ms-formbody"]')
description = td[18].text
when = td[19].text
where = td[25].text
# type = td[27].text
meeting_lead = td[28].text
when = datetime.datetime.strptime(when, "%m/%d/%Y %H:%M %p")
when = self._tz.localize(when)
if where is None or where == "":
where = 'State House'
event = Event(name=description,
start_date=when,
location_name=where)
if td[20].text is None:
participants = meeting_lead
else:
participants = td[20].text.split(';')
if participants:
for participant in participants:
name = participant.strip().replace('HON.', '', 1)
if name != "":
event.add_participant(name, type='committee',
note='host')
event.add_source(url)
yield event
else:
# hack so we dont fail on the first id numbers where there are some gaps
# between the numbers that work and not.
if event_id > 1700:
raise Exception("Parsing is done we are on future ids that are not used yet.")
开发者ID:neelneelpurk,项目名称:openstates,代码行数:45,代码来源:events.py
示例17: scrape_house_weekly_schedule
def scrape_house_weekly_schedule(self):
url = "http://house.louisiana.gov/H_Sched/Hse_MeetingSchedule.aspx"
page = self.lxmlize(url)
meeting_rows = page.xpath('//table[@id = "table229"]/tr')
valid_meetings = [row for row in meeting_rows if row.xpath(
'./td[1]')[0].text_content().replace(u'\xa0', '') and row.xpath(
'./td/a/img[contains(@src, "PDF-AGENDA.png")]') and 'Not Meeting' not in row.xpath(
'./td[2]')[0].text_content()]
for meeting in valid_meetings:
try:
guid = meeting.xpath('./td/a[descendant::img[contains(@src,'
'"PDF-AGENDA.png")]]/@href')[0]
# self.logger.debug(guid)
self.warning("logger.debug" + guid)
except KeyError:
continue # Sometimes we have a dead link. This is only on
# dead entries.
committee_name = meeting.xpath('./td[1]/text()')[0].strip()
meeting_string = meeting.xpath('./td[2]')[0].text_content()
if "@" in meeting_string:
continue # Contains no time data.
date, time, location = ([s.strip() for s in meeting_string.split(
',') if s] + [None]*3)[:3]
# check for time in date because of missing comma
time_srch = re.search(r'\d{2}:\d{2} (AM|PM)', date)
if time_srch:
location = time
time = time_srch.group()
date = date.replace(time, '')
# self.logger.debug(location)
self.warning("logger.debug" + location)
year = datetime.datetime.now().year
datetime_string = ' '.join((date, str(year), time))
when = datetime.datetime.strptime(datetime_string, '%b %d %Y %I:%M %p')
when = self._tz.localize(when)
description = 'Committee Meeting: {}'.format(committee_name)
# self.logger.debug(description)
self.warning("logger.debug" + description)
event = Event(name=description,
start_date=self._tz.localize(when),
location_name=location)
event.add_source(url)
event.add_participant(committee_name, type='committee', note='host')
event.add_document(note='Agenda', url=guid, text='agenda',
media_type='application/pdf')
yield event
开发者ID:sunlightlabs,项目名称:openstates,代码行数:57,代码来源:events.py
示例18: scrape
def scrape(self):
local_timezone = pytz.timezone("US/Eastern")
base_calendar_url = "http://www.miamidade.gov/cob/county-commission-calendar.asp"
#things get messy more than a few months out
#so we're just pulling 3 months. If we want three
#more, they are called "nxx", "nxy" and "nxz"
months = ["cur","nex","nxw"]
for m in months:
doc = self.lxmlize(base_calendar_url + "?next={}".format(m))
events = doc.xpath("//table[contains(@style,'dotted #ccc')]")
for event in events:
rows = event.xpath(".//tr")
for row in rows:
heading, data = row.xpath(".//td")
h = heading.text_content().lower().replace(":","").strip()
if h == "event":
title = data.text_content()
link = data.xpath(".//a")[0].attrib["href"]
elif h == "event date":
when = datetime.strptime(data.text, '%m/%d/%y %H:%M%p')
when = local_timezone.localize(when)
elif h == "location":
where = data.text
elif h == "description":
description = data.text
if not description:
description = ""
status = "confirmed"
if "cancelled" in title.lower():
status = "cancelled"
e = Event(name=title,
start_time=when,
timezone="US/Eastern",
location_name=where,
description=description,
status=status)
e.add_source(link)
yield e
开发者ID:Code-for-Miami,项目名称:scrapers-us-municipal,代码行数:42,代码来源:events.py
示例19: scrape_upper_events
def scrape_upper_events(self):
url = "https://www.flsenate.gov/Tracker/RSS/DailyCalendar"
page = self.get(url).text
feed = feedparser.parse(page)
for entry in feed['entries']:
# The feed breaks the RSS standard by making the pubdate the
# actual event's date, not the RSS item publish date
when = datetime.datetime(*entry['published_parsed'][:6])
when = pytz.utc.localize(when)
desc = entry['summary'].split(' - ')[0]
location = entry['summary'].split(' - ')[1]
event = Event(name=desc,
start_date=when,
description=desc,
location_name=location)
event.add_source(entry['link'])
yield event
开发者ID:neelneelpurk,项目名称:openstates,代码行数:20,代码来源:events.py
示例20: scrape
def scrape(self):
start = dt.datetime.utcnow()
start = start - dt.timedelta(days=10)
end = start + dt.timedelta(days=30)
url = URL.format(**{"from": start.strftime("%Y/%m/%d"), "til": end.strftime("%Y/%m/%d")})
page = self.lxmlize(url)
events = page.xpath("//ul[contains(@class, 'committee-events')]//li")
for event in events:
string = event.text_content()
po = CLICK_INFO.match(event.xpath(".//span")[0].attrib["onclick"])
if po is None:
continue
poid = po.groupdict()["info_id"] # This is used to get more deetz on
popage = self.popOverUrl(poid)
when = dt.datetime.strptime(popage.xpath("//strong")[0].text, "%B %d, %Y @ %I:%M %p")
who = popage.xpath("//h1")[0].text
related = []
for item in popage.xpath("//div"):
t = item.text
if t is None:
continue
t = t.strip()
for related_entity in ORD_INFO.findall(t):
related.append({"ord_no": related_entity, "what": t})
e = Event(name=who, when=when, location="unknown")
e.add_source(url)
for o in related:
i = e.add_agenda_item(o["what"])
i.add_bill(o["ord_no"], note="consideration")
yield e
开发者ID:dtpeters,项目名称:scrapers-us-municipal,代码行数:41,代码来源:events.py
注:本文中的pupa.scrape.Event类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论