本文整理汇总了Python中utils.feedparser.parse函数的典型用法代码示例。如果您正苦于以下问题:Python parse函数的具体用法?Python parse怎么用?Python parse使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了parse函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: downloadSource
def downloadSource(self,source):
if source.etag != None:
d = feedparser.parse(source.url,etag=source.etag)
else:
d = feedparser.parse(source.url)
save_in_db = False
articles = []
if d.has_key('status'):
if d.status == 200:
if d.has_key('etag'):
save_in_db = True
source.etag = d.etag
for entry in d.entries:
title = entry.title \
if entry.has_key('title') \
else None
summary = entry.summary \
if entry.has_key('summary') \
else None
description = entry.description \
if entry.has_key('description') \
else None
link = entry.link \
if entry.has_key('link') \
else None
article = Article(parent=ndb.Key('Article',source.name),\
published=datetime(*entry.published_parsed[:6]),\
source=source.key,title=title,description=description,\
summary=summary,link=link)
articles.append(article)
if save_in_db:
self.bulkEntityInsertion(articles)
source.put()
print articles
开发者ID:cirocosta,项目名称:ciro-costa,代码行数:34,代码来源:parser.py
示例2: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
self.feed.title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
address = self.feed.feed_address
if (self.options.get('force') or random.random() <= .01):
modified = None
etag = None
address = cache_bust_url(address)
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.title[:30], address))
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
USER_AGENT = 'NewsBlur Feed Fetcher - %s subscriber%s - %s (Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.2.3 (KHTML, like Gecko) Version/5.2)' % (
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
settings.NEWSBLUR_URL
)
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
self.feed.title[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
self.feed.title[:30]))
return FEED_OK, self.fpf
try:
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
except (TypeError, ValueError), e:
logging.debug(u' ***> [%-30s] ~FR%s, turning off microformats.' %
(self.feed.title[:30], e))
feedparser.PARSE_MICROFORMATS = False
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
feedparser.PARSE_MICROFORMATS = True
开发者ID:0077cc,项目名称:NewsBlur,代码行数:55,代码来源:feed_fetcher.py
示例3: fetch
def fetch(self):
""" Downloads and parses a feed.
"""
socket.setdefaulttimeout(30)
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] Fetching feed (%d)' % (identity,
unicode(self.feed)[:30],
self.feed.id)
logging.debug(log_msg)
# Check if feed still needs to be updated
# feed = Feed.objects.get(pk=self.feed.pk)
# if feed.next_scheduled_update > datetime.datetime.now() and not self.options.get('force'):
# log_msg = u' ---> Already fetched %s (%d)' % (self.feed.feed_title,
# self.feed.id)
# logging.debug(log_msg)
# feed.save_feed_history(303, "Already fetched")
# return FEED_SAME, None
# else:
self.feed.set_next_scheduled_update()
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get('force'):
modified = None
etag = None
self.fpf = feedparser.parse(self.feed.feed_address,
agent=USER_AGENT,
etag=etag,
modified=modified)
return FEED_OK, self.fpf
开发者ID:dkeskar,项目名称:NewsBlur,代码行数:34,代码来源:feed_fetcher.py
示例4: _blog_fetch_callback
def _blog_fetch_callback(self, rpc):
content = rpc.get_result().content
d = feedparser.parse(StringIO(content))
s = Signatures.get_single()
signatures_and_times = dict(zip(s.hashes, s.times))
posts = []
tags_entity = Tags.get_single()
tags = set(tags_entity.available)
for entry in d['entries']:
blog_post = BlogPost.blog_post_from_feed_entry(entry)
signature_time = signatures_and_times.get(blog_post.signature)
if signature_time:
signature_time = datetime.datetime.fromtimestamp(signature_time).strftime('%m/%d/%Y %H:%M')
posts.append((blog_post, signature_time))
for tag in blog_post.tags:
tags.add(tag.lower())
template_values = {"posts":posts}
tags_entity.available = list(tags)
tags_entity.save()
template = jinja_environment.get_template('moderate.html')
self.response.out.write(template.render(template_values))
开发者ID:darvin,项目名称:moderated-feed-gae-server,代码行数:28,代码来源:admin.py
示例5: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] Fetching feed (%d)' % (identity,
unicode(self.feed)[:30],
self.feed.id)
logging.debug(log_msg)
self.feed.set_next_scheduled_update()
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get('force') or not self.feed.fetched_once:
modified = None
etag = None
USER_AGENT = 'NewsBlur Feed Fetcher (%s subscriber%s) - %s' % (
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
URL
)
self.fpf = feedparser.parse(self.feed.feed_address,
agent=USER_AGENT,
etag=etag,
modified=modified)
return FEED_OK, self.fpf
开发者ID:rkabir,项目名称:NewsBlur,代码行数:29,代码来源:feed_fetcher.py
示例6: add_feed
def add_feed(self, xml):
feeds = feedparser.parse(xml)
self.articles += len(feeds['items'])
for f in feeds['items']:
self.u_mut.acquire ()
self.items[f['link']] = (f['title'], f['summary'])
self.u_mut.release()
self.u_sem.release()
开发者ID:enry86,项目名称:NewSearch,代码行数:8,代码来源:feedman.py
示例7: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY)' % (identity,
self.feed.title[:30],
self.feed.id)
logging.debug(log_msg)
etag = self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
address = self.feed.feed_address
# If is forced or random is less than 1%, set modified = None and etag = None,
# means it will fetch new
if (self.options.get('force') or random.random() <= .01):
modified = None
etag = None
address = cache_bust_url(address)
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.title[:30], address))
# If this feed_id in not fetched once before or not known_good
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
USER_AGENT = ('NewsBlur Feed Fetcher - %s '
'(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
'Safari/534.48.3)' % (
self.feed.permalink,
))
try:
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
except (TypeError, ValueError, KeyError), e:
logging.debug(u' ***> [%-30s] ~FR%s, turning off headers.' %
(self.feed.title[:30], e))
self.fpf = feedparser.parse(address, agent=USER_AGENT)
开发者ID:echobfy,项目名称:rssEngine,代码行数:44,代码来源:feed_fetcher.py
示例8: _blog_fetch_callback
def _blog_fetch_callback(self, rpc):
content = rpc.get_result().content
d = feedparser.parse(StringIO(content))
signatures = Signatures.signatures()
posts = []
for entry in d['entries']:
blog_post = BlogPost.blog_post_from_feed_entry(entry)
if blog_post.signature in signatures:
posts.append(blog_post)
template_values = {"posts":posts, "DEBUG":DEBUG}
template = jinja_environment.get_template('main.html')
self.response.out.write(template.render(template_values))
开发者ID:darvin,项目名称:moderated-feed-gae-server,代码行数:15,代码来源:main.py
示例9: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
unicode(self.feed)[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get('force') or not self.feed.fetched_once or not self.feed.known_good:
modified = None
etag = None
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 Safari/534.48.3 (NewsBlur Feed Fetcher - %s subscriber%s - %s)' % (
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
settings.NEWSBLUR_URL
)
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
unicode(self.feed)[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
unicode(self.feed)[:30]))
else:
self.fpf = feedparser.parse(self.feed.feed_address,
agent=USER_AGENT,
etag=etag,
modified=modified)
if self.options['verbose'] and getattr(self.fpf, 'status', None) == 200:
logging.debug(u' ---> [%-30s] ~FBTIME: feed fetch in ~FM%.4ss' % (
unicode(self.feed)[:30], time.time() - start))
return FEED_OK, self.fpf
开发者ID:ananthrk,项目名称:NewsBlur,代码行数:42,代码来源:feed_fetcher.py
示例10: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
socket.setdefaulttimeout(30)
identity = self.get_identity()
log_msg = u"%2s ---> [%-30s] Fetching feed (%d)" % (identity, unicode(self.feed)[:30], self.feed.id)
logging.debug(log_msg)
self.feed.set_next_scheduled_update()
etag = self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get("force") or not self.feed.fetched_once:
modified = None
etag = None
self.fpf = feedparser.parse(self.feed.feed_address, agent=USER_AGENT, etag=etag, modified=modified)
return FEED_OK, self.fpf
开发者ID:tosh,项目名称:NewsBlur,代码行数:20,代码来源:feed_fetcher.py
示例11: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u"%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s" % (
identity,
self.feed.title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update,
)
logging.debug(log_msg)
etag = self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
if self.options.get("force") or not self.feed.fetched_once or not self.feed.known_good:
modified = None
etag = None
USER_AGENT = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/536.2.3 (KHTML, like Gecko) Version/5.2 (NewsBlur Feed Fetcher - %s subscriber%s - %s)"
% (self.feed.num_subscribers, "s" if self.feed.num_subscribers != 1 else "", settings.NEWSBLUR_URL)
)
if self.options.get("feed_xml"):
logging.debug(
u" ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s"
% (self.feed.title[:30], len(self.options.get("feed_xml")))
)
if self.options.get("fpf"):
self.fpf = self.options.get("fpf")
logging.debug(u" ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping." % (self.feed.title[:30]))
else:
self.fpf = feedparser.parse(self.feed.feed_address, agent=USER_AGENT, etag=etag, modified=modified)
logging.debug(u" ---> [%-30s] ~FYFeed fetch in ~FM%.4ss" % (self.feed.title[:30], time.time() - start))
return FEED_OK, self.fpf
开发者ID:zhudonghe,项目名称:NewsBlur,代码行数:39,代码来源:feed_fetcher.py
示例12: parse
def parse(self):
logging.info('Parse feed: %s' % self.url)
referrer = "https://www.google.com/reader/view/"
self.feed = feedparser.parse(self.url, agent=self.user_agent,referrer=referrer)
if self.feed.bozo == 1:
raise self.feed.bozo_exception
self.ffname = ascii_filename(self.feed.feed.title)
self.book_dir = '%s%s' % (self.data_dir, self.ffname)
#如果目录存在换个名字
#i,tmpdir = 1,self.book_dir
#while True:
# if os.path.isdir(tmpdir):
# tmpdir = self.book_dir + ('(%s)' % i)
# i = i + 1
# else:
# self.book_dir = tmpdir
# break
self.book_dir = self.book_dir + '/'
if os.path.isdir(self.book_dir) is False:
os.mkdir(self.book_dir, 0777)
if os.path.isdir(self.book_dir+'images/') is False:
os.mkdir(self.book_dir+'images/', 0777)
if os.path.isdir(self.book_dir+'articles/') is False:
os.mkdir(self.book_dir+'articles/', 0777)
return self
开发者ID:KyoZhang,项目名称:feed2mobi,代码行数:36,代码来源:feed2mobi.py
示例13: _run
def _run(self):
return _feed.parse(self.url, etag = self.etag, modified = self.modified)
开发者ID:CMGS,项目名称:Collapsar,代码行数:2,代码来源:crawler.py
示例14: except
if not self.fpf or self.options.get('force_fp', False):
try:
self.fpf = feedparser.parse(address,
agent=self.feed.user_agent,
etag=etag,
modified=modified)
except (TypeError, ValueError, KeyError, EOFError, MemoryError), e:
logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' %
(self.feed.log_title[:30], e))
pass
if not self.fpf:
try:
logging.debug(u' ***> [%-30s] ~FRTurning off headers...' %
(self.feed.log_title[:30]))
self.fpf = feedparser.parse(address, agent=self.feed.user_agent)
except (TypeError, ValueError, KeyError, EOFError, MemoryError), e:
logging.debug(u' ***> [%-30s] ~FRFetch failed: %s.' %
(self.feed.log_title[:30], e))
return FEED_ERRHTTP, None
logging.debug(u' ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' % (
self.feed.log_title[:30], time.time() - start))
return FEED_OK, self.fpf
def get_identity(self):
identity = "X"
current_process = multiprocessing.current_process()
if current_process._identity:
开发者ID:dosiecki,项目名称:NewsBlur,代码行数:31,代码来源:feed_fetcher.py
示例15: update
def update(self):
"""
Update
Uses FeedParser to grab and parse feed.
"""
if self.enabled is False:
return
if self.last_modified:
modified = self.last_modified.timetuple()
else:
modified = None
data = feedparser.parse(self.url, etag=self.etag,
modified=modified, agent=USER_AGENT)
if data.bozo != 1:
if data.status != 304:
data.entries.reverse()
while data.entries:
dentry = data.entries.pop()
if dentry.has_key('published'):
created_on = datetime.datetime(*dentry.published_parsed[0:6])
else:
created_on = datetime.datetime(*dentry.updated_parsed[0:6])
if self.last_modified is not None and (created_on <= self.last_modified):
break
if dentry.has_key('title'):
text = dentry.title
elif dentry.has_key('summary'):
text = dentry.summary
else:
dentry.get('content', 'None')
if self.auto_link:
text = self._auto_link(text)
if self.feed_type == 't':
text = self._twitter_parser(text)
Entry.objects.create(
source=self,
url=dentry.link,
text=text,
created_on=created_on
)
if data.has_key('etag'):
self.etag = data.etag
if data.has_key('modified'):
self.last_modified = datetime.datetime(*data.modified[0:6])
else:
try:
latest = Entry.objects.filter(source=self).latest('created_on')
self.last_modified = latest.created_on
except ObjectDoesNotExist:
pass
self.last_update_successful = True
self.last_status_code = data.status
else:
self.last_update_successful = False
self.error_message = pprint(data.bozo_exception)
self.save()
开发者ID:jobscry,项目名称:vz-stream,代码行数:69,代码来源:models.py
示例16: fetch
def fetch(self):
"""
Uses requests to download the feed, parsing it in feedparser. Will be storified later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
self.feed.title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
address = self.feed.feed_address
if (self.options.get('force') or random.random() <= .01):
self.options['force'] = True
modified = None
etag = None
address = qurl(address, add={"_": random.randint(0, 10000)})
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.title[:30], address))
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s '
'(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
'Safari/534.48.3)' % (
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
self.feed.permalink,
))
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
self.feed.title[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
self.feed.title[:30]))
return FEED_OK, self.fpf
if 'youtube.com' in address:
try:
youtube_feed = self.fetch_youtube(address)
except (requests.adapters.ConnectionError):
youtube_feed = None
if not youtube_feed:
logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' %
(self.feed.title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(youtube_feed)
elif re.match('(https?)?://twitter.com/\w+/?$', qurl(address, remove=['_'])):
# try:
twitter_feed = self.fetch_twitter(address)
# except Exception, e:
# logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s: %e' %
# (self.feed.title[:30], address, e))
# twitter_feed = None
if not twitter_feed:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' %
(self.feed.title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(twitter_feed)
if not self.fpf:
try:
headers = {
'User-Agent': USER_AGENT,
'Accept-encoding': 'gzip, deflate',
'A-IM': 'feed',
}
if etag:
headers['If-None-Match'] = etag
if modified:
# format into an RFC 1123-compliant timestamp. We can't use
# time.strftime() since the %a and %b directives can be affected
# by the current locale, but RFC 2616 states that dates must be
# in English.
short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])
headers['If-Modified-Since'] = modified_header
raw_feed = requests.get(address, headers=headers)
if raw_feed.content:
response_headers = raw_feed.headers
response_headers['Content-Location'] = raw_feed.url
self.fpf = feedparser.parse(smart_unicode(raw_feed.content),
response_headers=response_headers)
except Exception, e:
logging.debug(" ---> [%-30s] ~FRFeed failed to fetch with request, trying feedparser: %s" % (self.feed.title[:30], unicode(e)[:100]))
if not self.fpf:
try:
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
#.........这里部分代码省略.........
开发者ID:wizos,项目名称:NewsBlur,代码行数:101,代码来源:feed_fetcher.py
示例17: except
if not self.fpf:
try:
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
except (TypeError, ValueError, KeyError, EOFError), e:
logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' %
(self.feed.title[:30], e))
pass
if not self.fpf:
try:
logging.debug(u' ***> [%-30s] ~FRTurning off headers...' %
(self.feed.title[:30]))
self.fpf = feedparser.parse(address, agent=USER_AGENT)
except (TypeError, ValueError, KeyError, EOFError), e:
logging.debug(u' ***> [%-30s] ~FRFetch failed: %s.' %
(self.feed.title[:30], e))
return FEED_ERRHTTP, None
logging.debug(u' ---> [%-30s] ~FYFeed fetch in ~FM%.4ss' % (
self.feed.title[:30], time.time() - start))
return FEED_OK, self.fpf
def get_identity(self):
identity = "X"
current_process = multiprocessing.current_process()
if current_process._identity:
开发者ID:wizos,项目名称:NewsBlur,代码行数:31,代码来源:feed_fetcher.py
示例18: fetch
def fetch(self):
"""
Uses requests to download the feed, parsing it in feedparser. Will be storified later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
self.feed.log_title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
etag = self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
address = self.feed.feed_address
if (self.options.get('force') or random.random() <= .01):
self.options['force'] = True
modified = None
etag = None
address = qurl(address, add={"_": random.randint(0, 10000)})
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.log_title[:30], address))
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
self.feed.log_title[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
self.feed.log_title[:30]))
return FEED_OK, self.fpf
if 'youtube.com' in address:
try:
youtube_feed = self.fetch_youtube(address)
except (requests.adapters.ConnectionError):
youtube_feed = None
if not youtube_feed:
logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' %
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(youtube_feed)
elif re.match(r'(https?)?://twitter.com/\w+/?$', qurl(address, remove=['_'])):
twitter_feed = self.fetch_twitter(address)
if not twitter_feed:
logging.debug(u' ***> [%-30s] ~FRTwitter fetch failed: %s' %
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(twitter_feed)
elif re.match(r'(.*?)facebook.com/\w+/?$', qurl(address, remove=['_'])):
facebook_feed = self.fetch_facebook()
if not facebook_feed:
logging.debug(u' ***> [%-30s] ~FRFacebook fetch failed: %s' %
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(facebook_feed)
if not self.fpf:
try:
headers = self.feed.fetch_headers()
if etag:
headers['If-None-Match'] = etag
if modified:
# format into an RFC 1123-compliant timestamp. We can't use
# time.strftime() since the %a and %b directives can be affected
# by the current locale, but RFC 2616 states that dates must be
# in English.
short_weekdays = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
modified_header = '%s, %02d %s %04d %02d:%02d:%02d GMT' % (short_weekdays[modified[6]], modified[2], months[modified[1] - 1], modified[0], modified[3], modified[4], modified[5])
headers['If-Modified-Since'] = modified_header
if etag or modified:
headers['A-IM'] = 'feed'
raw_feed = requests.get(address, headers=headers)
if raw_feed.status_code >= 400:
logging.debug(" ***> [%-30s] ~FRFeed fetch was %s status code, trying fake user agent: %s" % (self.feed.log_title[:30], raw_feed.status_code, raw_feed.headers))
raw_feed = requests.get(self.feed.feed_address, headers=self.feed.fetch_headers(fake=True))
if raw_feed.content and 'application/json' in raw_feed.headers.get('Content-Type', ""):
# JSON Feed
json_feed = self.fetch_json_feed(address, raw_feed)
if not json_feed:
logging.debug(u' ***> [%-30s] ~FRJSON fetch failed: %s' %
(self.feed.log_title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(json_feed)
elif raw_feed.content and raw_feed.status_code < 400:
response_headers = raw_feed.headers
response_headers['Content-Location'] = raw_feed.url
self.raw_feed = smart_unicode(raw_feed.content)
self.fpf = feedparser.parse(self.raw_feed,
response_headers=response_headers)
if self.options.get('debug', False):
logging.debug(" ---> [%-30s] ~FBFeed fetch status %s: %s length / %s" % (self.feed.log_title[:30], raw_feed.status_code, len(smart_unicode(raw_feed.content)), raw_feed.headers))
except Exception, e:
#.........这里部分代码省略.........
开发者ID:dosiecki,项目名称:NewsBlur,代码行数:101,代码来源:feed_fetcher.py
示例19: fetch
def fetch(self):
"""
Uses feedparser to download the feed. Will be parsed later.
"""
start = time.time()
identity = self.get_identity()
log_msg = u'%2s ---> [%-30s] ~FYFetching feed (~FB%d~FY), last update: %s' % (identity,
self.feed.title[:30],
self.feed.id,
datetime.datetime.now() - self.feed.last_update)
logging.debug(log_msg)
etag=self.feed.etag
modified = self.feed.last_modified.utctimetuple()[:7] if self.feed.last_modified else None
address = self.feed.feed_address
if (self.options.get('force') or random.random() <= .01):
modified = None
etag = None
address = cache_bust_url(address)
logging.debug(u' ---> [%-30s] ~FBForcing fetch: %s' % (
self.feed.title[:30], address))
elif (not self.feed.fetched_once or not self.feed.known_good):
modified = None
etag = None
USER_AGENT = ('NewsBlur Feed Fetcher - %s subscriber%s - %s '
'(Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_1) '
'AppleWebKit/534.48.3 (KHTML, like Gecko) Version/5.1 '
'Safari/534.48.3)' % (
self.feed.num_subscribers,
's' if self.feed.num_subscribers != 1 else '',
self.feed.permalink,
))
if self.options.get('feed_xml'):
logging.debug(u' ---> [%-30s] ~FM~BKFeed has been fat pinged. Ignoring fat: %s' % (
self.feed.title[:30], len(self.options.get('feed_xml'))))
if self.options.get('fpf'):
self.fpf = self.options.get('fpf')
logging.debug(u' ---> [%-30s] ~FM~BKFeed fetched in real-time with fat ping.' % (
self.feed.title[:30]))
return FEED_OK, self.fpf
if 'youtube.com' in address:
try:
youtube_feed = self.fetch_youtube(address)
except (requests.adapters.ConnectionError):
youtube_feed = None
if not youtube_feed:
logging.debug(u' ***> [%-30s] ~FRYouTube fetch failed: %s.' %
(self.feed.title[:30], address))
return FEED_ERRHTTP, None
self.fpf = feedparser.parse(youtube_feed)
if not self.fpf:
try:
self.fpf = feedparser.parse(address,
agent=USER_AGENT,
etag=etag,
modified=modified)
except (TypeError, ValueError, KeyError, EOFError), e:
logging.debug(u' ***> [%-30s] ~FRFeed fetch error: %s' %
(self.feed.title[:30], e))
pass
开发者ID:swplzj,项目名称:NewsBlur,代码行数:65,代码来源:feed_fetcher.py
示例20: feedParsedItems
def feedParsedItems(request, feed_id):
feed = Feed.objects.get(id=feed_id)
items = feedparser.parse(feed.feed_address).entries
#print feed.feed_address
#print
|
请发表评论