本文整理汇总了Python中newsman.config.settings.logger.error函数的典型用法代码示例。如果您正苦于以下问题:Python error函数的具体用法?Python error怎么用?Python error使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了error函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: clean
def clean():
"""
remove expired items from database
"""
logger.info('... cleaning database ...')
try:
document_names = _find_document_names()
if document_names:
for document_name in document_names:
document = Collection(db, document_name)
# compute a threshold
current_utc_time_posix = calendar.timegm(time.gmtime())
deadline_datetime = datetime.utcfromtimestamp(
current_utc_time_posix) - timedelta(
days=DATABASE_REMOVAL_DAYS)
deadline_posix = calendar.timegm(deadline_datetime.timetuple())
removal_candidates = document.find(
{'updated': {'$lt': deadline_posix}})
for removal_candidate in removal_candidates:
# see if removal candidate has a footage in memory
clean_memory.clean_by_item(str(removal_candidate['_id']))
# remove corresponding files on disk
clean_disk.clean_by_item(removal_candidate)
# remove the candidate in database
document.remove({'_id': removal_candidate['_id']})
return True
else:
logger.error('Cannot find documents')
return False
except Exception as k:
logger.error(str(k))
return False
开发者ID:chengdujin,项目名称:newsman,代码行数:34,代码来源:clean_database.py
示例2: __init__
def __init__(self, url, language):
"""
docs needed!
"""
try:
self.candidates = {}
self.url = url
self.language = language
self.data = transcoder.prepare_link(self.url)
self.data = self.regexps['replace_brs'].sub(
"</p><p>", str(self.data))
self.data = self.regexps['replace_fonts'].sub(
"<\g<1>span>", str(self.data))
self.html = BeautifulSoup(self.data.decode('utf-8', 'ignore'))
self.article_image = None
self._get_specific_image()
self._remove_script()
self._remove_style()
self._remove_link()
self.title = self._get_title()
self.short_title = self._get_short_title()
self.content = self._get_article()
self.images = self._get_images()
except Exception as k:
logger.error(str(k))
开发者ID:chengdujin,项目名称:newsman,代码行数:28,代码来源:simplr.py
示例3: _extract
def _extract(data=None, referer=None):
"""
extract images and text content
"""
if not data:
logger.error('Received no data from UCK server.')
return None, None, None
successful = int(data['STRUCT_PAGE_TYPE'])
if successful == 0:
logger.info('Cannot interpret the page! status != 1')
return None, None, None
try:
# content
content = data['content'].replace("\\", "")
content = _sanitize(content, referer)
# images
images, data = _collect_images(data, referer)
images = images if images else None
# title
title = None
if 'title' in data:
title = data['title']
return title, content, images
except Exception as k:
logger.error(str(k))
return None, None, None
开发者ID:chengdujin,项目名称:newsman,代码行数:31,代码来源:baidu_uck.py
示例4: _is_valid
def _is_valid(content, language):
"""
check if the content meets the need
need: chinese/japanese - more than 40 words
"""
if not content or not language:
logger.error('Method malformed!')
return False
try:
if isinstance(content, str):
content = content.decode(
chardet.detect(content)['encoding'], 'ignore')
if language.startswith('zh') or language == 'ja':
words = content
if len(words) < PARAGRAPH_CRITERIA_KANJI:
return False
elif language.startswith('th'):
words = content.split()
if len(words) < PARAGRAPH_CRITERIA_THAI:
return False
else:
words = content.split()
if len(words) < PARAGRAPH_CRITERIA_LATIN:
return False
return True
except Exception as k:
logger.error(str(k))
return False
开发者ID:chengdujin,项目名称:newsman,代码行数:31,代码来源:summarizer.py
示例5: _collect_images
def _collect_images(data=None, referer=None):
"""
find all possible images
1. image_list
2. images in the new content
"""
if not data:
return None
try:
images = []
# first try to find images in image_list
if 'image_list' in data and data.get('image_list'):
for image in data.get('image_list'):
if 'src' in image and image['src']:
image_normalized = illustrator.find_image(
image['src'].strip(), referer)
if image_normalized:
images.append(image_normalized)
# then try to find images in the content
images_from_content, data[
'content'] = illustrator.find_images(data['content'], referer)
if images_from_content:
images.extend(images_from_content)
# remove duplicated ones
images = illustrator.dedup_images(images) if images else None
return images, data
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:32,代码来源:baidu_uck.py
示例6: _get_shorter_text
def _get_shorter_text(content, language, limit):
"""
limit the number of words to 500
"""
if not content or not language:
logger.error('Method malformed!')
return None
try:
# data should be processed as unicode, so
if isinstance(content, str):
content = content.decode(
chardet.detect(content)['encoding'], 'ignore')
# break text by sentence
if language == 'zh' or language == 'ja':
jp_sent_tokenizer = nltk.RegexpTokenizer('[^!?.!?。.]*[!?.!?。]*')
sentences = jp_sent_tokenizer.tokenize(content)
if language == 'th':
sentences = content.split()
else: # supports latin-based, thai and arabic
sentences = nltk.sent_tokenize(content)
enough_sentences = u""
for sentence in sentences:
# sentence is in unicode, len() then applies to CJK
sentence = sentence.strip()
if sentence:
if len(enough_sentences) + len(sentence) + 1 <= limit:
enough_sentences = "%s %s" % (enough_sentences, sentence)
return str(enough_sentences.strip())
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:35,代码来源:summarizer.py
示例7: extract
def extract(language, title, content, summary, link, feed, category):
"""
get the summary from the source, first paragraph or summary
"""
if not content or not title or not language:
logger.error('No data is found!')
return None
try:
result_summary = ""
# set the number of sentences
# limit the number of words
if content:
if language in ['en', 'ja', 'pt', 'th']:
teaser = PyTeaser(
language, title, content, link, feed, category)
result_summary = teaser.summarize()
# if summary from rss provider is found use summary, but limit
# the number of words
if not result_summary and summary:
result_summary = _get_summary(summary, language)
# else find first paragraph from transcoded also limit the
# number of words
if not result_summary and content:
result_summary = _get_first_paragraph(content, language)
return result_summary
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:33,代码来源:summarizer.py
示例8: _get_summary
def _get_summary(content, language):
"""
find out the first readable summary
"""
if not content or not language:
logger.error('Method malformed!')
return None
try:
# strip off html code
h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
h.ignore_emphasis = True
h.body_width = 0
paragraphs = (h.handle(content)).strip().strip(
'#').strip().split("\n\n")
paragraphs = [
paragraph for paragraph in paragraphs if paragraph.strip()]
for paragraph in paragraphs:
if paragraph and _is_valid(paragraph, language):
summary = _get_shorter_text(
paragraph, language, SUMMARY_LENGTH_LIMIT)
if summary:
return summary
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:28,代码来源:summarizer.py
示例9: convert
def convert(link):
"""
send link to uck api and reformat the content
"""
if not link:
logger.error('Cannot transcode nothing!')
return None, None, None
# send link to uck server and get data back
try:
raw_data = _transcode(link)
if raw_data:
# check if raw_data is syntax-correct
try:
eval(raw_data)
except Exception:
logger.info('Invalid syntax found for UCK output')
return None, None, None
# text is sanitized, images are found from image_list
title, transcoded, images = _extract(eval(raw_data), link)
return title, transcoded, images
else:
logger.info('Cannot read anything from UCK server')
return None, None, None
except Exception as k:
logger.error('%s for %s' % (str(k), str(link)))
return None, None, None
开发者ID:chengdujin,项目名称:newsman,代码行数:28,代码来源:baidu_uck.py
示例10: convert
def convert(link):
"""
use burify's readability implementation to transcode a web page
and return the transcoded page and images found in it
"""
if not link:
logger.error('Cannot transcode nothing!')
return None, None, None
try:
data = transcoder.prepare_link(link)
if data:
article = Document(data)
if article:
images, content = _collect_images(
article.summary(html_partial=False), link)
return article.short_title(), content, images
else:
logger.info('Burify cannot recognize the data')
return None, None, None
else:
logger.info('Cannot parse %s correctly' % link)
return None, None, None
except Exception as k:
logger.error('%s for %s' % (str(k), str(link)))
return None, None, None
开发者ID:chengdujin,项目名称:newsman,代码行数:26,代码来源:burify.py
示例11: dedup_images
def dedup_images(images=None):
"""
remove same images
image: {'url':xxx, 'width':yyy, 'height':zzz}
images = [image, image, image]
"""
if not images:
logger.error('Image list is found VOID!')
return None
image_urls = []
def _exists(image):
"""
return boolean if image exists in the image_urls list
"""
if image['url'] not in image_urls:
image_urls.append(image['url'])
return False
else:
return True
try:
return filter(lambda x: not _exists(x), images)
except Exception as k:
logger.info('Problem:[%s]\nSource:[%s]' % (str(k), str(images)))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:27,代码来源:illustrator.py
示例12: _check_image
def _check_image(self, image_url=None, image_html=None):
"""
Replace orginal image_url with downloaded local copy, if original
image_url could not be reached without HEADERS
"""
if not image_url:
logger.error('Image URL is found VOID!')
raise Exception('Image URL is found VOID!')
if not image_html:
logger.error('Image content is found VOID!')
raise Exception('Image content is found VOID!')
try:
response = requests.get(image_url, timeout=UCK_TIMEOUT)
if response.status_code > 400 or 'posttoday.com/media/content' in \
image_url:
raise Exception(
'Without HEADERS [%s] cannot be reached!' % str(image_url))
except Exception as k:
logger.info('Problem:[%s] Source:[%s]' % (str(k), str(image_url)))
# replace original image_url with downloaded local copy
image_url_new = self._download_copy(image_url, image_html)
return image_url_new if image_url_new else image_url
# Image is accessible with/without HEADERS
return image_url
开发者ID:chengdujin,项目名称:newsman,代码行数:27,代码来源:illustrator.py
示例13: _get_actual_link
def _get_actual_link(prefix=None, link=None):
"""
find the actual news link
"""
if not prefix or not link:
logger.error(
'Method malformed! Prefix:[%s], Link:[%s]' % (prefix, link))
try:
actual_link = None
raw_data = urllib2.urlopen(link)
data = raw_data.readlines()
# str() is critical
soup = BeautifulStoneSoup(str(data))
html_tag, html_attrs = HIDDEN_LINKS[prefix]
html_wrapper = soup.find(name=html_tag, attrs=html_attrs)
if html_wrapper:
actual_suffix = html_wrapper.find('a')['href']
actual_link = str('%s%s' % (prefix, actual_suffix))
return actual_link
else:
return None
except Exception as k:
logger.error('Cannot open %s' % k)
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:25,代码来源:rss_parser.py
示例14: save
def save(feed_info=None):
"""
add a new record of feed
"""
if not feed_info:
logger.error("Method malformed!")
return None
try:
# if the collection does not exist, it will be created
col = Collection(db, FEED_REGISTRAR)
# make a record in the feeds table
item = col.find_one(
{'feed_link': feed_info['feed_link'],
'language': feed_info['language']})
if not item:
feed_info['updated_times'] = 0
feed_info['latest_update'] = None
# the final return
return str(col.save(feed_info))
else:
# the final return
return str(item['_id'])
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:26,代码来源:database.py
示例15: find_biggest_image
def find_biggest_image(images=None):
"""
find the biggest image in resolution from a list of images
"""
if not images:
logger.error('Image list is found VOID!')
return None
try:
biggest = None
resolution_max = MIN_IMAGE_SIZE[0] * MIN_IMAGE_SIZE[1]
for image in images:
if 'width' in image and 'height' in image:
resolution_image = int(image['width']) * int(image['height'])
if resolution_image > MIN_IMAGE_SIZE[0] * MIN_IMAGE_SIZE[1]:
if resolution_image > resolution_max:
biggest = image
resolution_max = resolution_image
else:
logger.info('Image [%s] is not big enough!' %
str(image['url']))
else:
logger.info('Height and width not found! %s' % str(image))
return biggest
except Exception as k:
logger.error('Problem:[%s]\nSource:[%s]' % (str(k), str(images)))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:27,代码来源:illustrator.py
示例16: _find_redirected_link
def _find_redirected_link(url=None):
"""
find the real link from redirection
"""
if not url:
logger.error('Link [%s] is not valid!' % url)
return None
IS_REDIRECTED = True
counter = 0
nurl = url
while IS_REDIRECTED and counter < 10:
resp = requests.get(nurl)
purl = nurl
nurl = resp.url
if nurl == purl:
IS_REDIRECTED = False
else:
counter = counter + 1
if not IS_REDIRECTED:
return nurl
else:
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:26,代码来源:rss_parser.py
示例17: _clean_style
def _clean_style(self, e):
try:
for elem in e.findAll(True):
del elem['class']
del elem['id']
del elem['style']
except Exception as k:
logger.error(str(k))
开发者ID:chengdujin,项目名称:newsman,代码行数:8,代码来源:simplr.py
示例18: __init__
def __init__(self, image_url=None, referer=None):
if not image_url:
logger.error('Method malformed!')
raise Exception('Method malformed!')
self._image_url, self._image_html = self._analyze(image_url, referer)
self._image_size = self._calculate_size(self._image_html)
self._clean_data()
开发者ID:chengdujin,项目名称:newsman,代码行数:9,代码来源:illustrator.py
示例19: run
def run(self):
if self.transcoder == 'simplr':
self.result = eval(self.transcoder).convert(
self.url, self.language)
else:
try:
self.result = eval(self.transcoder).convert(self.url)
except Exception as k:
logger.error(str(k))
self.result = None, None, None
开发者ID:chengdujin,项目名称:newsman,代码行数:10,代码来源:transcoder.py
示例20: _get_title
def _get_title(self):
try:
title = ''
try:
title = self.html.find('title').text
except:
pass
return title
except Exception as k:
logger.error(str(k))
return None
开发者ID:chengdujin,项目名称:newsman,代码行数:11,代码来源:simplr.py
注:本文中的newsman.config.settings.logger.error函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论