本文整理汇总了Python中module.utils.html_unescape函数的典型用法代码示例。如果您正苦于以下问题:Python html_unescape函数的具体用法?Python html_unescape怎么用?Python html_unescape使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了html_unescape函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。
示例1: handleWebLinks
def handleWebLinks(self):
self.logDebug("Search for Web links ")
package_links = []
pattern = r'<form action="http://linkcrypt.ws/out.html"[^>]*?>.*?<input[^>]*?value="([^"]*?)"[^>]*?name="file"'
ids = re.findall(pattern, self.html, re.I | re.S)
self.logDebug("Decrypting %d Web links" % len(ids))
for idx, weblink_id in enumerate(ids):
try:
self.logDebug("Decrypting Web link %d, %s" % (idx + 1, weblink_id))
res = self.load("http://linkcrypt.ws/out.html", post = {'file':weblink_id})
indexs = res.find("window.location =") + 19
indexe = res.find('"', indexs)
link2 = res[indexs:indexe]
self.logDebug(link2)
link2 = html_unescape(link2)
package_links.append(link2)
except Exception, detail:
self.logDebug("Error decrypting Web link %s, %s" % (weblink_id, detail))
开发者ID:kurtiss,项目名称:htpc,代码行数:27,代码来源:LinkCryptWs.py
示例2: downloadFile
def downloadFile(self, pyfile):
url = pyfile.url
for i in range(5):
header = self.load(url, just_header=True)
# self.load does not raise a BadHeader on 404 responses, do it here
if 'code' in header and header['code'] == 404:
raise BadHeader(404)
if 'location' in header:
self.logDebug("Location: " + header['location'])
url = unquote(header['location'])
else:
break
name = html_unescape(unquote(urlparse(url).path.split("/")[-1]))
if 'content-disposition' in header:
self.logDebug("Content-Disposition: " + header['content-disposition'])
m = search("filename(?P<type>=|\*=(?P<enc>.+)'')(?P<name>.*)", header['content-disposition'])
if m:
disp = m.groupdict()
self.logDebug(disp)
if not disp['enc']:
disp['enc'] = 'utf-8'
name = remove_chars(disp['name'], "\"';").strip()
name = unicode(unquote(name), disp['enc'])
if not name:
name = url
pyfile.name = name
self.logDebug("Filename: %s" % pyfile.name)
self.download(url, disposition=True)
开发者ID:DasLampe,项目名称:pyload,代码行数:34,代码来源:BasePlugin.py
示例3: handleCaptcha
def handleCaptcha(self, inputs):
found = re.search(self.RECAPTCHA_URL_PATTERN, self.html)
if found:
recaptcha_key = unquote(found.group(1))
self.logDebug("RECAPTCHA KEY: %s" % recaptcha_key)
recaptcha = ReCaptcha(self)
inputs["recaptcha_challenge_field"], inputs["recaptcha_response_field"] = recaptcha.challenge(recaptcha_key)
return 1
else:
found = re.search(self.CAPTCHA_URL_PATTERN, self.html)
if found:
captcha_url = found.group(1)
inputs["code"] = self.decryptCaptcha(captcha_url)
return 2
else:
found = re.search(self.CAPTCHA_DIV_PATTERN, self.html, re.S)
if found:
captcha_div = found.group(1)
self.logDebug(captcha_div)
numerals = re.findall(
r"<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>", html_unescape(captcha_div)
)
inputs["code"] = "".join([a[1] for a in sorted(numerals, key=lambda num: int(num[0]))])
self.logDebug("CAPTCHA", inputs["code"], numerals)
return 3
else:
found = re.search(self.SOLVEMEDIA_PATTERN, self.html)
if found:
captcha_key = found.group(1)
captcha = SolveMedia(self)
inputs["adcopy_challenge"], inputs["adcopy_response"] = captcha.challenge(captcha_key)
return 4
return 0
开发者ID:japhigu,项目名称:pyload,代码行数:33,代码来源:XFileSharingPro.py
示例4: process
def process(self, pyfile):
self.prepare()
if not re.match(self.__pattern__, self.pyfile.url):
if self.premium:
self.handleOverriden()
else:
self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
else:
try:
self.html = self.load(pyfile.url, cookies = False, decode = True)
self.file_info = self.getFileInfo()
except PluginParseError:
self.file_info = None
self.location = self.getDirectDownloadLink()
if not self.file_info:
pyfile.name = html_unescape(unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1]))
if self.location:
self.startDownload(self.location)
elif self.premium:
self.handlePremium()
else:
self.handleFree()
开发者ID:4Christopher,项目名称:pyload,代码行数:26,代码来源:XFileSharingPro.py
示例5: handlePremium
def handlePremium(self):
found = re.search(self.PREMIUM_URL_PATTERN, self.html)
if not found: self.parseError("Premium URL")
url = html_unescape(found.group(1))
self.logDebug("Premium URL: " + url)
if not url.startswith("http://"): self.resetAccount()
self.download(url)
开发者ID:beefone,项目名称:pyload,代码行数:7,代码来源:CoolshareCz.py
示例6: process
def process(self, pyfile):
self.prepare()
if not re.match(self.__pattern__, self.pyfile.url):
if self.premium:
self.handleOverriden()
else:
self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
else:
try:
# Due to a 0.4.9 core bug self.load would use cookies even if
# cookies=False. Workaround using getURL to avoid cookies.
# Can be reverted in 0.5 as the cookies bug has been fixed.
self.html = getURL(pyfile.url, decode=True)
self.file_info = self.getFileInfo()
except PluginParseError:
self.file_info = None
self.location = self.getDirectDownloadLink()
if not self.file_info:
pyfile.name = html_unescape(
unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
)
if self.location:
self.startDownload(self.location)
elif self.premium:
self.handlePremium()
else:
self.handleFree()
开发者ID:wangjun,项目名称:pyload,代码行数:31,代码来源:XFileSharingPro.py
示例7: getFileInfo
def getFileInfo(self):
self.logDebug("URL: %s" % self.pyfile.url)
if hasattr(self, "TEMP_OFFLINE_PATTERN") and re.search(self.TEMP_OFFLINE_PATTERN, self.html):
self.tempOffline()
name, size, status = parseFileInfo(self)[:3]
if status == 1:
self.offline()
elif status != 2:
self.logDebug(self.file_info)
self.parseError('File info')
if name:
self.pyfile.name = name
else:
self.pyfile.name = html_unescape(urlparse(self.pyfile.url).path.split("/")[-1])
if size:
self.pyfile.size = size
else:
self.logError("File size not parsed")
self.logDebug("FILE NAME: %s FILE SIZE: %s" % (self.pyfile.name, self.pyfile.size))
return self.file_info
开发者ID:Dmanugm,项目名称:pyload,代码行数:25,代码来源:SimpleHoster.py
示例8: handleCaptcha
def handleCaptcha(self, inputs):
captcha_div = re.search(r'<b>Enter code.*?<div.*?>(.*?)</div>', self.html, re.S).group(1)
self.logDebug(captcha_div)
numerals = re.findall('<span.*?padding-left\s*:\s*(\d+).*?>(\d)</span>', html_unescape(captcha_div))
inputs['code'] = "".join([a[1] for a in sorted(numerals, key = lambda num: int(num[0]))])
self.logDebug("CAPTCHA", inputs['code'], numerals)
return 3
开发者ID:4Christopher,项目名称:pyload,代码行数:7,代码来源:RarefileNet.py
示例9: proceed
def proceed(self, url, location):
url = self.parent.url
self.html = self.load(url)
link_ids = re.findall(r"<a id=\"DownloadLink_(\d*)\" href=\"http://1kh.de/", self.html)
for id in link_ids:
new_link = html_unescape(
re.search("width=\"100%\" src=\"(.*)\"></iframe>", self.load("http://1kh.de/l/" + id)).group(1))
self.urls.append(new_link)
开发者ID:earthGavinLee,项目名称:pyload,代码行数:8,代码来源:OneKhDe.py
示例10: get_file_name
def get_file_name(self):
try:
name = self.api["name"]
except KeyError:
file_name_pattern = 'id="downloadlink"><a href="(.*)" onclick="'
name = re.search(file_name_pattern, self.html[1]).group(1).split("/")[-1]
return html_unescape(name)
开发者ID:beefone,项目名称:pyload,代码行数:8,代码来源:MegauploadCom.py
示例11: load
def load(self, url, get={}, post={}, ref=True, cookies=True, just_header=False, decode=True, multipart=False, req=None):
"""
Load content at url and returns it
:param url:
:param get:
:param post:
:param ref:
:param cookies:
:param just_header: If True only the header will be retrieved and returned as dict
:param decode: Wether to decode the output according to http header, should be True in most cases
:return: Loaded content
"""
if hasattr(self, 'pyfile') and self.pyfile.abort:
self.abort()
url = fixurl(url)
if not url or not isinstance(url, basestring):
self.fail(_("No url given"))
if self.pyload.debug:
self.log_debug("LOAD URL " + url,
*["%s=%s" % (key, val) for key, val in locals().items() if key not in ("self", "url")])
if req is None:
req = self.req or self.pyload.requestFactory.getRequest(self.__name__)
#@TODO: Move to network in 0.4.10
if hasattr(self, 'COOKIES') and isinstance(self.COOKIES, list):
set_cookies(req.cj, self.COOKIES)
res = req.load(url, get, post, ref, bool(cookies), just_header, multipart, decode is True) #@TODO: Fix network multipart in 0.4.10
#@TODO: Move to network in 0.4.10
if decode:
res = html_unescape(res)
#@TODO: Move to network in 0.4.10
if isinstance(decode, basestring):
res = decode(res, decode)
if self.pyload.debug:
frame = inspect.currentframe()
framefile = fs_join("tmp", self.__name__, "%s_line%s.dump.html" % (frame.f_back.f_code.co_name, frame.f_back.f_lineno))
try:
if not exists(os.path.join("tmp", self.__name__)):
os.makedirs(os.path.join("tmp", self.__name__))
with open(framefile, "wb") as f:
del frame #: Delete the frame or it wont be cleaned
f.write(encode(res))
except IOError, e:
self.log_error(e)
开发者ID:earthGavinLee,项目名称:pyload,代码行数:55,代码来源:Plugin.py
示例12: getPackageNameAndFolder
def getPackageNameAndFolder(self):
if hasattr(self, 'TITLE_PATTERN'):
m = re.search(self.TITLE_PATTERN, self.html)
if m:
name = folder = html_unescape(m.group('title').strip())
self.logDebug("Found name [%s] and folder [%s] in package info" % (name, folder))
return name, folder
name = self.pyfile.package().name
folder = self.pyfile.package().folder
self.logDebug("Package info not found, defaulting to pyfile name [%s] and folder [%s]" % (name, folder))
return name, folder
开发者ID:3DMeny,项目名称:pyload,代码行数:12,代码来源:SimpleCrypter.py
示例13: handle_free
def handle_free(self, pyfile):
fileid = re.search(self.FILEID_PATTERN, self.html).group(1)
self.log_debug("FileID: " + fileid)
token = re.search(self.TOKEN_PATTERN, self.html).group(1)
self.log_debug("Token: " + token)
self.html = self.load("http://lolabits.es/action/License/Download",
post={'fileId' : fileid,
'__RequestVerificationToken' : token},
decode="unicode-escape")
self.link = html_unescape(re.search(self.LINK_PATTERN, self.html).group(1))
开发者ID:earthGavinLee,项目名称:pyload,代码行数:13,代码来源:LolabitsEs.py
示例14: _translateAPIFileInfo
def _translateAPIFileInfo(apiFileId, apiFileDataMap, apiHosterMap):
# Translate
fileInfo = {}
try:
fileInfo['status'] = MegauploadCom.API_STATUS_MAPPING[apiFileDataMap[apiFileId]]
fileInfo['name'] = html_unescape(apiFileDataMap['n'])
fileInfo['size'] = int(apiFileDataMap['s'])
fileInfo['hoster'] = apiHosterMap[apiFileDataMap['d']]
except:
pass
return fileInfo
开发者ID:beefone,项目名称:pyload,代码行数:13,代码来源:MegauploadCom.py
示例15: getInfo
def getInfo(urls):
for chunk in chunks(urls, 80):
result = []
api = getAPIData(chunk)
for data in api.itervalues():
if data[0] == "online":
result.append((html_unescape(data[2]), data[1], 2, data[4]))
elif data[0] == "offline":
result.append((data[4], 0, 1, data[4]))
yield result
开发者ID:masterwaster,项目名称:pyload,代码行数:14,代码来源:UploadedTo.py
示例16: parseFileInfo
def parseFileInfo(self, url = '', html = ''):
if not html and hasattr(self, "html"): html = self.html
name, size, status, found, fileid = url, 0, 3, None, None
if re.search(self.FILE_OFFLINE_PATTERN, html):
# File offline
status = 1
else:
found = re.search(self.FILE_INFO_PATTERN, html)
if found:
name, fileid = html_unescape(found.group('N')), found.group('ID')
size = parseFileSize(found.group('S'))
status = 2
return name, size, status, fileid
开发者ID:masterwaster,项目名称:pyload,代码行数:15,代码来源:UploadedTo.py
示例17: process
def process(self, pyfile):
if not hasattr(self, "HOSTER_NAME"):
self.HOSTER_NAME = re.search(self.__pattern__, self.pyfile.url).group(1)
if not hasattr(self, "DIRECT_LINK_PATTERN"):
self.DIRECT_LINK_PATTERN = (
r'(http://(\w+\.%s|\d+\.\d+\.\d+\.\d+)(:\d+/d/|/files/\d+/\w+/)[^"\'<]+)' % self.HOSTER_NAME
)
self.captcha = self.errmsg = None
self.passwords = self.getPassword().splitlines()
if not re.match(self.__pattern__, self.pyfile.url):
if self.premium:
self.handleOverriden()
else:
self.fail("Only premium users can download from other hosters with %s" % self.HOSTER_NAME)
else:
try:
self.html = self.load(pyfile.url, cookies=False, decode=True)
self.file_info = self.getFileInfo()
except PluginParseError:
self.file_info = None
self.req.http.lastURL = self.pyfile.url
self.req.http.c.setopt(FOLLOWLOCATION, 0)
self.html = self.load(self.pyfile.url, cookies=True, decode=True)
self.header = self.req.http.header
self.req.http.c.setopt(FOLLOWLOCATION, 1)
self.location = None
found = re.search("Location\s*:\s*(.*)", self.header, re.I)
if found and re.match(self.DIRECT_LINK_PATTERN, found.group(1)):
self.location = found.group(1).strip()
if not self.file_info:
pyfile.name = html_unescape(
unquote(urlparse(self.location if self.location else pyfile.url).path.split("/")[-1])
)
if self.location:
self.startDownload(self.location)
elif self.premium:
self.handlePremium()
else:
self.handleFree()
开发者ID:tetratec,项目名称:shareacc,代码行数:46,代码来源:XFileSharingPro.py
示例18: handleShow
def handleShow(self, url):
src = self.getSJSrc(url)
soup = BeautifulSoup(src)
packageName = self.pyfile.package().name
if self.config.get("changeNameSJ") == "Show":
found = html_unescape(soup.find("h2").find("a").string.split(" –")[0])
if found:
packageName = found
nav = soup.find("div", attrs={"id": "scb"})
package_links = []
for a in nav.findAll("a"):
if self.config.get("changeNameSJ") == "Show":
package_links.append(a["href"])
else:
package_links.append(a["href"] + "#hasName")
if self.config.get("changeNameSJ") == "Show":
self.packages.append((packageName, package_links, packageName))
else:
self.packages.append((self.pyfile.package().name, package_links, self.pyfile.package().name))
开发者ID:J-Ha,项目名称:pyload-stuff,代码行数:21,代码来源:SerienjunkiesOrg.py
示例19: process
def process(self, pyfile):
self.req.cj.setCookie("uploaded.net", "lang", "en") # doesn't work anymore
self.load("http://uploaded.net/language/en")
api = getAPIData([pyfile.url])
# TODO: fallback to parse from site, because api sometimes delivers wrong status codes
if not api:
self.logWarning("No response for API call")
self.html = unicode(self.load(pyfile.url, decode = False), 'iso-8859-1')
name, size, status, self.fileID = parseFileInfo(self)
self.logDebug(name, size, status, self.fileID)
if status == 1:
self.offline()
elif status == 2:
pyfile.name, pyfile.size = name, size
else:
self.fail('Parse error - file info')
elif api == 'Access denied':
self.fail(_("API key invalid"))
else:
if self.fileID not in api:
self.offline()
self.data = api[self.fileID]
if self.data[0] != "online":
self.offline()
pyfile.name = html_unescape(self.data[2])
# self.pyfile.name = self.get_file_name()
if self.premium:
self.handlePremium()
else:
self.handleFree()
开发者ID:beefone,项目名称:pyload,代码行数:39,代码来源:UploadedTo.py
示例20: process
def process(self, pyfile):
html = self.load(pyfile.url, decode=True)
if re.search(r'<div id="player-unavailable" class="\s*player-width player-height\s*">', html):
self.offline()
if "We have been receiving a large volume of requests from your network." in html:
self.tempOffline()
#get config
use3d = self.getConfig("3d")
if use3d:
quality = {"sd": 82, "hd": 84, "fullhd": 85, "240p": 83, "360p": 82,
"480p": 82, "720p": 84, "1080p": 85, "3072p": 85}
else:
quality = {"sd": 18, "hd": 22, "fullhd": 37, "240p": 5, "360p": 18,
"480p": 35, "720p": 22, "1080p": 37, "3072p": 38}
desired_fmt = self.getConfig("fmt")
if desired_fmt and desired_fmt not in self.formats:
self.logWarning("FMT %d unknown - using default." % desired_fmt)
desired_fmt = 0
if not desired_fmt:
desired_fmt = quality.get(self.getConfig("quality"), 18)
#parse available streams
streams = re.search(r'"url_encoded_fmt_stream_map": "(.*?)",', html).group(1)
streams = [x.split('\u0026') for x in streams.split(',')]
streams = [dict((y.split('=', 1)) for y in x) for x in streams]
streams = [(int(x['itag']), unquote(x['url'])) for x in streams]
#self.logDebug("Found links: %s" % streams)
self.logDebug("AVAILABLE STREAMS: %s" % [x[0] for x in streams])
#build dictionary of supported itags (3D/2D)
allowed = lambda x: self.getConfig(self.formats[x][0])
streams = [x for x in streams if x[0] in self.formats and allowed(x[0])]
if not streams:
self.fail("No available stream meets your preferences")
fmt_dict = dict([x for x in streams if self.formats[x[0]][4] == use3d] or streams)
self.logDebug("DESIRED STREAM: ITAG:%d (%s) %sfound, %sallowed" %
(desired_fmt, "%s %dx%d Q:%d 3D:%s" % self.formats[desired_fmt],
"" if desired_fmt in fmt_dict else "NOT ", "" if allowed(desired_fmt) else "NOT "))
#return fmt nearest to quality index
if desired_fmt in fmt_dict and allowed(desired_fmt):
fmt = desired_fmt
else:
sel = lambda x: self.formats[x][3] # select quality index
comp = lambda x, y: abs(sel(x) - sel(y))
self.logDebug("Choosing nearest fmt: %s" % [(x, allowed(x), comp(x, desired_fmt)) for x in fmt_dict.keys()])
fmt = reduce(lambda x, y: x if comp(x, desired_fmt) <= comp(y, desired_fmt) and
sel(x) > sel(y) else y, fmt_dict.keys())
self.logDebug("Chosen fmt: %s" % fmt)
url = fmt_dict[fmt]
self.logDebug("URL: %s" % url)
#set file name
file_suffix = self.formats[fmt][0] if fmt in self.formats else ".flv"
file_name_pattern = '<meta name="title" content="(.+?)">'
name = re.search(file_name_pattern, html).group(1).replace("/", "")
# Cleaning invalid characters from the file name
name = name.encode('ascii', 'replace')
pyfile.name = html_unescape(name)
time = re.search(r"t=((\d+)m)?(\d+)s", pyfile.url)
ffmpeg = which("ffmpeg")
if ffmpeg and time:
m, s = time.groups()[1:]
if not m:
m = "0"
pyfile.name += " (starting at %s:%s)" % (m, s)
pyfile.name += file_suffix
filename = self.download(url)
if ffmpeg and time:
inputfile = filename + "_"
os.rename(filename, inputfile)
subprocess.call([
ffmpeg,
"-ss", "00:%s:%s" % (m, s),
"-i", inputfile,
"-vcodec", "copy",
"-acodec", "copy",
filename])
os.remove(inputfile)
开发者ID:ASCIIteapot,项目名称:pyload,代码行数:92,代码来源:YoutubeCom.py
注:本文中的module.utils.html_unescape函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论