• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Python grabber.URLGrabber类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中urlgrabber.grabber.URLGrabber的典型用法代码示例。如果您正苦于以下问题:Python URLGrabber类的具体用法?Python URLGrabber怎么用?Python URLGrabber使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了URLGrabber类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: downloadFile

def downloadFile(url, filename, subdir):
    BongEnvironment.logger.info("starting download of {!s} to {!s}/{!s}".format(url, subdir, filename))
    maxBytesPerSecond=0        #  2**19   ==> 0.5 MByte/s 
                               #  0       ==> not restricted
    grabber = URLGrabber( progress_obj=None
                        , throttle=maxBytesPerSecond        
                        , reget='simple'
                        , retry=5
                        , retrycodes=[-1,4,5,6,7,12,14]
                        , timeout=30
                        , user_agent='bong download manager/1.0'
                        )
    
    statinfo = os.stat(BongEnvironment.settings['recdir'])
    
    targetdir = os.path.join(BongEnvironment.settings['recdir'], subdir)
    if not os.path.isdir(targetdir):
        os.mkdir(targetdir)
        if os.name == 'posix':
            os.chmod(targetdir, 0777)
            os.chown(targetdir, statinfo.st_uid, statinfo.st_gid)

    targetfile = os.path.join(targetdir, filename)
    
    t1 = time.time()
    try:
        local_filename = grabber.urlgrab(url, targetfile)
    except URLGrabError, e:
        BongEnvironment.logger.warning('exception {!s} trying to download {!s} to {!s}'.format(e, url, targetfile))
        return False
开发者ID:ambo4,项目名称:BONG.TV-Download-Manager,代码行数:30,代码来源:BongDownload.py


示例2: update_categories

def update_categories(username, subscriptions):
    g = URLGrabber()
    folder = BASE + '/' + username
    if not os.path.exists(folder):
        os.mkdir(folder)

    cats = get_categories(username)
    visited = set()

    for sub in subscriptions:
        if sub.name in visited:
            continue
        elif sub.name in cats:
            del cats[sub.name]
            visited.add(sub.name)
            continue
        else:
            print 'Downloading thumbnail for %s/%s'%(sub.name, sub.dname)
            ft = sub.thumbnail[-3:]
            nf = '%s/%s%s%s.%s'%(folder, sub.name, SPLITTER, sub.dname, ft)
            g.urlgrab(sub.thumbnail, filename=nf)

    for sub in cats:
        print 'Removing thumbnail for %s'%sub
        if cats[sub] is None:
            old_fn = '%s/%s*'%(folder, sub)
        else:
            old_fn = '%s/%s/%s*'%(folder, cats[sub], sub)
        for fl in glob.glob(old_fn):
            print '\t', fl
            os.remove(fl)
开发者ID:DLu,项目名称:probablyscripts,代码行数:31,代码来源:classes.py


示例3: _retrievePublicKey

    def _retrievePublicKey(self, keyurl, repo=None):
        """
        Retrieve a key file
        @param keyurl: url to the key to retrieve
        Returns a list of dicts with all the keyinfo
        """
        key_installed = False

        # Go get the GPG key from the given URL
        try:
            url = yum.misc.to_utf8(keyurl)
            if repo is None:
                rawkey = urlgrabber.urlread(url, limit=9999)
            else:
                #  If we have a repo. use the proxy etc. configuration for it.
                # In theory we have a global proxy config. too, but meh...
                # external callers should just update.
                ug = URLGrabber(bandwidth = repo.bandwidth,
                                retry = repo.retries,
                                throttle = repo.throttle,
                                progress_obj = repo.callback,
                                proxies=repo.proxy_dict)
                ug.opts.user_agent = default_grabber.opts.user_agent
                rawkey = ug.urlread(url, text=repo.id + "/gpgkey")

        except urlgrabber.grabber.URLGrabError, e:
            raise ChannelException('GPG key retrieval failed: ' +
                                    yum.i18n.to_unicode(str(e)))
开发者ID:m47ik,项目名称:uyuni,代码行数:28,代码来源:yum_src.py


示例4: WebGrabber

class WebGrabber(Singleton):
    g = None
    
    def __init__(self,config = {}):
        self.gotLibUrlGrabber = False
        try:
            from urlgrabber.grabber import URLGrabber
        except:
            writeError('This script is better with URLBrabber.')
            writeError('See http://linux.duke.edu/projects/urlgrabber/')
            self.gotLibUrlGrabber = False
            
        if not self.gotLibUrlGrabber:
            return
        if config.has_key('proxy'):
            writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
            self.g = URLGrabber(proxies= {'http' : config['proxy']})
        else:
            writeInfo("URLGrabbersansProxy")
            self.g = URLGrabber()

    def getWebFile(self,url, dest):
        if not self.gotLibUrlGrabber:
            import urllib
            fd = open(dest,"wb")
            fd.write(urllib.urlopen(url).read())
            fd.close()
        else:
            urllib.urlretrieve ("http://www.example.com/songs/mp3.mp3", "mp3.mp3")
            self.g.urlgrab(url, filename=dest)
开发者ID:naparuba,项目名称:pyndsgest,代码行数:30,代码来源:webGrabber.py


示例5: Fetcher

	class Fetcher(object):
		def __init__(self, remote):
			self.remote = remote
			self.g = URLGrabber(prefix=self.remote)

		def fetch_to_file(self, src, dest):
			tmp = dest + '.part'
			try:
				self.g.urlgrab(src, filename=tmp, copy_local=1, user_agent='lsd-fetch/1.0')
			except URLGrabError as e:
				raise IOError(str(e))
			os.rename(tmp, dest)

		def fetch(self, src='/'):
			try:
				contents = self.g.urlread(src).strip()
			except URLGrabError as e:
				raise IOError(str(e))
			return contents

		def listdir(self, dir='/'):
			lfn = os.path.join(dir, '.listing')

			contents = self.fetch(lfn)

			return [ s.strip() for s in contents.split() if s.strip() != '' ]

		# Pickling support -- only pickle the remote URL
		def __getstate__(self):
			return self.remote
		def __setstate__(self, remote):
			self.__init__(remote)
开发者ID:banados,项目名称:lsd,代码行数:32,代码来源:fetcher.py


示例6: test_make_callback

 def test_make_callback(self):
     """grabber.URLGrabber._make_callback() tests"""
     def cb(e): pass
     tup_cb = (cb, ('stuff'), {'some': 'dict'})
     g = URLGrabber()
     self.assertEquals(g._make_callback(cb),     (cb, (), {}))
     self.assertEquals(g._make_callback(tup_cb), tup_cb)
开发者ID:pombredanne,项目名称:urlgrabber-1,代码行数:7,代码来源:test_grabber.py


示例7: fetchPackages

    def fetchPackages(self, destination=None):
        """Downloads packages to destination directory """
        from urlgrabber.grabber import URLGrabber
        from urlgrabber.progress import TextMeter
        from os import path, chdir

        if destination:
            chdir(destination)
        else:
            chdir(self.dest_dir)

        ### URLGrabber objects ###
        t = TextMeter()
        g = URLGrabber(progress_obj=t)

        ### Start Iteration over list of packages' URIs ###
        for uri in self.getPackageList():
            pisifile = uri.split("/")[-1]
            if path.exists(pisifile):
                print pisifile, "--- No Update! Skipping..."
                continue
            try:
                g.urlgrab(uri)
            except:
                print "Error while downloading file %s" % pisifile
                break
        print "Finished."
开发者ID:havan,项目名称:porsuk,代码行数:27,代码来源:pisi_crawler.py


示例8: moosWeb2dict

def moosWeb2dict(vehicle_host, vehicle_port):

    def moosHTML2dict(data):
        soup = BeautifulSoup(data)
        istrtd = (lambda tag : tag.name == "tr" and len(tag.findAll("td")) > 0)
        ret = {}
        for tr in soup.table.table.findAll(istrtd):
            tds = tr.findAll("td")
            vartag = tds[0].a
            if 0 < len(vartag) and "pending" != tds[2].contents[0]:
                key = vartag.contents[0]
                val = tds[6].contents[0]
                ret[str(key)] = str(val)
        return ret


    UG = URLGrabber()

    #fetch new page
    data = UG.urlread("http://" + remote_vehicle + ":" + str(vehicle_port))

    #paul newman writes shitty HTML; we must fix it
    p = re.compile('<A href = ([^>]*)>')
    fixed_data = p.sub(r'<A href="\1">', data)
                
    return moosHTML2dict(fixed_data)
开发者ID:Hoffman408,项目名称:MOOS-python-utils,代码行数:26,代码来源:MOOSDBparser.py


示例9: __init__

		def __init__(self, awsAccessKey, awsSecretKey, baseurl):
			self.logger = logging.getLogger("yum.verbose.main")
			self.logger.log(logginglevels.DEBUG_4, "s3: creating empty URLGrabber instance")
			URLGrabber.__init__(self)
			self.logger.log(logginglevels.DEBUG_4, "s3: BotoGrabber init BASE_URL=%s" % baseurl)
			if not baseurl:
				raise Exception("s3: BotoGrabberInit got blank baseurl")
			try:
				baseurl = baseurl[0]
			except:
				pass
			self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey)
			self.baseurl = urlparse(baseurl)
			if hasattr(self.baseurl, 'netloc'):
				self.bucket_name = self.baseurl.netloc
				self.key_prefix = self.baseurl.path[1:]
			else:
				self.bucket_name = self.baseurl[1]
				self.key_prefix = self.baseurl[2]
			if self.key_prefix.startswith("/"):
				self.key_prefix = self.key_prefix[1:]
			m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name)
			if (m):
				self.bucket_name = m.group(1)
			if sys.stdout.isatty():
				print "%s - %s" % (self.bucket_name, self.key_prefix)
开发者ID:aripringle,项目名称:yum-s3-plugin,代码行数:26,代码来源:s3.py


示例10: __init__

	def __init__(self, pakfire, *args, **kwargs):
		kwargs.update({
			"quote" : 0,
			"user_agent" : "pakfire/%s" % PAKFIRE_VERSION,

			"ssl_verify_host" : False,
			"ssl_verify_peer" : False,
		})

		if isinstance(pakfire, _Config):
			config = pakfire
		else:
			config = pakfire.config
		self.config = config

		# Set throttle setting.
		bandwidth_throttle = config.get("downloader", "bandwidth_throttle")
		if bandwidth_throttle:
			try:
				bandwidth_throttle = int(bandwidth_throttle)
			except ValueError:
				log.error("Configuration value for bandwidth_throttle is invalid.")
				bandwidth_throttle = 0

			kwargs.update({ "throttle" : bandwidth_throttle })

		# Configure HTTP proxy.
		http_proxy = config.get("downloader", "http_proxy")
		if http_proxy:
			kwargs.update({ "proxies" : { "http" : http_proxy, "https" : http_proxy }})

		URLGrabber.__init__(self, *args, **kwargs)
开发者ID:ipfire,项目名称:pakfire,代码行数:32,代码来源:downloader.py


示例11: __init__

        def __init__(self, awsAccessKey, awsSecretKey, baseurl):
            self.logger.debug("BotoGrabber init BASE_URL=%s" % baseurl)

            URLGrabber.__init__(self)
            self._handle_baseurl(baseurl)
            self._handle_s3(awsAccessKey, awsSecretKey)
            self._dump_attributes()
            interactive_notify("%s - %s" % (self.bucket_name, self.key_prefix))
开发者ID:toxsick,项目名称:yum-s3-plugin,代码行数:8,代码来源:s3.py


示例12: ProxyHTTPAuthTests

class ProxyHTTPAuthTests(BaseProxyTests):
    def setUp(self):
        self.url = ref_http
        if not self.have_proxy():
            self.skip()
        self.g = URLGrabber()

    def test_good_password(self):
        self.g.urlopen(self.url, proxies=self.good_proxies)

    def test_bad_password(self):
        self.assertRaises(URLGrabError, self.g.urlopen,
                          self.url, proxies=self.bad_proxies)
开发者ID:pombredanne,项目名称:urlgrabber-1,代码行数:13,代码来源:test_grabber.py


示例13: urlgrab

	def urlgrab(self, url, *args, **kwargs):
		self.check_offline_mode()

		# This is for older versions of urlgrabber which are packaged in Debian
		# and Ubuntu and cannot handle filenames as a normal Python string but need
		# a unicode string.
		return URLGrabber.urlgrab(self, url.encode("utf-8"), *args, **kwargs)
开发者ID:ipfire,项目名称:pakfire,代码行数:7,代码来源:downloader.py


示例14: _getTreeInfo

    def _getTreeInfo(self, url, proxy_url, sslverify):
        """ Retrieve treeinfo and return the path to the local file.

            :param baseurl: url of the repo
            :type baseurl: string
            :param proxy_url: Optional full proxy URL of or ""
            :type proxy_url: string
            :param sslverify: True if SSL certificate should be varified
            :type sslverify: bool
            :returns: Path to retrieved .treeinfo file or None
            :rtype: string or None
        """
        if not url:
            return None

        log.debug("retrieving treeinfo from %s (proxy: %s ; sslverify: %s)",
                  url, proxy_url, sslverify)

        ugopts = {"ssl_verify_peer": sslverify,
                  "ssl_verify_host": sslverify}

        proxies = {}
        if proxy_url:
            try:
                proxy = ProxyString(proxy_url)
                proxies = {"http": proxy.url,
                           "https": proxy.url}
            except ProxyStringError as e:
                log.info("Failed to parse proxy for _getTreeInfo %s: %s",
                         proxy_url, e)

        ug = URLGrabber()
        try:
            treeinfo = ug.urlgrab("%s/.treeinfo" % url,
                                  "/tmp/.treeinfo", copy_local=True,
                                  proxies=proxies, **ugopts)
        except URLGrabError as e:
            try:
                treeinfo = ug.urlgrab("%s/treeinfo" % url,
                                      "/tmp/.treeinfo", copy_local=True,
                                      proxies=proxies, **ugopts)
            except URLGrabError as e:
                log.info("Error downloading treeinfo: %s", e)
                treeinfo = None

        return treeinfo
开发者ID:akozumpl,项目名称:anaconda,代码行数:46,代码来源:__init__.py


示例15: download

def download(url, filename=None, associated_task=None, web_proxy = None):
    if associated_task:
        associated_task.description = _("Downloading %s") % os.path.basename(url)
        associated_task.unit = "KB"
    log.debug("downloading %s > %s" % (url, filename))
    progress_obj = DownloadProgress(associated_task)
    if web_proxy:
        web_proxy={'http':web_proxy}
    urlgrabber = URLGrabber(
        reget = 'simple',
        proxies = web_proxy,
        progress_obj = progress_obj)
    if os.path.isdir(filename):
        basename = os.path.basename(url)
        filename = os.path.join(filename, basename)
    filename = urlgrabber.urlgrab(url, filename=filename)
    return filename
开发者ID:hechaoyuyu,项目名称:swinst,代码行数:17,代码来源:downloader.py


示例16: __init__

 def __init__(self,config = {}):
     self.gotLibUrlGrabber = False
     try:
         from urlgrabber.grabber import URLGrabber
     except:
         writeError('This script is better with URLBrabber.')
         writeError('See http://linux.duke.edu/projects/urlgrabber/')
         self.gotLibUrlGrabber = False
         
     if not self.gotLibUrlGrabber:
         return
     if config.has_key('proxy'):
         writeInfo("URLGrabberWithProxy : %s" % config['proxy'])
         self.g = URLGrabber(proxies= {'http' : config['proxy']})
     else:
         writeInfo("URLGrabbersansProxy")
         self.g = URLGrabber()
开发者ID:naparuba,项目名称:pyndsgest,代码行数:17,代码来源:webGrabber.py


示例17: download_file

def download_file(url, dirname):
    """
        Download @url and save to @dirname.
        @return - filename of saved file
    """
    # pycurl is picky about Unicode URLs, see rhbz #515797
    url = url.encode('ascii', 'ignore')

    if not os.path.exists(dirname):
        os.makedirs(dirname)

    basename = os.path.basename(url)
    filename = "%s/%s" % (dirname, basename)

    if os.path.exists(filename):
        raise Exception("File %s already exists! Not downloading!" % filename)

    g = URLGrabber(reget=None)
    local_filename = g.urlgrab(url, filename)
    return local_filename
开发者ID:Acidburn0zzz,项目名称:difio,代码行数:20,代码来源:grabber.py


示例18: __init__

		def __init__(self, awsAccessKey, awsSecretKey, baseurl):
			if self.DEBUG:
				print "creating empty URLGrabber instance"
			URLGrabber.__init__(self)
			if self.DEBUG:
				print "BotoGrabber init BASE_URL=%s" % baseurl
			if not baseurl: raise Exception("BotoGrabberInit got blank baseurl")
			try: baseurl = baseurl[0]
			except: pass
			self.s3 = boto.connect_s3(awsAccessKey, awsSecretKey)
			self.baseurl = urlparse(baseurl)
			if hasattr(self.baseurl, 'netloc'):
				self.bucket_name = self.baseurl.netloc
				self.key_prefix = self.baseurl.path[1:]
			else:
				self.bucket_name = self.baseurl[1]
				self.key_prefix = self.baseurl[2]
			m = re.match('(.*)\.s3.*\.amazonaws\.com', self.bucket_name)
			if (m):
				self.bucket_name = m.group(1)
			if sys.stdout.isatty():
				print "%s - %s" % (self.bucket_name, self.key_prefix)
开发者ID:NumberFour,项目名称:yum-s3-plugin,代码行数:22,代码来源:s3.py


示例19: run

    def run(self):
        #Check if file exists
        if os.path.isfile(self.file):
            os.chmod(self.file, stat.S_IWUSR)
            os.remove(self.file)

        ##Init url/path pointers
        #response     = urllib2.urlopen(self.url)
        #total_size   = response.info().getheader('Content-Length').strip()
        #self.total_size   = int(total_size)

        #freespace
        #freespace = get_free_space(self.app, path)

        #check if enough freespace
        #if self.freespace < total_size and self.freespace != 0:
        #    self.app.gui.ShowDialogNotification('Not enough freespace to download the item')
        #    self.active = False
        #    return

        self.app.gui.SetVisible(4000, True)
        progress = TextMeter(self.app)
        try:
            Log(self.app, 'Download started' )
            g = URLGrabber(reget='simple')
            g.urlgrab(self.url, filename=self.file, reget='simple', progress_obj=progress, text=self.filename)

            #Create info file as json
            json_dumps(self.infodata, self.infopath)
            self.app.gui.ShowDialogNotification('Download Complete')
        except:
            Log(self.app, traceback.format_exc() )
            self.app.gui.ShowDialogNotification('Error during download')

        self.app.gui.SetVisible(4000, False)
        self.active = False
        Log(self.app, 'Download finished' )
开发者ID:D34dmeat,项目名称:boxeehack,代码行数:37,代码来源:download.py


示例20: get_file_if_size_diff

def get_file_if_size_diff(url, d):
    fn = url.split('/')[-1]
    out_fnp = os.path.join(d, fn)
    g = URLGrabber(reget = "simple")
    locFnp = g.urlgrab(url, out_fnp)
    return locFnp
开发者ID:umass-bib,项目名称:guides,代码行数:6,代码来源:downloadSraFromTable.py



注:本文中的urlgrabber.grabber.URLGrabber类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python mirror.MirrorGroup类代码示例发布时间:2022-05-27
下一篇:
Python urlgrabber.urlread函数代码示例发布时间:2022-05-27
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap