• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Java Pipeline类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Java中us.codecraft.webmagic.pipeline.Pipeline的典型用法代码示例。如果您正苦于以下问题:Java Pipeline类的具体用法?Java Pipeline怎么用?Java Pipeline使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



Pipeline类属于us.codecraft.webmagic.pipeline包,在下文中一共展示了Pipeline类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: scratch

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public void scratch(){
    us.codecraft.webmagic.Spider.create(new SimplePageProcessor(config.getStartUrl(), config.getTargetUrlPattern()))
            .addPipeline(new Pipeline() {
                @Override
                public void process(ResultItems resultItems, Task task) {
                    Document doc = Jsoup.parse(resultItems.get("html"));
                    IPersist persist = PersistManager.getInstance().getPersist(config.getHost(), config.getCategory());
                    if(persist == null){
                        logger.warn("persistNotExists: host={}, category={}", config.getHost(), config.getCategory());
                    }else{
                        persist.persist(resultItems.getRequest().getUrl(), config, doc);
                    }
                }
            })
            .thread(5).run();
}
 
开发者ID:wangdamu,项目名称:SpiderApplication,代码行数:17,代码来源:Spider.java


示例2: processRequest

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
protected void processRequest(Request request) {
    Page page = downloader.download(request, this);
    if (page == null) {
        sleep(site.getSleepTime());
        onError(request);
        return;
    }
    // for cycle retry
    if (page.isNeedCycleRetry()) {
        extractAndAddRequests(page, true);
        sleep(site.getRetrySleepTime());
        return;
    }
    pageLoginProcessor.process(page);
    extractAndAddRequests(page, spawnUrl);
    if (!page.getResultItems().isSkip()) {
        for (Pipeline pipeline : pipelines) {
            pipeline.process(page.getResultItems(), this);
        }
    }
    //for proxy status management
    request.putExtra(Request.STATUS_CODE, page.getStatusCode());
    sleep(site.getSleepTime());
}
 
开发者ID:hexiaohong-code,项目名称:LoginCrawler,代码行数:25,代码来源:SpiderLogin.java


示例3: testStartAndStop

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
@Ignore("long time")
@Test
public void testStartAndStop() throws InterruptedException {
    Spider spider = Spider.create(new SimplePageProcessor( "http://www.oschina.net/*")).addPipeline(new Pipeline() {
        @Override
        public void process(ResultItems resultItems, Task task) {
            System.out.println(1);
        }
    }).thread(1).addUrl("http://www.oschina.net/");
    spider.start();
    Thread.sleep(10000);
    spider.stop();
    Thread.sleep(10000);
    spider.start();
    Thread.sleep(10000);
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:17,代码来源:SpiderTest.java


示例4: startSpider

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
private static void startSpider(Params params) {
    ScriptProcessor pageProcessor = ScriptProcessorBuilder.custom()
            .language(params.getLanguage()).scriptFromFile(params.getScriptFileName()).thread(params.getThread()).build();
    pageProcessor.getSite().setSleepTime(params.getSleepTime());
    pageProcessor.getSite().setRetryTimes(3);
    pageProcessor.getSite().setAcceptStatCode(WMCollections.<Integer>newHashSet(200, 404,403, 500,502));
    Spider spider = Spider.create(pageProcessor).thread(params.getThread());
    spider.clearPipeline().addPipeline(new Pipeline() {
        @Override
        public void process(ResultItems resultItems, Task task) {

        }
    });
    if (params.getUrls() == null || params.getUrls().size() == 0) {
        System.err.println("Need at least one argument");
        System.out.println("Usage: java -jar webmagic.jar [-l language] -f script file [-t threadnum] [-s sleep time] url1 [url2 url3]");
        System.exit(-1);
    }
    for (String url : params.getUrls()) {
        spider.addUrl(url);
    }
    spider.run();
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:24,代码来源:ScriptConsole.java


示例5: close

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public void close() {
    destroyEach(downloader);
    destroyEach(pageLoginProcessor);
    destroyEach(scheduler);
    for (Pipeline pipeline : pipelines) {
        destroyEach(pipeline);
    }
    threadPool.shutdown();
}
 
开发者ID:hexiaohong-code,项目名称:LoginCrawler,代码行数:10,代码来源:SpiderLogin.java


示例6: destroy

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
protected void destroy() {
	destroyEach(downloader);
	destroyEach(pageProcessor);
	for (Pipeline pipeline : pipelines) {
		destroyEach(pipeline);
	}
}
 
开发者ID:yuany,项目名称:en-webmagic,代码行数:8,代码来源:Spider.java


示例7: processRequest

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
protected void processRequest(Request request) {
	Page page = downloader.download(request, this);
	if (page == null) {
		sleep(site.getSleepTime());
		return;
	}
	pageProcessor.process(page);
	addRequest(page);
	if (!page.getResultItems().isSkip()) {
		for (Pipeline pipeline : pipelines) {
			pipeline.process(page.getResultItems(), this);
		}
	}
	sleep(site.getSleepTime());
}
 
开发者ID:yuany,项目名称:en-webmagic,代码行数:16,代码来源:Spider.java


示例8: close

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public void close() {
    destroyEach(downloader);
    destroyEach(pageProcessor);
    destroyEach(scheduler);
    for (Pipeline pipeline : pipelines) {
        destroyEach(pipeline);
    }
    threadPool.shutdown();
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:10,代码来源:Spider.java


示例9: onDownloadSuccess

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
private void onDownloadSuccess(Request request, Page page) {
    if (site.getAcceptStatCode().contains(page.getStatusCode())){
        pageProcessor.process(page);
        extractAndAddRequests(page, spawnUrl);
        if (!page.getResultItems().isSkip()) {
            for (Pipeline pipeline : pipelines) {
                pipeline.process(page.getResultItems(), this);
            }
        }
    } else {
        logger.info("page status code error, page {} , code: {}", request.getUrl(), page.getStatusCode());
    }
    sleep(site.getSleepTime());
    return;
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:16,代码来源:Spider.java


示例10: test_github

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
@Test
public void test_github() throws Exception {
    Spider.create(new GithubRepoPageProcessor()).addPipeline(new Pipeline() {
        @Override
        public void process(ResultItems resultItems, Task task) {
            assertThat(((String) resultItems.get("name")).trim()).isEqualTo("webmagic");
            assertThat(((String) resultItems.get("author")).trim()).isEqualTo("code4craft");
        }
    }).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:11,代码来源:GithubRepoPageProcessorTest.java


示例11: test

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
@Test
public void test() {
    OOSpider.create(new GithubRepoProcessor()).addPipeline(new Pipeline() {
        @Override
        public void process(ResultItems resultItems, Task task) {
            Assert.assertEquals("78",((String)resultItems.get("star")).trim());
            Assert.assertEquals("65",((String)resultItems.get("fork")).trim());
        }
    }).setDownloader(new MockGithubDownloader()).test("https://github.com/code4craft/webmagic");
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:11,代码来源:GithubRepoProcessor.java


示例12: getPipelineList

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public List<Pipeline> getPipelineList() {
    return pipelineList;
}
 
开发者ID:bruceq,项目名称:Gather-Platform,代码行数:4,代码来源:CommonSpider.java


示例13: setPipelineList

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public CommonSpider setPipelineList(List<Pipeline> pipelineList) {
    this.pipelineList = pipelineList;
    return this;
}
 
开发者ID:bruceq,项目名称:Gather-Platform,代码行数:5,代码来源:CommonSpider.java


示例14: getPipelines

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public List<Pipeline> getPipelines() {
    return pipelines;
}
 
开发者ID:TransientBuckwheat,项目名称:nest-spider,代码行数:4,代码来源:CommonSpider.java


示例15: setPipelines

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public CommonSpider setPipelines(List<Pipeline> pipelines) {
    this.pipelines = pipelines;
    return this;
}
 
开发者ID:TransientBuckwheat,项目名称:nest-spider,代码行数:5,代码来源:CommonSpider.java


示例16: clearPipeline

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
public Spider clearPipeline() {
	pipelines = new ArrayList<Pipeline>();
	return this;
}
 
开发者ID:yuany,项目名称:en-webmagic,代码行数:5,代码来源:Spider.java


示例17: addPipeline

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
/**
 * add a pipeline for Spider
 *
 * @param pipeline pipeline
 * @return this
 * @see Pipeline
 * @since 0.2.1
 */
public SpiderLogin addPipeline(Pipeline pipeline) {
    checkIfRunning();
    this.pipelines.add(pipeline);
    return this;
}
 
开发者ID:hexiaohong-code,项目名称:LoginCrawler,代码行数:14,代码来源:SpiderLogin.java


示例18: setPipelines

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
/**
 * set pipelines for Spider
 *
 * @param pipelines pipelines
 * @return this
 * @see Pipeline
 * @since 0.4.1
 */
public SpiderLogin setPipelines(List<Pipeline> pipelines) {
    checkIfRunning();
    this.pipelines = pipelines;
    return this;
}
 
开发者ID:hexiaohong-code,项目名称:LoginCrawler,代码行数:14,代码来源:SpiderLogin.java


示例19: clearPipeline

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
/**
 * clear the pipelines set
 *
 * @return this
 */
public SpiderLogin clearPipeline() {
    pipelines = new ArrayList<Pipeline>();
    return this;
}
 
开发者ID:hexiaohong-code,项目名称:LoginCrawler,代码行数:10,代码来源:SpiderLogin.java


示例20: addPipeline

import us.codecraft.webmagic.pipeline.Pipeline; //导入依赖的package包/类
/**
 * add a pipeline for Spider
 *
 * @param pipeline pipeline
 * @return this
 * @see Pipeline
 * @since 0.2.1
 */
public Spider addPipeline(Pipeline pipeline) {
    checkIfRunning();
    this.pipelines.add(pipeline);
    return this;
}
 
开发者ID:code4craft,项目名称:webmagic,代码行数:14,代码来源:Spider.java



注:本文中的us.codecraft.webmagic.pipeline.Pipeline类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Java DynamicMBean2类代码示例发布时间:2022-05-21
下一篇:
Java InvalidInputException类代码示例发布时间:2022-05-21
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap