本文整理汇总了Scala中org.jsoup.nodes.Element类的典型用法代码示例。如果您正苦于以下问题:Scala Element类的具体用法?Scala Element怎么用?Scala Element使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Element类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: HtmlConcatCompiler
//设置package包名称以及导入依赖的类
package com.karasiq.scalajsbundler.compilers
import com.karasiq.scalajsbundler.ScalaJSBundler.PageTypedContent
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import scala.collection.JavaConversions._
object HtmlConcatCompiler extends AssetCompiler {
private implicit class ElementOps(val e: Element) extends AnyVal {
def concatWith(src: Element): Unit = {
@inline
def delimit(delimiter: String, s1: String, s2: String): String = {
if (s1.endsWith(delimiter)) s1 + s2
else s1 + delimiter + s2
}
src.attributes().foreach {
case a if a.getKey == "class" ?
e.attr(a.getKey, delimit(" ", e.attr(a.getKey), a.getValue))
case a if a.getKey == "style" ?
e.attr(a.getKey, delimit(";", e.attr(a.getKey), a.getValue))
case a ? // Replaces attribute value
e.attr(a.getKey, a.getValue)
}
e.append(src.html())
}
}
def concat(htmlList: Seq[String]): String = {
val result = Jsoup.parse(htmlList.head)
htmlList.tail.foreach { h ?
val html = Jsoup.parse(h)
result.head().concatWith(html.head())
result.body().concatWith(html.body())
}
result.outerHtml()
}
override def compile(contents: Seq[PageTypedContent]): String = {
concat(contents.map(_.asset.asString))
}
}
开发者ID:Karasiq,项目名称:sbt-scalajs-bundler,代码行数:46,代码来源:HtmlConcatCompiler.scala
示例2:
//设置package包名称以及导入依赖的类
import com.mashape.unirest.http.Unirest
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import purecsv.safe._
val results = Unirest.post("http://nturanking.lis.ntu.edu.tw/DataPage/OverallRanking.aspx")
.queryString("pagesize", pagesize)
.queryString("y", year)
.asString.getBody
val jsoup = Jsoup.parse(results)
val jsoupResults = jsoup.body.select("#MainContain_GridView1 > tbody").select("tr").toArray.tail
val csvResults = (0 until jsoupResults.size)
.map(idx => (idx, jsoupResults(idx))).map(_.asInstanceOf[(Int, Element)])
.map(t =>
(t._1 + 1, t._2.child(1).child(0).html, t._2.child(2).child(0).html, t._2.child(3).child(0).html))
println(csvResults.map(t => s"${t._1},${t._2},${t._3},${t._4}").mkString("\n"))
}
}
开发者ID:sguzman,项目名称:UniversityRankingWebScraper,代码行数:22,代码来源:CScraper.scala
示例3: Convert
//设置package包名称以及导入依赖的类
package com.javaconverter.model
import org.jsoup.Jsoup
import org.jsoup.select.NodeVisitor
import org.jsoup.nodes.Node
import org.jsoup.nodes.TextNode
import org.jsoup.nodes.Comment
import scala.collection.JavaConversions._
import org.jsoup.nodes.Element
import org.jsoup.nodes.DataNode
class Convert(html: String) {
private def pad(r: Range) = r.map(_ => " ").mkString
private def render(node: Node, depth: Int) = {
val render = node match {
case n: Element => node.nodeName + "("
case n: TextNode => s"""text("${node.asInstanceOf[TextNode].text()}""""
case n: Comment => s"""text("<!--${node.asInstanceOf[Comment].getData}-->""""
case n: DataNode => s"""text("${node.asInstanceOf[DataNode].getWholeData}""""
}
"\n" + pad(0 until depth) + render
}
def toJavaTags() = {
val doc = Jsoup.parse(html)
var result = ""
doc.child(0).traverse(new NodeVisitor() {
override def head(node: Node, depth: Int) {
result += render(node, depth)
var attribute = node.attributes().asList().map { attr => s""""${attr.getKey} -> ${attr.getValue}"""" }
if (!attribute.isEmpty && node.isInstanceOf[Element]) {
result += s"""attr(${attribute.mkString(",")})"""
if(!node.childNodes().isEmpty()){
result += ","
}
}
}
override def tail(node: Node, depth: Int) {
if(node.childNodes().isEmpty()){
result += ")"
} else{
result += "\n" + pad(depth until 0 by -1) + ")"
}
if(node.nextSibling() != null){
result += ","
}
}
}
)
result
}
def toHtmlFormat() = {
Jsoup.parse(html).toString().
replaceAll("<", "<").
replaceAll(">", ">")
}
}
开发者ID:manlioGit,项目名称:javatagsconverter,代码行数:62,代码来源:Convert.scala
示例4: Link
//设置package包名称以及导入依赖的类
package utils
import java.net.{MalformedURLException, URL}
import models.{MessageButton, Photo, Message}
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
//import org.jsoup.nodes.Element
import scala.collection.JavaConversions._
import scala.util.control.Exception._
sealed case class Link(title: String, href: String, imageSrc:String, desc:String) {
override def toString(): String ={
s"title : $title, href : $href, imageSrc : $imageSrc, desc : $desc"
}
def toMessage:Message ={
Message(s"[$title]\n$desc",Option(Photo(imageSrc,300,250)), Option(MessageButton("?????",href)))
}
}
object HtmlParser {
type JDoc = org.jsoup.nodes.Document
def get(url: String): JDoc = Jsoup.connect(url).get()
def titleText(doc: JDoc): String = doc.select("title").text
def bodyText(doc: JDoc): String = doc.select("body").text
def linkSequence(doc: JDoc, containStr : String): Seq[Link] = {
val links = doc.select(s"a[href*=$containStr]").iterator.toList
links.map { l => Link(l.text, l.attr("href"), l.select("img[src]").attr("src"), l.select("[class*=desc]").text) }
}
def safeURL(url: String): Option[String] = {
val result = catching(classOf[MalformedURLException]) opt new URL(url)
result match {
case Some(v) => Some(v.toString)
case None => None
}
}
}
开发者ID:suya55,项目名称:kakaoYellowIdBot,代码行数:48,代码来源:HtmlParser.scala
示例5: LinksSelector
//设置package包名称以及导入依赖的类
package haishu.crawler.selector
import java.nio.charset.Charset
import org.jsoup.helper.StringUtil
import org.jsoup.nodes.Element
import scala.collection.JavaConverters._
class LinksSelector extends BaseElementSelector {
override def select(element: Element): String =
throw new UnsupportedOperationException
override def selectSeq(element: Element): Seq[String] = {
val elements = element.select("a")
elements.asScala.map { elem =>
if (StringUtil.isBlank(elem.baseUri())) {
elem.attr("abs:href")
} else {
elem.attr("href")
}
}
}
override def selectElement(element: Element): Element =
throw new UnsupportedOperationException
override def selectElements(element: Element): Seq[Element] =
throw new UnsupportedOperationException
override def hasAttribute = true
}
object LinksSelector {
def apply(): LinksSelector = new LinksSelector()
}
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:38,代码来源:LinksSelector.scala
示例6: checkElementAndConvert
//设置package包名称以及导入依赖的类
package haishu.crawler.selector
import org.jsoup.nodes.{Document, Element}
private def checkElementAndConvert(element: Element): Element = element match {
case d: Document => d
case _ =>
val root = new Document(element.ownerDocument().baseUri())
root.appendChild(element.clone())
root
}
override def css(selector: String): Selectable = {
val cssSelector = Selectors.css(selector)
selectElements(cssSelector)
}
override def css(selector: String, attrName: String): Selectable = {
val cssSelector = Selectors.css(selector, attrName)
selectElements(cssSelector)
}
}
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:24,代码来源:HtmlNode.scala
示例7: CssSelector
//设置package包名称以及导入依赖的类
package haishu.crawler.selector
import org.jsoup.nodes.Element
import org.jsoup.nodes.TextNode
import scala.collection.JavaConverters._
class CssSelector(selectorText: String, attrName: String) extends BaseElementSelector {
private def getValue(element: Element): String = {
if (attrName == null) element.outerHtml
else if ("innerHtml".equalsIgnoreCase(attrName)) element.html
else if ("text".equalsIgnoreCase(attrName)) getText(element)
else if ("allText".equalsIgnoreCase(attrName)) element.text
else element.attr(attrName)
}
protected def getText(element: Element): String = {
element.childNodes.asScala.map {
case node: TextNode => node.text()
case _ => ""
}.mkString
}
override def select(element: Element): String = {
val elements = selectElements(element)
if (elements.isEmpty) null
else getValue(elements.head)
}
override def selectSeq(doc: Element): Seq[String] = {
val elements = selectElements(doc)
if (elements.isEmpty) Seq()
else elements.map(getValue).filter(_ != null)
}
override def selectElement(element: Element): Element = {
val elements = selectElements(element)
if (elements.isEmpty) null else elements.head
}
override def selectElements(element: Element): Seq[Element] = {
element.select(selectorText).asScala
}
override def hasAttribute: Boolean = attrName != null
}
object CssSelector {
def apply(selectorText: String, attrName: String): CssSelector = new CssSelector(selectorText, attrName)
def apply(selectorText: String): CssSelector = new CssSelector(selectorText, null)
}
开发者ID:hualongdata,项目名称:hl-crawler,代码行数:54,代码来源:CssSelector.scala
示例8: StyleguideSpider
//设置package包名称以及导入依赖的类
package com.themillhousegroup.witchhunt
import org.jsoup.nodes.{ Element, Document }
import scala.concurrent.Future
import com.themillhousegroup.scoup.{ ScoupImplicits, Scoup }
import scala.concurrent.ExecutionContext.Implicits.global
import java.net.URL
object StyleguideSpider extends ScoupImplicits {
def visit(url: URL, thisPageOnly: Boolean = false): Future[Set[Document]] = {
visitLink(url, Set.empty, thisPageOnly)
}
private def visitLink(url: URL, alreadyVisited: Set[URL], thisPageOnly: Boolean): Future[Set[Document]] = {
Scoup.parse(url.toString).flatMap { doc =>
if (thisPageOnly) {
Future.successful(Set(doc))
} else {
visitLinks(url, doc, alreadyVisited)
}
}
}
private def visitLinks(url: URL, doc: Document, alreadyVisited: Set[URL]) = {
val links = doc.select("a").filter(isLocal).map(_.attr("href"))
links.map(createFullLocalUrl(url)).filter(!alreadyVisited.contains(_)).foldLeft(Future.successful(Set(doc))) {
case (acc, link) =>
for {
existingDocs <- acc
newDocs <- visitLink(link, alreadyVisited + link, false)
} yield (existingDocs ++ newDocs)
}
}
private def isLocal(link: Element): Boolean = {
val href = link.attr("href")
href.startsWith("/")
}
def createFullLocalUrl(base: URL)(link: String): URL = {
(new java.net.URL(base, link))
}
}
开发者ID:themillhousegroup,项目名称:witchhunt,代码行数:47,代码来源:StyleguideSpider.scala
示例9: Article
//设置package包名称以及导入依赖的类
package gander
import gander.images.Image
import gander.opengraph.OpenGraphData
import org.joda.time.DateTime
import org.jsoup.nodes.{Document, Element}
final case class Article(title: String,
cleanedArticleText: Option[String],
metaDescription: String,
metaKeywords: String,
canonicalLink: String,
domain: String,
topNode: Option[Element],
topImage: Option[Image],
tags: Set[String],
movies: List[Element],
finalUrl: String,
linkHash: String,
rawHtml: String,
doc: Document,
rawDoc: Document,
publishDate: Option[DateTime],
additionalData: Map[String, String],
openGraphData: OpenGraphData)
开发者ID:lloydmeta,项目名称:gander,代码行数:27,代码来源:Article.scala
示例10: HtmlLifter
//设置package包名称以及导入依赖的类
package com.twitter.diffy.lifter
import org.jsoup.Jsoup
import org.jsoup.nodes.{Document, Element}
import org.jsoup.select.Elements
import scala.collection.JavaConversions._
object HtmlLifter {
def lift(node: Element): FieldMap[Any] = node match {
case doc: Document =>
FieldMap(
Map(
"head" -> lift(doc.head),
"body" -> lift(doc.body)
)
)
case doc: Element => {
val children: Elements = doc.children
val attributes =
FieldMap[String](
doc.attributes.asList map { attribute =>
attribute.getKey -> attribute.getValue
} toMap
)
FieldMap(
Map(
"tag" -> doc.tagName,
"text" -> doc.ownText,
"attributes" -> attributes,
"children" -> children.map(element => lift(element))
)
)
}
}
def decode(html: String): Document = Jsoup.parse(html)
}
开发者ID:sachinmanchanda,项目名称:diffy_unicast,代码行数:40,代码来源:HtmlLifter.scala
示例11: SosachPngSource
//设置package包名称以及导入依赖的类
package com.karasiq.nanoboard.sources.png
import akka.actor.ActorSystem
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import com.karasiq.nanoboard.encoding.DataEncodingStage
import org.jsoup.nodes.Element
final class SosachPngSource(encoding: DataEncodingStage)(implicit as: ActorSystem, am: ActorMaterializer) extends BoardPngSource(encoding) {
private val regex = """https?://2ch\.hk/(\w+/src/\d+/\d+\.png)""".r
override protected def getUrl(e: Element, attr: String): Option[String] = {
e.attr(attr) match {
case regex(path) ?
Some(s"http://m2-ch.ru/$path")
case _ ?
None
}
}
override def imagesFromPage(url: String): Source[String, akka.NotUsed] = {
super.imagesFromPage(url.replace("https://2ch.hk/", "http://m2-ch.ru/"))
}
}
开发者ID:Karasiq,项目名称:nanoboard,代码行数:27,代码来源:SosachPngSource.scala
示例12: BoardPngSource
//设置package包名称以及导入依赖的类
package com.karasiq.nanoboard.sources.png
import java.net.URL
import akka.actor.ActorSystem
import akka.http.scaladsl.Http
import akka.http.scaladsl.model.HttpRequest
import akka.stream.ActorMaterializer
import akka.stream.scaladsl.Source
import akka.util.ByteString
import com.karasiq.nanoboard.NanoboardMessage
import com.karasiq.nanoboard.encoding.DataEncodingStage
import org.jsoup.Jsoup
import org.jsoup.nodes.{Document, Element}
import scala.collection.JavaConversions._
import scala.util.Try
class BoardPngSource(encoding: DataEncodingStage)(implicit as: ActorSystem, am: ActorMaterializer) extends UrlPngSource {
protected final val http = Http()
def messagesFromImage(url: String): Source[NanoboardMessage, akka.NotUsed] = {
Source.fromFuture(http.singleRequest(HttpRequest(uri = url)))
.flatMapConcat(_.entity.dataBytes.fold(ByteString.empty)(_ ++ _))
.mapConcat { data ?
NanoboardMessage.parseMessages(encoding.decode(data))
}
.recoverWith { case _ ? Source.empty }
}
def imagesFromPage(url: String): Source[String, akka.NotUsed] = {
Source.fromFuture(http.singleRequest(HttpRequest(uri = url)))
.flatMapConcat(_.entity.dataBytes.fold(ByteString.empty)(_ ++ _))
.flatMapConcat(data ? imagesFromPage(Jsoup.parse(data.utf8String, url)))
.recoverWith { case _ ? Source.empty }
}
protected def imagesFromPage(page: Document): Source[String, akka.NotUsed] = {
val urls = page.select("a").flatMap(getUrl(_, "href"))
Source(urls.distinct.toVector)
}
protected def getUrl(e: Element, attr: String): Option[String] = {
Try(new URL(e.absUrl(attr)))
.toOption
.filter(_.getPath.matches("([^\\?\\s]+)?/src/([^\\?\\s]+)?\\.png"))
.map(_.toString)
}
}
开发者ID:Karasiq,项目名称:nanoboard,代码行数:51,代码来源:BoardPngSource.scala
示例13: FloggerPageSpec
//设置package包名称以及导入依赖的类
package com.themillhousegroup.flogger
import org.specs2.mutable.Specification
import com.themillhousegroup.flogger.test.TestFixtures
import com.themillhousegroup.scoup.ScoupImplicits
import org.jsoup.nodes.Element
class FloggerPageSpec extends Specification with TestFixtures with ScoupImplicits {
"Page API" should {
"list all pages" in {
waitFor(testFlogger.blogPages) must not beEmpty
}
"be able to parse HTML as a JSoup Document from a page" in {
val testPage = getTestPage
val html = testPage.documentContent
html must not beNull
val h4s: Iterable[Element] = html.select("h4")
h4s must not beEmpty
h4s.head.text must beEqualTo("This is an H4")
}
}
}
开发者ID:themillhousegroup,项目名称:flogger,代码行数:26,代码来源:FloggerPageSpec.scala
示例14: JsoupHelper
//设置package包名称以及导入依赖的类
package util
import java.util.stream.Collectors
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
object JsoupHelper {
implicit def elementsToElements4Scala(elements: Elements): Elements4Scala = {
new Elements4Scala(elements)
}
}
class Elements4Scala(that: Elements) {
def toElementArray: Array[Element] = {
val list: java.util.List[Element] = that.stream.collect(Collectors.toList[Element])
list.toArray(new Array[Element](list.size()))
}
}
开发者ID:ktr-skmt,项目名称:FelisCatusZero,代码行数:21,代码来源:JsoupHelper.scala
示例15: TorrentSearchResult
//设置package包名称以及导入依赖的类
package providers
import org.jsoup.Jsoup
import org.jsoup.nodes.Element
import org.jsoup.select.Elements
import play.api.libs.json.Json
import play.api.libs.ws.WSClient
import scala.collection.JavaConversions._
import scala.concurrent.ExecutionContext.Implicits.global
case class TorrentSearchResult(name:String,magnetLink:String,
size:String,age:String,
seeders:String, leechers:String,
provider:String) extends SearchResult
object TorrentSearchResult{
implicit val torrentSearchResultWrites = Json.writes[TorrentSearchResult]
}
class KatcrProvider extends SearchProvider {
override def search(searchString:String, ws: WSClient, numberofResulstToReturn:Int) = {
val searchProviderURL = "https://kat.cr/usearch/"
ws.url(searchProviderURL + searchString).get().map {
response => { //process the response
extractTopTorrents(response.body,numberofResulstToReturn)
}
}
}
def processKatcrTableRow(tds: Elements):TorrentSearchResult =
TorrentSearchResult(tds(0).text,
tds.select("a[title=\"Torrent magnet link\"]").first().attr("href"),
tds(1).text,
tds(2).text,tds(3).text,tds(4).text, "kat.cr")
def extractTopTorrents(htmlString:String, numberOfResults:Int) = {
val tables:Elements = Jsoup.parse(htmlString).select("table")
val table = tables.get(1)
// Get the top numberOfResults and the return a list of TorrentSearchResult..
table.select("tr").slice(1,numberOfResults+1).foldLeft(List[TorrentSearchResult]()){
(result,tr) => // process each tr element and prepend to result
processKatcrTableRow(tr.select("td")) :: result
}
}.reverse //since the list is sorted in the reverse seeders order
.asInstanceOf[List[SearchResult]]
}
开发者ID:aashiks,项目名称:jIgor,代码行数:53,代码来源:KatcrProvider.scala
示例16: LoginFormExtractor
//设置package包名称以及导入依赖的类
package services.scraper.extractor
import models.ScraperData
import org.jsoup.nodes.{Document, Element}
import scala.collection.JavaConverters._
object LoginFormExtractor {
def apply(): LoginFormExtractor = new LoginFormExtractor()
}
class LoginFormExtractor extends Extractor[ScraperData]{
override def apply(doc: Document, in: ScraperData): ScraperData = Option {
doc.select("form").forms().asScala.map { form =>
form.select("input").asScala.filter { input:Element =>
input.attr("name") == "login" ||
input.attr("name") == "user" ||
input.attr("name") == "username" ||
input.attr("name") == "pwd" ||
input.attr("name") == "password"
}
}.filter(_.size > 1)
} match {
case Some(forms) =>
in.copy(haveLoginForm = if (forms.nonEmpty) true else false)
case None => in
}
}
开发者ID:htimur,项目名称:site-analysis,代码行数:30,代码来源:LoginFormExtractor.scala
注:本文中的org.jsoup.nodes.Element类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论