本文整理汇总了Scala中java.util.regex.Pattern类的典型用法代码示例。如果您正苦于以下问题:Scala Pattern类的具体用法?Scala Pattern怎么用?Scala Pattern使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Pattern类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。
示例1: GenerateSCSSAction
//设置package包名称以及导入依赖的类
package com.xcodersteam.idea.plugins.mkupscss
import java.awt.datatransfer.StringSelection
import java.util.regex.Pattern
import com.intellij.openapi.actionSystem.{AnAction, AnActionEvent, CommonDataKeys}
import com.intellij.openapi.ide.CopyPasteManager
import com.intellij.openapi.ui.popup.JBPopupFactory
import com.intellij.openapi.util.text.StringUtil
import com.intellij.openapi.wm.WindowManager
class GenerateSCSSAction extends AnAction {
val pattern = Pattern.compile("<[^>]*>")
override def update(e: AnActionEvent): Unit = {
val project = e.getData(CommonDataKeys.PROJECT)
val editor = e.getData(CommonDataKeys.EDITOR)
val lang = e.getData(CommonDataKeys.PSI_FILE).getFileType.getName
e.getPresentation.setVisible(CodeLangHelper.langs.get(lang).isDefined && project != null && editor != null)
e.getPresentation.setEnabled(editor.getSelectionModel.hasSelection() && pattern.matcher(editor.getSelectionModel.getSelectedText).find())
}
override def actionPerformed(anActionEvent: AnActionEvent): Unit = {
val editor = anActionEvent.getData(CommonDataKeys.EDITOR)
val lang = anActionEvent.getData(CommonDataKeys.PSI_FILE).getFileType.getName
val text = editor.getSelectionModel.getSelectedText()
val statusBar = WindowManager.getInstance().getStatusBar(editor.getProject)
val codeLang = CodeLangHelper.langs.get(lang).orNull
val prefix = "[" + codeLang.name + "->SCSS]"
try {
if (codeLang == null)
throw new CodeConvertingException(message = "Failed to recognize file type")
val generated = CodeConverter(codeLang.prepareText(text), codeLang)
CopyPasteManager.getInstance().setContents(new StringSelection(StringUtil.convertLineSeparators(generated, "\n")))
JBPopupFactory.getInstance()
.createHtmlTextBalloonBuilder(prefix + " Successful generated scss and copied to clipboard", com.intellij.openapi.ui.MessageType.INFO, null)
.setFadeoutTime(5000)
.createBalloon().showInCenterOf(statusBar.getComponent)
} catch {
case e: CodeConvertingException =>
JBPopupFactory.getInstance()
.createHtmlTextBalloonBuilder(e.errorMessage, com.intellij.openapi.ui.MessageType.ERROR, null)
.setFadeoutTime(5000)
.createBalloon().showInCenterOf(statusBar.getComponent)
}
}
}
开发者ID:semoro,项目名称:MarkupToSCSS,代码行数:54,代码来源:GenerateSCSSAction.scala
示例2: PScoutTranslator
//设置package包名称以及导入依赖的类
package org.argus.amandroid.core.util
import org.argus.jawa.core.util._
import java.util.regex.Pattern
import org.argus.jawa.core.{JavaKnowledge, Signature}
object PScoutTranslator {
def main(args: Array[String]): Unit = {
val filepath = args(0)
val fileuri = FileUtil.toUri(filepath)
translate(fileuri)
}
def translate(uri: FileResourceUri): IMap[String, ISet[Signature]] = {
val permissionMap: MMap[String, MSet[Signature]] = mmapEmpty
var currentPermission: String = null
scala.io.Source.fromFile(FileUtil.toFile(uri)).getLines().foreach {
case permission if permission.startsWith("Permission:") =>
currentPermission = permission.replace("Permission:", "")
case sigstr if sigstr.startsWith("<") =>
val sig = formatSignature(sigstr)
permissionMap.getOrElseUpdate(currentPermission, msetEmpty) ++= sig
case _ =>
}
permissionMap.map{case (k, v) => (k, v.toSet)}.toMap
}
// 1 2 3 4
private val regex = "<([[^\\s]&&[^:]]+):\\s([^\\s]+)\\s([[^\\s]&&[^\\(]]+)\\(([[^\\s]&&[^\\)]]*)\\)>\\s+\\(.*\\)"
private def formatSignature(sigstr: String): Option[Signature] = {
val p: Pattern = Pattern.compile(regex)
val m = p.matcher(sigstr)
if(m.find()){
val classTypStr = m.group(1)
val retTypStr = m.group(2)
val methodName = m.group(3)
val paramTypStrList = m.group(4).split(",")
val classTyp = JavaKnowledge.getTypeFromJawaName(classTypStr)
val protosb = new StringBuilder
protosb.append("(")
paramTypStrList.foreach{
paramTypStr =>
if(!paramTypStr.isEmpty)
protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(paramTypStr)))
}
protosb.append(")")
protosb.append(JavaKnowledge.formatTypeToSignature(JavaKnowledge.getTypeFromJawaName(retTypStr)))
Some(Signature(classTyp, methodName, protosb.toString()))
} else {
System.err.println("PScoutTranslator, does not match: " + sigstr)
None
}
}
}
开发者ID:arguslab,项目名称:Argus-SAF,代码行数:56,代码来源:PScoutTranslator.scala
示例3: URLInString
//设置package包名称以及导入依赖的类
package org.argus.jawa.core.util
import java.util.regex.Pattern
object URLInString {
def extract(str: String): Set[String] = {
val results = msetEmpty[String]
val regex = "\\b(((ht|f)tp(s?)\\:\\/\\/|~\\/|\\/)|www.)" +
"(\\w+:\\[email protected])?(([-\\w]+\\.)+(com|org|net|gov" +
"|mil|biz|info|mobi|name|aero|jobs|museum" +
"|travel|[a-z]{2}))(:[\\d]{1,5})?" +
"(((\\/([-\\w~!$+|.,=]|%[a-f\\d]{2})+)+|\\/)+|\\?|#)?" +
"((\\?([-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)" +
"(&(?:[-\\w~!$+|.,*:]|%[a-f\\d{2}])+=?" +
"([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)*)*" +
"(#([-\\w~!$+|.,*:=]|%[a-f\\d]{2})*)?\\b"
val p = Pattern.compile(regex)
val m = p.matcher(str)
while(m.find()) {
var urlStr = m.group()
if (urlStr.startsWith("(") && urlStr.endsWith(")"))
{
urlStr = urlStr.substring(1, urlStr.length() - 1)
}
results.add(urlStr)
}
results.toSet
}
}
开发者ID:arguslab,项目名称:Argus-SAF,代码行数:32,代码来源:URLInString.scala
示例4: StringUtils
//设置package包名称以及导入依赖的类
package utils
import java.text.Normalizer
import java.util.regex.Pattern
import java.security.SecureRandom
import scala.util.Try
object StringUtils {
def generateUuid(): String = {
java.util.UUID.randomUUID().toString
}
def deAccent(str: String): String = {
if (str == null || str.isEmpty) {
""
} else {
val nfdNormalizedString: String = Normalizer.normalize(str, Normalizer.Form.NFD);
val pattern: Pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
pattern.matcher(nfdNormalizedString).replaceAll("");
}
}
//Random Generator
private[this] val random = new SecureRandom()
// Generate a random string of length n from the given alphabet
private[this] def randomString(alphabet: String)(n: Int): String = {
Stream.continually(random.nextInt(alphabet.size)).map(alphabet).take(n).mkString
}
// Generate a random alphabnumeric string of length n
def randomAlphanumericString(n: Int): String = {
randomString("abcdefghijklmnopqrstuvwxyz0123456789")(n)
}
}
开发者ID:Driox,项目名称:play-app-seed,代码行数:38,代码来源:StringUtil.scala
示例5: TweetCollect
//设置package包名称以及导入依赖的类
package info.matsumana.flink
import java.util.regex.Pattern
import java.util.{HashMap, Properties}
import com.fasterxml.jackson.databind.ObjectMapper
import org.apache.flink.api.java.utils.ParameterTool
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010
import org.apache.flink.streaming.connectors.twitter.TwitterSource
import org.apache.flink.streaming.util.serialization.SimpleStringSchema
object TweetCollect {
val DELETED_TWEET_PATTERN = Pattern.compile("""^\{"delete":\{""")
// ????
val TARGET_TWEET_PATTERN = Pattern.compile("^.*[\u3040-\u3096]+.*$")
val mapper = new ObjectMapper()
def main(args: Array[String]): Unit = {
val params = ParameterTool.fromPropertiesFile(args(0))
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(60000)
// source (twitter)
val twitterProps = new Properties()
twitterProps.setProperty(TwitterSource.CONSUMER_KEY, params.get("consumer_key", ""))
twitterProps.setProperty(TwitterSource.CONSUMER_SECRET, params.get("consumer_secret", ""))
twitterProps.setProperty(TwitterSource.TOKEN, params.get("token", ""))
twitterProps.setProperty(TwitterSource.TOKEN_SECRET, params.get("token_secret", ""))
val sourceStream = env.addSource(new TwitterSource(twitterProps))
// sink (kafka)
val kafkaProps = new Properties()
kafkaProps.setProperty("bootstrap.servers", params.get("bootstrap.servers", "localhost:9092"))
val topic = params.get("topic", "twitter")
val sink = new FlinkKafkaProducer010[String](
topic,
new SimpleStringSchema,
kafkaProps)
// stream processing
sourceStream
.filter(!DELETED_TWEET_PATTERN.matcher(_).matches())
.map(mapper.readValue(_, classOf[HashMap[String, Object]]))
.filter(m => {
val text = m.get("text")
text != null && TARGET_TWEET_PATTERN.matcher(String.valueOf(text)).matches()
})
.map(mapper.writeValueAsString(_))
.addSink(sink)
env.execute("TweetCollect")
}
}
开发者ID:matsumana,项目名称:scala-fukuoka-lt,代码行数:59,代码来源:TweetCollect.scala
示例6: getImageType
//设置package包名称以及导入依赖的类
package org.aj.awslambda
import java.awt.image.BufferedImage
import java.io.IOException
import java.net.URLDecoder
import com.amazonaws.services.lambda.runtime.events.S3Event
import java.util.regex.Pattern
import scala.concurrent.{Await, Future}
import scala.concurrent.ExecutionContext.Implicits.global
private def getImageType(srcKey: String): Option[String] = {
val matcher = Pattern.compile(".*\\.([^\\.]*)").matcher(srcKey)
matcher.matches() match {
case true => {
//get source image type and validate
val imageType = matcher.group(1)
imageTypes.get(imageType) match {
case Some(_) => Some(imageType)
case _ => None
}
}
case _ => None
}
}
private def decodeS3Key(key: String): String = URLDecoder.decode(key.replace("+", " "), "utf-8")
private def process(srcBucket: String, srcKey: String, imageType: String, dstBucket: String): Future[List[Url]] = {
//get source image with it's sizes
val sourceImage: (BufferedImage, Url) = getImage(srcBucket, srcKey)
//re-size
val original: Future[Url] = process(sourceImage._1, sourceImage._2, imageType, dstBucket, srcKey)
val resized: List[Option[Future[Url]]] = sizes.map { size =>
if (sourceImage._2.width != size) Some(process(sourceImage._1, size, imageType, dstBucket, srcKey))
else None
}
//return
Future.sequence(original :: resized.flatten)
}
}
开发者ID:ajmnsk,项目名称:awslambda-resize,代码行数:47,代码来源:ImageHandler.scala
示例7: RawCellContentExtractor
//设置package包名称以及导入依赖的类
package org.opencompare.io.wikipedia.parser
import java.util.regex.Pattern
import org.joda.time.DateTime
import org.sweble.wikitext.engine.PageTitle
import org.sweble.wikitext.engine.config.WikiConfig
import org.sweble.wikitext.parser.nodes._
import org.sweble.wom3.swcadapter.AstToWomConverter
import org.sweble.wom3.util.Wom3Toolbox
class RawCellContentExtractor(val wikiConfig : WikiConfig) {
private val trimPattern : Pattern = Pattern.compile("[\\s|!]*([\\s\\S]*?)\\s*")
private val nestedTableChecker : NestedTableChecker = new NestedTableChecker
private val wtToStringConverter : WtToStringConverter = new WtToStringConverter(wikiConfig)
def extract(cell : WtNode) : String = {
if (nestedTableChecker.hasNestedTable(cell)) {
"" // FIXME : we do not support nested tables for now
} else {
val code = wtToStringConverter.convert(cell)
if (code.isDefined) {
trim(code.get)
} else {
""
}
}
}
def trim(s: String): String = {
val matcher = trimPattern.matcher(s)
if (matcher.matches() && matcher.groupCount() == 1) {
matcher.group(1)
} else {
""
}
}
}
开发者ID:Bastcloa,项目名称:testJenkins,代码行数:45,代码来源:RawCellContentExtractor.scala
示例8: RegexPatternInterpreter
//设置package包名称以及导入依赖的类
package org.opencompare.formalizer.interpreters
import java.util.regex.{Matcher, Pattern}
import org.opencompare.api.java.{Value, Feature, Product}
abstract class RegexPatternInterpreter(initValidHeaders : List[String],
regex : String,
initParameters : List[String],
initConfident : Boolean) extends PatternInterpreter(initValidHeaders, initParameters, initConfident) {
private val pattern : Pattern = Pattern.compile(regex, Pattern.UNICODE_CHARACTER_CLASS |
Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE | Pattern.DOTALL)
override def matchAndCreateValue(s: String, product: Product, feature: Feature): Option[Value] = {
val matcher = pattern.matcher(s)
if (matcher.matches()) {
createValue(s, matcher, parameters, product, feature)
} else {
None
}
}
def createValue(s : String, matcher : Matcher, parameters : List[String], product : Product, feature : Feature) : Option[Value]
}
开发者ID:Bastcloa,项目名称:testJenkins,代码行数:27,代码来源:RegexPatternInterpreter.scala
示例9: SVNDirEntryExt
//设置package包名称以及导入依赖的类
package csw.services.config.server.commons
import java.util.regex.Pattern
import csw.services.config.api.models.FileType
import org.tmatesoft.svn.core.{SVNDirEntry, SVNNodeKind}
object SVNDirEntryExt {
implicit class RichSvnDirEntry(val entry: SVNDirEntry) extends AnyVal {
def isFile: Boolean = entry.getKind == SVNNodeKind.FILE
def isNotActiveFile(activeFileName: String): Boolean = !entry.getName.endsWith(activeFileName)
def stripAnnexSuffix(annexSuffix: String): Unit =
entry.setRelativePath(entry.getRelativePath.stripSuffix(annexSuffix))
def matches(maybePattern: Option[Pattern]): Boolean = maybePattern match {
case None ? true
case Some(pattern) ? pattern.matcher(entry.getRelativePath).matches()
}
def matchesFileType(maybeFileType: Option[FileType], annexSuffix: String): Boolean = maybeFileType match {
case None ? true
case Some(FileType.Annex) ? isAnnex(annexSuffix)
case Some(FileType.Normal) ? !isAnnex(annexSuffix)
}
private def isAnnex(annexSuffix: String): Boolean = entry.getName.endsWith(annexSuffix)
}
}
开发者ID:tmtsoftware,项目名称:csw-prod,代码行数:27,代码来源:SVNDirEntryExt.scala
示例10: message
//设置package包名称以及导入依赖的类
package com.wunder.pets.validations
import java.util.regex.{Matcher, Pattern}
import org.postgresql.util.PSQLException
trait ValidationError {
def message: String
}
final class IsEmpty(val field: String) extends ValidationError {
override def message: String = s"$field cannot be empty"
}
final class NotGreaterThan[T](val field: String, val lowerBound: T) extends ValidationError {
def message: String = s"$field must be greater than $lowerBound"
}
final class NotLessThan[T](val field: String, val upperBound: T) extends ValidationError {
def message: String = s"$field must be less than $upperBound"
}
final class DuplicateValue(val e: PSQLException) extends ValidationError {
override def message: String = {
val regex = "Key \\((.*)\\)=\\((.*)\\) already exists."
val m: Matcher = Pattern.compile(regex).matcher(e.getServerErrorMessage.getDetail);
if (m.matches) {
s"${m.group(1)} has a duplicate value of ${m.group(2)}"
} else {
"Could not determine field and value."
}
}
}
final class GeneralError(val message: String) extends ValidationError
开发者ID:wunderteam,项目名称:battle-pets-api,代码行数:36,代码来源:ValidationError.scala
示例11: KStreamBuilderS
//设置package包名称以及导入依赖的类
package com.github.aseigneurin.kafka.streams.scala
import java.util.regex.Pattern
import com.github.aseigneurin.kafka.streams.scala.ImplicitConversions._
import org.apache.kafka.common.serialization.Serde
import org.apache.kafka.streams.kstream.{GlobalKTable, KStreamBuilder}
import org.apache.kafka.streams.processor.TopologyBuilder
object KStreamBuilderS {
val inner = new KStreamBuilder
def stream[K, V](topics: String*)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
inner.stream[K, V](keySerde, valSerde, topics: _*)
def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
topics: String*)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
inner.stream[K, V](offsetReset, keySerde, valSerde, topics: _*)
def stream[K, V](topicPattern: Pattern)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
inner.stream[K, V](keySerde, valSerde, topicPattern)
def stream[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
topicPattern: Pattern)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KStreamS[K, V] =
inner.stream[K, V](offsetReset, keySerde, valSerde, topicPattern)
def table[K, V](topic: String,
storeName: String)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
inner.table[K, V](keySerde, valSerde, topic, storeName)
def table[K, V](offsetReset: TopologyBuilder.AutoOffsetReset,
topic: String,
storeName: String)
(implicit keySerde: Serde[K], valSerde: Serde[V]): KTableS[K, V] =
inner.table[K, V](offsetReset, keySerde, valSerde, topic, storeName)
def globalTable[K, V](topic: String,
storeName: String)
(implicit keySerde: Serde[K], valSerde: Serde[V]): GlobalKTable[K, V] =
inner.globalTable(keySerde, valSerde, topic, storeName)
def merge[K, V](streams: KStreamS[K, V]*): KStreamS[K, V] = {
val streamsJ = streams.map { streamS => streamS.inner }
inner.merge(streamsJ: _*)
}
}
开发者ID:aseigneurin,项目名称:kafka-streams-scala,代码行数:55,代码来源:KStreamBuilderS.scala
示例12: RegexConstraint
//设置package包名称以及导入依赖的类
package be.dataminded.wharlord.constraints
import java.util.regex.Pattern
import org.apache.spark.sql.functions._
import org.apache.spark.sql.{Column, DataFrame}
import scala.util.Try
case class RegexConstraint(columnName: String, regex: String) extends Constraint {
val fun = (df: DataFrame) => {
val pattern = Pattern.compile(regex)
val doesNotMatch = udf((column: String) => column != null && !pattern.matcher(column).find())
val maybeDoesNotMatchCount = Try(df.filter(doesNotMatch(new Column(columnName))).count)
RegexConstraintResult(
constraint = this,
data = maybeDoesNotMatchCount.toOption.map(RegexConstraintResultData),
status = tryToStatus[Long](maybeDoesNotMatchCount, _ == 0)
)
}
}
case class RegexConstraintResult(constraint: RegexConstraint,
data: Option[RegexConstraintResultData],
status: ConstraintStatus) extends ConstraintResult[RegexConstraint] {
val message: String = {
val columnName = constraint.columnName
val regex = constraint.regex
val maybeFailedRows = data.map(_.failedRows)
val maybePluralSAndVerb = maybeFailedRows.map(failedRows => if (failedRows == 1) ("", "does") else ("s", "do"))
(status, maybeFailedRows, maybePluralSAndVerb) match {
case (ConstraintSuccess, Some(0), _) =>
s"Column $columnName matches $regex"
case (ConstraintFailure, Some(failedRows), Some((pluralS, verb))) =>
s"Column $columnName contains $failedRows row$pluralS that $verb not match $regex"
case (ConstraintError(throwable), None, None) =>
s"Checking whether column $columnName matches $regex failed: $throwable"
case default => throw IllegalConstraintResultException(this)
}
}
}
case class RegexConstraintResultData(failedRows: Long)
开发者ID:datamindedbe,项目名称:wharlord,代码行数:48,代码来源:RegexConstraint.scala
示例13: uuid
//设置package包名称以及导入依赖的类
package events
import eventstore.JsonMapping._
import java.util.UUID
import java.util.regex.Pattern
import play.api.libs.json._
import scala.util.control.Exception.catching
trait Identifier {
def uuid: UUID
}
abstract class IdentifierCompanion[A <: Identifier](val prefix: String) {
def apply(uuid: UUID): A
def generate(): A = apply(UUID.randomUUID)
def fromString(s: String): Option[A] = s match {
case IdentifierRegex(uuid) => catching(classOf[RuntimeException]) opt { apply(UUID.fromString(uuid)) }
case _ => None
}
implicit val IdentifierFormat: Format[A] = valueFormat(apply)(_.uuid)
implicit val IdentifierCompanionObject: IdentifierCompanion[A] = this
private val IdentifierRegex = (Pattern.quote(prefix) + """\(([a-fA-F0-9-]{36})\)""").r
}
开发者ID:cubean,项目名称:play-blog-example,代码行数:28,代码来源:Identifier.scala
示例14: Template
//设置package包名称以及导入依赖的类
package de.frosner.broccoli.models
import java.util.regex.Pattern
import org.apache.commons.codec.digest.DigestUtils
import play.api.libs.json._
import play.api.libs.functional.syntax._
import scala.collection.mutable.ArrayBuffer
import ParameterInfo.parameterInfoWrites
case class Template(id: String, template: String, description: String, parameterInfos: Map[String, ParameterInfo])
extends Serializable {
@transient
lazy val parameters: Set[String] = {
val matcher = Template.TemplatePattern.matcher(template)
var variables = ArrayBuffer[String]()
while (matcher.find()) {
variables += matcher.group(1)
}
val uniqueVariables = variables.toSet
require(
uniqueVariables.contains("id"),
s"There needs to be an 'id' field in the template for Broccoli to work. Parameters defined: ${uniqueVariables}")
uniqueVariables
}
@transient
lazy val version: String = DigestUtils.md5Hex(template.trim() + "_" + parameterInfos.toString)
}
object Template {
val TemplatePattern = Pattern.compile("\\{\\{([A-Za-z][A-Za-z0-9\\-\\_\\_]*)\\}\\}")
implicit val templateApiWrites: Writes[Template] = (
(JsPath \ "id").write[String] and
(JsPath \ "description").write[String] and
(JsPath \ "parameters").write[Set[String]] and
(JsPath \ "parameterInfos").write[Map[String, ParameterInfo]] and
(JsPath \ "version").write[String]
)((template: Template) =>
(template.id, template.description, template.parameters, template.parameterInfos, template.version))
implicit val templatePersistenceReads: Reads[Template] = Json.reads[Template]
implicit val templatePersistenceWrites: Writes[Template] = Json.writes[Template]
}
开发者ID:FRosner,项目名称:cluster-broccoli,代码行数:53,代码来源:Template.scala
示例15: DataType
//设置package包名称以及导入依赖的类
package com.epam.streaming
import java.io.File
import java.util.regex.Pattern
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import scala.collection.mutable
object DataType extends Enumeration {
val ActorData, RatingData = Value
}
class ImdbEventGenerator(actorDataPath: String, ratingDataPath: String) {
val YEAR_IN_FILE_NAME_PATTERN: Pattern = Pattern.compile("^.+_(\\d+)\\.tsv$")
val yearsOfRatings = getYearsFromListOfFiles(ratingDataPath)
val yearsOfActors = getYearsFromListOfFiles(actorDataPath)
val years: List[String] = yearsOfRatings.intersect(yearsOfActors).toList.sorted
def buildImdbEventStream(sc: SparkContext, dataType: DataType.Value): mutable.Queue[RDD[String]] = {
val queue = mutable.Queue[RDD[String]]()
val yearRDDs = years.map(year => sc.parallelize(loadLinesFromFile(year, dataType)))
queue ++= yearRDDs
}
def getYearsFromListOfFiles(dir: String): Set[String] = {
val d = new File(dir)
if (d.exists && d.isDirectory) {
d.listFiles
.filter(_.isFile)
.map(_.getName)
.map(YEAR_IN_FILE_NAME_PATTERN.matcher(_))
.filter(_.matches())
.map(_.group(1))
.toSet
} else {
Set[String]()
}
}
def loadLinesFromFile(year: String, dataType: DataType.Value): Seq[String] = {
val fileName = dataType match {
case DataType.ActorData => s"$actorDataPath/actor_data_$year.tsv"
case DataType.RatingData => s"$ratingDataPath/movie_ratings_$year.tsv"
}
val source = scala.io.Source.fromFile(fileName)
try source.getLines().filter(!_.trim.isEmpty).toList finally source.close()
}
}
开发者ID:kgyozo,项目名称:sparkstreaminghw,代码行数:56,代码来源:ImdbEventGenerator.scala
示例16: Expressions
//设置package包名称以及导入依赖的类
package edu.knowitall
package chunkedextractor
import java.util.regex.Pattern
import edu.knowitall.tool.stem.Lemmatized
import edu.knowitall.tool.chunk.ChunkedToken
object Expressions {
type Token = Lemmatized[ChunkedToken]
class ChunkExpression(val pattern: Pattern) extends Function[Token, Boolean] {
def this(string: String, flags: Int) {
this(Pattern.compile(string, flags))
}
def this(string: String) {
this(string, Pattern.CASE_INSENSITIVE)
}
override def apply(token: Token): Boolean =
pattern.matcher(token.token.chunk).matches()
}
}
开发者ID:schmmd,项目名称:openie-standalone,代码行数:25,代码来源:Expressions.scala
示例17: Normalizer
//设置package包名称以及导入依赖的类
package com.fustigatedcat.heystk.agent.common.normalization
import java.util.regex.Pattern
import com.fustigatedcat.heystk.common.normalization.{Log, Normalization}
import com.typesafe.config.Config
import org.slf4j.LoggerFactory
import scala.collection.JavaConverters._
object Normalizer {
def create(config : Config) : Normalizer = {
new Normalizer(
config.getString("name"),
config.getString("expression"),
config.getConfigList("extractors").asScala.map(Extractor.create).toList,
config.getConfigList("child-normalizers").asScala.map(Normalizer.create).toList
)
}
}
class Normalizer(name : String, exp : String, extractors : List[Extractor], childNormalizers : List[Normalizer]) {
val regex = Pattern.compile(exp, Pattern.DOTALL|Pattern.UNIX_LINES|Pattern.MULTILINE)
val logger = LoggerFactory.getLogger(this.getClass)
def process(log : Log) : Map[String, Extractor] = {
if(regex.matcher(log.message).find()) {
logger.trace("Processing normalizer [{}]", name)
val rtn : Map[String, Extractor] = childNormalizers.foldLeft(
extractors.map(e => e.field -> e).toMap[String, Extractor] // handle current extractors
)((n, c) => n ++ c.process(log)) // collect child extractors
logger.trace("Done processing normalizer [{}]", name)
rtn
} else {
logger.trace("Skipping normalizer [{}]", name)
Map()
}
}
}
开发者ID:fustigatedcat,项目名称:heystk,代码行数:45,代码来源:Normalizer.scala
示例18: evaluate
//设置package包名称以及导入依赖的类
package de.fuberlin.wiwiss.silk.plugins.transformer.linguistic
import de.fuberlin.wiwiss.silk.linkagerule.input.SimpleTransformer
import java.text.Normalizer
import java.util.regex.Pattern
import de.fuberlin.wiwiss.silk.runtime.plugin.Plugin
private val DIACRITICS_AND_FRIENDS = Pattern.compile("[\\p{InCombiningDiacriticalMarks}\\p{IsLm}\\p{IsSk}]+");
private val NONDIACRITICS = Map(
//replace non-diacritics as their equivalent chars
('\u0141' -> "l"), // BiaLystock
('\u0142' -> "l"), // Bialystock
('ß' -> "ss"),
('æ' -> "ae"),
('ø' -> "o"),
('©' -> "c"),
('\u00D0' -> "d"), // all Ð ð from http://de.wikipedia.org/wiki/%C3%90
('\u00F0' -> "d"),
('\u0110' -> "d"),
('\u0111' -> "d"),
('\u0189' -> "d"),
('\u0256' -> "d"),
('\u00DE' -> "th"), // thorn Þ
('\u00FE' -> "th")) // thorn þ
def evaluate(value: String) = {
simplifyString(value)
}
private def simplifyString(str: String): String = {
if (str == null) {
null
} else {
stripNonDiacritics(stripDiacritics(str))
}
}
private def stripNonDiacritics(orig: String): String = {
val ret = new StringBuffer()
for (i <- 0 until orig.length()) {
val source = orig.charAt(i)
val replace = NONDIACRITICS.get(source).getOrElse(source)
ret.append(replace)
}
ret.toString
}
private def stripDiacritics(str: String): String = {
val normalized = Normalizer.normalize(str, Normalizer.Form.NFD)
DIACRITICS_AND_FRIENDS.matcher(normalized).replaceAll("")
}
}
开发者ID:petrovskip,项目名称:silk.2.6-GenLinkSA,代码行数:58,代码来源:NormalizeCharsTransformer.scala
示例19: ConcatMultipleValuesTransformer
//设置package包名称以及导入依赖的类
package de.fuberlin.wiwiss.silk.plugins.transformer.combine
import de.fuberlin.wiwiss.silk.linkagerule.input.Transformer
import de.fuberlin.wiwiss.silk.runtime.plugin.Plugin
import java.util.regex.Pattern
@Plugin(
id = "concatMultiValues",
categories = Array("Combine"),
label = "ConcatenateMultipleValues",
description = "Concatenates multiple values received for an input. If applied to multiple inputs, yields at most one value per input. Optionally removes duplicate values."
)
case class ConcatMultipleValuesTransformer(glue: String = "", removeDuplicates:Boolean = false) extends Transformer {
override def apply(values: Seq[Set[String]]): Set[String] = {
(for (strings <- values; if ! strings.isEmpty) yield
{
if (removeDuplicates) {
//glue, split, remove duplicates and glue again to remove more subtle duplicates.
//e.g. "Albert", "Einstein", "Albert Einstein" -> "Albert Einstein" instead of "Albert Einstein Albert Einstein"
strings.reduce(_ + glue + _).split(Pattern.quote(glue)).toSet.reduce(_ + glue + _)
} else {
strings.reduce(_ + glue + _)
}
}).toSet
}
}
开发者ID:petrovskip,项目名称:silk.2.6-GenLinkSA,代码行数:29,代码来源:ConcatMultipleValuesTransformer.scala
示例20: ShouldNotTypecheck
//设置package包名称以及导入依赖的类
package com.lightbend.lagom.macrotestkit
import scala.language.experimental.macros
import java.util.regex.Pattern
import scala.reflect.macros.TypecheckException
import scala.reflect.macros.blackbox.Context
object ShouldNotTypecheck {
def apply(name: String, code: String): Unit = macro applyImplNoExp
def apply(name: String, code: String, expected: String): Unit = macro applyImpl
def applyImplNoExp(ctx: Context)(name: ctx.Expr[String], code: ctx.Expr[String]) = applyImpl(ctx)(name, code, null)
def applyImpl(ctx: Context)(name: ctx.Expr[String], code: ctx.Expr[String], expected: ctx.Expr[String]): ctx.Expr[Unit] = {
import ctx.universe._
val Expr(Literal(Constant(codeStr: String))) = code
val Expr(Literal(Constant(nameStr: String))) = name
val (expPat, expMsg) = expected match {
case null => (null, "Expected some error.")
case Expr(Literal(Constant(s: String))) =>
(Pattern.compile(s, Pattern.CASE_INSENSITIVE), "Expected error matching: " + s)
}
try ctx.typecheck(ctx.parse("{ " + codeStr + " }")) catch {
case e: TypecheckException =>
val msg = e.getMessage
if ((expected ne null) && !expPat.matcher(msg).matches) {
ctx.abort(ctx.enclosingPosition, s"$nameStr failed in an unexpected way.\n$expMsg\nActual error: $msg")
} else {
println(s"$nameStr passed.")
return reify(())
}
}
ctx.abort(ctx.enclosingPosition, s"$nameStr succeeded unexpectedly.\n$expMsg")
}
}
开发者ID:lagom,项目名称:lagom,代码行数:41,代码来源:ShouldNotTypecheck.scala
注:本文中的java.util.regex.Pattern类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论