• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

Scala MatrixEntry类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Scala中org.apache.spark.mllib.linalg.distributed.MatrixEntry的典型用法代码示例。如果您正苦于以下问题:Scala MatrixEntry类的具体用法?Scala MatrixEntry怎么用?Scala MatrixEntry使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了MatrixEntry类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Scala代码示例。

示例1: Utils

//设置package包名称以及导入依赖的类
package com.github.aadamson.spark_glove

import org.apache.spark.{SparkConf, SparkContext};
import org.apache.spark.mllib.linalg.{Vector, Vectors, Matrix, Matrices, DenseMatrix};
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, BlockMatrix, RowMatrix, MatrixEntry, IndexedRow, IndexedRowMatrix};
import org.apache.spark.rdd.RDD;

object Utils {
  type CoordinateRDD[T] = RDD[((Long, Long), T)];

  implicit def CoordinateRDD2CoordinateMatrix(a: CoordinateRDD[Float]): CoordinateMatrix = {
    val entries: RDD[MatrixEntry] = a.map { case ((i, j), value) => new MatrixEntry(i, j, value) };
    val mat: CoordinateMatrix = new CoordinateMatrix(entries);
    return mat;
  }

  def broadcastVector(v: Vector, numRows: Int, sc: SparkContext): IndexedRowMatrix = {
    val rows: RDD[IndexedRow] = sc.parallelize(0 to numRows-1).map(i => new IndexedRow(i, v));
    val mat: IndexedRowMatrix = new IndexedRowMatrix(rows);
    return mat;
  }

  def elementwiseProduct[T](a: T, b: T): T = (a, b) match {
    case (x: BlockMatrix, y: BlockMatrix) => {
      val aIRM = x.toIndexedRowMatrix();
      val bIRM = y.toIndexedRowMatrix();
      val rows = aIRM.rows.zip(bIRM.rows).map {
        case (aRow: IndexedRow, bRow: IndexedRow) => new IndexedRow(aRow.index, elementwiseProduct(aRow.vector, bRow.vector));
      }
      return (new IndexedRowMatrix(rows)).toBlockMatrix().asInstanceOf[T];
    }
    case (x: Vector, y: Vector) => {
      val values = Array(x.toArray, y.toArray);
      return Vectors.dense(values.transpose.map(_.sum)).asInstanceOf[T];;
    }
  }
} 
开发者ID:aadamson,项目名称:spark-glove,代码行数:38,代码来源:Utils.scala


示例2: MapperSpec

//设置package包名称以及导入依赖的类
package com.github.log0ymxm.mapper

import org.scalatest._
import com.holdenkarau.spark.testing.SharedSparkContext
import org.apache.spark.sql.{ SparkSession, Row }
import org.apache.spark.mllib.linalg.distributed.{ CoordinateMatrix, IndexedRow, IndexedRowMatrix, MatrixEntry }
import org.apache.spark.mllib.linalg.{ DenseVector, Vector, Vectors }

class MapperSpec extends FunSuite with SharedSparkContext {

  test("simple mapper on noisy circle") {
    val spark = SparkSession.builder().getOrCreate()

    val fileLoc = getClass.getClassLoader.getResource("circles.csv").getPath()
    val circle = spark.read
      .option("header", false)
      .option("inferSchema", true)
      .csv(fileLoc)

    assert(circle.count == 400)

    val indexedRDD = circle.rdd.zipWithIndex.map {
      case (Row(x: Double, y: Double), i) =>
        val v: Vector = new DenseVector(Array(x, y))
        IndexedRow(i, v)
    }
    val matrix = new IndexedRowMatrix(indexedRDD)
    val similarities = matrix.toCoordinateMatrix
      .transpose()
      .toIndexedRowMatrix()
      .columnSimilarities()
    val distances = new CoordinateMatrix(
      similarities
        .entries
        .map((entry) => new MatrixEntry(entry.i, entry.j, 1 - entry.value))
    )

    val filtration = new IndexedRowMatrix(indexedRDD.map({ row =>
      IndexedRow(row.index, new DenseVector(Array(
        Vectors.norm(row.vector, 2)
      )))
    }))

    //Mapper.writeAsJson(graph, "mapper-vis/circle-graph.json")
    val graph = Mapper.mapper(sc, distances, filtration, 100, 2.0)

    assert(graph.vertices.count == 160)
    assert(graph.edges.count == 327)
  }
} 
开发者ID:log0ymxm,项目名称:spark-mapper,代码行数:51,代码来源:MapperSpec.scala


示例3: Blocks

//设置package包名称以及导入依赖的类
package hr.fer.ztel.thesis.multiplication.block

import hr.fer.ztel.thesis.datasource.MatrixEntryDataSource._
import hr.fer.ztel.thesis.spark.SparkSessionHandler
import hr.fer.ztel.thesis.sparse_linalg.SparseVectorOperators._
import org.apache.spark.mllib.linalg.MLlibBreezeConversions._
import org.apache.spark.mllib.linalg.distributed.MLlibBlockMatrixMultiplyVersion220._
import org.apache.spark.mllib.linalg.distributed.{CoordinateMatrix, MatrixEntry}

object Blocks {

  def main(args: Array[String]): Unit = {

    val handler = new SparkSessionHandler(args)
    implicit val spark = handler.getSparkSession

    val userItemEntries = readUserItemEntries(handler.userItemPath)
    val itemItemEntries = readItemItemEntries(handler.itemItemPath, handler.measure, handler.normalize)

    // precomputed max number (upper bound) with C++ indexer,
    // it is possible that some users were filtered by quantity treshold
    val numUsers = spark.read.textFile(handler.usersSizePath).first.toInt
    val numItems = spark.read.textFile(handler.itemsSizePath).first.toInt

    val B = handler.blockSize

    val C = new CoordinateMatrix(userItemEntries, numUsers, numItems).toBlockMatrix(B, B)
    val S = new CoordinateMatrix(itemItemEntries, numItems, numItems).toBlockMatrix(B, B)

    val R = multiply(C, S)

    val userSeenItemsBroadcast = spark.sparkContext.broadcast(
      userItemEntries
        .map { case MatrixEntry(user, item, _) => (user.toInt, item.toInt) }
        .groupByKey.mapValues(_.toSet)
        .collectAsMap.toMap
    )

    val recommendations = R.toIndexedRowMatrix.rows.mapPartitions {
      val localUserSeenItems = userSeenItemsBroadcast.value
      _.filter(row => localUserSeenItems.contains(row.index.toInt))
        .map { row =>
          val user = row.index.toInt
          val unseenItems = row.vector.toBreeze.activeIterator
            .filterNot { case (item, _) => localUserSeenItems(user).contains(item) }
          val unseenTopKItems = argTopK(unseenItems.toArray, handler.topK)

          s"$user:${unseenTopKItems.mkString(",")}"
        }
    }

    recommendations.saveAsTextFile(handler.recommendationsPath)

    println(s"Recommendations saved in: ${handler.recommendationsPath}")
  }
} 
开发者ID:fpopic,项目名称:master_thesis,代码行数:57,代码来源:Blocks.scala



注:本文中的org.apache.spark.mllib.linalg.distributed.MatrixEntry类示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Scala DenseMatrix类代码示例发布时间:2022-05-23
下一篇:
Scala OneToOneDependency类代码示例发布时间:2022-05-23
热门推荐
热门话题
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap