• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

C++ kokkos::View类代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中kokkos::View的典型用法代码示例。如果您正苦于以下问题:C++ View类的具体用法?C++ View怎么用?C++ View使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。



在下文中一共展示了View类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: a

TEST_F( KokkosThreads, SerialInitialize)
{
  // allocate a rank 2 array witn that is RUN_TIME_DIMENSION x COMPILE_TIME_DIMENSION

  // View will default initialize all the values unless it is explicitly disabled, ie,
  // Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a("node views", RUN_TIME_DIMENSION);
  // zero fills the array, but
  // Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);
  // will allocate without initializing the array

  Kokkos::View<unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> a( Kokkos::ViewAllocateWithoutInitializing("node views"), RUN_TIME_DIMENSION);

  for (size_t i=0; i < a.dimension_0(); ++i) {
    for (size_t x=0; x < a.dimension_1(); ++x) {
      a(i,x) = i;
    }
  }

  // get a const view to the same array
  // this view shares the same memory as a, but cannot modify the values
  Kokkos::View<const unsigned*[COMPILE_TIME_DIMENSION], KOKKOS_THREAD_DEVICE> b = a;

  for (size_t i=0; i < b.dimension_0(); ++i) {
    for (size_t x=0; x < b.dimension_1(); ++x) {
      EXPECT_EQ(i, b(i,x));
    }
  }
}
开发者ID:agrippa,项目名称:Trilinos,代码行数:28,代码来源:unit_test_kokkos.cpp


示例2: rcp

  Teuchos::RCP<const Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> > >
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  replaceCommWithSubset (const Teuchos::RCP<const Teuchos::Comm<int> >& newComm) const
  {
    using Teuchos::ArrayView;
    using Teuchos::outArg;
    using Teuchos::RCP;
    using Teuchos::REDUCE_MIN;
    using Teuchos::reduceAll;
    typedef global_size_t GST;
    typedef LocalOrdinal LO;
    typedef GlobalOrdinal GO;
    typedef Map<LO, GO, node_type> map_type;

    // mfh 26 Mar 2013: The lazy way to do this is simply to recreate
    // the Map by calling its ordinary public constructor, using the
    // original Map's data.  This only involves O(1) all-reduces over
    // the new communicator, which in the common case only includes a
    // small number of processes.

    // Create the Map to return.
    if (newComm.is_null ()) {
      return Teuchos::null; // my process does not participate in the new Map
    } else {
      // Map requires that the index base equal the global min GID.
      // Figuring out the global min GID requires a reduction over all
      // processes in the new communicator.  It could be that some (or
      // even all) of these processes contain zero entries.  (Recall
      // that this method, unlike removeEmptyProcesses(), may remove
      // an arbitrary subset of processes.)  We deal with this by
      // doing a min over the min GID on each process if the process
      // has more than zero entries, or the global max GID, if that
      // process has zero entries.  If no processes have any entries,
      // then the index base doesn't matter anyway.
      const GO myMinGid = (this->getNodeNumElements () == 0) ?
        this->getMaxAllGlobalIndex () : this->getMinGlobalIndex ();
      GO newIndexBase = this->getInvalidGlobalIndex ();
      reduceAll<int, GO> (*newComm, REDUCE_MIN, myMinGid, outArg (newIndexBase));

      // Make Map's constructor compute the global number of indices.
      const GST globalNumInds = Teuchos::OrdinalTraits<GST>::invalid ();

      if (mapDevice_.initialized ()) {
        Kokkos::View<const GO*, DeviceType> myGIDs =
          mapDevice_.getMyGlobalIndices ();
        return rcp (new map_type (globalNumInds, myGIDs, newIndexBase,
                                  newComm, this->getNode ()));
      }
      else {
        Kokkos::View<const GO*, host_mirror_device_type> myGidsHostView =
          mapHost_.getMyGlobalIndices ();
        ArrayView<const GO> myGidsArrayView (myGidsHostView.ptr_on_device (),
                                             myGidsHostView.dimension_0 ());
        return rcp (new map_type (globalNumInds, myGidsArrayView, newIndexBase,
                                  newComm, this->getNode ()));
      }
    }
  }
开发者ID:00liujj,项目名称:trilinos,代码行数:58,代码来源:Tpetra_KokkosRefactor_Map_def.hpp


示例3: GEMM

		static void GEMM(Teuchos::ETransp transA, Teuchos::ETransp transB, Scalar alpha,
           Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> A,  Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> B,
          Scalar beta, Kokkos::View<Scalar***,Kokkos::LayoutLeft,Kokkos::DefaultExecutionSpace> C){
		const int m = static_cast<int> (C.dimension_1()),
        n = static_cast<int> (C.dimension_2 ()),
        k = (transA == Teuchos::NO_TRANS ? A.dimension_2 () : A.dimension_1 ());
   //     printf("m:%d,n:%d,k:%d",m,n,k);
	Kokkos::parallel_for(C.dimension(0),blasOpenMPBatchLeft<Scalar>(A,B,C,m,n,k,transA,transB,alpha,beta));
	}
开发者ID:giorgiobornia,项目名称:trilinos,代码行数:9,代码来源:example_04.cpp


示例4: getNodeElementList

  Teuchos::ArrayView<const GlobalOrdinal>
  Map<LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType> >::
  getNodeElementList () const
  {
    typedef GlobalOrdinal GO;
    Kokkos::View<const GO*, host_mirror_device_type> myGlobalInds =
      mapHost_.getMyGlobalIndices (); // creates it if it doesn't exist

    return Teuchos::ArrayView<const GO> (myGlobalInds.ptr_on_device (),
                                         myGlobalInds.dimension_0 ());
  }
开发者ID:00liujj,项目名称:trilinos,代码行数:11,代码来源:Tpetra_KokkosRefactor_Map_def.hpp


示例5:

void NearestNeighborOperator<DeviceType>::apply(
    Kokkos::View<double const *, DeviceType> source_values,
    Kokkos::View<double *, DeviceType> target_values ) const
{
    // Precondition: check that the source and target are properly sized
    DTK_REQUIRE( _indices.extent( 0 ) == target_values.extent( 0 ) );
    DTK_REQUIRE( _size == source_values.extent_int( 0 ) );

    auto values = Details::NearestNeighborOperatorImpl<DeviceType>::fetch(
        _comm, _ranks, _indices, source_values );

    Kokkos::deep_copy( target_values, values );
}
开发者ID:Rombur,项目名称:DataTransferKit,代码行数:13,代码来源:DTK_NearestNeighborOperator_def.hpp


示例6:

FieldContainer_Kokkos(Kokkos::View<ScalarPoindex_typeer,Kokkos::LayoutLeft,Kokkos::OpenMP>& InContainer){
dim0=dim[0]=InContainer.dimension(0);
dim1=dim[1]=InContainer.dimension(1);
dim2=dim[2]=InContainer.dimension(2);
dim3=dim[3]=InContainer.dimension(3);
dim4=dim[4]=InContainer.dimension(4);
dim5=dim[5]=InContainer.dimension(5);
dim6=dim[6]=InContainer.dimension(6);
dim7=dim[7]=InContainer.dimension(7);
rankValue=Kokkos::View<ScalarPoindex_typeer,Kokkos::LayoutLeft,Kokkos::OpenMP>::Rank;
intepidManaged=false;
switch(rankValue){
case 1:
sizeValue=dim0;
break;

case 2:
sizeValue=dim0*dim1;
break;

case 3:
sizeValue=dim0*dim1*dim2;
break;

case 4:
sizeValue=dim0*dim1*dim2*dim3;
break;

case 5:
sizeValue=dim0*dim1*dim2*dim3*dim4;
break;

case 6:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5;
break;

case 7:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5*dim6;
break;

case 8:
sizeValue=dim0*dim1*dim2*dim3*dim4*dim5*dim6*dim7;
break;

}
containerMemory=InContainer.ptr_on_device();


}
开发者ID:prwolfe,项目名称:Trilinos,代码行数:49,代码来源:Intrepid2_FieldContainer_Kokkos_OpenMP_Left.hpp


示例7:

 KOKKOS_INLINE_FUNCTION
 NestedView & operator = ( const Kokkos::View<int*,Space> & lhs )
   {
     member = lhs ;
     if ( member.dimension_0() ) Kokkos::atomic_add( & member(0) , 1 );
     return *this ;
   }
开发者ID:aajarven,项目名称:matfys-kesakoulu,代码行数:7,代码来源:TestViewOfClass.hpp


示例8: member

 KOKKOS_INLINE_FUNCTION
 ~NestedView()
 { 
   if ( member.dimension_0() ) {
     Kokkos::atomic_add( & member(0) , -1 );
   }
 }
开发者ID:aajarven,项目名称:matfys-kesakoulu,代码行数:7,代码来源:TestViewOfClass.hpp


示例9: kk_inspector_matvec

void kk_inspector_matvec(AType A, XType x, YType y, int rows_per_thread, int team_size, int vector_length) {

  typedef typename XType::non_const_value_type Scalar;
  typedef typename AType::execution_space execution_space;
  typedef KokkosSparse::CrsMatrix<const Scalar,int,execution_space,void,int> matrix_type ;
  typedef typename Kokkos::View<Scalar*,Kokkos::LayoutLeft,execution_space> y_type;
  typedef typename Kokkos::View<const Scalar*,Kokkos::LayoutLeft,execution_space,Kokkos::MemoryRandomAccess > x_type;

  //int rows_per_team = launch_parameters<execution_space>(A.numRows(),A.nnz(),rows_per_thread,team_size,vector_length);
  //static int worksets = (y.extent(0)+rows_per_team-1)/rows_per_team;
  static int worksets = std::is_same<Schedule,Kokkos::Static>::value ?
                        team_size>0?execution_space::concurrency()/team_size:execution_space::concurrency() : //static
                        team_size>0?execution_space::concurrency()*32/team_size:execution_space::concurrency()*32 ; //dynamic
  static Kokkos::View<int*> workset_offsets;
  if(workset_offsets.extent(0) == 0) {
    workset_offsets = Kokkos::View<int*> ("WorksetOffsets",worksets+1);
    const size_t nnz = A.nnz();
    int nnz_per_workset = (nnz+worksets-1)/worksets;
    workset_offsets(0) = 0;
    int ws = 1;
    for(int row = 0; row<A.numRows(); row++) {
      if(A.graph.row_map(row) > ws*nnz_per_workset) {
        workset_offsets(ws) = row;
        ws++;
      }
    }
    if(workset_offsets(ws-1) < A.numRows()) {
      workset_offsets(ws) = A.numRows();
    }
    printf("Worksets: %i %i\n",worksets,ws);
    worksets = ws;
  }
  double s_a = 1.0;
  double s_b = 0.0;
  SPMV_Inspector_Functor<matrix_type,x_type,y_type,0,false,int> func (s_a,A,x,workset_offsets,s_b,y);

  Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> > policy(1,1);

  if(team_size>0)
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,team_size,vector_length);
  else
    policy = Kokkos::TeamPolicy<Kokkos::Schedule<Schedule> >(worksets,Kokkos::AUTO,vector_length);

  Kokkos::parallel_for("KokkosSparse::PerfTest::SpMV_Inspector", policy,func);
}
开发者ID:jwillenbring,项目名称:Trilinos,代码行数:45,代码来源:Kokkos_SPMV_Inspector.hpp


示例10: getElementLIDs

   /** Access the local IDs for an element. The local ordering is according to
     * the <code>getOwnedAndSharedIndices</code> method. Note
     */
   void getElementLIDs(Kokkos::View<const int*,PHX::Device> cellIds,
                       Kokkos::View<LocalOrdinalT**,PHX::Device> lids) const
   { 
     CopyCellLIDsFunctor functor;
     functor.cellIds = cellIds;
     functor.global_lids = localIDs_k_;
     functor.local_lids = lids; // we assume this array is sized correctly!

     Kokkos::parallel_for(cellIds.dimension_0(),functor);
   }
开发者ID:Russell-Jones-OxPhys,项目名称:Trilinos,代码行数:13,代码来源:Panzer_UniqueGlobalIndexer.hpp


示例11: multiply

void multiply(const CrsMatrix< float , Kokkos::OpenMP >& A,
              const Kokkos::View< float* , Kokkos::OpenMP >& x,
              Kokkos::View< float* , Kokkos::OpenMP >& y,
              MKLMultiply tag)
{
  MKL_INT n = A.graph.row_map.dimension_0() - 1 ;
  float *A_values = A.values.ptr_on_device() ;
  MKL_INT *col_indices = A.graph.entries.ptr_on_device() ;
  MKL_INT *row_beg = const_cast<MKL_INT*>(A.graph.row_map.ptr_on_device()) ;
  MKL_INT *row_end = row_beg+1;
  char matdescra[6] = { 'G', 'x', 'N', 'C', 'x', 'x' };
  char trans = 'N';
  float alpha = 1.0;
  float beta = 0.0;

  float *x_values = x.ptr_on_device() ;
  float *y_values = y.ptr_on_device() ;

  mkl_scsrmv(&trans, &n, &n, &alpha, matdescra, A_values, col_indices,
             row_beg, row_end, x_values, &beta, y_values);
}
开发者ID:00liujj,项目名称:trilinos,代码行数:21,代码来源:Stokhos_OpenMP_MKL_CrsMatrix.hpp


示例12: indices

NearestNeighborOperator<DeviceType>::NearestNeighborOperator(
    MPI_Comm comm, Kokkos::View<Coordinate const **, DeviceType> source_points,
    Kokkos::View<Coordinate const **, DeviceType> target_points )
    : _comm( comm )
    , _indices( "indices" )
    , _ranks( "ranks" )
    , _size( source_points.extent_int( 0 ) )
{
    // NOTE: instead of checking the pre-condition that there is at least one
    // source point passed to one of the rank, we let the tree handle the
    // communication and just check that the tree is not empty.

    // Build distributed search tree over the source points.
    DistributedSearchTree<DeviceType> search_tree( _comm, source_points );

    // Tree must have at least one leaf, otherwise it makes little sense to
    // perform the search for nearest neighbors.
    DTK_CHECK( !search_tree.empty() );

    // Query nearest neighbor for all target points.
    auto nearest_queries = Details::NearestNeighborOperatorImpl<
        DeviceType>::makeNearestNeighborQueries( target_points );

    // Perform the actual search.
    Kokkos::View<int *, DeviceType> indices( "indices" );
    Kokkos::View<int *, DeviceType> offset( "offset" );
    Kokkos::View<int *, DeviceType> ranks( "ranks" );
    search_tree.query( nearest_queries, indices, offset, ranks );

    // Check post-condition that we did find a nearest neighbor to all target
    // points.
    DTK_ENSURE( lastElement( offset ) == target_points.extent_int( 0 ) );

    // Save results.
    // NOTE: we don't bother keeping `offset` around since it is just `[0, 1, 2,
    // ..., n_target_poins]`
    _indices = indices;
    _ranks = ranks;
}
开发者ID:Rombur,项目名称:DataTransferKit,代码行数:39,代码来源:DTK_NearestNeighborOperator_def.hpp


示例13: sortQueriesAlongZOrderCurve

    static Kokkos::View<size_t *, DeviceType>
    sortQueriesAlongZOrderCurve( Box const &scene_bounding_box,
                                 Kokkos::View<Query *, DeviceType> queries )
    {
        auto const n_queries = queries.extent( 0 );

        Kokkos::View<unsigned int *, DeviceType> morton_codes(
            Kokkos::ViewAllocateWithoutInitializing( "morton" ), n_queries );
        Kokkos::parallel_for(
            ARBORX_MARK_REGION( "assign_morton_codes_to_queries" ),
            Kokkos::RangePolicy<ExecutionSpace>( 0, n_queries ),
            KOKKOS_LAMBDA( int i ) {
                Point xyz = Details::returnCentroid( queries( i )._geometry );
                translateAndScale( xyz, xyz, scene_bounding_box );
                morton_codes( i ) = morton3D( xyz[0], xyz[1], xyz[2] );
            } );
开发者ID:ORNL-CEES,项目名称:DataTransferKit,代码行数:16,代码来源:ArborX_DetailsBatchedQueries.hpp


示例14: pointInCell

void pointInCell( double threshold,
                  Kokkos::View<Coordinate **, DeviceType> physical_points,
                  Kokkos::View<Coordinate ***, DeviceType> cells,
                  Kokkos::View<int *, DeviceType> coarse_search_output_cells,
                  Kokkos::View<Coordinate **, DeviceType> reference_points,
                  Kokkos::View<bool *, DeviceType> point_in_cell )
{
    using ExecutionSpace = typename DeviceType::execution_space;
    int const n_ref_pts = reference_points.extent( 0 );

    Functor::PointInCell<CellType, DeviceType> search_functor(
        threshold, physical_points, cells, coarse_search_output_cells,
        reference_points, point_in_cell );
    Kokkos::parallel_for( DTK_MARK_REGION( "point_in_cell" ),
                          Kokkos::RangePolicy<ExecutionSpace>( 0, n_ref_pts ),
                          search_functor );
}
开发者ID:ORNL-CEES,项目名称:DataTransferKit,代码行数:17,代码来源:DTK_PointInCell_def.hpp


示例15: modified_gram_schmidt

void modified_gram_schmidt(

  const Kokkos::View< ScalarQ ** ,
                           Kokkos::LayoutLeft ,
                           DeviceType ,
                           Management > & Q ,

  const Kokkos::View< ScalarR ** ,
                           Kokkos::LayoutLeft ,
                           DeviceType ,
                           Management > & R ,

  comm::Machine machine )
{
  const Kokkos::ALL ALL ;

  typedef Kokkos::View< ScalarQ * ,
                             Kokkos::LayoutLeft ,
                             DeviceType ,
                             Kokkos::MemoryUnmanaged >
    vector_view_type ;

  const typename
    Kokkos::View< ScalarR** ,
                       Kokkos::LayoutLeft ,
                       DeviceType >::
    HostMirror hostR = Kokkos::create_mirror_view( R );

  const int length = Q.dimension_0();
  const int count  = Q.dimension_1();

  for ( int j = 0 ; j < count ; ++j ) {

    const vector_view_type  Qj = Kokkos::subview< vector_view_type >( Q , ALL , j );

    // reads  += length
    // writes += 0
    // flops  += 1 + 2 * length
    const double norm_Qj = Kokkos::norm2( length , Qj , machine );

    hostR(j,j) = norm_Qj ;

    // reads  += length
    // writes += length
    // flops  += 1 + length
    Kokkos::scale( length , 1.0 / norm_Qj , Qj );

    for ( int k = j + 1 ; k < count ; ++k ) {

      const vector_view_type  Qk = Kokkos::subview< vector_view_type >( Q , ALL , k );

      // reads  += 2 * length
      // writes += 0
      // flops  += 2 * length
      const double Qj_dot_Qk =
        Kokkos::dot( length , Qj , Qk , machine );

      hostR(j,k) = Qj_dot_Qk ;

      // reads  += 2 * length
      // writes += length
      // flops += 2 * length
      Kokkos::axpy( length , - Qj_dot_Qk , Qj , Qk );
    }
  }

  // reads  += 0
  // writes += count * count
  Kokkos::deep_copy( R , hostR );
}
开发者ID:00liujj,项目名称:trilinos,代码行数:70,代码来源:GramSchmidt.cpp


示例16: DataTransferKitNotImplementedException

void PointInCell<DeviceType>::search(
    Kokkos::View<Coordinate **, DeviceType> physical_points,
    Kokkos::View<Coordinate ***, DeviceType> cells,
    Kokkos::View<int *, DeviceType> coarse_search_output_cells,
    DTK_CellTopology cell_topo,
    Kokkos::View<Coordinate **, DeviceType> reference_points,
    Kokkos::View<bool *, DeviceType> point_in_cell )
{
    // Check the size of the Views
    DTK_REQUIRE( reference_points.extent( 0 ) == point_in_cell.extent( 0 ) );
    DTK_REQUIRE( reference_points.extent( 0 ) == physical_points.extent( 0 ) );
    DTK_REQUIRE( reference_points.extent( 1 ) == physical_points.extent( 1 ) );
    DTK_REQUIRE( reference_points.extent( 1 ) == cells.extent( 2 ) );

    // Perform the point in cell search. We hide the template parameters used by
    // Intrepid2, using the CellType template.
    // Note that if the Newton solver does not converge, Intrepid2 will just
    // return the last results and there is no way to know that the coordinates
    // in the reference frames where not found.
    switch ( cell_topo )
    {
    case DTK_HEX_8:
    {
        internal::pointInCell<HEX_8, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_HEX_27:
    {
        internal::pointInCell<HEX_27, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_PYRAMID_5:
    {
        internal::pointInCell<PYRAMID_5, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_QUAD_4:
    {
        internal::pointInCell<QUAD_4, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_QUAD_9:
    {
        internal::pointInCell<QUAD_9, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_TET_4:
    {
        internal::pointInCell<TET_4, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_TET_10:
    {
        internal::pointInCell<TET_10, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_TRI_3:
    {
        internal::pointInCell<TRI_3, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_TRI_6:
    {
        internal::pointInCell<TRI_6, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_WEDGE_6:
    {
        internal::pointInCell<WEDGE_6, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    case DTK_WEDGE_18:
    {
        internal::pointInCell<WEDGE_18, DeviceType>(
            threshold, physical_points, cells, coarse_search_output_cells,
            reference_points, point_in_cell );
        break;
    }
    default:
    {
//.........这里部分代码省略.........
开发者ID:ORNL-CEES,项目名称:DataTransferKit,代码行数:101,代码来源:DTK_PointInCell_def.hpp


示例17: SortView

 SortView( const Kokkos::View<ValueType*,ExecSpace> v , int begin , int end )
   {
     std::sort( v.ptr_on_device() + begin , v.ptr_on_device() + end );
   }
开发者ID:arielzn,项目名称:lammps,代码行数:4,代码来源:grow_array.hpp


示例18: operator

 KOKKOS_INLINE_FUNCTION
 void operator() (int i) const {
   double tmp = 0.0;
   for(int j = 0; j < idx.dimension_1(); j++) {
     const double val = src(idx(i,j));
     tmp += val*val + 0.5*(idx.dimension_0()*val -idx.dimension_1()*val);
   }
   dest(i) += tmp;
 }
开发者ID:rorypeck,项目名称:trilinos,代码行数:9,代码来源:dual_view.cpp


示例19: operator

  KOKKOS_INLINE_FUNCTION
  void operator() ( const team_member & thread) const {
    int i = thread.league_rank();

    // Allocate a shared array for the team.
    shared_1d_int count(thread.team_shmem(),data.dimension_1());

    // With each team run a parallel_for with its threads
    Kokkos::parallel_for(Kokkos::TeamThreadRange(thread,data.dimension_1()), [=] (const int& j) {
      int tsum;
      // Run a vector loop reduction over the inner dimension of data
      // Count how many values are multiples of 4
      // Every vector lane gets the same reduction value (tsum) back, it is broadcast to all vector lanes
      Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(thread,data.dimension_2()), [=] (const int& k, int & vsum) {
        vsum+= (data(i,j,k) % 4 == 0)?1:0;
      },tsum);

      // Make sure only one vector lane adds the reduction value to the shared array, i.e. execute
      // the next line only once PerThread
      Kokkos::single(Kokkos::PerThread(thread),[=] () {
        count(j) = tsum;
      });
    });

    // Wait for all threads to finish the parallel_for so that all shared memory writes are done
    thread.team_barrier();

    // Check with one vector lane from each thread how many consecutive
    // data segments have the same number of values divisible by 4
    // The team reduction value is again broadcast to every team member (and every vector lane)
    int team_sum = 0;
    Kokkos::parallel_reduce(Kokkos::TeamThreadRange(thread, data.dimension_1()-1), [=] (const int& j, int& thread_sum) {
      // It is not valid to directly add to thread_sum
      // Use a single function with broadcast instead
      // team_sum will be used as input to the operator (i.e. it is used to initialize sum)
      // the end value of sum will be broadcast to all vector lanes in the thread.
      Kokkos::single(Kokkos::PerThread(thread),[=] (int& sum) {
        if(count(j)==count(j+1)) sum++;
      },thread_sum);
    },team_sum);

    // Add with one thread and vectorlane of the team the team_sum to the global value
    Kokkos::single(Kokkos::PerTeam(thread),[=] () {
      Kokkos::atomic_add(&gsum(),team_sum);
    });
  }
开发者ID:BrianMoths,项目名称:lammps,代码行数:46,代码来源:vectorization.cpp


示例20: operator

    KOKKOS_INLINE_FUNCTION
    void operator()( size_t i ) const
    {
        if ( i < m_elem_node.dimension_0() * m_elem_node.dimension_1() ) {

            const size_t ielem = i / ElemNode ;
            const size_t inode = i % ElemNode ;

            unsigned elem_grid[SpaceDim] ;
            unsigned node_grid[SpaceDim] ;

            m_box_part.uses_elem_coord( ielem , elem_grid );

            enum { elem_node_scale = Order == BoxElemPart::ElemLinear ? 1 :
                                     Order == BoxElemPart::ElemQuadratic ? 2 : 0
                 };

            node_grid[0] = elem_node_scale * elem_grid[0] + m_elem_node_local[inode][0] ;
            node_grid[1] = elem_node_scale * elem_grid[1] + m_elem_node_local[inode][1] ;
            node_grid[2] = elem_node_scale * elem_grid[2] + m_elem_node_local[inode][2] ;

            m_elem_node(ielem,inode) = m_box_part.local_node_id( node_grid );
        }

        if ( i < m_node_grid.dimension_0() ) {
            unsigned node_grid[SpaceDim] ;
            m_box_part.local_node_coord( i , node_grid );
            m_node_grid(i,0) = node_grid[0] ;
            m_node_grid(i,1) = node_grid[1] ;
            m_node_grid(i,2) = node_grid[2] ;

            m_coord_map( node_grid[0] ,
                         node_grid[1] ,
                         node_grid[2] ,
                         m_node_coord(i,0) ,
                         m_node_coord(i,1) ,
                         m_node_coord(i,2) );
        }

        if ( i < m_recv_node.dimension_0() ) {
            m_recv_node(i,0) = m_box_part.recv_node_rank(i);
            m_recv_node(i,1) = m_box_part.recv_node_count(i);
        }

        if ( i < m_send_node.dimension_0() ) {
            m_send_node(i,0) = m_box_part.send_node_rank(i);
            m_send_node(i,1) = m_box_part.send_node_count(i);
        }

        if ( i < m_send_node_id.dimension_0() ) {
            m_send_node_id(i) = m_box_part.send_node_id(i);
        }
    }
开发者ID:gitter-badger,项目名称:quinoa,代码行数:53,代码来源:BoxElemFixture.hpp



注:本文中的kokkos::View类示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ impl::Timer类代码示例发布时间:2022-05-31
下一篇:
C++ kokkos::DynRankView类代码示例发布时间:2022-05-31
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap