• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

C++ saxpy函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中saxpy函数的典型用法代码示例。如果您正苦于以下问题:C++ saxpy函数的具体用法?C++ saxpy怎么用?C++ saxpy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了saxpy函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: axpy

 static vl::Error
 axpy(vl::Context & context,
      ptrdiff_t n,
      type alpha,
      type const *x, ptrdiff_t incx,
      type *y, ptrdiff_t incy)
 {
   saxpy(&n,
         &alpha,
         (float*)x, &incx,
         (float*)y, &incy) ;
   return vl::vlSuccess ;
 }
开发者ID:BenJamesbabala,项目名称:twostreamfusion,代码行数:13,代码来源:blashelper.hpp


示例2: MocCompute2WayPartitionParams

/*************************************************************************
* This function computes the initial id/ed
**************************************************************************/
void MocCompute2WayPartitionParams(CtrlType *ctrl, GraphType *graph)
{
    int i, j, /*k, l,*/ nvtxs, ncon, nbnd, mincut;
    idxtype *xadj, *adjncy, *adjwgt;
    float *nvwgt, *npwgts;
    idxtype *id, *ed, *where;
    idxtype *bndptr, *bndind;
    int me/*, other*/;

    nvtxs = graph->nvtxs;
    ncon = graph->ncon;
    xadj = graph->xadj;
    nvwgt = graph->nvwgt;
    adjncy = graph->adjncy;
    adjwgt = graph->adjwgt;

    where = graph->where;
    npwgts = sset(2*ncon, 0.0, graph->npwgts);
    id = idxset(nvtxs, 0, graph->id);
    ed = idxset(nvtxs, 0, graph->ed);
    bndptr = idxset(nvtxs, -1, graph->bndptr);
    bndind = graph->bndind;


    /*------------------------------------------------------------
    / Compute now the id/ed degrees
    /------------------------------------------------------------*/
    nbnd = mincut = 0;
    for (i=0; i<nvtxs; i++) {
        ASSERT(where[i] >= 0 && where[i] <= 1);
        me = where[i];
        saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);

        for (j=xadj[i]; j<xadj[i+1]; j++) {
            if (me == where[adjncy[j]])
                id[i] += adjwgt[j];
            else
                ed[i] += adjwgt[j];
        }

        if (ed[i] > 0 || xadj[i] == xadj[i+1]) {
            mincut += ed[i];
            bndptr[i] = nbnd;
            bndind[nbnd++] = i;
        }
    }

    graph->mincut = mincut/2;
    graph->nbnd = nbnd;

}
开发者ID:netsym,项目名称:minissf,代码行数:54,代码来源:mrefine.c


示例3: interpolate_initial

        //! @{
        virtual void interpolate_initial(shared_ptr<ISweeper<time>> dst,
                                         shared_ptr<const ISweeper<time>> src) override
        {
          auto& fine = as_encap_sweeper(dst);
          auto& crse = as_encap_sweeper(src);

          auto crse_factory = crse.get_factory();
          auto fine_factory = fine.get_factory();

          auto crse_delta = crse_factory->create(solution);
          this->restrict(crse_delta, fine.get_start_state());
          crse_delta->saxpy(-1.0, crse.get_start_state());

          auto fine_delta = fine_factory->create(solution);
          this->interpolate(fine_delta, crse_delta);
          fine.get_start_state()->saxpy(-1.0, fine_delta);

          fine.reevaluate(true);
        }
开发者ID:memmett,项目名称:PFASST,代码行数:20,代码来源:poly_interp.hpp


示例4: main

int main()
{

	uint32_t num_cpus = std::thread::hardware_concurrency();
	std::vector<std::thread>threads(num_cpus);

	float *S = new float[num_cpus], a = 1.0f, b = 2.0f, *X = new float[num_cpus], *Y = new float[num_cpus];

	for(uint32_t i=0;i<num_cpus;i++){
		X[i] = 1.0f*i;
		Y[i] = 2.0f*(i+1);
		Saxpy saxpy(std::ref(S[i]), a, X[i], b, Y[i]);
		threads[i] = std::thread(saxpy);
	}

	for(uint32_t i=0;i<num_cpus;i++){
		threads[i].join();
		std::cout<<S[i]<<std::endl;
	}
}
开发者ID:gpu0,项目名称:cpp-threads,代码行数:20,代码来源:ch02.4.cpp


示例5: main

int main (int argc, char * argv[])
{
    const int N = 16;
    const int iters = 1;

    float *x, *y, *z; 
    
    posix_memalign((void **)&x, VECTOR_SIZE, N*sizeof(float));
    posix_memalign((void **)&y, VECTOR_SIZE, N*sizeof(float));
    posix_memalign((void **)&z, VECTOR_SIZE, N*sizeof(float));
    
    float a = 0.93f;

    int i, j;

    for (i=0; i<N; i++)
    {
        x[i] = i+1;
        y[i] = i-1;
        z[i] = 0.0f;
    }

    for (i=0; i<iters; i++)
    {
        saxpy(x, y, z, a, N);
    }

    for (i=0; i<N; i++)
    {
        if (z[i] != (a * x[i] + y[i]))
        {
            printf("Error\n");
            return (1);
        }
    }

    printf("SUCCESS!\n");
    return 0;
}
开发者ID:drpicox,项目名称:mcxx,代码行数:39,代码来源:success_simd_03_parallel_saxpy.c


示例6: main

int main(int argc, char* argv[]) {
    float  a, x[N], y[N];
	a=5;
	int i;
	for (i=0; i<N; ++i){
		x[i]=i;
		y[i]=i+2;
	}

    saxpy(N, a, x,  y);

#pragma omp taskwait
	
	//Check results	
	for (i=0; i<N; ++i){
		if (y[i]!=a*i+(i+2)){
			printf("Error when checking results, in position %d\n",i);
			return -1;
		}
	}
	printf("Results are correct\n");
    return 0;
}
开发者ID:mfarrera,项目名称:ompss-tests,代码行数:23,代码来源:saxpy.c


示例7: interpolate

        virtual void interpolate(shared_ptr<ISweeper<time>> dst,
                                 shared_ptr<const ISweeper<time>> src,
                                 bool interp_initial) override
        {
          auto& fine = as_encap_sweeper(dst);
          auto& crse = as_encap_sweeper(src);

          if (tmat.rows() == 0) {
            tmat = pfasst::quadrature::compute_interp<time>(fine.get_nodes(), crse.get_nodes());
          }

          if (interp_initial) {
            this->interpolate_initial(dst, src);
          }

          size_t nfine = fine.get_nodes().size();
          size_t ncrse = crse.get_nodes().size();

          auto crse_factory = crse.get_factory();
          auto fine_factory = fine.get_factory();

          EncapVecT fine_state(nfine), fine_delta(ncrse);

          for (size_t m = 0; m < nfine; m++) { fine_state[m] = fine.get_state(m); }
          for (size_t m = 0; m < ncrse; m++) { fine_delta[m] = fine_factory->create(solution); }

          auto crse_delta = crse_factory->create(solution);
          for (size_t m = 0; m < ncrse; m++) {
            crse_delta->copy(crse.get_state(m));
            crse_delta->saxpy(-1.0, crse.get_saved_state(m));
            interpolate(fine_delta[m], crse_delta);
          }

          fine.get_state(0)->mat_apply(fine_state, 1.0, tmat, fine_delta, false);

          fine.reevaluate();
        }
开发者ID:memmett,项目名称:PFASST,代码行数:37,代码来源:poly_interp.hpp


示例8: operator

 void operator()(float a, float *x, float *y, std::size_t n)
 {
   saxpy(a,x,y,n);
 }
开发者ID:jaredhoberock,项目名称:personal,代码行数:4,代码来源:saxpy.cpp


示例9: sgefa

int sgefa ( float a[], int lda, int n, int ipvt[] )

/*******************************************************************************/
/*
  Purpose:

    SGEFA factors a matrix by gaussian elimination.

  Discussion:

    Matrix references which would, mathematically, be written A(I,J)
    must be written here as:
    * A[I+J*LDA], when the value is needed, or
    * A+I+J*LDA, when the address is needed.

  Modified:

    07 March 2008

  Author:

    FORTRAN77 original version by Cleve Moler.
    C version by John Burkardt.

  Reference:

    Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
    LINPACK User's Guide,
    SIAM, 1979,
    ISBN13: 978-0-898711-72-1,
    LC: QA214.L56.

  Parameters:

    Input/output, float A[LDA*N].  On input, the matrix to be factored.
    On output, an upper triangular matrix and the multipliers which were
    used to obtain it.  The factorization can be written A = L * U where
    L is a product of permutation and unit lower triangular matrices and
    U is upper triangular.

    Input, int LDA, the leading dimension of the matrix.

    Input, int N, the order of the matrix.

    Output, int IPVT[N], the pivot indices.

    Output, int SGEFA, indicates singularity.
    If 0, this is the normal value, and the algorithm succeeded.
    If K, then on the K-th elimination step, a zero pivot was encountered.
    The matrix is numerically not invertible.
*/
{
  int j;
  int info;
  int k;
  int kp1;
  int l;
  int nm1;
  float t;

  info = 0;

  for ( k = 1; k <= n - 1; k++ )
  {
/*
  Find l = pivot index.
*/
    l = isamax ( n-k+1, &a[k-1+(k-1)*lda], 1 ) + k - 1;
    ipvt[k-1] = l;
/*
  Zero pivot implies this column already triangularized.
*/
    if ( a[l-1+(k-1)*lda] != 0.0 )
    {
/*
  Interchange if necessary.
*/
      if ( l != k )
      {
        t                = a[l-1+(k-1)*lda];
        a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda];
        a[k-1+(k-1)*lda] = t;
      }
/*
  Compute multipliers.
*/
      t = - 1.0 / a[k-1+(k-1)*lda];
      sscal ( n-k, t, &a[k+(k-1)*lda], 1 );
/*
  Row elimination with column indexing.
*/
      for ( j = k + 1; j <= n; j++ )
      {
        t = a[l-1+(j-1)*lda];
        if (l != k)
        {
          a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda];
          a[k-1+(j-1)*lda] = t;
        }
        saxpy ( n-k, t, &a[k+(k-1)*lda], 1, &a[k+(j-1)*lda], 1 );
//.........这里部分代码省略.........
开发者ID:8l,项目名称:insieme,代码行数:101,代码来源:sgefa.c


示例10: main

int
main()
{
#define N 8
  int i;
  float x_ref[N], y_ref[N];
  float x[N], y[N];
  cublasHandle_t h;
  float a = 2.0;

  for (i = 0; i < N; i++)
    {
      x[i] = x_ref[i] = 4.0 + i;
      y[i] = y_ref[i] = 3.0;
    }

  saxpy (N, a, x_ref, y_ref);

  cublasCreate (&h);

#pragma acc data copyin (x[0:N]) copy (y[0:N])
  {
#pragma acc host_data use_device (x, y)
    {
      cublasSaxpy (h, N, &a, x, 1, y, 1);
    }
  }

  validate_results (N, y, y_ref);

#pragma acc data create (x[0:N]) copyout (y[0:N])
  {
#pragma acc kernels
    for (i = 0; i < N; i++)
      y[i] = 3.0;

#pragma acc host_data use_device (x, y)
    {
      cublasSaxpy (h, N, &a, x, 1, y, 1);
    }
  }

  cublasDestroy (h);

  validate_results (N, y, y_ref);

  for (i = 0; i < N; i++)
    y[i] = 3.0;

  /* There's no need to use host_data here.  */
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
  {
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
    saxpy (N, a, x, y);
  }

  validate_results (N, y, y_ref);

  /* Exercise host_data with data transferred with acc enter data.  */

  for (i = 0; i < N; i++)
    y[i] = 3.0;

#pragma acc enter data copyin (x, a, y)
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
  {
    saxpy (N, a, x, y);
  }
#pragma acc exit data delete (x, a) copyout (y)

  validate_results (N, y, y_ref);

  return 0;
}
开发者ID:vinriviere,项目名称:m68k-atari-mint-gcc,代码行数:74,代码来源:host_data-1.c


示例11: CreateCoarseGraphNoMask

/*************************************************************************
* This function creates the coarser graph
**************************************************************************/
void CreateCoarseGraphNoMask(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm)
{
  int i, j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize;
  idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj;
  idxtype *cmap, *htable;
  idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum;
  float *nvwgt, *cnvwgt;
  GraphType *cgraph;

  dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0);

  IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr));

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  vwgt = graph->vwgt;
  vsize = graph->vsize;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;
  adjwgtsum = graph->adjwgtsum;
  cmap = graph->cmap;


  /* Initialize the coarser graph */
  cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize);
  cxadj = cgraph->xadj;
  cvwgt = cgraph->vwgt;
  cvsize = cgraph->vsize;
  cnvwgt = cgraph->nvwgt;
  cadjwgtsum = cgraph->adjwgtsum;
  cadjncy = cgraph->adjncy;
  cadjwgt = cgraph->adjwgt;


  htable = idxset(cnvtxs, -1, idxwspacemalloc(ctrl, cnvtxs));

  iend = xadj[nvtxs];
  auxadj = ctrl->wspace.auxcore; 
  memcpy(auxadj, adjncy, iend*sizeof(idxtype)); 
  for (i=0; i<iend; i++)
    auxadj[i] = cmap[auxadj[i]];

  cxadj[0] = cnvtxs = cnedges = 0;
  for (i=0; i<nvtxs; i++) {
    v = perm[i];
    if (cmap[v] != cnvtxs) 
      continue;

    u = match[v];
    if (ncon == 1)
      cvwgt[cnvtxs] = vwgt[v];
    else
      scopy(ncon, nvwgt+v*ncon, cnvwgt+cnvtxs*ncon);

    if (dovsize)
      cvsize[cnvtxs] = vsize[v];

    cadjwgtsum[cnvtxs] = adjwgtsum[v];
    nedges = 0;

    istart = xadj[v];
    iend = xadj[v+1];
    for (j=istart; j<iend; j++) {
      k = auxadj[j];
      if ((m = htable[k]) == -1) {
        cadjncy[nedges] = k;
        cadjwgt[nedges] = adjwgt[j];
        htable[k] = nedges++;
      }
      else {
        cadjwgt[m] += adjwgt[j];
      }
    }

    if (v != u) { 
      if (ncon == 1)
        cvwgt[cnvtxs] += vwgt[u];
      else
        saxpy(ncon, 1.0, nvwgt+u*ncon, 1, cnvwgt+cnvtxs*ncon, 1);

      if (dovsize)
        cvsize[cnvtxs] += vsize[u];

      cadjwgtsum[cnvtxs] += adjwgtsum[u];

      istart = xadj[u];
      iend = xadj[u+1];
      for (j=istart; j<iend; j++) {
        k = auxadj[j];
        if ((m = htable[k]) == -1) {
          cadjncy[nedges] = k;
          cadjwgt[nedges] = adjwgt[j];
          htable[k] = nedges++;
        }
        else {
//.........这里部分代码省略.........
开发者ID:aceskpark,项目名称:osfeo,代码行数:101,代码来源:ccgraph.c


示例12: MCRandom_KWayEdgeRefineHorizontal


//.........这里部分代码省略.........
          if (gain >= 0 && 
              (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            break;
        }
        if (k == myndegrees)
          continue;  /* break out if you did not find a candidate */

        for (j=k+1; j<myndegrees; j++) {
          to = myedegrees[j].pid;
          if ((myedegrees[j].ed > myedegrees[k].ed &&
               (AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) || 
               IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) ||
              (myedegrees[j].ed == myedegrees[k].ed && 
               IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)))
            k = j;
        }

        to = myedegrees[k].pid;

        if (myedegrees[k].ed-myrinfo->id == 0 
            && !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)
            && AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon)) 
          continue;

        /*=====================================================================
        * If we got here, we can now move the vertex from 'from' to 'to' 
        *======================================================================*/
        graph->mincut -= myedegrees[k].ed-myrinfo->id;

        IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut));

        /* Update where, weight, and ID/ED information of the vertex you moved */
        saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
        saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
        where[i] = to;
        myrinfo->ed += myrinfo->id-myedegrees[k].ed;
        SWAP(myrinfo->id, myedegrees[k].ed, j);
        if (myedegrees[k].ed == 0) 
          myedegrees[k] = myedegrees[--myrinfo->ndegrees];
        else
          myedegrees[k].pid = from;

        if (myrinfo->ed-myrinfo->id < 0)
          BNDDelete(nbnd, bndind, bndptr, i);

        /* Update the degrees of adjacent vertices */
        for (j=xadj[i]; j<xadj[i+1]; j++) {
          ii = adjncy[j];
          me = where[ii];

          myrinfo = graph->rinfo+ii;
          if (myrinfo->edegrees == NULL) {
            myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
            ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii];
          }
          myedegrees = myrinfo->edegrees;

          ASSERT(CheckRInfo(myrinfo));

          if (me == from) {
            INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);

            if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1)
              BNDInsert(nbnd, bndind, bndptr, ii);
          }
开发者ID:Vignesh2208,项目名称:Awlsim_Ins,代码行数:67,代码来源:mkwayfmh.c


示例13: sgesl

void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )

/******************************************************************************/
/*
  Purpose:

    SGESL solves a real general linear system A * X = B.

  Discussion:

    SGESL can solve either of the systems A * X = B or A' * X = B.

    The system matrix must have been factored by SGECO or SGEFA.

    A division by zero will occur if the input factor contains a
    zero on the diagonal.  Technically this indicates singularity
    but it is often caused by improper arguments or improper
    setting of LDA.  It will not occur if the subroutines are
    called correctly and if SGECO has set 0.0 < RCOND
    or SGEFA has set INFO == 0.

  Modified:

    04 April 2006

  Author:

    FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
    C translation by John Burkardt.

  Reference:

    Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
    LINPACK User's Guide,
    SIAM, (Society for Industrial and Applied Mathematics),
    3600 University City Science Center,
    Philadelphia, PA, 19104-2688.
    ISBN: 0-89871-172-X

  Parameters:

    Input, float A[LDA*N], the output from SGECO or SGEFA.

    Input, int LDA, the leading dimension of A.

    Input, int N, the order of the matrix A.

    Input, int IPVT[N], the pivot vector from SGECO or SGEFA.

    Input/output, float B[N].
    On input, the right hand side vector.
    On output, the solution vector.

    Input, int JOB.
    0, solve A * X = B;
    nonzero, solve A' * X = B.
*/
{
  int k;
  int l;
  float t;
/*
  Solve A * X = B.
*/
  if ( job == 0 )
  {
    for ( k = 1; k <= n-1; k++ )
    {
      l = ipvt[k-1];
      t = b[l-1];

      if ( l != k )
      {
        b[l-1] = b[k-1];
        b[k-1] = t;
      }
      saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
    }

    for ( k = n; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
      t = -b[k-1];
      saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
    }
  }
/*
  Solve A' * X = B.
*/
  else
  {
    for ( k = 1; k <= n; k++ )
    {
      t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
      b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
    }

    for ( k = n-1; 1 <= k; k-- )
    {
      b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
//.........这里部分代码省略.........
开发者ID:8l,项目名称:insieme,代码行数:101,代码来源:sgefa.c


示例14: ConjGrad2

/*************************************************************************
* This function implements the CG solver used during the directed diffusion
**************************************************************************/
void ConjGrad2(MatrixType *A, floattype *b, floattype *x, floattype tol, floattype *workspace)
{
  int i, k, n;
  floattype *p, *r, *q, *z, *M;
  floattype alpha, beta, rho, rho_1 = -1.0, error, bnrm2, tmp;
  idxtype *rowptr, *colind;
  floattype *values;

  n = A->nrows;
  rowptr = A->rowptr;
  colind = A->colind;
  values = A->values;

  /* Initial Setup */
  p = workspace;
  r = workspace + n;
  q = workspace + 2*n;
  z = workspace + 3*n;
  M = workspace + 4*n;

  for (i=0; i<n; i++) {
    x[i] = 0.0;
    if (values[rowptr[i]] != 0.0)
      M[i] = 1.0/values[rowptr[i]];
    else
      M[i] = 0.0;
  }

  /* r = b - Ax */
  mvMult2(A, x, r);
  for (i=0; i<n; i++)
    r[i] = b[i]-r[i];

  bnrm2 = snorm2(n, b);
  if (bnrm2 > 0.0) {
    error = snorm2(n, r) / bnrm2;

    if (error > tol) {
      /* Begin Iterations */
      for (k=0; k<n; k++) {
        for (i=0; i<n; i++)
          z[i] = r[i]*M[i];

        rho = sdot(n, r, z);

        if (k == 0)
          scopy(n, z, p);
        else {
          if (rho_1 != 0.0)
            beta = rho/rho_1;
          else
            beta = 0.0;
          for (i=0; i<n; i++)
            p[i] = z[i] + beta*p[i];
        }

        mvMult2(A, p, q); /* q = A*p */

        tmp = sdot(n, p, q);
        if (tmp != 0.0)
          alpha = rho/tmp;
        else
          alpha = 0.0;
        saxpy(n, alpha, p, x);    /* x = x + alpha*p */
        saxpy(n, -alpha, q, r);   /* r = r - alpha*q */
        error = snorm2(n, r) / bnrm2;
        if (error < tol)
          break;

        rho_1 = rho;
      }
    }
  }
}
开发者ID:davidheryanto,项目名称:sc14,代码行数:77,代码来源:diffutil.c


示例15: sqrdc


//.........这里部分代码省略.........
				jpvt[j] = jpvt[pl];
				jpvt[pl] = j;
				pl++;
			}
		}
		pu = p-1;
		for (j=p-1; j>=0; j--) {
			if (jpvt[j]<0) {
				jpvt[j] = -jpvt[j];
				if (j!=pu) {
					sswap(n,x[pu],1,x[j],1);
					jp = jpvt[pu];
					jpvt[pu] = jpvt[j];
					jpvt[j] = jp;
				}
				pu--;
			}
		}
	}
	
	/* compute the norms of the free columns */
	for (j=pl; j<=pu; j++) {
		qraux[j] = snrm2(n,x[j],1);
		work[j] = qraux[j];
	}
	
	/* perform the Householder reduction of x */
	lup = MIN(n,p);
	for (l=0; l<lup; l++) {
		if (l>=pl && l<pu) {
			
			/* 
			 * locate the column of largest norm and
			 * bring it into pivot position.
			 */
			maxnrm = 0.0;
			maxj = l;
			for (j=l; j<=pu; j++) {
			 	if (qraux[j]>maxnrm) {
					maxnrm = qraux[j];
					maxj = j;
				}
			}
			if (maxj!=l) {
				sswap(n,x[l],1,x[maxj],1);
				qraux[maxj] = qraux[l];
				work[maxj] = work[l];
				jp = jpvt[maxj];
				jpvt[maxj] = jpvt[l];
				jpvt[l] = jp;
			}
		}
		qraux[l] = 0.0;
		if (l!=n-1) {
		
			/*
			 * compute the Householder transformation
			 * for column l
			 */
			nrmxl = snrm2(n-l,&x[l][l],1);
			if (nrmxl!=0.0) {
				if (x[l][l]!=0.0)
					nrmxl = (x[l][l]>0.0) ?
						ABS(nrmxl) :
						-ABS(nrmxl);
				sscal(n-l,1.0/nrmxl,&x[l][l],1);
				x[l][l] += 1.0;
				
				/*
				 * apply the transformation to the remaining
				 * columns, updating the norms
				 */
				 for (j=l+1; j<p; j++) {
					 t = -sdot(n-l,&x[l][l],1,&x[j][l],1)/
					 	x[l][l];
					saxpy(n-l,t,&x[l][l],1,&x[j][l],1);
					if (j>=pl && j<=pu && qraux[j]!=0.0) {
						tt = ABS(x[j][l])/qraux[j];
						tt = 1.0-tt*tt;
						tt = MAX(tt,0.0);
						t = tt;
						ttt = qraux[j]/work[j];
						tt = 1.0+0.05*tt*ttt*ttt;
						if (tt!=1.0) {
							qraux[j] *= sqrt(t);
						} else {
							qraux[j] = snrm2(n-l-1,
								&x[j][l+1],1);
							work[j] = qraux[j];
						}
					}
				}
				
				/* save the transformation */
				qraux[l] = x[l][l];
				x[l][l] = -nrmxl;
			}
		}
	}
}	
开发者ID:gwowen,项目名称:seismicunix,代码行数:101,代码来源:sqr.c


示例16: MocGeneral2WayBalance


//.........这里部分代码省略.........
  newcut = mincut = graph->mincut;
  mincutorder = -1;

  if (ctrl->dbglvl&DBG_REFINE) {
    printf("Parts: [");
    for (l=0; l<ncon; l++)
      printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
    printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
  }

  idxset(nvtxs, -1, moved);

  ASSERT(ComputeCut(graph, where) == graph->mincut);
  ASSERT(CheckBnd(graph));

  /* Insert all nodes in the priority queues */
  nbnd = graph->nbnd;
  RandomPermute(nvtxs, perm, 1);
  for (ii=0; ii<nvtxs; ii++) {
    i = perm[ii];
    PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
  }

  for (nswaps=0; nswaps<nvtxs; nswaps++) {
    if (minbal < lbfactor)
      break;

    SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
    to = (from+1)%2;

    if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
      break;

    saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
    saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
    newcut -= (ed[higain]-id[higain]);
    newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);

    if (newbal < minbal || (newbal == minbal && 
        (newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
      mincut = newcut;
      minbal = newbal;
      mincutorder = nswaps;
      for (i=0; i<ncon; i++)
        mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
    }
    else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
      newcut += (ed[higain]-id[higain]);
      saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
      saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
      break;
    }

    where[higain] = to;
    moved[higain] = nswaps;
    swaps[nswaps] = higain;

    if (ctrl->dbglvl&DBG_MOVEINFO) {
      printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
      for (l=0; l<ncon; l++) 
        printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
      printf(", %.3f LB: %.3f\n", minbal, newbal);
    }


    /**************************************************************
开发者ID:iyer-arvind,项目名称:gmsh,代码行数:67,代码来源:mbalance.c


示例17: main

int main()
{
    const int n = 1000000;
    const int incx = 2;
    const int incy = 3;
    const float a = float(rand()) / RAND_MAX;

    float * x, * y, * dev_x, * dev_y, * y_verify;

    x = new float[n * incx];
	y = new float[n * incy];
	y_verify = new float[n * incy];
    
	cudaMalloc((void **) &dev_x, n * incx * sizeof(float));
    cudaMalloc((void **) &dev_y, n * incy * sizeof(float));

    for (int i = 0; i < n * incx; ++i)
        x[i] = float(rand()) / RAND_MAX;
    for (int i = 0; i < n * incy; ++i)
        y[i] = float(rand()) / RAND_MAX;

    cudaMemcpy(dev_x, x, n * incx * sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(dev_y, y, n * incy * sizeof(float), cudaMemcpyHostToDevice);

	clock_t start = clock();
    saxpy(n, a, x, incx, y, incy);
    clock_t finish = clock();

	std::cout.setf(std::ios_base::fixed);
    std::cout.precision(5);

    double cpu_time = (finish - start) /(double) CLOCKS_PER_SEC;
    std::cout << "CPU " << cpu_time << " seconds.\n";

	start = clock();
    saxpy_openmp(n, a, x, incx, y, incy);
    finish = clock();
	double cpu_openmp_time = (finish - start) /(double) CLOCKS_PER_SEC;
    std::cout << "CPU_openmp " << cpu_openmp_time << " seconds.\n";

	start = clock();
    saxpy_gpu(n, a, dev_x, incx, dev_y, incy);
    cudaThreadSynchronize();
    finish = clock();

    double gpu_time = (float) (finish - start) / (double)CLOCKS_PER_SEC;

    std::cout << "GPU " << gpu_time << " seconds.\n";

    cudaMemcpy(y_verify, dev_y, n * incy * sizeof(float), cudaMemcpyDeviceToHost);

    double diff = 0;
    for (int i = 0; i < n * incy; ++i)
        diff += (y[i] - y_verify[i]) * (y[i] - y_verify[i]);

    if (diff < 1e-4f)
        std::cout << "Correct.\n";
    else
        std::cout << "Correct.\n";
	
	////
	//// а теперь тоже самое для double
	////

	//double * xd, * yd, * dev_xd, * dev_yd, * y_verifyd;
	//const float ad = float(rand()) / RAND_MAX;

 //   xd = new double[n * incx];
	//yd = new double[n * incy];
	//y_verifyd = new double[n * incy];
 //   
	//cudaMalloc((void **) &dev_xd, n * incx * sizeof(double));
 //   cudaMalloc((void **) &dev_yd, n * incy * sizeof(double));

 //   for (int i = 0; i < n * incx; ++i)
 //       xd[i] = double(rand()) / RAND_MAX;
 //   for (int i = 0; i < n * incy; ++i)
 //       yd[i] = double(rand()) / RAND_MAX;

 //   cudaMemcpy(dev_xd, xd, n * incx * sizeof(double), cudaMemcpyHostToDevice);
 //   cudaMemcpy(dev_yd, yd, n * incy * sizeof(double), cudaMemcpyHostToDevice);

	//start = clock();
 //   for (int i = 0; i < iterations; ++i)
 //       daxpy(n, ad, xd, incx, yd, incy);
 //   finish = clock();

 //   cpu_time = (finish - start) / CLOCKS_PER_SEC;
 //   std::cout << "CPU " << cpu_time << " seconds.\n";

	//start = clock();
 //   for (int i = 0; i < iterations; ++i)
 //       daxpy_gpu(n, ad, dev_xd, incx, dev_yd, incy);
 //   
 //   cudaThreadSynchronize();
 //   finish = clock();

 //   gpu_time = (float) (finish - start) / CLOCKS_PER_SEC;

 //   std::cout << "GPU " << gpu_time << " seconds.\n";
//.........这里部分代码省略.........
开发者ID:ViktorRoy94,项目名称:Completed-projects,代码行数:101,代码来源:main.cpp


示例18: main

main()
{
	int i,n=N;

	printf("isamax = %d\n",isamax(n,sx,1));
	printf("isamax = %d\n",isamax(n/2,sx,2));
	printf("isamax = %d\n",isamax(n,sy,1));

	printf("sasum = %g\n",sasum(n,sx,1));
	printf("sasum = %g\n",sasum(n/2,sx,2));
	printf("sasum = %g\n",sasum(n,sy,1));

	printf("snrm2 = %g\n",snrm2(n,sx,1));
	printf("snrm2 = %g\n",snrm2(n/2,sx,2));
	printf("snrm2 = %g\n",snrm2(n,sy,1));

	printf("sdot = %g\n",sdot(n,sx,1,sy,1));
	printf("sdot = %g\n",sdot(n/2,sx,2,sy,2));
	printf("sdot = %g\n",sdot(n/2,sx,-2,sy,2));
	printf("sdot = %g\n",sdot(n,sy,1,sy,1));

	printf("sscal\n");
	sscal(n,2.0,sx,1);
	pvec(n,sx);
	sscal(n,0.5,sx,1);
	pvec(n,sx);
	sscal(n/2,2.0,sx,2);
	pvec(n,sx);
	sscal(n/2,0.5,sx,2);
	pvec(n,sx);

	printf("sswap\n");
	sswap(n,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n,sy,1,sx,1);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,1,sx+n/2,-1);
	pvec(n,sx);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	sswap(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);

	printf("saxpy\n");
	saxpy(n,2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n,-2.0,sx,1,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);
	saxpy(n/2,-2.0,sx,-2,sy,1);
	pvec(n,sx); pvec(n,sy);

	printf("scopy\n");
	scopy(n/2,sx,2,sy,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,2,sy+1,2);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx,2,sy,1);
	pvec(n,sx); pvec(n,sy);
	scopy(n/2,sx+1,-2,sy+n/2,-1);
	pvec(n,sx); pvec(n,sy);
}
开发者ID:JOravetz,项目名称:SeisUnix,代码行数:69,代码来源:blast.c


示例19: MocComputeKWayPartitionParams

/*************************************************************************
* This function computes the initial id/ed 
**************************************************************************/
void MocComputeKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts)
{
  int i, j, k, l, nvtxs, ncon, nbnd, mincut, me, other;
  idxtype *xadj, *adjncy, *adjwgt, *where, *bndind, *bndptr;
  RInfoType *rinfo, *myrinfo;
  EDegreeType *myedegrees;
  float *nvwgt, *npwgts;

  nvtxs = graph->nvtxs;
  ncon = graph->ncon;
  xadj = graph->xadj;
  nvwgt = graph->nvwgt;
  adjncy = graph->adjncy;
  adjwgt = graph->adjwgt;

  where = graph->where;
  npwgts = sset(ncon*nparts, 0.0, graph->npwgts);
  bndind = graph->bndind;
  bndptr = idxset(nvtxs, -1, graph->bndptr);
  rinfo = graph->rinfo;


  /*------------------------------------------------------------
  / Compute now the id/ed degrees
  /------------------------------------------------------------*/
  ctrl->wspace.cdegree = 0;
  nbnd = mincut = 0;
  for (i=0; i<nvtxs; i++) {
    me = where[i];
    saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);

    myrinfo = rinfo+i;
    myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0;
    myrinfo->edegrees = NULL;

    for (j=xadj[i]; j<xadj[i+1]; j++) {
      if (me != where[adjncy[j]])
        myrinfo->ed += adjwgt[j];
    }
    myrinfo->id = graph->adjwgtsum[i] - myrinfo->ed;

    if (myrinfo->ed > 0) 
      mincut += myrinfo->ed;

    if (myrinfo->ed-myrinfo->id >= 0)
      BNDInsert(nbnd, bndind, bndptr, i);

    /* Time to compute the particular external degrees */
    if (myrinfo->ed > 0) { 
      myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
      ctrl->wspace.cdegree += xadj[i+1]-xadj[i];

      for (j=xadj[i]; j<xadj[i+1]; j++) {
        other = where[adjncy[j]];
        if (me != other) {
          for (k=0; k<myrinfo->ndegrees; k++) {
            if (myedegrees[k].pid == other) {
              myedegrees[k].ed += adjwgt[j];
              break;
            }
          }
          if (k == myrinfo->ndegrees) {
            myedegrees[myrinfo->ndegrees].pid = other;
            myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
          }
        }
      }

      ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
    }
  }

  graph->mincut = mincut/2;
  graph->nbnd = nbnd;

}
开发者ID:cran,项目名称:BigQuic,代码行数:79,代码来源:mkwayrefine.c


示例20: sqrsl


//.........这里部分代码省略.........
	/* determine what is to be computed */
	cqy = job/10000!=0;
	cqty = job%10000!=0;
	cb = (job%1000)/100!=0;
	cr = (job%100)/10!=0;
	cxb = job%10!=0;
	ju = MIN(k,n-1);
	
	/* special action when n=1 */
	if (ju==0) {
		if (cqy) qy[0] = y[0];
		if (cqty) qty[0] = y[0];
		if (cxb) xb[0] = y[0];
		if (cb) {
			if (x[0][0]==0.0)
				*info = 1;
			else
				b[0] = y[0]/x[0][0];
		}
		if (cr) rsd[0] = 0.0;
		return;
	}
	
	/* set up to compute Qy or Q'y */
	if (cqy) scopy(n,y,1,qy,1);
	if (cqty) scopy(n,y,1,qty,1);
	if (cqy) {
	
		/* compute Qy */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qy[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qy[j],1);
				x[j][j] = temp;
			}
		}
	}
	if (cqty) {
		
		/* compute Q'y */
		for (j=0; j<ju; j++) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				t = -sdot(n-j,&x[j][j],1,&qty[j],1)/x[j][j];
				saxpy(n-j,t,&x[j][j],1,&qty[j],1);
				x[j][j] = temp;
			}
		}
	}
	
	/* set up to compute b, rsd, or xb */
	if (cb) scopy(k,qty,1,b,1);
	if (cxb) scopy(k,qty,1,xb,1);
	if (cr && k<n) scopy(n-k,&qty[k],1,&rsd[k],1);
	if (cxb && k<n)
		for (i=k; i<n; i++)
			xb[i] = 0.0;
	if (cr)
		for (i=0; i<k; i++)
			rsd[i] = 0.0;
	if (cb) {
	
		/* compute b */
		for (j=k-1; j>=0; j--) {
			if (x[j][j]==0.0) {
				*info = j;
				break;
			}
			b[j] /= x[j][j];
			if (j!=0) {
				t = -b[j];
				saxpy(j,t,x[j],1,b,1);
			}
		}
	}
	if (cr || cxb) {
	
		/* compute rsd or xb as requested */
		for (j=ju-1; j>=0; j--) {
			if (qraux[j]!=0.0) {
				temp = x[j][j];
				x[j][j] = qraux[j];
				if (cr) {
					t = -sdot(n-j,&x[j][j],1,&rsd[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&rsd[j],1);
				}
				if (cxb) {
					t = -sdot(n-j,&x[j][j],1,&xb[j],1)/
						x[j][j];
					saxpy(n-j,t,&x[j][j],1,&xb[j],1);
				}
				x[j][j] = temp;
			}
		}
	}
}
开发者ID:gwowen,项目名称:seismicunix,代码行数:101,代码来源:sqr.c



注:本文中的saxpy函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ saxpy_函数代码示例发布时间:2022-05-30
下一篇:
C++ saw_object函数代码示例发布时间:2022-05-30
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap