本文整理汇总了C++中saxpy函数的典型用法代码示例。如果您正苦于以下问题:C++ saxpy函数的具体用法?C++ saxpy怎么用?C++ saxpy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了saxpy函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: axpy
static vl::Error
axpy(vl::Context & context,
ptrdiff_t n,
type alpha,
type const *x, ptrdiff_t incx,
type *y, ptrdiff_t incy)
{
saxpy(&n,
&alpha,
(float*)x, &incx,
(float*)y, &incy) ;
return vl::vlSuccess ;
}
开发者ID:BenJamesbabala,项目名称:twostreamfusion,代码行数:13,代码来源:blashelper.hpp
示例2: MocCompute2WayPartitionParams
/*************************************************************************
* This function computes the initial id/ed
**************************************************************************/
void MocCompute2WayPartitionParams(CtrlType *ctrl, GraphType *graph)
{
int i, j, /*k, l,*/ nvtxs, ncon, nbnd, mincut;
idxtype *xadj, *adjncy, *adjwgt;
float *nvwgt, *npwgts;
idxtype *id, *ed, *where;
idxtype *bndptr, *bndind;
int me/*, other*/;
nvtxs = graph->nvtxs;
ncon = graph->ncon;
xadj = graph->xadj;
nvwgt = graph->nvwgt;
adjncy = graph->adjncy;
adjwgt = graph->adjwgt;
where = graph->where;
npwgts = sset(2*ncon, 0.0, graph->npwgts);
id = idxset(nvtxs, 0, graph->id);
ed = idxset(nvtxs, 0, graph->ed);
bndptr = idxset(nvtxs, -1, graph->bndptr);
bndind = graph->bndind;
/*------------------------------------------------------------
/ Compute now the id/ed degrees
/------------------------------------------------------------*/
nbnd = mincut = 0;
for (i=0; i<nvtxs; i++) {
ASSERT(where[i] >= 0 && where[i] <= 1);
me = where[i];
saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);
for (j=xadj[i]; j<xadj[i+1]; j++) {
if (me == where[adjncy[j]])
id[i] += adjwgt[j];
else
ed[i] += adjwgt[j];
}
if (ed[i] > 0 || xadj[i] == xadj[i+1]) {
mincut += ed[i];
bndptr[i] = nbnd;
bndind[nbnd++] = i;
}
}
graph->mincut = mincut/2;
graph->nbnd = nbnd;
}
开发者ID:netsym,项目名称:minissf,代码行数:54,代码来源:mrefine.c
示例3: interpolate_initial
//! @{
virtual void interpolate_initial(shared_ptr<ISweeper<time>> dst,
shared_ptr<const ISweeper<time>> src) override
{
auto& fine = as_encap_sweeper(dst);
auto& crse = as_encap_sweeper(src);
auto crse_factory = crse.get_factory();
auto fine_factory = fine.get_factory();
auto crse_delta = crse_factory->create(solution);
this->restrict(crse_delta, fine.get_start_state());
crse_delta->saxpy(-1.0, crse.get_start_state());
auto fine_delta = fine_factory->create(solution);
this->interpolate(fine_delta, crse_delta);
fine.get_start_state()->saxpy(-1.0, fine_delta);
fine.reevaluate(true);
}
开发者ID:memmett,项目名称:PFASST,代码行数:20,代码来源:poly_interp.hpp
示例4: main
int main()
{
uint32_t num_cpus = std::thread::hardware_concurrency();
std::vector<std::thread>threads(num_cpus);
float *S = new float[num_cpus], a = 1.0f, b = 2.0f, *X = new float[num_cpus], *Y = new float[num_cpus];
for(uint32_t i=0;i<num_cpus;i++){
X[i] = 1.0f*i;
Y[i] = 2.0f*(i+1);
Saxpy saxpy(std::ref(S[i]), a, X[i], b, Y[i]);
threads[i] = std::thread(saxpy);
}
for(uint32_t i=0;i<num_cpus;i++){
threads[i].join();
std::cout<<S[i]<<std::endl;
}
}
开发者ID:gpu0,项目名称:cpp-threads,代码行数:20,代码来源:ch02.4.cpp
示例5: main
int main (int argc, char * argv[])
{
const int N = 16;
const int iters = 1;
float *x, *y, *z;
posix_memalign((void **)&x, VECTOR_SIZE, N*sizeof(float));
posix_memalign((void **)&y, VECTOR_SIZE, N*sizeof(float));
posix_memalign((void **)&z, VECTOR_SIZE, N*sizeof(float));
float a = 0.93f;
int i, j;
for (i=0; i<N; i++)
{
x[i] = i+1;
y[i] = i-1;
z[i] = 0.0f;
}
for (i=0; i<iters; i++)
{
saxpy(x, y, z, a, N);
}
for (i=0; i<N; i++)
{
if (z[i] != (a * x[i] + y[i]))
{
printf("Error\n");
return (1);
}
}
printf("SUCCESS!\n");
return 0;
}
开发者ID:drpicox,项目名称:mcxx,代码行数:39,代码来源:success_simd_03_parallel_saxpy.c
示例6: main
int main(int argc, char* argv[]) {
float a, x[N], y[N];
a=5;
int i;
for (i=0; i<N; ++i){
x[i]=i;
y[i]=i+2;
}
saxpy(N, a, x, y);
#pragma omp taskwait
//Check results
for (i=0; i<N; ++i){
if (y[i]!=a*i+(i+2)){
printf("Error when checking results, in position %d\n",i);
return -1;
}
}
printf("Results are correct\n");
return 0;
}
开发者ID:mfarrera,项目名称:ompss-tests,代码行数:23,代码来源:saxpy.c
示例7: interpolate
virtual void interpolate(shared_ptr<ISweeper<time>> dst,
shared_ptr<const ISweeper<time>> src,
bool interp_initial) override
{
auto& fine = as_encap_sweeper(dst);
auto& crse = as_encap_sweeper(src);
if (tmat.rows() == 0) {
tmat = pfasst::quadrature::compute_interp<time>(fine.get_nodes(), crse.get_nodes());
}
if (interp_initial) {
this->interpolate_initial(dst, src);
}
size_t nfine = fine.get_nodes().size();
size_t ncrse = crse.get_nodes().size();
auto crse_factory = crse.get_factory();
auto fine_factory = fine.get_factory();
EncapVecT fine_state(nfine), fine_delta(ncrse);
for (size_t m = 0; m < nfine; m++) { fine_state[m] = fine.get_state(m); }
for (size_t m = 0; m < ncrse; m++) { fine_delta[m] = fine_factory->create(solution); }
auto crse_delta = crse_factory->create(solution);
for (size_t m = 0; m < ncrse; m++) {
crse_delta->copy(crse.get_state(m));
crse_delta->saxpy(-1.0, crse.get_saved_state(m));
interpolate(fine_delta[m], crse_delta);
}
fine.get_state(0)->mat_apply(fine_state, 1.0, tmat, fine_delta, false);
fine.reevaluate();
}
开发者ID:memmett,项目名称:PFASST,代码行数:37,代码来源:poly_interp.hpp
示例8: operator
void operator()(float a, float *x, float *y, std::size_t n)
{
saxpy(a,x,y,n);
}
开发者ID:jaredhoberock,项目名称:personal,代码行数:4,代码来源:saxpy.cpp
示例9: sgefa
int sgefa ( float a[], int lda, int n, int ipvt[] )
/*******************************************************************************/
/*
Purpose:
SGEFA factors a matrix by gaussian elimination.
Discussion:
Matrix references which would, mathematically, be written A(I,J)
must be written here as:
* A[I+J*LDA], when the value is needed, or
* A+I+J*LDA, when the address is needed.
Modified:
07 March 2008
Author:
FORTRAN77 original version by Cleve Moler.
C version by John Burkardt.
Reference:
Jack Dongarra, Jim Bunch, Cleve Moler, Pete Stewart,
LINPACK User's Guide,
SIAM, 1979,
ISBN13: 978-0-898711-72-1,
LC: QA214.L56.
Parameters:
Input/output, float A[LDA*N]. On input, the matrix to be factored.
On output, an upper triangular matrix and the multipliers which were
used to obtain it. The factorization can be written A = L * U where
L is a product of permutation and unit lower triangular matrices and
U is upper triangular.
Input, int LDA, the leading dimension of the matrix.
Input, int N, the order of the matrix.
Output, int IPVT[N], the pivot indices.
Output, int SGEFA, indicates singularity.
If 0, this is the normal value, and the algorithm succeeded.
If K, then on the K-th elimination step, a zero pivot was encountered.
The matrix is numerically not invertible.
*/
{
int j;
int info;
int k;
int kp1;
int l;
int nm1;
float t;
info = 0;
for ( k = 1; k <= n - 1; k++ )
{
/*
Find l = pivot index.
*/
l = isamax ( n-k+1, &a[k-1+(k-1)*lda], 1 ) + k - 1;
ipvt[k-1] = l;
/*
Zero pivot implies this column already triangularized.
*/
if ( a[l-1+(k-1)*lda] != 0.0 )
{
/*
Interchange if necessary.
*/
if ( l != k )
{
t = a[l-1+(k-1)*lda];
a[l-1+(k-1)*lda] = a[k-1+(k-1)*lda];
a[k-1+(k-1)*lda] = t;
}
/*
Compute multipliers.
*/
t = - 1.0 / a[k-1+(k-1)*lda];
sscal ( n-k, t, &a[k+(k-1)*lda], 1 );
/*
Row elimination with column indexing.
*/
for ( j = k + 1; j <= n; j++ )
{
t = a[l-1+(j-1)*lda];
if (l != k)
{
a[l-1+(j-1)*lda] = a[k-1+(j-1)*lda];
a[k-1+(j-1)*lda] = t;
}
saxpy ( n-k, t, &a[k+(k-1)*lda], 1, &a[k+(j-1)*lda], 1 );
//.........这里部分代码省略.........
开发者ID:8l,项目名称:insieme,代码行数:101,代码来源:sgefa.c
示例10: main
int
main()
{
#define N 8
int i;
float x_ref[N], y_ref[N];
float x[N], y[N];
cublasHandle_t h;
float a = 2.0;
for (i = 0; i < N; i++)
{
x[i] = x_ref[i] = 4.0 + i;
y[i] = y_ref[i] = 3.0;
}
saxpy (N, a, x_ref, y_ref);
cublasCreate (&h);
#pragma acc data copyin (x[0:N]) copy (y[0:N])
{
#pragma acc host_data use_device (x, y)
{
cublasSaxpy (h, N, &a, x, 1, y, 1);
}
}
validate_results (N, y, y_ref);
#pragma acc data create (x[0:N]) copyout (y[0:N])
{
#pragma acc kernels
for (i = 0; i < N; i++)
y[i] = 3.0;
#pragma acc host_data use_device (x, y)
{
cublasSaxpy (h, N, &a, x, 1, y, 1);
}
}
cublasDestroy (h);
validate_results (N, y, y_ref);
for (i = 0; i < N; i++)
y[i] = 3.0;
/* There's no need to use host_data here. */
#pragma acc data copyin (x[0:N]) copyin (a) copy (y[0:N])
{
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
saxpy (N, a, x, y);
}
validate_results (N, y, y_ref);
/* Exercise host_data with data transferred with acc enter data. */
for (i = 0; i < N; i++)
y[i] = 3.0;
#pragma acc enter data copyin (x, a, y)
#pragma acc parallel present (x[0:N]) pcopy (y[0:N]) present (a)
{
saxpy (N, a, x, y);
}
#pragma acc exit data delete (x, a) copyout (y)
validate_results (N, y, y_ref);
return 0;
}
开发者ID:vinriviere,项目名称:m68k-atari-mint-gcc,代码行数:74,代码来源:host_data-1.c
示例11: CreateCoarseGraphNoMask
/*************************************************************************
* This function creates the coarser graph
**************************************************************************/
void CreateCoarseGraphNoMask(CtrlType *ctrl, GraphType *graph, int cnvtxs, idxtype *match, idxtype *perm)
{
int i, j, k, m, istart, iend, nvtxs, nedges, ncon, cnedges, v, u, dovsize;
idxtype *xadj, *vwgt, *vsize, *adjncy, *adjwgt, *adjwgtsum, *auxadj;
idxtype *cmap, *htable;
idxtype *cxadj, *cvwgt, *cvsize, *cadjncy, *cadjwgt, *cadjwgtsum;
float *nvwgt, *cnvwgt;
GraphType *cgraph;
dovsize = (ctrl->optype == OP_KVMETIS ? 1 : 0);
IFSET(ctrl->dbglvl, DBG_TIME, starttimer(ctrl->ContractTmr));
nvtxs = graph->nvtxs;
ncon = graph->ncon;
xadj = graph->xadj;
vwgt = graph->vwgt;
vsize = graph->vsize;
nvwgt = graph->nvwgt;
adjncy = graph->adjncy;
adjwgt = graph->adjwgt;
adjwgtsum = graph->adjwgtsum;
cmap = graph->cmap;
/* Initialize the coarser graph */
cgraph = SetUpCoarseGraph(graph, cnvtxs, dovsize);
cxadj = cgraph->xadj;
cvwgt = cgraph->vwgt;
cvsize = cgraph->vsize;
cnvwgt = cgraph->nvwgt;
cadjwgtsum = cgraph->adjwgtsum;
cadjncy = cgraph->adjncy;
cadjwgt = cgraph->adjwgt;
htable = idxset(cnvtxs, -1, idxwspacemalloc(ctrl, cnvtxs));
iend = xadj[nvtxs];
auxadj = ctrl->wspace.auxcore;
memcpy(auxadj, adjncy, iend*sizeof(idxtype));
for (i=0; i<iend; i++)
auxadj[i] = cmap[auxadj[i]];
cxadj[0] = cnvtxs = cnedges = 0;
for (i=0; i<nvtxs; i++) {
v = perm[i];
if (cmap[v] != cnvtxs)
continue;
u = match[v];
if (ncon == 1)
cvwgt[cnvtxs] = vwgt[v];
else
scopy(ncon, nvwgt+v*ncon, cnvwgt+cnvtxs*ncon);
if (dovsize)
cvsize[cnvtxs] = vsize[v];
cadjwgtsum[cnvtxs] = adjwgtsum[v];
nedges = 0;
istart = xadj[v];
iend = xadj[v+1];
for (j=istart; j<iend; j++) {
k = auxadj[j];
if ((m = htable[k]) == -1) {
cadjncy[nedges] = k;
cadjwgt[nedges] = adjwgt[j];
htable[k] = nedges++;
}
else {
cadjwgt[m] += adjwgt[j];
}
}
if (v != u) {
if (ncon == 1)
cvwgt[cnvtxs] += vwgt[u];
else
saxpy(ncon, 1.0, nvwgt+u*ncon, 1, cnvwgt+cnvtxs*ncon, 1);
if (dovsize)
cvsize[cnvtxs] += vsize[u];
cadjwgtsum[cnvtxs] += adjwgtsum[u];
istart = xadj[u];
iend = xadj[u+1];
for (j=istart; j<iend; j++) {
k = auxadj[j];
if ((m = htable[k]) == -1) {
cadjncy[nedges] = k;
cadjwgt[nedges] = adjwgt[j];
htable[k] = nedges++;
}
else {
//.........这里部分代码省略.........
开发者ID:aceskpark,项目名称:osfeo,代码行数:101,代码来源:ccgraph.c
示例12: MCRandom_KWayEdgeRefineHorizontal
//.........这里部分代码省略.........
if (gain >= 0 &&
(AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)))
break;
}
if (k == myndegrees)
continue; /* break out if you did not find a candidate */
for (j=k+1; j<myndegrees; j++) {
to = myedegrees[j].pid;
if ((myedegrees[j].ed > myedegrees[k].ed &&
(AreAllHVwgtsBelow(ncon, 1.0, npwgts+to*ncon, 1.0, nvwgt, maxwgt+to*ncon) ||
IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec))) ||
(myedegrees[j].ed == myedegrees[k].ed &&
IsHBalanceBetterTT(ncon, nparts, npwgts+myedegrees[k].pid*ncon, npwgts+to*ncon, nvwgt, ubvec)))
k = j;
}
to = myedegrees[k].pid;
if (myedegrees[k].ed-myrinfo->id == 0
&& !IsHBalanceBetterFT(ncon, nparts, npwgts+from*ncon, npwgts+to*ncon, nvwgt, ubvec)
&& AreAllHVwgtsBelow(ncon, 1.0, npwgts+from*ncon, 0.0, npwgts+from*ncon, maxwgt+from*ncon))
continue;
/*=====================================================================
* If we got here, we can now move the vertex from 'from' to 'to'
*======================================================================*/
graph->mincut -= myedegrees[k].ed-myrinfo->id;
IFSET(ctrl->dbglvl, DBG_MOVEINFO, printf("\t\tMoving %6d to %3d. Gain: %4d. Cut: %6d\n", i, to, myedegrees[k].ed-myrinfo->id, graph->mincut));
/* Update where, weight, and ID/ED information of the vertex you moved */
saxpy(ncon, 1.0, nvwgt, 1, npwgts+to*ncon, 1);
saxpy(ncon, -1.0, nvwgt, 1, npwgts+from*ncon, 1);
where[i] = to;
myrinfo->ed += myrinfo->id-myedegrees[k].ed;
SWAP(myrinfo->id, myedegrees[k].ed, j);
if (myedegrees[k].ed == 0)
myedegrees[k] = myedegrees[--myrinfo->ndegrees];
else
myedegrees[k].pid = from;
if (myrinfo->ed-myrinfo->id < 0)
BNDDelete(nbnd, bndind, bndptr, i);
/* Update the degrees of adjacent vertices */
for (j=xadj[i]; j<xadj[i+1]; j++) {
ii = adjncy[j];
me = where[ii];
myrinfo = graph->rinfo+ii;
if (myrinfo->edegrees == NULL) {
myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
ctrl->wspace.cdegree += xadj[ii+1]-xadj[ii];
}
myedegrees = myrinfo->edegrees;
ASSERT(CheckRInfo(myrinfo));
if (me == from) {
INC_DEC(myrinfo->ed, myrinfo->id, adjwgt[j]);
if (myrinfo->ed-myrinfo->id >= 0 && bndptr[ii] == -1)
BNDInsert(nbnd, bndind, bndptr, ii);
}
开发者ID:Vignesh2208,项目名称:Awlsim_Ins,代码行数:67,代码来源:mkwayfmh.c
示例13: sgesl
void sgesl ( float a[], int lda, int n, int ipvt[], float b[], int job )
/******************************************************************************/
/*
Purpose:
SGESL solves a real general linear system A * X = B.
Discussion:
SGESL can solve either of the systems A * X = B or A' * X = B.
The system matrix must have been factored by SGECO or SGEFA.
A division by zero will occur if the input factor contains a
zero on the diagonal. Technically this indicates singularity
but it is often caused by improper arguments or improper
setting of LDA. It will not occur if the subroutines are
called correctly and if SGECO has set 0.0 < RCOND
or SGEFA has set INFO == 0.
Modified:
04 April 2006
Author:
FORTRAN77 original by Dongarra, Moler, Bunch and Stewart.
C translation by John Burkardt.
Reference:
Jack Dongarra, Cleve Moler, Jim Bunch, Pete Stewart,
LINPACK User's Guide,
SIAM, (Society for Industrial and Applied Mathematics),
3600 University City Science Center,
Philadelphia, PA, 19104-2688.
ISBN: 0-89871-172-X
Parameters:
Input, float A[LDA*N], the output from SGECO or SGEFA.
Input, int LDA, the leading dimension of A.
Input, int N, the order of the matrix A.
Input, int IPVT[N], the pivot vector from SGECO or SGEFA.
Input/output, float B[N].
On input, the right hand side vector.
On output, the solution vector.
Input, int JOB.
0, solve A * X = B;
nonzero, solve A' * X = B.
*/
{
int k;
int l;
float t;
/*
Solve A * X = B.
*/
if ( job == 0 )
{
for ( k = 1; k <= n-1; k++ )
{
l = ipvt[k-1];
t = b[l-1];
if ( l != k )
{
b[l-1] = b[k-1];
b[k-1] = t;
}
saxpy ( n-k, t, a+k+(k-1)*lda, 1, b+k, 1 );
}
for ( k = n; 1 <= k; k-- )
{
b[k-1] = b[k-1] / a[k-1+(k-1)*lda];
t = -b[k-1];
saxpy ( k-1, t, a+0+(k-1)*lda, 1, b, 1 );
}
}
/*
Solve A' * X = B.
*/
else
{
for ( k = 1; k <= n; k++ )
{
t = sdot ( k-1, a+0+(k-1)*lda, 1, b, 1 );
b[k-1] = ( b[k-1] - t ) / a[k-1+(k-1)*lda];
}
for ( k = n-1; 1 <= k; k-- )
{
b[k-1] = b[k-1] + sdot ( n-k, a+k+(k-1)*lda, 1, b+k, 1 );
//.........这里部分代码省略.........
开发者ID:8l,项目名称:insieme,代码行数:101,代码来源:sgefa.c
示例14: ConjGrad2
/*************************************************************************
* This function implements the CG solver used during the directed diffusion
**************************************************************************/
void ConjGrad2(MatrixType *A, floattype *b, floattype *x, floattype tol, floattype *workspace)
{
int i, k, n;
floattype *p, *r, *q, *z, *M;
floattype alpha, beta, rho, rho_1 = -1.0, error, bnrm2, tmp;
idxtype *rowptr, *colind;
floattype *values;
n = A->nrows;
rowptr = A->rowptr;
colind = A->colind;
values = A->values;
/* Initial Setup */
p = workspace;
r = workspace + n;
q = workspace + 2*n;
z = workspace + 3*n;
M = workspace + 4*n;
for (i=0; i<n; i++) {
x[i] = 0.0;
if (values[rowptr[i]] != 0.0)
M[i] = 1.0/values[rowptr[i]];
else
M[i] = 0.0;
}
/* r = b - Ax */
mvMult2(A, x, r);
for (i=0; i<n; i++)
r[i] = b[i]-r[i];
bnrm2 = snorm2(n, b);
if (bnrm2 > 0.0) {
error = snorm2(n, r) / bnrm2;
if (error > tol) {
/* Begin Iterations */
for (k=0; k<n; k++) {
for (i=0; i<n; i++)
z[i] = r[i]*M[i];
rho = sdot(n, r, z);
if (k == 0)
scopy(n, z, p);
else {
if (rho_1 != 0.0)
beta = rho/rho_1;
else
beta = 0.0;
for (i=0; i<n; i++)
p[i] = z[i] + beta*p[i];
}
mvMult2(A, p, q); /* q = A*p */
tmp = sdot(n, p, q);
if (tmp != 0.0)
alpha = rho/tmp;
else
alpha = 0.0;
saxpy(n, alpha, p, x); /* x = x + alpha*p */
saxpy(n, -alpha, q, r); /* r = r - alpha*q */
error = snorm2(n, r) / bnrm2;
if (error < tol)
break;
rho_1 = rho;
}
}
}
}
开发者ID:davidheryanto,项目名称:sc14,代码行数:77,代码来源:diffutil.c
示例15: sqrdc
//.........这里部分代码省略.........
jpvt[j] = jpvt[pl];
jpvt[pl] = j;
pl++;
}
}
pu = p-1;
for (j=p-1; j>=0; j--) {
if (jpvt[j]<0) {
jpvt[j] = -jpvt[j];
if (j!=pu) {
sswap(n,x[pu],1,x[j],1);
jp = jpvt[pu];
jpvt[pu] = jpvt[j];
jpvt[j] = jp;
}
pu--;
}
}
}
/* compute the norms of the free columns */
for (j=pl; j<=pu; j++) {
qraux[j] = snrm2(n,x[j],1);
work[j] = qraux[j];
}
/* perform the Householder reduction of x */
lup = MIN(n,p);
for (l=0; l<lup; l++) {
if (l>=pl && l<pu) {
/*
* locate the column of largest norm and
* bring it into pivot position.
*/
maxnrm = 0.0;
maxj = l;
for (j=l; j<=pu; j++) {
if (qraux[j]>maxnrm) {
maxnrm = qraux[j];
maxj = j;
}
}
if (maxj!=l) {
sswap(n,x[l],1,x[maxj],1);
qraux[maxj] = qraux[l];
work[maxj] = work[l];
jp = jpvt[maxj];
jpvt[maxj] = jpvt[l];
jpvt[l] = jp;
}
}
qraux[l] = 0.0;
if (l!=n-1) {
/*
* compute the Householder transformation
* for column l
*/
nrmxl = snrm2(n-l,&x[l][l],1);
if (nrmxl!=0.0) {
if (x[l][l]!=0.0)
nrmxl = (x[l][l]>0.0) ?
ABS(nrmxl) :
-ABS(nrmxl);
sscal(n-l,1.0/nrmxl,&x[l][l],1);
x[l][l] += 1.0;
/*
* apply the transformation to the remaining
* columns, updating the norms
*/
for (j=l+1; j<p; j++) {
t = -sdot(n-l,&x[l][l],1,&x[j][l],1)/
x[l][l];
saxpy(n-l,t,&x[l][l],1,&x[j][l],1);
if (j>=pl && j<=pu && qraux[j]!=0.0) {
tt = ABS(x[j][l])/qraux[j];
tt = 1.0-tt*tt;
tt = MAX(tt,0.0);
t = tt;
ttt = qraux[j]/work[j];
tt = 1.0+0.05*tt*ttt*ttt;
if (tt!=1.0) {
qraux[j] *= sqrt(t);
} else {
qraux[j] = snrm2(n-l-1,
&x[j][l+1],1);
work[j] = qraux[j];
}
}
}
/* save the transformation */
qraux[l] = x[l][l];
x[l][l] = -nrmxl;
}
}
}
}
开发者ID:gwowen,项目名称:seismicunix,代码行数:101,代码来源:sqr.c
示例16: MocGeneral2WayBalance
//.........这里部分代码省略.........
newcut = mincut = graph->mincut;
mincutorder = -1;
if (ctrl->dbglvl&DBG_REFINE) {
printf("Parts: [");
for (l=0; l<ncon; l++)
printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
printf("] T[%.3f %.3f], Nv-Nb[%5d, %5d]. ICut: %6d, LB: %.3f [B]\n", tpwgts[0], tpwgts[1], graph->nvtxs, graph->nbnd, graph->mincut, origbal);
}
idxset(nvtxs, -1, moved);
ASSERT(ComputeCut(graph, where) == graph->mincut);
ASSERT(CheckBnd(graph));
/* Insert all nodes in the priority queues */
nbnd = graph->nbnd;
RandomPermute(nvtxs, perm, 1);
for (ii=0; ii<nvtxs; ii++) {
i = perm[ii];
PQueueInsert(&parts[qnum[i]][where[i]], i, ed[i]-id[i]);
}
for (nswaps=0; nswaps<nvtxs; nswaps++) {
if (minbal < lbfactor)
break;
SelectQueue(ncon, npwgts, tpwgts, &from, &cnum, parts);
to = (from+1)%2;
if (from == -1 || (higain = PQueueGetMax(&parts[cnum][from])) == -1)
break;
saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
newcut -= (ed[higain]-id[higain]);
newbal = Compute2WayHLoadImbalance(ncon, npwgts, tpwgts);
if (newbal < minbal || (newbal == minbal &&
(newcut < mincut || (newcut == mincut && BetterBalance(ncon, npwgts, tpwgts, mindiff))))) {
mincut = newcut;
minbal = newbal;
mincutorder = nswaps;
for (i=0; i<ncon; i++)
mindiff[i] = fabs(tpwgts[0]-npwgts[i]);
}
else if (nswaps-mincutorder > limit) { /* We hit the limit, undo last move */
newcut += (ed[higain]-id[higain]);
saxpy(ncon, 1.0, nvwgt+higain*ncon, 1, npwgts+from*ncon, 1);
saxpy(ncon, -1.0, nvwgt+higain*ncon, 1, npwgts+to*ncon, 1);
break;
}
where[higain] = to;
moved[higain] = nswaps;
swaps[nswaps] = higain;
if (ctrl->dbglvl&DBG_MOVEINFO) {
printf("Moved %6d from %d(%d). Gain: %5d, Cut: %5d, NPwgts: ", higain, from, cnum, ed[higain]-id[higain], newcut);
for (l=0; l<ncon; l++)
printf("(%.3f, %.3f) ", npwgts[l], npwgts[ncon+l]);
printf(", %.3f LB: %.3f\n", minbal, newbal);
}
/**************************************************************
开发者ID:iyer-arvind,项目名称:gmsh,代码行数:67,代码来源:mbalance.c
示例17: main
int main()
{
const int n = 1000000;
const int incx = 2;
const int incy = 3;
const float a = float(rand()) / RAND_MAX;
float * x, * y, * dev_x, * dev_y, * y_verify;
x = new float[n * incx];
y = new float[n * incy];
y_verify = new float[n * incy];
cudaMalloc((void **) &dev_x, n * incx * sizeof(float));
cudaMalloc((void **) &dev_y, n * incy * sizeof(float));
for (int i = 0; i < n * incx; ++i)
x[i] = float(rand()) / RAND_MAX;
for (int i = 0; i < n * incy; ++i)
y[i] = float(rand()) / RAND_MAX;
cudaMemcpy(dev_x, x, n * incx * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(dev_y, y, n * incy * sizeof(float), cudaMemcpyHostToDevice);
clock_t start = clock();
saxpy(n, a, x, incx, y, incy);
clock_t finish = clock();
std::cout.setf(std::ios_base::fixed);
std::cout.precision(5);
double cpu_time = (finish - start) /(double) CLOCKS_PER_SEC;
std::cout << "CPU " << cpu_time << " seconds.\n";
start = clock();
saxpy_openmp(n, a, x, incx, y, incy);
finish = clock();
double cpu_openmp_time = (finish - start) /(double) CLOCKS_PER_SEC;
std::cout << "CPU_openmp " << cpu_openmp_time << " seconds.\n";
start = clock();
saxpy_gpu(n, a, dev_x, incx, dev_y, incy);
cudaThreadSynchronize();
finish = clock();
double gpu_time = (float) (finish - start) / (double)CLOCKS_PER_SEC;
std::cout << "GPU " << gpu_time << " seconds.\n";
cudaMemcpy(y_verify, dev_y, n * incy * sizeof(float), cudaMemcpyDeviceToHost);
double diff = 0;
for (int i = 0; i < n * incy; ++i)
diff += (y[i] - y_verify[i]) * (y[i] - y_verify[i]);
if (diff < 1e-4f)
std::cout << "Correct.\n";
else
std::cout << "Correct.\n";
////
//// а теперь тоже самое для double
////
//double * xd, * yd, * dev_xd, * dev_yd, * y_verifyd;
//const float ad = float(rand()) / RAND_MAX;
// xd = new double[n * incx];
//yd = new double[n * incy];
//y_verifyd = new double[n * incy];
//
//cudaMalloc((void **) &dev_xd, n * incx * sizeof(double));
// cudaMalloc((void **) &dev_yd, n * incy * sizeof(double));
// for (int i = 0; i < n * incx; ++i)
// xd[i] = double(rand()) / RAND_MAX;
// for (int i = 0; i < n * incy; ++i)
// yd[i] = double(rand()) / RAND_MAX;
// cudaMemcpy(dev_xd, xd, n * incx * sizeof(double), cudaMemcpyHostToDevice);
// cudaMemcpy(dev_yd, yd, n * incy * sizeof(double), cudaMemcpyHostToDevice);
//start = clock();
// for (int i = 0; i < iterations; ++i)
// daxpy(n, ad, xd, incx, yd, incy);
// finish = clock();
// cpu_time = (finish - start) / CLOCKS_PER_SEC;
// std::cout << "CPU " << cpu_time << " seconds.\n";
//start = clock();
// for (int i = 0; i < iterations; ++i)
// daxpy_gpu(n, ad, dev_xd, incx, dev_yd, incy);
//
// cudaThreadSynchronize();
// finish = clock();
// gpu_time = (float) (finish - start) / CLOCKS_PER_SEC;
// std::cout << "GPU " << gpu_time << " seconds.\n";
//.........这里部分代码省略.........
开发者ID:ViktorRoy94,项目名称:Completed-projects,代码行数:101,代码来源:main.cpp
示例18: main
main()
{
int i,n=N;
printf("isamax = %d\n",isamax(n,sx,1));
printf("isamax = %d\n",isamax(n/2,sx,2));
printf("isamax = %d\n",isamax(n,sy,1));
printf("sasum = %g\n",sasum(n,sx,1));
printf("sasum = %g\n",sasum(n/2,sx,2));
printf("sasum = %g\n",sasum(n,sy,1));
printf("snrm2 = %g\n",snrm2(n,sx,1));
printf("snrm2 = %g\n",snrm2(n/2,sx,2));
printf("snrm2 = %g\n",snrm2(n,sy,1));
printf("sdot = %g\n",sdot(n,sx,1,sy,1));
printf("sdot = %g\n",sdot(n/2,sx,2,sy,2));
printf("sdot = %g\n",sdot(n/2,sx,-2,sy,2));
printf("sdot = %g\n",sdot(n,sy,1,sy,1));
printf("sscal\n");
sscal(n,2.0,sx,1);
pvec(n,sx);
sscal(n,0.5,sx,1);
pvec(n,sx);
sscal(n/2,2.0,sx,2);
pvec(n,sx);
sscal(n/2,0.5,sx,2);
pvec(n,sx);
printf("sswap\n");
sswap(n,sx,1,sy,1);
pvec(n,sx); pvec(n,sy);
sswap(n,sy,1,sx,1);
pvec(n,sx); pvec(n,sy);
sswap(n/2,sx,1,sx+n/2,-1);
pvec(n,sx);
sswap(n/2,sx,1,sx+n/2,-1);
pvec(n,sx);
sswap(n/2,sx,2,sy,2);
pvec(n,sx); pvec(n,sy);
sswap(n/2,sx,2,sy,2);
pvec(n,sx); pvec(n,sy);
printf("saxpy\n");
saxpy(n,2.0,sx,1,sy,1);
pvec(n,sx); pvec(n,sy);
saxpy(n,-2.0,sx,1,sy,1);
pvec(n,sx); pvec(n,sy);
saxpy(n/2,2.0,sx,2,sy,2);
pvec(n,sx); pvec(n,sy);
saxpy(n/2,-2.0,sx,2,sy,2);
pvec(n,sx); pvec(n,sy);
saxpy(n/2,2.0,sx,-2,sy,1);
pvec(n,sx); pvec(n,sy);
saxpy(n/2,-2.0,sx,-2,sy,1);
pvec(n,sx); pvec(n,sy);
printf("scopy\n");
scopy(n/2,sx,2,sy,2);
pvec(n,sx); pvec(n,sy);
scopy(n/2,sx+1,2,sy+1,2);
pvec(n,sx); pvec(n,sy);
scopy(n/2,sx,2,sy,1);
pvec(n,sx); pvec(n,sy);
scopy(n/2,sx+1,-2,sy+n/2,-1);
pvec(n,sx); pvec(n,sy);
}
开发者ID:JOravetz,项目名称:SeisUnix,代码行数:69,代码来源:blast.c
示例19: MocComputeKWayPartitionParams
/*************************************************************************
* This function computes the initial id/ed
**************************************************************************/
void MocComputeKWayPartitionParams(CtrlType *ctrl, GraphType *graph, int nparts)
{
int i, j, k, l, nvtxs, ncon, nbnd, mincut, me, other;
idxtype *xadj, *adjncy, *adjwgt, *where, *bndind, *bndptr;
RInfoType *rinfo, *myrinfo;
EDegreeType *myedegrees;
float *nvwgt, *npwgts;
nvtxs = graph->nvtxs;
ncon = graph->ncon;
xadj = graph->xadj;
nvwgt = graph->nvwgt;
adjncy = graph->adjncy;
adjwgt = graph->adjwgt;
where = graph->where;
npwgts = sset(ncon*nparts, 0.0, graph->npwgts);
bndind = graph->bndind;
bndptr = idxset(nvtxs, -1, graph->bndptr);
rinfo = graph->rinfo;
/*------------------------------------------------------------
/ Compute now the id/ed degrees
/------------------------------------------------------------*/
ctrl->wspace.cdegree = 0;
nbnd = mincut = 0;
for (i=0; i<nvtxs; i++) {
me = where[i];
saxpy(ncon, 1.0, nvwgt+i*ncon, 1, npwgts+me*ncon, 1);
myrinfo = rinfo+i;
myrinfo->id = myrinfo->ed = myrinfo->ndegrees = 0;
myrinfo->edegrees = NULL;
for (j=xadj[i]; j<xadj[i+1]; j++) {
if (me != where[adjncy[j]])
myrinfo->ed += adjwgt[j];
}
myrinfo->id = graph->adjwgtsum[i] - myrinfo->ed;
if (myrinfo->ed > 0)
mincut += myrinfo->ed;
if (myrinfo->ed-myrinfo->id >= 0)
BNDInsert(nbnd, bndind, bndptr, i);
/* Time to compute the particular external degrees */
if (myrinfo->ed > 0) {
myedegrees = myrinfo->edegrees = ctrl->wspace.edegrees+ctrl->wspace.cdegree;
ctrl->wspace.cdegree += xadj[i+1]-xadj[i];
for (j=xadj[i]; j<xadj[i+1]; j++) {
other = where[adjncy[j]];
if (me != other) {
for (k=0; k<myrinfo->ndegrees; k++) {
if (myedegrees[k].pid == other) {
myedegrees[k].ed += adjwgt[j];
break;
}
}
if (k == myrinfo->ndegrees) {
myedegrees[myrinfo->ndegrees].pid = other;
myedegrees[myrinfo->ndegrees++].ed = adjwgt[j];
}
}
}
ASSERT(myrinfo->ndegrees <= xadj[i+1]-xadj[i]);
}
}
graph->mincut = mincut/2;
graph->nbnd = nbnd;
}
开发者ID:cran,项目名称:BigQuic,代码行数:79,代码来源:mkwayrefine.c
示例20: sqrsl
//.........这里部分代码省略.........
/* determine what is to be computed */
cqy = job/10000!=0;
cqty = job%10000!=0;
cb = (job%1000)/100!=0;
cr = (job%100)/10!=0;
cxb = job%10!=0;
ju = MIN(k,n-1);
/* special action when n=1 */
if (ju==0) {
if (cqy) qy[0] = y[0];
if (cqty) qty[0] = y[0];
if (cxb) xb[0] = y[0];
if (cb) {
if (x[0][0]==0.0)
*info = 1;
else
b[0] = y[0]/x[0][0];
}
if (cr) rsd[0] = 0.0;
return;
}
/* set up to compute Qy or Q'y */
if (cqy) scopy(n,y,1,qy,1);
if (cqty) scopy(n,y,1,qty,1);
if (cqy) {
/* compute Qy */
for (j=ju-1; j>=0; j--) {
if (qraux[j]!=0.0) {
temp = x[j][j];
x[j][j] = qraux[j];
t = -sdot(n-j,&x[j][j],1,&qy[j],1)/x[j][j];
saxpy(n-j,t,&x[j][j],1,&qy[j],1);
x[j][j] = temp;
}
}
}
if (cqty) {
/* compute Q'y */
for (j=0; j<ju; j++) {
if (qraux[j]!=0.0) {
temp = x[j][j];
x[j][j] = qraux[j];
t = -sdot(n-j,&x[j][j],1,&qty[j],1)/x[j][j];
saxpy(n-j,t,&x[j][j],1,&qty[j],1);
x[j][j] = temp;
}
}
}
/* set up to compute b, rsd, or xb */
if (cb) scopy(k,qty,1,b,1);
if (cxb) scopy(k,qty,1,xb,1);
if (cr && k<n) scopy(n-k,&qty[k],1,&rsd[k],1);
if (cxb && k<n)
for (i=k; i<n; i++)
xb[i] = 0.0;
if (cr)
for (i=0; i<k; i++)
rsd[i] = 0.0;
if (cb) {
/* compute b */
for (j=k-1; j>=0; j--) {
if (x[j][j]==0.0) {
*info = j;
break;
}
b[j] /= x[j][j];
if (j!=0) {
t = -b[j];
saxpy(j,t,x[j],1,b,1);
}
}
}
if (cr || cxb) {
/* compute rsd or xb as requested */
for (j=ju-1; j>=0; j--) {
if (qraux[j]!=0.0) {
temp = x[j][j];
x[j][j] = qraux[j];
if (cr) {
t = -sdot(n-j,&x[j][j],1,&rsd[j],1)/
x[j][j];
saxpy(n-j,t,&x[j][j],1,&rsd[j],1);
}
if (cxb) {
t = -sdot(n-j,&x[j][j],1,&xb[j],1)/
x[j][j];
saxpy(n-j,t,&x[j][j],1,&xb[j],1);
}
x[j][j] = temp;
}
}
}
}
开发者ID:gwowen,项目名称:seismicunix,代码行数:101,代码来源:sqr.c
注:本文中的saxpy函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论