• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    公众号

C++ ATL_AlignPtr函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了C++中ATL_AlignPtr函数的典型用法代码示例。如果您正苦于以下问题:C++ ATL_AlignPtr函数的具体用法?C++ ATL_AlignPtr怎么用?C++ ATL_AlignPtr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了ATL_AlignPtr函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: cblas_cher2

void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                 const int N, const void *alpha,
                 const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   void *vx, *vy;
   float *x0, *y0;
   const float *x=X, *y=Y, *alp=alpha;
   const float one[2]={ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (Order != CblasColMajor && Order != CblasRowMajor)
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (Uplo != CblasUpper && Uplo != CblasLower)
      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
                          CblasUpper, CblasLower, Uplo);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (lda < N || lda < 1)
      info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d",
                          lda, N);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_cher2", "");
      return;
   }
#endif

   if (incX < 0) x += (1-N)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda);
   else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero)
   {
      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vx != NULL && vy != NULL);
      x0 = ATL_AlignPtr(vx);
      y0 = ATL_AlignPtr(vy);
      ATL_cmoveConj(N, alpha, y, incY, y0, 1);
      ATL_ccopyConj(N, x, incX, x0, 1);
      ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                N, one, y0, 1, x0, 1, A, lda);
      free(vx);
      free(vy);
   }
   else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
                  N, alpha, y, incY, x, incX, A, lda);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:56,代码来源:cblas_cher2.c


示例2: Mjoin

void Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   TYPE alpha[2];
   const TYPE beta = *( (const TYPE *)vbeta  );
   const TYPE zero[2] = {0.0, 0.0};

   alpha[0] = *( (const TYPE *)valpha );
   if (K > HERK_Xover)
   {
      alpha[1] = 0.0;
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc);
      else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc);
      else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda,
                            beta, C, ldc);
}
开发者ID:certik,项目名称:vendor,代码行数:26,代码来源:ATL_herk_N.c


示例3: Mjoin

      int Mjoin(PATL,her2kLN)
   #endif
#endif
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   int i;
   void *vc=NULL;
   TYPE *c;
   const TYPE beta =*( (const TYPE *)vbeta  );
   const TYPE zero[2]={0.0, 0.0};

   i = ATL_MulBySize(N)*N;
   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
   if (vc == NULL) return(1);
   c = ATL_AlignPtr(vc);
   #ifdef Transpose_
      ATL_ammm(AtlasConjTrans, AtlasNoTrans, N, N, K, valpha, A, lda, B, ldb,
   #else
      ATL_ammm(AtlasNoTrans, AtlasConjTrans, N, N, K, valpha, A, lda, B, ldb,
   #endif
               zero, c, N);
   if ( beta == 1.0 ) Mjoin(her2k_put,_b1)(N, c, vbeta, C, ldc);
   else if ( beta == 0.0 ) Mjoin(her2k_put,_b0)(N, c, vbeta, C, ldc);
   else Mjoin(her2k_put,_bXi0)(N, c, vbeta, C, ldc);
   free(vc);
   return(0);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:28,代码来源:ATL_her2k.c


示例4: Mjoin

void Mjoin(Mjoin(PATL,symmL),UploNM)
(const int M, const int N, const void *valpha, const void *A, const int lda,
 const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
#ifdef TREAL
    const SCALAR alpha=*( (const SCALAR *)valpha );
    const SCALAR beta =*( (const SCALAR *)vbeta  );
    const SCALAR one=1.0;
#else
#define alpha valpha
#define beta vbeta
#endif
    TYPE *a;
    void *va;

    if (N > SYMM_Xover)
    {
        va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
        ATL_assert(va);
        a = ATL_AlignPtr(va);
#ifdef TREAL
        if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a);
        else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a);
        CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);
#else
        Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a);
        CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);
#endif
        free(va);
    }
    else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
                                 beta, C, ldc);
}
开发者ID:Leobin7,项目名称:Kaldi,代码行数:34,代码来源:ATL_symmL.c


示例5: Mjoin

   int Mjoin(PATL,syr2kLT)
#endif
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   int i;
   void *vc=NULL;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   i = ATL_MulBySize(N)*N;
   if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
   if (vc == NULL) return(1);
   c = ATL_AlignPtr(vc);
   CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N);
   if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc);
   else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc);
   #ifdef TCPLX
      else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc);
      else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc);
   #endif
   else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc);
   free(vc);
   return(0);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:33,代码来源:ATL_syr2k_T.c


示例6: Mjoin

void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)
   (const int N, const int K, const void *valpha, const void *A, const int lda,
    const void *vbeta, void *C, const int ldc)
{
   void *vc;
   TYPE *c;
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0, zero=0.0;
   #else
      #define alpha valpha
      const TYPE *beta=vbeta;
      const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
   #endif

   if (K > SYRK_Xover)
   {
      vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
      ATL_assert(vc);
      c = ATL_AlignPtr(vc);
      CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);
      if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);
      else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);
      #ifdef TCPLX
         else if ( SCALAR_IS_NONE(beta) )
            Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);
         else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);
      #endif
      else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);
      free(vc);
   }
   else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,
                            beta, C, ldc);
}
开发者ID:certik,项目名称:vendor,代码行数:35,代码来源:ATL_syrk_T.c


示例7: Mjoin

void Mjoin(Mjoin(PATL,symmR),UploNM)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR beta =*( (const SCALAR *)vbeta  );
      const SCALAR one=1.0;
   #else
      #define alpha valpha
      #define beta  vbeta
   #endif
   void *va;
   TYPE *a;

   if (M > SYMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) )
            Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a);
         else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc);
      #else
         Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a);
         ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb,
                            beta, C, ldc);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:34,代码来源:ATL_symmR.c


示例8: Mjoin

void Mjoin(Mjoin(PATL,trmmL),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   #ifdef TREAL
      const SCALAR alpha=*( (const SCALAR *)valpha );
      const SCALAR one=1.0, zero=0.0;
   #else
      const TYPE zero[2]={0.0,0.0};
      #define alpha valpha
   #endif
   void *va;
   TYPE *a;

   if (N > TRMM_Xover)
   {
      va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      #ifdef TREAL
         if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);
         else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);
         CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);
      #else
         ATL_trcopy(M, A, lda, a);
         CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);
      #endif
      free(va);
   }
   else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,
                            A, lda, C, ldc);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:32,代码来源:ATL_trmmL.c


示例9: cblas_zgerc

void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
                 const void *alpha, const void *X, const int incX,
                 const void *Y, const int incY, void *A, const int lda)
{
   int info = 2000;
   const double *x = X, *y = Y;
   void *vy;
   double *y0;
   double one[2] = {ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (M < 0) info = cblas_errprn(2, info,
                        "M cannot be less than zero; is set to %d.", M);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (Order == CblasColMajor)
   {
      if (lda < M || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d",
                             lda, M);
   }
   else if (Order == CblasRowMajor)
   {
      if (lda < N || lda < 1)
         info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d",
                             lda, N);
   }
   else
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_zgerc", "");
      return;
   }
#endif

   if (incX < 0) x += (1-M)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;

   if (Order == CblasColMajor)
      ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda);
   else
   {
      vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vy);
      y0 = ATL_AlignPtr(vy);
      ATL_zmoveConj(N, alpha, y, incY, y0, 1);
      ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda);
      free(vy);
   }
}
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:56,代码来源:cblas_zgerc.c


示例10: cblas_zher

void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
                const int N, const double alpha,
                const void *X, const int incX, void *A, const int lda)
{
   int info = 2000;
   void *vx;
   double one[2] = {ATL_rone, ATL_rzero};
   double *x0;
   const double *x=X;

#ifndef NoCblasErrorChecks
   if (Order != CblasColMajor && Order != CblasRowMajor)
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (Uplo != CblasUpper && Uplo != CblasLower)
      info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
                          CblasUpper, CblasLower, Uplo);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (lda < N || lda < 1)
      info = cblas_errprn(8, info, "lda must be >= MAX(N,1): lda=%d N=%d",
                          lda, N);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_zher", "");
      return;
   }
#endif

   if (incX < 0) x += (1-N)*incX<<1;

   if (Order == CblasColMajor)
      ATL_zher(Uplo, N, alpha, x, incX, A, lda);
   else if (alpha != ATL_rzero)
   {
      vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
      ATL_assert(vx);
      x0 = ATL_AlignPtr(vx);
      ATL_zmoveConj(N, one, x, incX, x0, 1);
      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
               N, alpha, x0, 1, A, lda);
      free(vx);
   }
   else
      ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
               N, ATL_rzero, x, incX, A, lda);
}
开发者ID:apollos,项目名称:atlas,代码行数:49,代码来源:cblas_zher.c


示例11: ATL_flushcache

double ATL_flushcache(long long size)
/*
 * flush cache by reading enough mem; note that if the compiler gets
 * really smart, may be necessary to make vp a global variable so it
 * can't figure out it's not being modified other than during setup;
 * the fact that ATL_dzero is external will confuse most compilers
 */
{
  static void *vp=NULL;
  static long long N = 0;
  double *cache;
  double dret=0.0;
  size_t i;

  if (size < 0) /* flush cache */
  {
     ATL_assert(vp);
     cache = ATL_AlignPtr(vp);
     if (N > 0) for (i=0; i != N; i++) dret += cache[i];
  }
  else if (size > 0) /* initialize */
  {
     vp = malloc(ATL_Cachelen + size);
     ATL_assert(vp);
     N = size / sizeof(double);
     cache = ATL_AlignPtr(vp);
     ATL_dzero(N, cache, 1);
  }
  else if (size == 0) /* free cache */
  {
     if (vp) free(vp);
     vp = NULL;
     N = 0;
  }
  return(dret);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:36,代码来源:ATL_flushcache.c


示例12: Mjoin

void Mjoin(Mjoin(PATL,hemmL),UploNM)
   (const int M, const int N, const void *alpha, const void *A, const int lda,
    const void *B, const int ldb, const void *beta, void *C, const int ldc)
{
   TYPE *a;
   void *va;

   if (N > HEMM_Xover)
   {
      va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
      ATL_assert(va);
      a = ATL_AlignPtr(va);
      Mjoin(Mjoin(PATL,hecopy),UploNM)(M, A, lda, a);
      ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, M, alpha, a, M, B, ldb,
               beta, C, ldc);
      free(va);
   }
   else Mjoin(PATL,refhemm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
                            beta, C, ldc);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:20,代码来源:ATL_hemmL.c


示例13: ATL_ptflushcache

double ATL_ptflushcache(long long size)
/*
 * flush cache by reading enough mem; note that if the compiler gets
 * really smart, may be necessary to make vp a global variable so it
 * can't figure out it's not being modified other than during setup;
 * the fact that ATL_dzero is external will confuse most compilers
 */
{
    static void *vp=NULL;
    static double *cache=NULL;
    double dret=0.0;
    static long long i, N = 0;
    ATL_FC fct[ATL_NTHREADS];

    if (size < 0) /* flush cache */
    {
        ATL_assert(cache);
        for (i=0; i < ATL_NTHREADS; i++)
        {
            fct[i].N = N;
            fct[i].dp = cache+i*N;
        }
        ATL_goparallel(ATL_NTHREADS, ATL_DoWorkFC, fct, NULL);
    }
    else if (size > 0) /* initialize */
    {
        vp = malloc(ATL_Cachelen + (size * ATL_NTHREADS));
        ATL_assert(vp);
        cache = ATL_AlignPtr(vp);
        N = size / sizeof(double);
        ATL_dzero(N*ATL_NTHREADS, cache, 1);
    }
    else if (size == 0) /* free cache */
    {
        if (vp) free(vp);
        vp = cache = NULL;
        N = 0;
    }
    return(dret);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:40,代码来源:ATL_ptflushcache.c


示例14: Mjoin

void Mjoin(Mjoin(PATL,trsmR),ATLP)
   (const int M, const int N, const void *valpha, const void *A, const int lda,
    void *C, const int ldc)
{
   const TYPE *alpha=valpha;
#ifdef TREAL
   #if defined(Transpose_) || defined(ConjTrans_)
      if ( M > (N<<2) )
      {
         void *va;
	 TYPE *a;

         va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N)));
         ATL_assert(va);
         a = ATL_AlignPtr(va);
         #ifdef TREAL
            Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a);
         #else
            ATL_trcopy(N, A, lda, a);
         #endif
         Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc);
         free(va);
      }
      else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha,
                               A, lda, C, ldc);
   #else
      Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc);
   #endif
#else
   if (M > (N<<2) && N <= 4)
      Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc);
   else
      Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha,
                          A, lda, C, ldc);
#endif
}
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:36,代码来源:ATL_trsmR.c


示例15: Mjoin

int Mjoin(PC2F,ormrq)
   (const enum CBLAS_SIDE Side, const enum CBLAS_TRANSPOSE TA,
    ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda, TYPE *TAU,
    TYPE *C, ATL_CINT ldc)
{
   TYPE work[2];
   void *vp;
   TYPE *wrk;
   ATL_INT lwrk;
   int iret;
/*
 * Query routine for optimal workspace, allocate it, and call routine with it
 */
   ATL_assert(!Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,
                                     work, -1));
   lwrk = work[0];
   vp = malloc(ATL_MulBySize(lwrk) + ATL_Cachelen);
   ATL_assert(vp);
   wrk = ATL_AlignPtr(vp);
   iret = Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,
                                wrk, lwrk);
   free(vp);
   return(iret);
}
开发者ID:apollos,项目名称:atlas,代码行数:24,代码来源:ATL_C2Formrq.c


示例16: clapack_sgetri

int clapack_sgetri(const enum CBLAS_ORDER Order, const int N, float *A,
                   const int lda, const int *ipiv)
{
   int ierr=0, lwrk;
   int Mjoin(PATL,GetNB)();
   void *vp;

   lwrk = Mjoin(PATL,GetNB)();
   if (lwrk <= N) lwrk *= N;
   else lwrk = N*N;
   vp = malloc(ATL_Cachelen + ATL_MulBySize(lwrk));
   if (vp)
   {
      ierr = ATL_getri(Order, N, A, lda, ipiv, ATL_AlignPtr(vp), &lwrk);
      free(vp);
   }
   else
   {
      cblas_xerbla(7, "clapack_sgetri",
                   "Cannot allocate workspace of %d\n", lwrk);
      return(-7);
   }
   return(ierr);
}
开发者ID:apollos,项目名称:atlas,代码行数:24,代码来源:clapack_sgetri.c


示例17: cblas_cger2c

void cblas_cger2c(const enum CBLAS_ORDER Order, ATL_CINT M, ATL_CINT N,
                 const void *alpha, const void *X, ATL_CINT incX,
                 const void *Y, ATL_CINT incY, const void *beta,
                 const void *W, ATL_CINT incW,
                 const void *Z, ATL_CINT incZ, void *A, ATL_CINT lda)
{
   int info = 2000;
   const float *x = X, *y = Y, *w = W, *z = Z;
   void *vy;
   float *y0, *z0;
   float one[2] = {ATL_rone, ATL_rzero};

#ifndef NoCblasErrorChecks
   if (M < 0) info = cblas_errprn(2, info,
                        "M cannot be less than zero; is set to %d.", M);
   if (N < 0) info = cblas_errprn(3, info,
                        "N cannot be less than zero; is set to %d.", N);
   if (!incX) info = cblas_errprn(6, info,
                                  "incX cannot be zero; is set to %d.", incX);
   if (!incY) info = cblas_errprn(8, info,
                                  "incY cannot be zero; is set to %d.", incY);
   if (!incW) info = cblas_errprn(11, info,
                                  "incW cannot be zero; is set to %d.", incW);
   if (!incZ) info = cblas_errprn(13, info,
                                  "incZ cannot be zero; is set to %d.", incZ);
   if (Order == CblasColMajor)
   {
      if (lda < M || lda < 1)
         info = cblas_errprn(15, info, "lda must be >= MAX(M,1): lda=%d M=%d",
                             lda, M);
   }
   else if (Order == CblasRowMajor)
   {
      if (lda < N || lda < 1)
         info = cblas_errprn(15, info, "lda must be >= MAX(N,1): lda=%d M=%d",
                             lda, N);
   }
   else
      info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
                          CblasRowMajor, CblasColMajor, Order);
   if (info != 2000)
   {
      cblas_xerbla(info, "cblas_cger2c", "");
      return;
   }
#endif

   if (incX < 0) x += (1-M)*incX<<1;
   if (incY < 0) y += (1-N)*incY<<1;
   if (incW < 0) w += (1-M)*incW<<1;
   if (incZ < 0) z += (1-N)*incZ<<1;

   if (Order == CblasColMajor)
      ATL_cger2c(M, N, alpha, x, incX, y, incY, beta, w, incW, z, incZ, A, lda);
   else
   {
      vy = malloc(ATL_Cachelen+ATL_Cachelen + ATL_MulBySize(N+N));
      ATL_assert(vy);
      y0 = ATL_AlignPtr(vy);
      z0 = y0 + N;
      z0 = ATL_AlignPtr(z0);
      ATL_cmoveConj(N, alpha, y, incY, y0, 1);
      ATL_cmoveConj(N, alpha, z, incZ, z0, 1);
      ATL_cger2u(N, M, one, y0, 1, x, incX, beta, w, incW, z, incZ, A, lda);
      free(vy);
   }
}
开发者ID:apollos,项目名称:atlas,代码行数:67,代码来源:cblas_cger2c.c


示例18: Mmin


//.........这里部分代码省略.........

   nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LAUpper, M, N, K,-1);

/*
 * If it is a workspace query, return the size of work required.
 *    wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2
 */
   if (LWORK < 0)
   {
      if(SIDE == CblasLeft)
      {
         *WORK = ( N*nb + nb*nb + maxMN )  ;
      }
      else
      {
         *WORK = ( M*nb + nb*nb + maxMN )  ;
      }
      return(0);
   }
   else if (M < 1 || N < 1)                 /* quick return if no work to do  */
      return(0);
/*
 * If the user gives us too little space, see if we can allocate it ourselves
 */
   else
   {
      if(SIDE == CblasLeft)
      {
         if (LWORK < (N*nb + nb*nb + maxMN))
         {
            vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen);
            if (!vp)
               return(-7);
            WORK = ATL_AlignPtr(vp);
         }
      }
      else
      {
         if (LWORK < (M*nb + nb*nb + maxMN))
         {
            vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen);
            if (!vp)
               return(-7);
            WORK = ATL_AlignPtr(vp);
         }
      } /* if CblasRight */
   }

/*
 * Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb
 */

   ws_T = WORK;                             /* T at begining of work          */
   ws_QR2 = WORK +(nb SHIFT)*nb;            /* After T Work space             */
   ws_larfb = ws_QR2 + (maxMN SHIFT);       /* After workspace for T and QR2  */


   if (SIDE == CblasLeft)
   {
      if ( TRANS == CblasNoTrans )
      {
         j = (K/nb)*nb;
         if (j == K)
         {
            j=K -nb;
         }
开发者ID:AIDman,项目名称:Kaldi,代码行数:67,代码来源:ATL_ormqr.c


示例19: Mjoin

int Mjoin(PATL,mmJKI)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,
                      const int M, const int N, const int K,
                      const SCALAR alpha, const TYPE *A, const int lda,
                      const TYPE *B, const int ldb, const SCALAR beta,
                      TYPE *C, const int ldc)
/*
 * This gemm is for small K, so we build gemm out of AXPY (outer product)
 * rather than dot (inner product).
 */
{
   int Mp, mp, m, k, ldaa=lda;
   void *vA=NULL;
   TYPE *pA;
   const TYPE CONE[2]={ATL_rone, ATL_rzero}, CNONE[2]={ATL_rnone, ATL_rzero};
   const SCALAR alp=alpha;

/*
 * Compute M partition necessary to promote reuse in the L1 cache.  Check
 * NB^2 in addition to L1elts, to catch machines where L1 is not used by FPU.
 * If this gives a small Mp, use CacheEdge instead (reuse in L2 instead of L1).
 */
   Mp = NB*NB;
   m = ATL_L1elts >> 1;
   Mp = (m > Mp) ? m : Mp;
   Mp /= ((K+2)<<1);
   if (Mp < 128)
   {
      #if !defined(CacheEdge) || CacheEdge == 0
         Mp = M;
      #else
         Mp = (CacheEdge) / ((K+2)*ATL_sizeof);
         if (Mp < 128)
            Mp = M;
      #endif
   }
   if (Mp > M)
      Mp = M;
/*
 * Change Mp if remainder is very small
 */
   else
   {
      Mp -= 16;      /* small safety margin on filling cache */
      mp = M / Mp;
      m = M - mp*Mp;
      if (m && m < 32)
         Mp += (m+mp-1)/mp;
   }
/*
 * If A not in NoTrans format, need to copy so it can use axpy wt stride=1.
 * NOTE: this routine should not be called when you can't afford this copy
 */
   if (TA != AtlasNoTrans)
   {
      vA = malloc(ATL_Cachelen + Mp*ATL_MulBySize(K));
      if (!vA) return(-1);
      pA = ATL_AlignPtr(vA);
      alp = CONE;
      ldaa = Mp;
      pA += Mp+Mp;
   }
   else
      pA = (TYPE *) A;
   for (m=0; m < M; m += Mp)
   {
      mp = M - m;
      if (mp > Mp)
         mp = Mp;
/*
 *    If the thing is in Trans format, copy to NoTrans
 */
      if (vA)
      {
         pA -= (Mp+Mp);
         if (TA == AtlasConjTrans)
         {
            for (k=0; k < K; k++)
            {
               Mjoin(PATL,copy)(mp, A+k+k, lda, pA+((k*ldaa)<<1), 1);
               Mjoin(PATLU,scal)(mp, ATL_rnone, pA+1+((k*ldaa)<<1), 2);
               if (!SCALAR_IS_ONE(alpha))
                  Mjoin(PATL,scal)(mp, alpha, pA+((k*ldaa)<<1), 1);
            }
         }
         else
         {
            for (k=0; k < K; k++)
               Mjoin(PATL,cpsc)(mp, alpha, A+k+k, lda, pA+((k*ldaa)<<1), 1);
         }
         A += mp*(lda+lda);
      }
      Mjoin(PATL,mm_axpy)(AtlasNoTrans, TB, mp, N, K, alp, pA, ldaa, B, ldb,
                          beta, C, ldc);
      pA += mp+mp;
      C += mp+mp;
   }
   if (vA) free(vA);
   return(0);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:99,代码来源:ATL_cmmJKI.c


示例20: ATL_her

void ATL_her(const enum ATLAS_UPLO Uplo, ATL_CINT N, const TYPE alpha,
               const TYPE *X, ATL_CINT incX, TYPE *A, ATL_CINT lda)
{
   const TYPE calpha[2] = {alpha, ATL_rzero};
   void *vp=NULL;
   TYPE *x, *xt;
   ATL_r1kern_t gerk;
   ATL_INT CacheElts;
   const int ALP1 = (alpha == ATL_rone);
   int COPYX, COPYXt;
   int mu, nu, minM, minN, alignX, alignXt, FNU, ALIGNX2A;
   if (N < 1 || (alpha == ATL_rzero))
      return;
/*
 * For very small problems, avoid overhead of func calls & data copy
 */
   if (N < 50)
   {
      Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);
      return;
   }
/*
 * Determine the GER kernel to use, and its parameters
 */
   gerk = ATL_GetR1Kern(N-ATL_s1L_NU, ATL_s1L_NU, A, lda, &mu, &nu,
                        &minM, &minN, &alignX, &ALIGNX2A, &alignXt,
                        &FNU, &CacheElts);
/*
 * Determine if we need to copy the vectors
 */
   COPYX = (incX != 1);
   if (!COPYX)  /* may still need to copy due to alignment issues */
   {
/*
 *    ATL_Cachelen is the highest alignment that can be requested, so
 *    make X's % with Cachelen match that of A if you want A & X to have
 *    the same alignment
 */
      if (ALIGNX2A)
      {
         size_t t1 = (size_t) A, t2 = (size_t) X;
         COPYX = (t1 - ATL_MulByCachelen(ATL_DivByCachelen(t1))) !=
                 (t2 - ATL_MulByCachelen(ATL_DivByCachelen(t2)));
      }
      else if (alignX)
      {
         size_t t1 = (size_t) X;
         COPYX = ((t1/alignX)*alignX != t1);
      }
   }
   vp = malloc((ATL_Cachelen+ATL_MulBySize(N))*(1+COPYX));
   if (!vp)
   {
      Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);
      return;
   }
   xt = ATL_AlignPtr(vp);
   if (COPYX)
   {
      x = xt + N+N;
      x = ALIGNX2A ? ATL_Align2Ptr(x, A) : ATL_AlignPtr(x);
      Mjoin(PATL,copy)(N, X, incX, x, 1);
   }
   else
      x = (TYPE*) X;
   if (ALP1)
      Mjoin(PATL,copyConj)(N, X, incX, xt, 1);
   else
      Mjoin(PATL,moveConj)(N, calpha, X, incX, xt, 1);
   if (Uplo == AtlasUpper)
      Mjoin(PATL,her_kU)(gerk, N, alpha, x, xt, A, lda);
   else
      Mjoin(PATL,her_kL)(gerk, N, alpha, x, xt, A, lda);
   if (vp)
     free(vp);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:76,代码来源:ATL_her.c



注:本文中的ATL_AlignPtr函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
C++ ATL_MulBySize函数代码示例发布时间:2022-05-30
下一篇:
C++ ATLVERIFY函数代码示例发布时间:2022-05-30
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap