本文整理汇总了C++中plasma_context_self函数的典型用法代码示例。如果您正苦于以下问题:C++ plasma_context_self函数的具体用法?C++ plasma_context_self怎么用?C++ plasma_context_self使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了plasma_context_self函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: PLASMA_dgecfi
/** ****************************************************************************
*
* @ingroup InPlaceTransformation
*
* PLASMA_dgecfi convert the matrice A in place from format f_in to
* format f_out
*
*******************************************************************************
*
* @param[in] m
* Number of rows of matrix A
*
* @param[in] n
* Number of columns of matrix A
*
* @param[in,out] A
* Matrix of size L*m*n
*
* @param[in] f_in
* Original format of the matrix A. Must be part of (PlasmaCM, PlasmaRM,
* PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
*
* @param[in] imb
* Number of rows of each block in original format
*
* @param[in] inb
* Number of columns of each block in original format
*
* @param[in] f_out
* Format requested for the matrix A. Must be part of (PlasmaCM, PlasmaRM,
* PlasmaCCRB, PlasmaCRRB, PlasmaRCRB, PlasmaRRRB)
*
* @param[in] omb
* Number of rows of each block in requested format
*
* @param[in] onb
* Number of columns of each block in requested format
*
*******************************************************************************
*
* @sa PLASMA_dgecfi_Async
*
******************************************************************************/
int PLASMA_dgecfi(int m, int n, double *A,
PLASMA_enum f_in, int imb, int inb,
PLASMA_enum f_out, int omb, int onb)
{
plasma_context_t *plasma;
PLASMA_sequence *sequence = NULL;
PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
int status;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error(__func__, "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
plasma_sequence_create(plasma, &sequence);
PLASMA_dgecfi_Async( m, n, A,
f_in, imb, inb,
f_out, omb, onb,
sequence, &request);
plasma_dynamic_sync();
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:70,代码来源:dgecfi.c
示例2: plasma_pdaxpy_quark
/***************************************************************************//**
*
**/
void plasma_pdaxpy_quark(double alpha, PLASMA_desc A, PLASMA_desc B,
PLASMA_sequence *sequence, PLASMA_request *request)
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int X, Y;
int m, n;
int ldam, ldbm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
X = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (n = 0; n < A.nt; n++) {
Y = n == A.nt-1 ? A.n-n*A.nb : A.nb;
QUARK_CORE_daxpy(
plasma->quark, &task_flags,
X, Y, A.mb,
alpha, A(m, n), ldam,
B(m, n), ldbm);
}
}
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:33,代码来源:pdaxpy.c
示例3: PLASMA_zgetri_Tile
/***************************************************************************//**
*
* @ingroup PLASMA_Complex64_t_Tile
*
* PLASMA_zgetri_Tile - Computes the inverse of a matrix using the LU factorization
* computed by PLASMA_zgetrf.
* This method inverts U and then computes inv(A) by solving the system
* inv(A)*L = inv(U) for inv(A).
* Tile equivalent of PLASMA_zgetri().
* Operates on matrices stored by tiles.
* All matrices are passed through descriptors.
* All dimensions are taken from the descriptors.
*
*******************************************************************************
*
* @param[in,out] A
* On entry, the triangular factor L or U from the
* factorization A = P*L*U as computed by PLASMA_zgetrf.
* On exit, if return value = 0, the inverse of the original
* matrix A.
*
* @param[in] IPIV
* The pivot indices that define the permutations
* as returned by PLASMA_zgetrf.
*
*******************************************************************************
*
* @return
* \retval PLASMA_SUCCESS successful exit
* \retval >0 if i, the (i,i) element of the factor U is
* exactly zero; The matrix is singular
* and its inverse could not be computed.
*
*******************************************************************************
*
* @sa PLASMA_zgetri
* @sa PLASMA_zgetri_Tile_Async
* @sa PLASMA_cgetri_Tile
* @sa PLASMA_dgetri_Tile
* @sa PLASMA_sgetri_Tile
* @sa PLASMA_zgetrf_Tile
*
******************************************************************************/
int PLASMA_zgetri_Tile(PLASMA_desc *A, int *IPIV)
{
plasma_context_t *plasma;
PLASMA_sequence *sequence = NULL;
PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
PLASMA_desc descW;
int status;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zgetri_Tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
plasma_sequence_create(plasma, &sequence);
/* Allocate workspace */
PLASMA_Alloc_Workspace_zgetri_Tile_Async(A, &descW);
PLASMA_zgetri_Tile_Async(A, IPIV, &descW, sequence, &request);
plasma_dynamic_sync();
plasma_desc_mat_free(&(descW));
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}
开发者ID:gpichon,项目名称:eigenproblems,代码行数:69,代码来源:zgetri.c
示例4: plasma_pdplrnt_quark
/***************************************************************************//**
* Parallel tile Cholesky factorization - dynamic scheduling
**/
void plasma_pdplrnt_quark( PLASMA_desc A, unsigned long long int seed,
PLASMA_sequence *sequence, PLASMA_request *request )
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int m, n;
int ldam;
int tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for (m = 0; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
for (n = 0; n < A.nt; n++) {
tempnn = n == A.nt-1 ? A.n-n*A.nb : A.nb;
QUARK_CORE_dplrnt(
plasma->quark, &task_flags,
tempmm, tempnn, A(m, n), ldam,
A.m, m*A.mb, n*A.nb, seed );
}
}
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:32,代码来源:pdplrnt.c
示例5: plasma_pslag2d_quark
/***************************************************************************//**
*
**/
void plasma_pslag2d_quark(PLASMA_desc SA, PLASMA_desc B,
PLASMA_sequence *sequence, PLASMA_request *request)
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int X, Y;
int m, n;
int ldam, ldbm;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
for(m = 0; m < SA.mt; m++) {
X = m == SA.mt-1 ? SA.m-m*SA.mb : SA.mb;
ldam = BLKLDD(SA, m);
ldbm = BLKLDD(B, m);
for(n = 0; n < SA.nt; n++) {
Y = n == SA.nt-1 ? SA.n-n*SA.nb : SA.nb;
QUARK_CORE_slag2d(
plasma->quark, &task_flags,
X, Y, SA.mb,
SA(m, n), ldam,
B(m, n), ldbm);
}
}
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:32,代码来源:pdlag2s.c
示例6: PLASMA_zlaswp_Tile_Async
/***************************************************************************//**
*
* @ingroup PLASMA_Complex64_t_Tile_Async
*
* PLASMA_zlaswp_Tile_Async - performs a series of row interchanges
* on the matrix A. One row interchange is initiated for each of
* rows K1 through K2 of A.
* Non-blocking equivalent of PLASMA_zlaswp_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations ar runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa PLASMA_zlaswp
* @sa PLASMA_zlaswp_Tile
* @sa PLASMA_claswp_Tile_Async
* @sa PLASMA_dlaswp_Tile_Async
* @sa PLASMA_slaswp_Tile_Async
* @sa PLASMA_zgetrf_Tile_Async
*
******************************************************************************/
int PLASMA_zlaswp_Tile_Async(PLASMA_desc *A, int K1, int K2, int *IPIV, int INCX,
PLASMA_sequence *sequence, PLASMA_request *request)
{
PLASMA_desc descA = *A;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL sequence");
return PLASMA_ERR_UNALLOCATED;
}
if (request == NULL) {
plasma_fatal_error("PLASMA_zlaswp_Tile", "NULL request");
return PLASMA_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
plasma_error("PLASMA_zlaswp_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if ( (K1 != 1) || (K2 != descA.m) ) {
plasma_error("PLASMA_zlaswp_Tile", "invalid K1 or K2 (1..M is the only interval supported right now)");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
plasma_dynamic_call_3(
plasma_pzbarrier_tl2pnl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
/* swap */
plasma_dynamic_call_5(
plasma_pzlaswp,
PLASMA_desc, descA,
int *, IPIV,
int, INCX,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
plasma_dynamic_call_3(
plasma_pzbarrier_pnl2tl,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:89,代码来源:zlaswp.c
示例7: PLASMA_dsygst_Tile_Async
/***************************************************************************//**
*
* @ingroup double_Tile_Async
*
* PLASMA_dsygst_Tile_Async - reduces a complex Hermitian-definite
* generalized eigenproblem to standard form.
* If PlasmaItype == 1, the problem is A*x = lambda*B*x, and A is
* overwritten by inv(U**T)*A*inv(U) or inv(L)*A*inv(L**T)
* If PlasmaItype == 2 or 3, the problem is A*B*x = lambda*x or B*A*x
* = lambda*x, and A is overwritten by U*A*U**T or L**T*A*L. B must
* have been previously factorized as U**T*U or L*L**T by
* PLASMA_DPOTRF.
* ONLY PlasmaItype == 1 and PlasmaLower supported!
* Non-blocking equivalent of PLASMA_dsygst_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations ar runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa PLASMA_dsygst
* @sa PLASMA_dsygst_Tile
* @sa PLASMA_chegst_Tile_Async
* @sa PLASMA_dsygst_Tile_Async
* @sa PLASMA_ssygst_Tile_Async
* @sa PLASMA_dsygv_Tile_Async
*
******************************************************************************/
int PLASMA_dsygst_Tile_Async(PLASMA_enum itype, PLASMA_enum uplo,
PLASMA_desc *A,
PLASMA_desc *B,
PLASMA_sequence *sequence, PLASMA_request *request)
{
PLASMA_desc descA = *A;
PLASMA_desc descB = *B;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_dsygst_Tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_dsygst_Tile", "NULL sequence");
return PLASMA_ERR_UNALLOCATED;
}
if (request == NULL) {
plasma_fatal_error("PLASMA_dsygst_Tile", "NULL request");
return PLASMA_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
plasma_error("PLASMA_dsygst_Tile", "invalid first descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (plasma_desc_check(&descB) != PLASMA_SUCCESS) {
plasma_error("PLASMA_dsygst_Tile", "invalid second descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_dsygst_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/*
* Transform Hermitian-definite generalized eigenproblem
* to standard form
*/
plasma_dynamic_call_6(plasma_pdsygst,
PLASMA_enum, itype,
PLASMA_enum, uplo,
PLASMA_desc, descA,
PLASMA_desc, descB,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:94,代码来源:dsygst.c
示例8: plasma_alloc_ibnb_tile
/***************************************************************************//**
*
**/
int plasma_alloc_ibnb_tile(int M, int N, PLASMA_enum func, int type, PLASMA_desc **desc)
{
int status;
int IB, NB, MT, NT;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("plasma_alloc_ibnb_tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
/* Tune NB & IB depending on M & N; Set IBNBSIZE */
status = plasma_tune(func, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("plasma_alloc_ibnb_tile", "plasma_tune() failed");
return PLASMA_ERR_UNEXPECTED;
}
/* Set MT & NT & allocate */
NB = PLASMA_NB;
IB = PLASMA_IB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Size is doubled for RH QR to store the reduction T */
if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) &&
((func == PLASMA_FUNC_SGELS) ||
(func == PLASMA_FUNC_DGELS) ||
(func == PLASMA_FUNC_CGELS) ||
(func == PLASMA_FUNC_ZGELS) ||
(func == PLASMA_FUNC_SGESVD) ||
(func == PLASMA_FUNC_DGESVD) ||
(func == PLASMA_FUNC_CGESVD) ||
(func == PLASMA_FUNC_ZGESVD)))
NT *= 2;
/* Allocate and initialize descriptor */
*desc = (PLASMA_desc*)malloc(sizeof(PLASMA_desc));
if (*desc == NULL) {
plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
return PLASMA_ERR_OUT_OF_RESOURCES;
}
**desc = plasma_desc_init(type, IB, NB, IB*NB, MT*IB, NT*NB, 0, 0, MT*IB, NT*NB);
/* Allocate matrix */
if (plasma_desc_mat_alloc(*desc)) {
plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
return PLASMA_ERR_OUT_OF_RESOURCES;
}
/* Check that everything is ok */
status = plasma_desc_check(*desc);
if (status != PLASMA_SUCCESS) {
plasma_error("plasma_alloc_ibnb_tile", "invalid descriptor");
return status;
}
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:61,代码来源:workspace.c
示例9: PLASMA_zpotrs
/***************************************************************************//**
*
* @ingroup PLASMA_Complex64_t
*
* PLASMA_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive
* definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky
* factorization A = U**H*U or A = L*L**H computed by PLASMA_zpotrf.
*
*******************************************************************************
*
* @param[in] uplo
* = PlasmaUpper: Upper triangle of A is stored;
* = PlasmaLower: Lower triangle of A is stored.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[in] NRHS
* The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
*
* @param[in] A
* The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H,
* computed by PLASMA_zpotrf.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[in,out] B
* On entry, the N-by-NRHS right hand side matrix B.
* On exit, if return value = 0, the N-by-NRHS solution matrix X.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,N).
*
*******************************************************************************
*
* @return
* \retval PLASMA_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*******************************************************************************
*
* @sa PLASMA_zpotrs_Tile
* @sa PLASMA_zpotrs_Tile_Async
* @sa PLASMA_cpotrs
* @sa PLASMA_dpotrs
* @sa PLASMA_spotrs
* @sa PLASMA_zpotrf
*
******************************************************************************/
int PLASMA_zpotrs(PLASMA_enum uplo, int N, int NRHS,
PLASMA_Complex64_t *A, int LDA,
PLASMA_Complex64_t *B, int LDB)
{
int NB;
int status;
plasma_context_t *plasma;
PLASMA_sequence *sequence = NULL;
PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_zpotrs", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_zpotrs", "illegal value of uplo");
return -1;
}
if (N < 0) {
plasma_error("PLASMA_zpotrs", "illegal value of N");
return -2;
}
if (NRHS < 0) {
plasma_error("PLASMA_zpotrs", "illegal value of NRHS");
return -3;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_zpotrs", "illegal value of LDA");
return -5;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_zpotrs", "illegal value of LDB");
return -7;
}
/* Quick return */
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_ZPOSV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_zpotrs", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
//.........这里部分代码省略.........
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:101,代码来源:zpotrs.c
示例10: PLASMA_sgesv
/***************************************************************************//**
*
* @ingroup float
*
* PLASMA_sgesv - Computes the solution to a system of linear equations A * X = B,
* where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
* The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A.
* The factored form of A is then used to solve the system of equations A * X = B.
*
*******************************************************************************
*
* @param[in] N
* The number of linear equations, i.e., the order of the matrix A. N >= 0.
*
* @param[in] NRHS
* The number of right hand sides, i.e., the number of columns of the matrix B.
* NRHS >= 0.
*
* @param[in,out] A
* On entry, the N-by-N coefficient matrix A.
* On exit, the tile L and U factors from the factorization.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,N).
*
* @param[out] IPIV
* On exit, the pivot indices that define the permutations.
*
* @param[in,out] B
* On entry, the N-by-NRHS matrix of right hand side matrix B.
* On exit, if return value = 0, the N-by-NRHS solution matrix X.
*
* @param[in] LDB
* The leading dimension of the array B. LDB >= max(1,N).
*
*******************************************************************************
*
* @return
* \retval PLASMA_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
* \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
* but the factor U is exactly singular, so the solution could not be computed.
*
*******************************************************************************
*
* @sa PLASMA_sgesv_Tile
* @sa PLASMA_sgesv_Tile_Async
* @sa PLASMA_cgesv
* @sa PLASMA_dgesv
* @sa PLASMA_sgesv
*
******************************************************************************/
int PLASMA_sgesv(int N, int NRHS,
float *A, int LDA,
int *IPIV,
float *B, int LDB)
{
int NB, IB, IBNB, NT;
int status;
plasma_context_t *plasma;
PLASMA_sequence *sequence = NULL;
PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
PLASMA_desc descA, descB;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_error("PLASMA_sgesv", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_sgesv", "illegal value of N");
return -1;
}
if (NRHS < 0) {
plasma_error("PLASMA_sgesv", "illegal value of NRHS");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_sgesv", "illegal value of LDA");
return -4;
}
if (LDB < max(1, N)) {
plasma_error("PLASMA_sgesv", "illegal value of LDB");
return -8;
}
/* Quick return */
if (min(N, NRHS) == 0)
return PLASMA_SUCCESS;
/* Tune NB & IB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_SGESV, N, N, NRHS);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_sgesv", "plasma_tune() failed");
return status;
}
/* Set NT & NTRHS */
NB = PLASMA_NB;
IB = PLASMA_IB;
//.........这里部分代码省略.........
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:101,代码来源:sgesv.c
示例11: PLASMA_splgsy
/***************************************************************************//**
*
* @ingroup float
*
* PLASMA_splgsy - Generate a random hermitian matrix by tiles.
*
*******************************************************************************
*
* @param[in] bump
* The value to add to the diagonal to be sure
* to have a positive definite matrix.
*
* @param[in] N
* The order of the matrix A. N >= 0.
*
* @param[out] A
* On exit, The random hermitian matrix A generated.
*
* @param[in] LDA
* The leading dimension of the array A. LDA >= max(1,M).
*
* @param[in] seed
* The seed used in the random generation.
*
*******************************************************************************
*
* @return
* \retval PLASMA_SUCCESS successful exit
* \retval <0 if -i, the i-th argument had an illegal value
*
*******************************************************************************
*
* @sa PLASMA_splgsy_Tile
* @sa PLASMA_splgsy_Tile_Async
* @sa PLASMA_cplgsy
* @sa PLASMA_dplgsy
* @sa PLASMA_splgsy
* @sa PLASMA_splrnt
* @sa PLASMA_splgsy
*
******************************************************************************/
int PLASMA_splgsy( float bump, int N,
float *A, int LDA,
unsigned long long int seed )
{
int NB;
int status;
plasma_context_t *plasma;
PLASMA_sequence *sequence = NULL;
PLASMA_request request = PLASMA_REQUEST_INITIALIZER;
PLASMA_desc descA;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_splgsy", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
/* Check input arguments */
if (N < 0) {
plasma_error("PLASMA_splgsy", "illegal value of N");
return -2;
}
if (LDA < max(1, N)) {
plasma_error("PLASMA_splgsy", "illegal value of LDA");
return -4;
}
/* Quick return */
if (max(0, N) == 0)
return PLASMA_SUCCESS;
/* Tune NB depending on M, N & NRHS; Set NBNB */
status = plasma_tune(PLASMA_FUNC_SGEMM, N, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("PLASMA_splgsy", "plasma_tune() failed");
return status;
}
/* Set NT */
NB = PLASMA_NB;
plasma_sequence_create(plasma, &sequence);
descA = plasma_desc_init(
PlasmaRealFloat, NB, NB, NB*NB,
LDA, N, 0, 0, N, N);
descA.mat = A;
/* Call the tile interface */
PLASMA_splgsy_Tile_Async( bump, &descA, seed, sequence, &request );
plasma_siptile2lap( descA, A, NB, NB, LDA, N );
plasma_dynamic_sync();
status = sequence->status;
plasma_sequence_destroy(plasma, sequence);
return status;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:97,代码来源:splgsy.c
示例12: plasma_pdorgqr_quark
/***************************************************************************//**
* Parallel construction of Q using tile V (application to identity) - dynamic scheduling
**/
void plasma_pdorgqr_quark(PLASMA_desc A, PLASMA_desc Q, PLASMA_desc T,
PLASMA_sequence *sequence, PLASMA_request *request)
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int k, m, n;
int ldak, ldqk, ldam, ldqm;
int tempmm, tempnn, tempkmin, tempkm;
int tempAkm, tempAkn;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = min(A.mt, A.nt)-1; k >= 0; k--) {
tempAkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempAkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempkmin = min( tempAkn, tempAkm );
tempkm = k == Q.mt-1 ? Q.m-k*Q.mb : Q.mb;
ldak = BLKLDD(A, k);
ldqk = BLKLDD(Q, k);
for (m = Q.mt - 1; m > k; m--) {
tempmm = m == Q.mt-1 ? Q.m-m*Q.mb : Q.mb;
ldam = BLKLDD(A, m);
ldqm = BLKLDD(Q, m);
for (n = 0; n < Q.nt; n++) {
tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
QUARK_CORE_dtsmqr(
plasma->quark, &task_flags,
PlasmaLeft, PlasmaNoTrans,
Q.mb, tempnn, tempmm, tempnn, tempAkn, ib, T.nb,
Q(k, n), ldqk,
Q(m, n), ldqm,
A(m, k), ldam,
T(m, k), T.mb);
}
}
for (n = 0; n < Q.nt; n++) {
tempnn = n == Q.nt-1 ? Q.n-n*Q.nb : Q.nb;
QUARK_CORE_dormqr(
plasma->quark, &task_flags,
PlasmaLeft, PlasmaNoTrans,
tempkm, tempnn, tempkmin, ib, T.nb,
A(k, k), ldak,
T(k, k), T.mb,
Q(k, n), ldqk);
}
}
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:56,代码来源:pdorgqr.c
示例13: PLASMA_splgsy_Tile_Async
/***************************************************************************//**
*
* @ingroup float_Tile_Async
*
* PLASMA_splgsy_Tile_Async - Generate a random hermitian matrix by tiles.
* Non-blocking equivalent of PLASMA_splgsy_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations ar runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa PLASMA_splgsy
* @sa PLASMA_splgsy_Tile
* @sa PLASMA_cplgsy_Tile_Async
* @sa PLASMA_dplgsy_Tile_Async
* @sa PLASMA_splgsy_Tile_Async
* @sa PLASMA_splgsy_Tile_Async
* @sa PLASMA_splgsy_Tile_Async
*
******************************************************************************/
int PLASMA_splgsy_Tile_Async( float bump,
PLASMA_desc *A,
unsigned long long int seed,
PLASMA_sequence *sequence,
PLASMA_request *request)
{
PLASMA_desc descA = *A;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_splgsy_Tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_splgsy_Tile", "NULL sequence");
return PLASMA_ERR_UNALLOCATED;
}
if (request == NULL) {
plasma_fatal_error("PLASMA_splgsy_Tile", "NULL request");
return PLASMA_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
plasma_error("PLASMA_splgsy_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_splgsy_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Quick return */
if (min( descA.m, descA.n ) == 0)
return PLASMA_SUCCESS;
plasma_parallel_call_5(plasma_psplgsy,
float, bump,
PLASMA_desc, descA,
unsigned long long int, seed,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:81,代码来源:splgsy.c
示例14: PLASMA_cpotrf_Tile_Async
/***************************************************************************//**
*
* @ingroup PLASMA_Complex32_t_Tile_Async
*
* PLASMA_cpotrf_Tile_Async - Computes the Cholesky factorization of a symmetric
* positive definite or Hermitian positive definite matrix.
* Non-blocking equivalent of PLASMA_cpotrf_Tile().
* May return before the computation is finished.
* Allows for pipelining of operations ar runtime.
*
*******************************************************************************
*
* @param[in] sequence
* Identifies the sequence of function calls that this call belongs to
* (for completion checks and exception handling purposes).
*
* @param[out] request
* Identifies this function call (for exception handling purposes).
*
*******************************************************************************
*
* @sa PLASMA_cpotrf
* @sa PLASMA_cpotrf_Tile
* @sa PLASMA_cpotrf_Tile_Async
* @sa PLASMA_dpotrf_Tile_Async
* @sa PLASMA_spotrf_Tile_Async
* @sa PLASMA_cpotrs_Tile_Async
*
******************************************************************************/
int PLASMA_cpotrf_Tile_Async(PLASMA_enum uplo, PLASMA_desc *A,
PLASMA_sequence *sequence, PLASMA_request *request)
{
PLASMA_desc descA = *A;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("PLASMA_cpotrf_Tile", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
if (sequence == NULL) {
plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL sequence");
return PLASMA_ERR_UNALLOCATED;
}
if (request == NULL) {
plasma_fatal_error("PLASMA_cpotrf_Tile", "NULL request");
return PLASMA_ERR_UNALLOCATED;
}
/* Check sequence status */
if (sequence->status == PLASMA_SUCCESS)
request->status = PLASMA_SUCCESS;
else
return plasma_request_fail(sequence, request, PLASMA_ERR_SEQUENCE_FLUSHED);
/* Check descriptors for correctness */
if (plasma_desc_check(&descA) != PLASMA_SUCCESS) {
plasma_error("PLASMA_cpotrf_Tile", "invalid descriptor");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
/* Check input arguments */
if (descA.nb != descA.mb) {
plasma_error("PLASMA_cpotrf_Tile", "only square tiles supported");
return plasma_request_fail(sequence, request, PLASMA_ERR_ILLEGAL_VALUE);
}
if (uplo != PlasmaUpper && uplo != PlasmaLower) {
plasma_error("PLASMA_cpotrf_Tile", "illegal value of uplo");
return plasma_request_fail(sequence, request, -1);
}
/* Quick return */
/*
if (max(N, 0) == 0)
return PLASMA_SUCCESS;
*/
plasma_parallel_call_4(plasma_pcpotrf,
PLASMA_enum, uplo,
PLASMA_desc, descA,
PLASMA_sequence*, sequence,
PLASMA_request*, request);
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:81,代码来源:cpotrf.c
示例15: plasma_alloc_ibnb
/***************************************************************************//**
*
**/
int plasma_alloc_ibnb(int M, int N, PLASMA_enum func, int type, void **memptr)
{
size_t size;
int status;
int IB, NB, MT, NT;
plasma_context_t *plasma;
plasma = plasma_context_self();
if (plasma == NULL) {
plasma_fatal_error("plasma_alloc_ibnb", "PLASMA not initialized");
return PLASMA_ERR_NOT_INITIALIZED;
}
/* Tune NB & IB depending on M & N; Set IBNBSIZE */
status = plasma_tune(func, M, N, 0);
if (status != PLASMA_SUCCESS) {
plasma_error("plasma_alloc_ibnb", "plasma_tune() failed");
return PLASMA_ERR_UNEXPECTED;
}
/* Set MT & NT & allocate */
NB = PLASMA_NB;
IB = PLASMA_IB;
MT = (M%NB==0) ? (M/NB) : (M/NB+1);
NT = (N%NB==0) ? (N/NB) : (N/NB+1);
/* Size is doubled for RH QR to store the reduction T */
if ((plasma->householder != PLASMA_FLAT_HOUSEHOLDER) &&
(func == PLASMA_FUNC_SGELS ||
func == PLASMA_FUNC_DGELS ||
func == PLASMA_FUNC_CGELS ||
func == PLASMA_FUNC_ZGELS ||
func == PLASMA_FUNC_SGESVD ||
func == PLASMA_FUNC_DGESVD ||
func == PLASMA_FUNC_CGESVD ||
func == PLASMA_FUNC_ZGESVD ))
NT *= 2;
size = (size_t)MT*NT*IB*NB * plasma_element_size(type);
if (size <= 0) {
*memptr = NULL;
return PLASMA_SUCCESS;
}
// status = posix_memalign(memptr, STANDARD_PAGE_SIZE, size);
*memptr = malloc(size);
// if (status != 0) {
if (*memptr == NULL) {
plasma_error("plasma_alloc_ibnb_tile", "malloc() failed");
return PLASMA_ERR_OUT_OF_RESOURCES;
}
return PLASMA_SUCCESS;
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:53,代码来源:workspace.c
示例16: plasma_pztrsmpl_quark
/***************************************************************************//**
* Parallel forward substitution for tile LU - dynamic scheduling
**/
void plasma_pztrsmpl_quark(PLASMA_desc A, PLASMA_desc B, PLASMA_desc L, int *IPIV,
PLASMA_sequence *sequence, PLASMA_request *request)
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int k, m, n;
int ldak, ldam, ldbk, ldbm;
int tempkm, tempnn, tempkmin, tempmm, tempkn;
int ib;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
ib = PLASMA_IB;
for (k = 0; k < min(A.mt, A.nt); k++) {
tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
tempkmin = k == min(A.mt, A.nt)-1 ? min(A.m, A.n)-k*A.mb : A.mb;
ldak = BLKLDD(A, k);
ldbk = BLKLDD(B, k);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
QUARK_CORE_zgessm(
plasma->quark, &task_flags,
tempkm, tempnn, tempkmin, ib, L.nb,
IPIV(k, k),
A(k, k), ldak,
B(k, n), ldbk);
}
for (m = k+1; m < A.mt; m++) {
tempmm = m == A.mt-1 ? A.m-m*A.mb : A.mb;
ldam = BLKLDD(A, m);
ldbm = BLKLDD(B, m);
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n-n*B.nb : B.nb;
QUARK_CORE_zssssm(
plasma->quark, &task_flags,
A.nb, tempnn, tempmm, tempnn, tempkn, ib, L.nb,
B(k, n), ldbk,
B(m, n), ldbm,
L(m, k), L.mb,
A(m, k), ldam,
IPIV(m, k));
}
}
}
}
开发者ID:joao-lima,项目名称:plasma-kaapi,代码行数:53,代码来源:pztrsmpl.c
示例17: plasma_pclaswp_quark
/***************************************************************************//**
* Parallel tile row interchanges - dynamic scheduling
**/
void plasma_pclaswp_quark(PLASMA_desc B, int *IPIV, int inc,
PLASMA_sequence *sequence, PLASMA_request *request)
{
plasma_context_t *plasma;
Quark_Task_Flags task_flags = Quark_Task_Flags_Initializer;
int m, n;
int tempi, tempm, tempmm, tempnn;
plasma = plasma_context_self();
if (sequence->status != PLASMA_SUCCESS)
return;
QUARK_Task_Flag_Set(&task_flags, TASK_SEQUENCE, (intptr_t)sequence->quark_sequence);
if ( inc > 0 )
{
for (m = 0; m < B.mt; m++) {
tempi = m * B.mb;
tempm = B.m - tempi;
tempmm = m == B.mt-1 ? tempm : B.mb;
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
QUARK_CORE_claswp_ontile(
plasma->quark, &task_flags,
plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
B(m, n), 1, tempmm, IPIV(m), inc, B(B.mt-1, n) );
}
}
}
else
{
for (m = B.mt-1; m > -1; m--) {
tempi = m * B.mb;
tempm = B.m - tempi;
tempmm = m == B.mt-1 ? tempm : B.mb;
for (n = 0; n < B.nt; n++) {
tempnn = n == B.nt-1 ? B.n - n * B.nb : B.nb;
QUARK_CORE_claswp_ontile(
plasma->quark, &task_flags,
plasma_desc_submatrix(B, tempi, n*B.nb, tempm, tempnn),
B(m, n), 1, tempmm, IPIV(m), inc, B(0, n) );
}
}
}
}
开发者ID:joao-lima,项目名称:pla |
请发表评论