本文整理汇总了C++中LDW函数的典型用法代码示例。如果您正苦于以下问题:C++ LDW函数的具体用法?C++ LDW怎么用?C++ LDW使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LDW函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: t3fv_4
static void t3fv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(rs)) {
V T2, T3, T4;
T2 = LDW(&(W[0]));
T3 = LDW(&(W[TWVL * 2]));
T4 = VZMULJ(T2, T3);
{
V T1, Tb, T6, T9, Ta, T5, T8;
T1 = LD(&(x[0]), ms, &(x[0]));
Ta = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Tb = VZMULJ(T3, Ta);
T5 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T6 = VZMULJ(T4, T5);
T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T9 = VZMULJ(T2, T8);
{
V T7, Tc, Td, Te;
T7 = VSUB(T1, T6);
Tc = VBYI(VSUB(T9, Tb));
ST(&(x[WS(rs, 1)]), VSUB(T7, Tc), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VADD(T7, Tc), ms, &(x[WS(rs, 1)]));
Td = VADD(T1, T6);
Te = VADD(T9, Tb);
ST(&(x[WS(rs, 2)]), VSUB(Td, Te), ms, &(x[0]));
ST(&(x[0]), VADD(Td, Te), ms, &(x[0]));
}
}
}
}
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:33,代码来源:t3fv_4.c
示例2: hc2cfdftv_4
static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(16, rs)) {
V T1, T2, Tb, T5, T6, T4, T9, T3, Tc, T7, Ta, Tg, T8, Td, Th;
V Tf, Te, Ti, Tj;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
Tb = LDW(&(W[0]));
T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T4 = LDW(&(W[TWVL * 2]));
T9 = LDW(&(W[TWVL * 4]));
T3 = VFMACONJ(T2, T1);
Tc = VZMULIJ(Tb, VFNMSCONJ(T2, T1));
T7 = VZMULJ(T4, VFMACONJ(T6, T5));
Ta = VZMULIJ(T9, VFNMSCONJ(T6, T5));
Tg = VADD(T3, T7);
T8 = VSUB(T3, T7);
Td = VSUB(Ta, Tc);
Th = VADD(Tc, Ta);
Tf = VCONJ(VMUL(LDK(KP500000000), VFMAI(Td, T8)));
Te = VMUL(LDK(KP500000000), VFNMSI(Td, T8));
Ti = VMUL(LDK(KP500000000), VSUB(Tg, Th));
Tj = VCONJ(VMUL(LDK(KP500000000), VADD(Th, Tg)));
ST(&(Rm[0]), Tf, -ms, &(Rm[0]));
ST(&(Rp[WS(rs, 1)]), Te, ms, &(Rp[WS(rs, 1)]));
ST(&(Rp[0]), Ti, ms, &(Rp[0]));
ST(&(Rm[WS(rs, 1)]), Tj, -ms, &(Rm[WS(rs, 1)]));
}
}
VLEAVE();
}
开发者ID:LiZimo,项目名称:interactiveCart,代码行数:35,代码来源:hc2cfdftv_4.c
示例3: t1sv_2
static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(4, rs)) {
V T1, T8, T6, T7;
T1 = LD(&(ri[0]), ms, &(ri[0]));
T8 = LD(&(ii[0]), ms, &(ii[0]));
{
V T3, T5, T2, T4;
T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
T5 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
T2 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 1]));
T6 = VFMA(T2, T3, VMUL(T4, T5));
T7 = VFNMS(T4, T3, VMUL(T2, T5));
}
ST(&(ri[WS(rs, 1)]), VSUB(T1, T6), ms, &(ri[WS(rs, 1)]));
ST(&(ii[WS(rs, 1)]), VSUB(T8, T7), ms, &(ii[WS(rs, 1)]));
ST(&(ri[0]), VADD(T1, T6), ms, &(ri[0]));
ST(&(ii[0]), VADD(T7, T8), ms, &(ii[0]));
}
}
VLEAVE();
}
开发者ID:SKA-ScienceDataProcessor,项目名称:FastImaging,代码行数:25,代码来源:t1sv_2.c
示例4: hc2cfdftv_4
static void hc2cfdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T4, Tc, T9, Te, T1, T3, T2, Tb, T6, T8, T7, T5, Td, Tg, Th;
V Ta, Tf, Tk, Tl, Ti, Tj;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T3 = VCONJ(T2);
T4 = VADD(T1, T3);
Tb = LDW(&(W[0]));
Tc = VZMULIJ(Tb, VSUB(T3, T1));
T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T8 = VCONJ(T7);
T5 = LDW(&(W[TWVL * 2]));
T9 = VZMULJ(T5, VADD(T6, T8));
Td = LDW(&(W[TWVL * 4]));
Te = VZMULIJ(Td, VSUB(T8, T6));
Ta = VSUB(T4, T9);
Tf = VBYI(VSUB(Tc, Te));
Tg = VMUL(LDK(KP500000000), VSUB(Ta, Tf));
Th = VCONJ(VMUL(LDK(KP500000000), VADD(Ta, Tf)));
ST(&(Rp[WS(rs, 1)]), Tg, ms, &(Rp[WS(rs, 1)]));
ST(&(Rm[0]), Th, -ms, &(Rm[0]));
Ti = VADD(T4, T9);
Tj = VADD(Tc, Te);
Tk = VCONJ(VMUL(LDK(KP500000000), VSUB(Ti, Tj)));
Tl = VMUL(LDK(KP500000000), VADD(Ti, Tj));
ST(&(Rm[WS(rs, 1)]), Tk, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[0]), Tl, ms, &(Rp[0]));
}
}
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:34,代码来源:hc2cfdftv_4.c
示例5: hc2cbdftv_4
static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T2, T3, T5, T6, Tf, T1, T9, Ta, T4, Tb, T7, Tc, Th, T8, Tg;
V Te, Td, Ti, Tj;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T5 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T6 = LD(&(Rm[0]), -ms, &(Rm[0]));
Tf = LDW(&(W[0]));
T1 = LDW(&(W[TWVL * 4]));
T9 = LDW(&(W[TWVL * 2]));
Ta = VFMACONJ(T3, T2);
T4 = VFNMSCONJ(T3, T2);
Tb = VFMACONJ(T6, T5);
T7 = VFNMSCONJ(T6, T5);
Tc = VZMUL(T9, VSUB(Ta, Tb));
Th = VADD(Ta, Tb);
T8 = VZMULI(T1, VFNMSI(T7, T4));
Tg = VZMULI(Tf, VFMAI(T7, T4));
Te = VCONJ(VSUB(Tc, T8));
Td = VADD(T8, Tc);
Ti = VADD(Tg, Th);
Tj = VCONJ(VSUB(Th, Tg));
ST(&(Rm[WS(rs, 1)]), Te, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[WS(rs, 1)]), Td, ms, &(Rp[WS(rs, 1)]));
ST(&(Rp[0]), Ti, ms, &(Rp[0]));
ST(&(Rm[0]), Tj, -ms, &(Rm[0]));
}
}
VLEAVE();
}
开发者ID:BGCX261,项目名称:zpr-rozpoznawanie-dzwieku-git,代码行数:34,代码来源:hc2cbdftv_4.c
示例6: LD
static const R *t1sv_2(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
INT i;
for (i = m; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * dist), ii = ii + ((2 * VL) * dist), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(ios)) {
V T1, Ta, T3, T6, T2, T5;
T1 = LD(&(ri[0]), dist, &(ri[0]));
Ta = LD(&(ii[0]), dist, &(ii[0]));
T3 = LD(&(ri[WS(ios, 1)]), dist, &(ri[WS(ios, 1)]));
T6 = LD(&(ii[WS(ios, 1)]), dist, &(ii[WS(ios, 1)]));
T2 = LDW(&(W[0]));
T5 = LDW(&(W[TWVL * 1]));
{
V T8, T4, T9, T7;
T8 = VMUL(T2, T6);
T4 = VMUL(T2, T3);
T9 = VFNMS(T5, T3, T8);
T7 = VFMA(T5, T6, T4);
ST(&(ii[0]), VADD(T9, Ta), dist, &(ii[0]));
ST(&(ii[WS(ios, 1)]), VSUB(Ta, T9), dist, &(ii[WS(ios, 1)]));
ST(&(ri[0]), VADD(T1, T7), dist, &(ri[0]));
ST(&(ri[WS(ios, 1)]), VSUB(T1, T7), dist, &(ri[WS(ios, 1)]));
}
}
return W;
}
开发者ID:exic,项目名称:last.fm-dbus,代码行数:25,代码来源:t1sv_2.c
示例7: hc2cbdftv_4
static void hc2cbdftv_4(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 6)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T5, Tc, T9, Td, T2, T4, T3, T6, T8, T7, Tj, Ti, Th, Tk, Tl;
V Ta, Te, T1, Tb, Tf, Tg;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T4 = VCONJ(T3);
T5 = VSUB(T2, T4);
Tc = VADD(T2, T4);
T6 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[0]), -ms, &(Rm[0]));
T8 = VCONJ(T7);
T9 = VBYI(VSUB(T6, T8));
Td = VADD(T6, T8);
Tj = VADD(Tc, Td);
Th = LDW(&(W[0]));
Ti = VZMULI(Th, VADD(T5, T9));
Tk = VADD(Ti, Tj);
ST(&(Rp[0]), Tk, ms, &(Rp[0]));
Tl = VCONJ(VSUB(Tj, Ti));
ST(&(Rm[0]), Tl, -ms, &(Rm[0]));
T1 = LDW(&(W[TWVL * 4]));
Ta = VZMULI(T1, VSUB(T5, T9));
Tb = LDW(&(W[TWVL * 2]));
Te = VZMUL(Tb, VSUB(Tc, Td));
Tf = VADD(Ta, Te);
ST(&(Rp[WS(rs, 1)]), Tf, ms, &(Rp[WS(rs, 1)]));
Tg = VCONJ(VSUB(Te, Ta));
ST(&(Rm[WS(rs, 1)]), Tg, -ms, &(Rm[WS(rs, 1)]));
}
}
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:33,代码来源:hc2cbdftv_4.c
示例8: hc2cfdftv_6
static void hc2cfdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(24, rs)) {
V Ta, Tu, Tn, Tw, Ti, Tv, T1, T8, Tg, Tf, T7, T3, Te, T6, T2;
V T4, T9, T5, Tk, Tm, Tj, Tl, Tc, Th, Tb, Td, Tr, Tp, Tq, To;
V Tt, Ts, TA, Ty, Tz, Tx, TC, TB;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T8 = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
Tg = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));
Te = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));
Tf = VCONJ(Te);
T6 = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
T7 = VCONJ(T6);
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T3 = VCONJ(T2);
T4 = VADD(T1, T3);
T5 = LDW(&(W[TWVL * 4]));
T9 = VZMULIJ(T5, VSUB(T7, T8));
Ta = VADD(T4, T9);
Tu = VSUB(T4, T9);
Tj = LDW(&(W[0]));
Tk = VZMULIJ(Tj, VSUB(T3, T1));
Tl = LDW(&(W[TWVL * 6]));
Tm = VZMULJ(Tl, VADD(Tf, Tg));
Tn = VADD(Tk, Tm);
Tw = VSUB(Tm, Tk);
Tb = LDW(&(W[TWVL * 2]));
Tc = VZMULJ(Tb, VADD(T7, T8));
Td = LDW(&(W[TWVL * 8]));
Th = VZMULIJ(Td, VSUB(Tf, Tg));
Ti = VADD(Tc, Th);
Tv = VSUB(Tc, Th);
Tr = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tn, Ti))));
To = VADD(Ti, Tn);
Tp = VMUL(LDK(KP500000000), VADD(Ta, To));
Tq = VFNMS(LDK(KP250000000), To, VMUL(LDK(KP500000000), Ta));
ST(&(Rp[0]), Tp, ms, &(Rp[0]));
Tt = VCONJ(VADD(Tq, Tr));
ST(&(Rm[WS(rs, 1)]), Tt, -ms, &(Rm[WS(rs, 1)]));
Ts = VSUB(Tq, Tr);
ST(&(Rp[WS(rs, 2)]), Ts, ms, &(Rp[0]));
TA = VMUL(LDK(KP500000000), VBYI(VMUL(LDK(KP866025403), VSUB(Tw, Tv))));
Tx = VADD(Tv, Tw);
Ty = VCONJ(VMUL(LDK(KP500000000), VADD(Tu, Tx)));
Tz = VFNMS(LDK(KP250000000), Tx, VMUL(LDK(KP500000000), Tu));
ST(&(Rm[WS(rs, 2)]), Ty, -ms, &(Rm[0]));
TC = VADD(Tz, TA);
ST(&(Rp[WS(rs, 1)]), TC, ms, &(Rp[WS(rs, 1)]));
TB = VCONJ(VSUB(Tz, TA));
ST(&(Rm[0]), TB, -ms, &(Rm[0]));
}
}
VLEAVE();
}
开发者ID:LiZimo,项目名称:interactiveCart,代码行数:59,代码来源:hc2cfdftv_6.c
示例9: t2sv_4
static void t2sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 4); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 4), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T4, T3, T5, T6, T8;
T2 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 1]));
T3 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 3]));
T6 = VFMA(T2, T3, VMUL(T4, T5));
T8 = VFNMS(T4, T3, VMUL(T2, T5));
{
V T1, Tp, Ta, To, Te, Tk, Th, Tl, T7, T9;
T1 = LD(&(ri[0]), ms, &(ri[0]));
Tp = LD(&(ii[0]), ms, &(ii[0]));
T7 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));
T9 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));
Ta = VFMA(T6, T7, VMUL(T8, T9));
To = VFNMS(T8, T7, VMUL(T6, T9));
{
V Tc, Td, Tf, Tg;
Tc = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
Td = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
Te = VFMA(T2, Tc, VMUL(T4, Td));
Tk = VFNMS(T4, Tc, VMUL(T2, Td));
Tf = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)]));
Tg = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)]));
Th = VFMA(T3, Tf, VMUL(T5, Tg));
Tl = VFNMS(T5, Tf, VMUL(T3, Tg));
}
{
V Tb, Ti, Tn, Tq;
Tb = VADD(T1, Ta);
Ti = VADD(Te, Th);
ST(&(ri[WS(rs, 2)]), VSUB(Tb, Ti), ms, &(ri[0]));
ST(&(ri[0]), VADD(Tb, Ti), ms, &(ri[0]));
Tn = VADD(Tk, Tl);
Tq = VADD(To, Tp);
ST(&(ii[0]), VADD(Tn, Tq), ms, &(ii[0]));
ST(&(ii[WS(rs, 2)]), VSUB(Tq, Tn), ms, &(ii[0]));
}
{
V Tj, Tm, Tr, Ts;
Tj = VSUB(T1, Ta);
Tm = VSUB(Tk, Tl);
ST(&(ri[WS(rs, 3)]), VSUB(Tj, Tm), ms, &(ri[WS(rs, 1)]));
ST(&(ri[WS(rs, 1)]), VADD(Tj, Tm), ms, &(ri[WS(rs, 1)]));
Tr = VSUB(Tp, To);
Ts = VSUB(Te, Th);
ST(&(ii[WS(rs, 1)]), VSUB(Tr, Ts), ms, &(ii[WS(rs, 1)]));
ST(&(ii[WS(rs, 3)]), VADD(Ts, Tr), ms, &(ii[WS(rs, 1)]));
}
}
}
}
VLEAVE();
}
开发者ID:SKA-ScienceDataProcessor,项目名称:FastImaging,代码行数:58,代码来源:t2sv_4.c
示例10: t3fv_5
static void t3fv_5(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 4)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 4), MAKE_VOLATILE_STRIDE(5, rs)) {
V T1, T4, T5, T9;
T1 = LDW(&(W[0]));
T4 = LDW(&(W[TWVL * 2]));
T5 = VZMUL(T1, T4);
T9 = VZMULJ(T1, T4);
{
V Tg, Tk, Tl, T8, Te, Th;
Tg = LD(&(x[0]), ms, &(x[0]));
{
V T3, Td, T7, Tb;
{
V T2, Tc, T6, Ta;
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T3 = VZMULJ(T1, T2);
Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Td = VZMULJ(T4, Tc);
T6 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
T7 = VZMULJ(T5, T6);
Ta = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tb = VZMULJ(T9, Ta);
}
Tk = VSUB(T3, T7);
Tl = VSUB(Tb, Td);
T8 = VADD(T3, T7);
Te = VADD(Tb, Td);
Th = VADD(T8, Te);
}
ST(&(x[0]), VADD(Tg, Th), ms, &(x[0]));
{
V Tm, Tn, Tj, To, Tf, Ti;
Tm = VBYI(VFMA(LDK(KP951056516), Tk, VMUL(LDK(KP587785252), Tl)));
Tn = VBYI(VFNMS(LDK(KP587785252), Tk, VMUL(LDK(KP951056516), Tl)));
Tf = VMUL(LDK(KP559016994), VSUB(T8, Te));
Ti = VFNMS(LDK(KP250000000), Th, Tg);
Tj = VADD(Tf, Ti);
To = VSUB(Ti, Tf);
ST(&(x[WS(rs, 1)]), VSUB(Tj, Tm), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VSUB(To, Tn), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 4)]), VADD(Tm, Tj), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VADD(Tn, To), ms, &(x[0]));
}
}
}
}
VLEAVE();
}
开发者ID:Aegisub,项目名称:fftw3,代码行数:57,代码来源:t3fv_5.c
示例11: hc2cbdftv_6
static void hc2cbdftv_6(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 10)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 10), MAKE_VOLATILE_STRIDE(rs)) {
V T5, Th, Te, Ts, Tk, Tm, T2, T4, T3, T6, Tc, T8, Tb, T7, Ta;
V T9, Td, Ti, Tj, TA, Tf, Tn, Tv, Tt, Tz, T1, Tl, Tg, Tu, Tr;
V Tq, Ty, To, Tp, TC, TB, Tx, Tw;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));
T4 = VCONJ(T3);
T5 = VSUB(T2, T4);
Th = VADD(T2, T4);
T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));
Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
T7 = LD(&(Rm[0]), -ms, &(Rm[0]));
T8 = VCONJ(T7);
Ta = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
Tb = VCONJ(Ta);
T9 = VSUB(T6, T8);
Td = VSUB(Tb, Tc);
Te = VADD(T9, Td);
Ts = VBYI(VMUL(LDK(KP866025403), VSUB(T9, Td)));
Ti = VADD(T6, T8);
Tj = VADD(Tb, Tc);
Tk = VADD(Ti, Tj);
Tm = VBYI(VMUL(LDK(KP866025403), VSUB(Ti, Tj)));
TA = VADD(Th, Tk);
T1 = LDW(&(W[TWVL * 4]));
Tf = VZMULI(T1, VADD(T5, Te));
Tl = VFNMS(LDK(KP500000000), Tk, Th);
Tg = LDW(&(W[TWVL * 2]));
Tn = VZMUL(Tg, VSUB(Tl, Tm));
Tu = LDW(&(W[TWVL * 6]));
Tv = VZMUL(Tu, VADD(Tm, Tl));
Tr = VFNMS(LDK(KP500000000), Te, T5);
Tq = LDW(&(W[TWVL * 8]));
Tt = VZMULI(Tq, VSUB(Tr, Ts));
Ty = LDW(&(W[0]));
Tz = VZMULI(Ty, VADD(Ts, Tr));
To = VADD(Tf, Tn);
ST(&(Rp[WS(rs, 1)]), To, ms, &(Rp[WS(rs, 1)]));
Tp = VCONJ(VSUB(Tn, Tf));
ST(&(Rm[WS(rs, 1)]), Tp, -ms, &(Rm[WS(rs, 1)]));
TC = VCONJ(VSUB(TA, Tz));
ST(&(Rm[0]), TC, -ms, &(Rm[0]));
TB = VADD(Tz, TA);
ST(&(Rp[0]), TB, ms, &(Rp[0]));
Tx = VCONJ(VSUB(Tv, Tt));
ST(&(Rm[WS(rs, 2)]), Tx, -ms, &(Rm[0]));
Tw = VADD(Tt, Tv);
ST(&(Rp[WS(rs, 2)]), Tw, ms, &(Rp[0]));
}
}
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:55,代码来源:hc2cbdftv_6.c
示例12: LD
static const R *t1sv_4(R *ri, R *ii, const R *W, stride ios, INT m, INT dist)
{
INT i;
for (i = m; i > 0; i = i - (2 * VL), ri = ri + ((2 * VL) * dist), ii = ii + ((2 * VL) * dist), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(ios)) {
V T1, Tv, T3, T6, T5, Ta, Td, Tc, Tg, Tj, Tt, T4, Tf, Ti, Tn;
V Tb, T2, T9;
T1 = LD(&(ri[0]), dist, &(ri[0]));
Tv = LD(&(ii[0]), dist, &(ii[0]));
T3 = LD(&(ri[WS(ios, 2)]), dist, &(ri[0]));
T6 = LD(&(ii[WS(ios, 2)]), dist, &(ii[0]));
T2 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 3]));
Ta = LD(&(ri[WS(ios, 1)]), dist, &(ri[WS(ios, 1)]));
Td = LD(&(ii[WS(ios, 1)]), dist, &(ii[WS(ios, 1)]));
T9 = LDW(&(W[0]));
Tc = LDW(&(W[TWVL * 1]));
Tg = LD(&(ri[WS(ios, 3)]), dist, &(ri[WS(ios, 1)]));
Tj = LD(&(ii[WS(ios, 3)]), dist, &(ii[WS(ios, 1)]));
Tt = VMUL(T2, T6);
T4 = VMUL(T2, T3);
Tf = LDW(&(W[TWVL * 4]));
Ti = LDW(&(W[TWVL * 5]));
Tn = VMUL(T9, Td);
Tb = VMUL(T9, Ta);
{
V Tu, T7, Tp, Th, To, Te;
Tu = VFNMS(T5, T3, Tt);
T7 = VFMA(T5, T6, T4);
Tp = VMUL(Tf, Tj);
Th = VMUL(Tf, Tg);
To = VFNMS(Tc, Ta, Tn);
Te = VFMA(Tc, Td, Tb);
{
V Tw, Tx, T8, Tm, Tq, Tk;
Tw = VADD(Tu, Tv);
Tx = VSUB(Tv, Tu);
T8 = VADD(T1, T7);
Tm = VSUB(T1, T7);
Tq = VFNMS(Ti, Tg, Tp);
Tk = VFMA(Ti, Tj, Th);
{
V Ts, Tr, Tl, Ty;
Ts = VADD(To, Tq);
Tr = VSUB(To, Tq);
Tl = VADD(Te, Tk);
Ty = VSUB(Te, Tk);
ST(&(ri[WS(ios, 1)]), VADD(Tm, Tr), dist, &(ri[WS(ios, 1)]));
ST(&(ri[WS(ios, 3)]), VSUB(Tm, Tr), dist, &(ri[WS(ios, 1)]));
ST(&(ii[WS(ios, 2)]), VSUB(Tw, Ts), dist, &(ii[0]));
ST(&(ii[0]), VADD(Ts, Tw), dist, &(ii[0]));
ST(&(ii[WS(ios, 3)]), VADD(Ty, Tx), dist, &(ii[WS(ios, 1)]));
ST(&(ii[WS(ios, 1)]), VSUB(Tx, Ty), dist, &(ii[WS(ios, 1)]));
ST(&(ri[0]), VADD(T8, Tl), dist, &(ri[0]));
ST(&(ri[WS(ios, 2)]), VSUB(T8, Tl), dist, &(ri[0]));
}
}
}
}
return W;
}
开发者ID:exic,项目名称:last.fm-dbus,代码行数:60,代码来源:t1sv_4.c
示例13: t1sv_2
static void t1sv_2(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
INT m;
for (m = mb, W = W + (mb * 2); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 2), MAKE_VOLATILE_STRIDE(rs)) {
V T1, Ta, T3, T6, T2, T5;
T1 = LD(&(ri[0]), ms, &(ri[0]));
Ta = LD(&(ii[0]), ms, &(ii[0]));
T3 = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
T6 = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
T2 = LDW(&(W[0]));
T5 = LDW(&(W[TWVL * 1]));
{
V T8, T4, T9, T7;
T8 = VMUL(T2, T6);
T4 = VMUL(T2, T3);
T9 = VFNMS(T5, T3, T8);
T7 = VFMA(T5, T6, T4);
ST(&(ii[0]), VADD(T9, Ta), ms, &(ii[0]));
ST(&(ii[WS(rs, 1)]), VSUB(Ta, T9), ms, &(ii[WS(rs, 1)]));
ST(&(ri[0]), VADD(T1, T7), ms, &(ri[0]));
ST(&(ri[WS(rs, 1)]), VSUB(T1, T7), ms, &(ri[WS(rs, 1)]));
}
}
}
开发者ID:phillipstanleymarbell,项目名称:sunflower-simulator,代码行数:24,代码来源:t1sv_2.c
示例14: hc2cfdftv_2
static void hc2cfdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T2, T4, T3, T5, T7, T6;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T4 = LDW(&(W[0]));
T3 = VFMACONJ(T2, T1);
T5 = VZMULIJ(T4, VFNMSCONJ(T2, T1));
T7 = VCONJ(VMUL(LDK(KP500000000), VADD(T3, T5)));
T6 = VMUL(LDK(KP500000000), VSUB(T3, T5));
ST(&(Rm[0]), T7, -ms, &(Rm[0]));
ST(&(Rp[0]), T6, ms, &(Rp[0]));
}
}
开发者ID:8cH9azbsFifZ,项目名称:wspr,代码行数:17,代码来源:hc2cfdftv_2.c
示例15: hc2cbdftv_2
static void hc2cbdftv_2(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 2)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 2), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T3, T1, T5, T4, T7, T6;
T2 = LD(&(Rp[0]), ms, &(Rp[0]));
T3 = LD(&(Rm[0]), -ms, &(Rm[0]));
T1 = LDW(&(W[0]));
T5 = VFMACONJ(T3, T2);
T4 = VZMULI(T1, VFNMSCONJ(T3, T2));
T7 = VCONJ(VSUB(T5, T4));
T6 = VADD(T4, T5);
ST(&(Rm[0]), T7, -ms, &(Rm[0]));
ST(&(Rp[0]), T6, ms, &(Rp[0]));
}
}
VLEAVE();
}
开发者ID:Aegisub,项目名称:fftw3,代码行数:19,代码来源:hc2cbdftv_2.c
示例16: t3bv_10
static void t3bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(10, rs)) {
V T1, T7, Th, Tx, Tr, Td, Tp, T6, Tv, Tc, Te, Ti, Tl, T2, T3;
V T5;
T2 = LDW(&(W[0]));
T3 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 4]));
T1 = LD(&(x[0]), ms, &(x[0]));
T7 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
{
V To, Tw, Tq, Tu, Ta, T4, Tt, Tk, Tb;
To = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tw = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Tq = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Tu = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Ta = VZMULJ(T2, T3);
T4 = VZMUL(T2, T3);
Th = VZMULJ(T2, T5);
Tt = VZMULJ(T3, T5);
Tb = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tx = VZMUL(T2, Tw);
Tr = VZMUL(T5, Tq);
Tk = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Td = VZMULJ(Ta, T5);
Tp = VZMUL(T4, To);
T6 = VZMULJ(T4, T5);
Tv = VZMUL(Tt, Tu);
Tc = VZMUL(Ta, Tb);
Te = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Ti = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Tl = VZMUL(T3, Tk);
}
{
V TN, Ts, T8, Ty, TO, Tf, Tj;
TN = VADD(Tp, Tr);
Ts = VSUB(Tp, Tr);
T8 = VZMUL(T6, T7);
Ty = VSUB(Tv, Tx);
TO = VADD(Tv, Tx);
Tf = VZMUL(Td, Te);
Tj = VZMUL(Th, Ti);
{
V T9, TJ, TP, TU, Tz, TF, Tg, TK, Tm, TL;
T9 = VSUB(T1, T8);
TJ = VADD(T1, T8);
TP = VADD(TN, TO);
TU = VSUB(TN, TO);
Tz = VADD(Ts, Ty);
TF = VSUB(Ts, Ty);
Tg = VSUB(Tc, Tf);
TK = VADD(Tc, Tf);
Tm = VSUB(Tj, Tl);
TL = VADD(Tj, Tl);
{
V TM, TV, Tn, TE;
TM = VADD(TK, TL);
TV = VSUB(TK, TL);
Tn = VADD(Tg, Tm);
TE = VSUB(Tg, Tm);
{
V TW, TY, TS, TQ, TG, TI, TC, TA, TR, TB;
TW = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TV, TU));
TY = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TU, TV));
TS = VSUB(TM, TP);
TQ = VADD(TM, TP);
TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TF, TE));
TI = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TE, TF));
TC = VSUB(Tn, Tz);
TA = VADD(Tn, Tz);
ST(&(x[0]), VADD(TJ, TQ), ms, &(x[0]));
TR = VFNMS(LDK(KP250000000), TQ, TJ);
ST(&(x[WS(rs, 5)]), VADD(T9, TA), ms, &(x[WS(rs, 1)]));
TB = VFNMS(LDK(KP250000000), TA, T9);
{
V TX, TT, TH, TD;
TX = VFMA(LDK(KP559016994), TS, TR);
TT = VFNMS(LDK(KP559016994), TS, TR);
TH = VFNMS(LDK(KP559016994), TC, TB);
TD = VFMA(LDK(KP559016994), TC, TB);
ST(&(x[WS(rs, 8)]), VFMAI(TW, TT), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VFNMSI(TW, TT), ms, &(x[0]));
ST(&(x[WS(rs, 6)]), VFMAI(TY, TX), ms, &(x[0]));
ST(&(x[WS(rs, 4)]), VFNMSI(TY, TX), ms, &(x[0]));
ST(&(x[WS(rs, 9)]), VFNMSI(TG, TD), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 1)]), VFMAI(TG, TD), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 7)]), VFNMSI(TI, TH), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VFMAI(TI, TH), ms, &(x[WS(rs, 1)]));
}
}
}
}
//.........这里部分代码省略.........
开发者ID:Enny1991,项目名称:ExApp,代码行数:101,代码来源:t3bv_10.c
示例17: t3fv_20
static void t3fv_20(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 8)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 8), MAKE_VOLATILE_STRIDE(20, rs)) {
V T1k, T1w, T1r, T1z, T1o, T1y, T1v, T1h;
{
V T2, T8, T3, Td;
T2 = LDW(&(W[0]));
T8 = LDW(&(W[TWVL * 2]));
T3 = LDW(&(W[TWVL * 4]));
Td = LDW(&(W[TWVL * 6]));
{
V T7, TM, T1F, T23, T1p, Tp, T1j, T27, T1P, T1I, T1i, T1L, T28, T1S, T1q;
V TE, T1n, T1d, T26, T2e;
{
V T1, TK, T5, TH;
T1 = LD(&(x[0]), ms, &(x[0]));
TK = LD(&(x[WS(rs, 15)]), ms, &(x[WS(rs, 1)]));
T5 = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
TH = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
{
V TA, Tx, TU, T1O, T14, Th, T1G, T1R, T1b, T1J, To, Ts, TV, Tv, TO;
V TQ, TT, Ty, TB;
{
V Tq, Tt, T17, T1a, Tk, Tn;
{
V Tl, Ti, T15, T18, TZ, Tc, T6, Tb, Tf, T10, T12, TL;
{
V TJ, Ta, T9, T4;
Ta = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
TA = VZMULJ(T2, T8);
T9 = VZMUL(T2, T8);
Tx = VZMUL(T8, T3);
Tl = VZMULJ(T8, T3);
T4 = VZMUL(T2, T3);
Tq = VZMULJ(T2, T3);
Tt = VZMULJ(T2, Td);
Ti = VZMULJ(T8, Td);
T15 = VZMULJ(TA, Td);
T18 = VZMULJ(TA, T3);
TU = VZMUL(TA, T3);
TJ = VZMULJ(T9, Td);
TZ = VZMUL(T9, T3);
Tc = VZMULJ(T9, T3);
T6 = VZMULJ(T4, T5);
Tb = VZMULJ(T9, Ta);
Tf = LD(&(x[WS(rs, 14)]), ms, &(x[0]));
T10 = LD(&(x[WS(rs, 13)]), ms, &(x[WS(rs, 1)]));
T12 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
TL = VZMULJ(TJ, TK);
}
{
V T1D, T11, T13, T19, T1E, Tg, T16, TI, Te, Tj, Tm;
T16 = LD(&(x[WS(rs, 17)]), ms, &(x[WS(rs, 1)]));
TI = VZMULJ(Tc, TH);
Te = VZMULJ(Tc, Td);
T7 = VSUB(T1, T6);
T1D = VADD(T1, T6);
T11 = VZMULJ(TZ, T10);
T13 = VZMULJ(T8, T12);
T19 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
T17 = VZMULJ(T15, T16);
TM = VSUB(TI, TL);
T1E = VADD(TI, TL);
Tg = VZMULJ(Te, Tf);
Tj = LD(&(x[WS(rs, 16)]), ms, &(x[0]));
Tm = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
T1O = VADD(T11, T13);
T14 = VSUB(T11, T13);
T1a = VZMULJ(T18, T19);
T1F = VSUB(T1D, T1E);
T23 = VADD(T1D, T1E);
Th = VSUB(Tb, Tg);
T1G = VADD(Tb, Tg);
Tk = VZMULJ(Ti, Tj);
Tn = VZMULJ(Tl, Tm);
}
}
{
V Tr, Tu, TN, TP, TS;
Tr = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
T1R = VADD(T17, T1a);
T1b = VSUB(T17, T1a);
Tu = LD(&(x[WS(rs, 18)]), ms, &(x[0]));
TN = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
TP = LD(&(x[WS(rs, 19)]), ms, &(x[WS(rs, 1)]));
TS = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T1J = VADD(Tk, Tn);
To = VSUB(Tk, Tn);
Ts = VZMULJ(Tq, Tr);
TV = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Tv = VZMULJ(Tt, Tu);
TO = VZMULJ(T3, TN);
//.........这里部分代码省略.........
开发者ID:barnex,项目名称:fftw,代码行数:101,代码来源:dft_simd_sse2_t3fv_20.c
示例18: hc2cfdftv_8
static void hc2cfdftv_8(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
DVK(KP353553390, +0.353553390593273762200422181052424519642417969);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 14)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 14), MAKE_VOLATILE_STRIDE(32, rs)) {
V Ta, TE, Tr, TF, Tl, TK, Tw, TG, T1, T6, T3, T8, T2, T7, T4;
V T9, T5, To, Tq, Tn, Tp, Tc, Th, Te, Tj, Td, Ti, Tf, Tk, Tb;
V Tg, Tt, Tv, Ts, Tu, Ty, Tz, Tm, Tx, TC, TD, TA, TB, TI, TO;
V TL, TP, TH, TJ, TM, TR, TN, TQ;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T6 = LD(&(Rp[WS(rs, 2)]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
T3 = VCONJ(T2);
T7 = LD(&(Rm[WS(rs, 2)]), -ms, &(Rm[0]));
T8 = VCONJ(T7);
T4 = VADD(T1, T3);
T5 = LDW(&(W[TWVL * 6]));
T9 = VZMULJ(T5, VADD(T6, T8));
Ta = VADD(T4, T9);
TE = VMUL(LDK(KP500000000), VSUB(T4, T9));
Tn = LDW(&(W[0]));
To = VZMULIJ(Tn, VSUB(T3, T1));
Tp = LDW(&(W[TWVL * 8]));
Tq = VZMULIJ(Tp, VSUB(T8, T6));
Tr = VADD(To, Tq);
TF = VSUB(To, Tq);
Tc = LD(&(Rp[WS(rs, 1)]), ms, &(Rp[WS(rs, 1)]));
Th = LD(&(Rp[WS(rs, 3)]), ms, &(Rp[WS(rs, 1)]));
Td = LD(&(Rm[WS(rs, 1)]), -ms, &(Rm[WS(rs, 1)]));
Te = VCONJ(Td);
Ti = LD(&(Rm[WS(rs, 3)]), -ms, &(Rm[WS(rs, 1)]));
Tj = VCONJ(Ti);
Tb = LDW(&(W[TWVL * 2]));
Tf = VZMULJ(Tb, VADD(Tc, Te));
Tg = LDW(&(W[TWVL * 10]));
Tk = VZMULJ(Tg, VADD(Th, Tj));
Tl = VADD(Tf, Tk);
TK = VSUB(Tf, Tk);
Ts = LDW(&(W[TWVL * 4]));
Tt = VZMULIJ(Ts, VSUB(Te, Tc));
Tu = LDW(&(W[TWVL * 12]));
Tv = VZMULIJ(Tu, VSUB(Tj, Th));
Tw = VADD(Tt, Tv);
TG = VSUB(Tv, Tt);
Tm = VADD(Ta, Tl);
Tx = VADD(Tr, Tw);
Ty = VCONJ(VMUL(LDK(KP500000000), VSUB(Tm, Tx)));
Tz = VMUL(LDK(KP500000000), VADD(Tm, Tx));
ST(&(Rm[WS(rs, 3)]), Ty, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[0]), Tz, ms, &(Rp[0]));
TA = VSUB(Ta, Tl);
TB = VBYI(VSUB(Tw, Tr));
TC = VCONJ(VMUL(LDK(KP500000000), VSUB(TA, TB)));
TD = VMUL(LDK(KP500000000), VADD(TA, TB));
ST(&(Rm[WS(rs, 1)]), TC, -ms, &(Rm[WS(rs, 1)]));
ST(&(Rp[WS(rs, 2)]), TD, ms, &(Rp[0]));
TH = VMUL(LDK(KP353553390), VADD(TF, TG));
TI = VADD(TE, TH);
TO = VSUB(TE, TH);
TJ = VMUL(LDK(KP707106781), VSUB(TG, TF));
TL = VMUL(LDK(KP500000000), VBYI(VSUB(TJ, TK)));
TP = VMUL(LDK(KP500000000), VBYI(VADD(TK, TJ)));
TM = VCONJ(VSUB(TI, TL));
ST(&(Rm[0]), TM, -ms, &(Rm[0]));
TR = VADD(TO, TP);
ST(&(Rp[WS(rs, 3)]), TR, ms, &(Rp[WS(rs, 1)]));
TN = VADD(TI, TL);
ST(&(Rp[WS(rs, 1)]), TN, ms, &(Rp[WS(rs, 1)]));
TQ = VCONJ(VSUB(TO, TP));
ST(&(Rm[WS(rs, 2)]), TQ, -ms, &(Rm[0]));
}
}
VLEAVE();
}
开发者ID:barnex,项目名称:fftw,代码行数:77,代码来源:rdft_simd_sse2_hc2cfdftv_8.c
示例19: t3bv_8
static void t3bv_8(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
{
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T3, Tb, T1, T5, Tn, Tq, T8, Td, T4, Ta, Tp, Tg, Ti, T9;
T2 = LDW(&(W[0]));
T3 = LDW(&(W[TWVL * 2]));
Tb = LDW(&(W[TWVL * 4]));
T1 = LD(&(x[0]), ms, &(x[0]));
T5 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tn = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Tq = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
T8 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Td = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
T4 = VZMUL(T2, T3);
Ta = VZMULJ(T2, T3);
Tp = VZMULJ(T2, Tb);
Tg = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
Ti = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T9 = VZMUL(T2, T8);
{
V T6, To, Tc, Tr, Th, Tj;
T6 = VZMUL(T4, T5);
To = VZMUL(Ta, Tn);
Tc = VZMULJ(Ta, Tb);
Tr = VZMUL(Tp, Tq);
Th = VZMUL(Tb, Tg);
Tj = VZMUL(T3, Ti);
{
V Tx, T7, Te, Ts, Ty, Tk, TB;
Tx = VADD(T1, T6);
T7 = VSUB(T1, T6);
Te = VZMUL(Tc, Td);
Ts = VSUB(To, Tr);
Ty = VADD(To, Tr);
Tk = VSUB(Th, Tj);
TB = VADD(Th, Tj);
{
V Tf, TA, Tz, TD;
Tf = VSUB(T9, Te);
TA = VADD(T9, Te);
Tz = VSUB(Tx, Ty);
TD = VADD(Tx, Ty);
{
V TC, TE, Tl, Tt;
TC = VSUB(TA, TB);
TE = VADD(TA, TB);
Tl = VADD(Tf, Tk);
Tt = VSUB(Tf, Tk);
{
V Tu, Tw, Tm, Tv;
ST(&(x[0]), VADD(TD, TE), ms, &(x[0]));
ST(&(x[WS(rs, 4)]), VSUB(TD, TE), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VFMAI(TC, Tz), ms, &(x[0]));
ST(&(x[WS(rs, 6)]), VFNMSI(TC, Tz), ms, &(x[0]));
Tu = VFNMS(LDK(KP707106781), Tt, Ts);
Tw = VFMA(LDK(KP707106781), Tt, Ts);
Tm = VFNMS(LDK(KP707106781), Tl, T7);
Tv = VFMA(LDK(KP707106781), Tl, T7);
ST(&(x[WS(rs, 1)]), VFMAI(Tw, Tv), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 7)]), VFNMSI(Tw, Tv), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 5)]), VFMAI(Tu, Tm), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VFNMSI(Tu, Tm), ms, &(x[WS(rs, 1)]));
}
}
}
}
}
}
}
VLEAVE();
}
开发者ID:barnex,项目名称:fftw,代码行数:76,代码来源:dft_simd_sse2_t3bv_8.c
示例20: hc2cfdftv_12
static void hc2cfdftv_12(R *Rp, R *Ip, R *Rm, R *Im, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
for (m = mb, W = W + ((mb - 1) * ((TWVL / VL) * 22)); m < me; m = m + VL, Rp = Rp + (VL * ms), Ip = Ip + (VL * ms), Rm = Rm - (VL * ms), Im = Im - (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(rs)) {
V T3, T7, TH, TE, Th, TC, Tq, T11, TU, Tx, Tb, Tz, Tu, Tw, Tp;
V Tl, T9, Ta, T8, Ty, Tn, To, Tm, TG, T1, T2, Tt, T5, T6, T4;
V Tv, Tj, Tk, Ti, TD, Tf, Tg, Te, TB, TT, TF, TR, Tr;
T1 = LD(&(Rp[0]), ms, &(Rp[0]));
T2 = LD(&(Rm[0]), -ms, &(Rm[0]));
Tt = LDW(&(W[0]));
T5
|
请发表评论