本文整理汇总了C++中LD函数的典型用法代码示例。如果您正苦于以下问题:C++ LD函数的具体用法?C++ LD怎么用?C++ LD使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了LD函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: q1fv_4
static void q1fv_4(R *ri, R *ii, const R *W, stride rs, stride vs, INT mb, INT me, INT ms)
{
{
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(8, rs), MAKE_VOLATILE_STRIDE(8, vs)) {
V Tb, Tm, Tx, TI;
{
V Tc, T9, T3, TG, TA, TH, TD, Ta, T6, Td, Tn, To, Tq, Tr, Tf;
V Tg;
{
V T1, T2, Ty, Tz, TB, TC, T4, T5;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Ty = LD(&(x[WS(vs, 3)]), ms, &(x[WS(vs, 3)]));
Tz = LD(&(x[WS(vs, 3) + WS(rs, 2)]), ms, &(x[WS(vs, 3)]));
TB = LD(&(x[WS(vs, 3) + WS(rs, 1)]), ms, &(x[WS(vs, 3) + WS(rs, 1)]));
TC = LD(&(x[WS(vs, 3) + WS(rs, 3)]), ms, &(x[WS(vs, 3) + WS(rs, 1)]));
T4 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T5 = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Tc = LD(&(x[WS(vs, 1)]), ms, &(x[WS(vs, 1)]));
T9 = VADD(T1, T2);
T3 = VSUB(T1, T2);
TG = VADD(Ty, Tz);
TA = VSUB(Ty, Tz);
TH = VADD(TB, TC);
TD = VSUB(TB, TC);
Ta = VADD(T4, T5);
T6 = VSUB(T4, T5);
Td = LD(&(x[WS(vs, 1) + WS(rs, 2)]), ms, &(x[WS(vs, 1)]));
Tn = LD(&(x[WS(vs, 2)]), ms, &(x[WS(vs, 2)]));
To = LD(&(x[WS(vs, 2) + WS(rs, 2)]), ms, &(x[WS(vs, 2)]));
Tq = LD(&(x[WS(vs, 2) + WS(rs, 1)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));
Tr = LD(&(x[WS(vs, 2) + WS(rs, 3)]), ms, &(x[WS(vs, 2) + WS(rs, 1)]));
Tf = LD(&(x[WS(vs, 1) + WS(rs, 1)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));
Tg = LD(&(x[WS(vs, 1) + WS(rs, 3)]), ms, &(x[WS(vs, 1) + WS(rs, 1)]));
}
{
V Tk, Te, Tv, Tp, Tw, Ts, Tl, Th, T7, TE, Tu, TF;
ST(&(x[0]), VADD(T9, Ta), ms, &(x[0]));
Tk = VADD(Tc, Td);
Te = VSUB(Tc, Td);
Tv = VADD(Tn, To);
Tp = VSUB(Tn, To);
Tw = VADD(Tq, Tr);
Ts = VSUB(Tq, Tr);
Tl = VADD(Tf, Tg);
Th = VSUB(Tf, Tg);
ST(&(x[WS(rs, 3)]), VADD(TG, TH), ms, &(x[WS(rs, 1)]));
T7 = BYTWJ(&(W[0]), VFNMSI(T6, T3));
TE = BYTWJ(&(W[0]), VFNMSI(TD, TA));
{
V Tt, Ti, Tj, T8;
T8 = BYTWJ(&(W[TWVL * 4]), VFMAI(T6, T3));
ST(&(x[WS(rs, 2)]), VADD(Tv, Tw), ms, &(x[0]));
Tt = BYTWJ(&(W[0]), VFNMSI(Ts, Tp));
ST(&(x[WS(rs, 1)]), VADD(Tk, Tl), ms, &(x[WS(rs, 1)]));
Ti = BYTWJ(&(W[0]), VFNMSI(Th, Te));
Tj = BYTWJ(&(W[TWVL * 4]), VFMAI(Th, Te));
ST(&(x[WS(vs, 1)]), T7, ms, &(x[WS(vs, 1)]));
ST(&(x[WS(vs, 1) + WS(rs, 3)]), TE, ms, &(x[WS(vs, 1) + WS(rs, 1)]));
ST(&(x[WS(vs, 3)]), T8, ms, &(x[WS(vs, 3)]));
Tu = BYTWJ(&(W[TWVL * 4]), VFMAI(Ts, Tp));
ST(&(x[WS(vs, 1) + WS(rs, 2)]), Tt, ms, &(x[WS(vs, 1)]));
TF = BYTWJ(&(W[TWVL * 4]), VFMAI(TD, TA));
ST(&(x[WS(vs, 1) + WS(rs, 1)]), Ti, ms, &(x[WS(vs, 1) + WS(rs, 1)]));
ST(&(x[WS(vs, 3) + WS(rs, 1)]), Tj, ms, &(x[WS(vs, 3) + WS(rs, 1)]));
}
Tb = BYTWJ(&(W[TWVL * 2]), VSUB(T9, Ta));
Tm = BYTWJ(&(W[TWVL * 2]), VSUB(Tk, Tl));
Tx = BYTWJ(&(W[TWVL * 2]), VSUB(Tv, Tw));
ST(&(x[WS(vs, 3) + WS(rs, 2)]), Tu, ms, &(x[WS(vs, 3)]));
TI = BYTWJ(&(W[TWVL * 2]), VSUB(TG, TH));
ST(&(x[WS(vs, 3) + WS(rs, 3)]), TF, ms, &(x[WS(vs, 3) + WS(rs, 1)]));
}
}
ST(&(x[WS(vs, 2)]), Tb, ms, &(x[WS(vs, 2)]));
ST(&(x[WS(vs, 2) + WS(rs, 1)]), Tm, ms, &(x[WS(vs, 2) + WS(rs, 1)]));
ST(&(x[WS(vs, 2) + WS(rs, 2)]), Tx, ms, &(x[WS(vs, 2)]));
ST(&(x[WS(vs, 2) + WS(rs, 3)]), TI, ms, &(x[WS(vs, 2) + WS(rs, 1)]));
}
}
VLEAVE();
}
开发者ID:SKA-ScienceDataProcessor,项目名称:FastImaging,代码行数:85,代码来源:q1fv_4.c
示例2: n2bv_20
static void n2bv_20(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
{
INT i;
const R *xi;
R *xo;
xi = ii;
xo = io;
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(40, is), MAKE_VOLATILE_STRIDE(40, os)) {
V T1H, T1I, TS, TA, TN, TV, T1M, T1N, T1O, T1P, T1R, T1S, TK, TU, TR;
V Tl;
{
V T3, TE, T1r, T13, Ta, TL, Tz, TG, Ts, TF, Th, TM, T1u, T1C, T1n;
V T1a, T1m, T1h, T1x, T1D, Tk, Ti;
{
V T1, T2, TC, TD;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
TC = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
TD = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
{
V T14, T6, T1c, Tv, Tm, T1f, Ty, T17, T9, Tn, Tp, T1b, Td, Tq, Te;
V Tf, T15, To;
{
V Tw, Tx, T7, T8, Tb, Tc;
{
V T4, T5, Tt, Tu, T11, T12;
T4 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
T5 = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
Tt = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
Tu = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Tw = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
T3 = VSUB(T1, T2);
T11 = VADD(T1, T2);
TE = VSUB(TC, TD);
T12 = VADD(TC, TD);
T14 = VADD(T4, T5);
T6 = VSUB(T4, T5);
T1c = VADD(Tt, Tu);
Tv = VSUB(Tt, Tu);
Tx = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
T7 = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
T8 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
T1r = VADD(T11, T12);
T13 = VSUB(T11, T12);
}
Tb = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Tc = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
Tm = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
T1f = VADD(Tw, Tx);
Ty = VSUB(Tw, Tx);
T17 = VADD(T7, T8);
T9 = VSUB(T7, T8);
Tn = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
Tp = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
T1b = VADD(Tb, Tc);
Td = VSUB(Tb, Tc);
Tq = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Te = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Tf = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
}
Ta = VADD(T6, T9);
TL = VSUB(T6, T9);
T15 = VADD(Tm, Tn);
To = VSUB(Tm, Tn);
Tz = VSUB(Tv, Ty);
TG = VADD(Tv, Ty);
{
V T1d, T1v, T18, Tr, T1e, Tg, T16, T1s;
T1d = VSUB(T1b, T1c);
T1v = VADD(T1b, T1c);
T18 = VADD(Tp, Tq);
Tr = VSUB(Tp, Tq);
T1e = VADD(Te, Tf);
Tg = VSUB(Te, Tf);
T16 = VSUB(T14, T15);
T1s = VADD(T14, T15);
{
V T1t, T19, T1w, T1g;
T1t = VADD(T17, T18);
T19 = VSUB(T17, T18);
Ts = VSUB(To, Tr);
TF = VADD(To, Tr);
T1w = VADD(T1e, T1f);
T1g = VSUB(T1e, T1f);
Th = VADD(Td, Tg);
TM = VSUB(Td, Tg);
T1u = VADD(T1s, T1t);
T1C = VSUB(T1s, T1t);
T1n = VSUB(T16, T19);
T1a = VADD(T16, T19);
T1m = VSUB(T1d, T1g);
T1h = VADD(T1d, T1g);
T1x = VADD(T1v, T1w);
T1D = VSUB(T1v, T1w);
}
//.........这里部分代码省略.........
开发者ID:SKA-ScienceDataProcessor,项目名称:FastImaging,代码行数:101,代码来源:n2bv_20.c
示例3: n1fv_10
static void n1fv_10(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP618033988, +0.618033988749894848204586834365638117720309180);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
{
INT i;
const R *xi;
R *xo;
xi = ri;
xo = ro;
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(20, is), MAKE_VOLATILE_STRIDE(20, os)) {
V Tb, Tr, T3, Ts, T6, Tw, Tg, Tt, T9, Tc, T1, T2;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
{
V T4, T5, Te, Tf, T7, T8;
T4 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
T5 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Te = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
Tf = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
T7 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Tb = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Tr = VADD(T1, T2);
T3 = VSUB(T1, T2);
Ts = VADD(T4, T5);
T6 = VSUB(T4, T5);
Tw = VADD(Te, Tf);
Tg = VSUB(Te, Tf);
Tt = VADD(T7, T8);
T9 = VSUB(T7, T8);
Tc = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
}
{
V TD, Tu, Tm, Ta, Td, Tv;
TD = VSUB(Ts, Tt);
Tu = VADD(Ts, Tt);
Tm = VSUB(T6, T9);
Ta = VADD(T6, T9);
Td = VSUB(Tb, Tc);
Tv = VADD(Tb, Tc);
{
V TC, Tx, Tn, Th;
TC = VSUB(Tv, Tw);
Tx = VADD(Tv, Tw);
Tn = VSUB(Td, Tg);
Th = VADD(Td, Tg);
{
V Ty, TA, TE, TG, Ti, Tk, To, Tq, Tz, Tj;
Ty = VADD(Tu, Tx);
TA = VSUB(Tu, Tx);
TE = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), TD, TC));
TG = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), TC, TD));
Ti = VADD(Ta, Th);
Tk = VSUB(Ta, Th);
To = VMUL(LDK(KP951056516), VFMA(LDK(KP618033988), Tn, Tm));
Tq = VMUL(LDK(KP951056516), VFNMS(LDK(KP618033988), Tm, Tn));
Tz = VFNMS(LDK(KP250000000), Ty, Tr);
ST(&(xo[0]), VADD(Tr, Ty), ovs, &(xo[0]));
Tj = VFNMS(LDK(KP250000000), Ti, T3);
ST(&(xo[WS(os, 5)]), VADD(T3, Ti), ovs, &(xo[WS(os, 1)]));
{
V TB, TF, Tl, Tp;
TB = VFNMS(LDK(KP559016994), TA, Tz);
TF = VFMA(LDK(KP559016994), TA, Tz);
Tl = VFMA(LDK(KP559016994), Tk, Tj);
Tp = VFNMS(LDK(KP559016994), Tk, Tj);
ST(&(xo[WS(os, 4)]), VFMAI(TG, TF), ovs, &(xo[0]));
ST(&(xo[WS(os, 6)]), VFNMSI(TG, TF), ovs, &(xo[0]));
ST(&(xo[WS(os, 8)]), VFNMSI(TE, TB), ovs, &(xo[0]));
ST(&(xo[WS(os, 2)]), VFMAI(TE, TB), ovs, &(xo[0]));
ST(&(xo[WS(os, 3)]), VFNMSI(Tq, Tp), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 7)]), VFMAI(Tq, Tp), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 9)]), VFMAI(To, Tl), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 1)]), VFNMSI(To, Tl), ovs, &(xo[WS(os, 1)]));
}
}
}
}
}
}
VLEAVE();
}
开发者ID:barnex,项目名称:fftw,代码行数:85,代码来源:dft_simd_sse2_n1fv_10.c
示例4: n1bv_12
static void n1bv_12(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
{
INT i;
const R *xi;
R *xo;
xi = ii;
xo = io;
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(24, is), MAKE_VOLATILE_STRIDE(24, os)) {
V T1, T6, Tc, Th, Td, Te, Ti, Tz, T4, TA, T9, Tj, Tf, Tw;
{
V T2, T3, T7, T8;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T6 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
T3 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
T7 = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
Tc = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Th = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Td = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Te = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
Ti = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Tz = VSUB(T2, T3);
T4 = VADD(T2, T3);
TA = VSUB(T7, T8);
T9 = VADD(T7, T8);
Tj = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
}
Tf = VADD(Td, Te);
Tw = VSUB(Td, Te);
{
V T5, Tp, TJ, TB, Ta, Tq, Tk, Tx, Tg, Ts;
T5 = VADD(T1, T4);
Tp = VFNMS(LDK(KP500000000), T4, T1);
TJ = VSUB(Tz, TA);
TB = VADD(Tz, TA);
Ta = VADD(T6, T9);
Tq = VFNMS(LDK(KP500000000), T9, T6);
Tk = VADD(Ti, Tj);
Tx = VSUB(Tj, Ti);
Tg = VADD(Tc, Tf);
Ts = VFNMS(LDK(KP500000000), Tf, Tc);
{
V Tr, TF, Tb, Tn, TG, Ty, Tl, Tt;
Tr = VADD(Tp, Tq);
TF = VSUB(Tp, Tq);
Tb = VSUB(T5, Ta);
Tn = VADD(T5, Ta);
TG = VADD(Tw, Tx);
Ty = VSUB(Tw, Tx);
Tl = VADD(Th, Tk);
Tt = VFNMS(LDK(KP500000000), Tk, Th);
{
V TC, TE, TH, TL, Tu, TI, Tm, To;
TC = VMUL(LDK(KP866025403), VSUB(Ty, TB));
TE = VMUL(LDK(KP866025403), VADD(TB, Ty));
TH = VFNMS(LDK(KP866025403), TG, TF);
TL = VFMA(LDK(KP866025403), TG, TF);
Tu = VADD(Ts, Tt);
TI = VSUB(Ts, Tt);
Tm = VSUB(Tg, Tl);
To = VADD(Tg, Tl);
{
V TK, TM, Tv, TD;
TK = VFMA(LDK(KP866025403), TJ, TI);
TM = VFNMS(LDK(KP866025403), TJ, TI);
Tv = VSUB(Tr, Tu);
TD = VADD(Tr, Tu);
ST(&(xo[0]), VADD(Tn, To), ovs, &(xo[0]));
ST(&(xo[WS(os, 6)]), VSUB(Tn, To), ovs, &(xo[0]));
ST(&(xo[WS(os, 9)]), VFMAI(Tm, Tb), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 3)]), VFNMSI(Tm, Tb), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 5)]), VFMAI(TM, TL), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 7)]), VFNMSI(TM, TL), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 11)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 1)]), VFMAI(TK, TH), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 8)]), VFNMSI(TE, TD), ovs, &(xo[0]));
ST(&(xo[WS(os, 4)]), VFMAI(TE, TD), ovs, &(xo[0]));
ST(&(xo[WS(os, 2)]), VFMAI(TC, Tv), ovs, &(xo[0]));
ST(&(xo[WS(os, 10)]), VFNMSI(TC, Tv), ovs, &(xo[0]));
}
}
}
}
}
}
VLEAVE();
}
开发者ID:dpl0,项目名称:bioinformatics,代码行数:91,代码来源:n1bv_12.c
示例5: t1bv_12
static void t1bv_12(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 22)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 22), MAKE_VOLATILE_STRIDE(rs)) {
V TI, Ti, TA, T7, Tm, TE, Tw, Tk, Tf, TB, TU, TM;
{
V T9, TK, Tj, TL, Te;
{
V T1, T4, T2, Tp, Tt, Tr;
T1 = LD(&(x[0]), ms, &(x[0]));
T4 = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tp = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Tt = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
{
V T5, T3, Tq, Tu, Ts, Td, Tb, T8, Tc, Ta;
T8 = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Tc = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
Ta = LD(&(x[WS(rs, 10)]), ms, &(x[0]));
T5 = BYTW(&(W[TWVL * 14]), T4);
T3 = BYTW(&(W[TWVL * 6]), T2);
Tq = BYTW(&(W[TWVL * 16]), Tp);
Tu = BYTW(&(W[TWVL * 8]), Tt);
Ts = BYTW(&(W[0]), Tr);
T9 = BYTW(&(W[TWVL * 10]), T8);
Td = BYTW(&(W[TWVL * 2]), Tc);
Tb = BYTW(&(W[TWVL * 18]), Ta);
{
V Th, T6, Tl, Tv;
Th = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
TK = VSUB(T3, T5);
T6 = VADD(T3, T5);
Tl = LD(&(x[WS(rs, 11)]), ms, &(x[WS(rs, 1)]));
Tv = VADD(Ts, Tu);
TI = VSUB(Tu, Ts);
Tj = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
TL = VSUB(Tb, Td);
Te = VADD(Tb, Td);
Ti = BYTW(&(W[TWVL * 4]), Th);
TA = VFNMS(LDK(KP500000000), T6, T1);
T7 = VADD(T1, T6);
Tm = BYTW(&(W[TWVL * 20]), Tl);
TE = VFNMS(LDK(KP500000000), Tv, Tq);
Tw = VADD(Tq, Tv);
}
}
}
Tk = BYTW(&(W[TWVL * 12]), Tj);
Tf = VADD(T9, Te);
TB = VFNMS(LDK(KP500000000), Te, T9);
TU = VSUB(TK, TL);
TM = VADD(TK, TL);
}
{
V Tn, TH, TC, TQ, Ty, Tg;
Tn = VADD(Tk, Tm);
TH = VSUB(Tk, Tm);
TC = VADD(TA, TB);
TQ = VSUB(TA, TB);
Ty = VADD(T7, Tf);
Tg = VSUB(T7, Tf);
{
V To, TD, TJ, TR;
To = VADD(Ti, Tn);
TD = VFNMS(LDK(KP500000000), Tn, Ti);
TJ = VSUB(TH, TI);
TR = VADD(TH, TI);
{
V TP, TN, TW, TS, TO, TG, TX, TV;
{
V Tz, Tx, TF, TT;
Tz = VADD(To, Tw);
Tx = VSUB(To, Tw);
TF = VADD(TD, TE);
TT = VSUB(TD, TE);
TP = VMUL(LDK(KP866025403), VADD(TM, TJ));
TN = VMUL(LDK(KP866025403), VSUB(TJ, TM));
TW = VFMA(LDK(KP866025403), TR, TQ);
TS = VFNMS(LDK(KP866025403), TR, TQ);
ST(&(x[WS(rs, 6)]), VSUB(Ty, Tz), ms, &(x[0]));
ST(&(x[0]), VADD(Ty, Tz), ms, &(x[0]));
ST(&(x[WS(rs, 9)]), VFMAI(Tx, Tg), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 3)]), VFNMSI(Tx, Tg), ms, &(x[WS(rs, 1)]));
TO = VADD(TC, TF);
TG = VSUB(TC, TF);
TX = VFNMS(LDK(KP866025403), TU, TT);
TV = VFMA(LDK(KP866025403), TU, TT);
}
ST(&(x[WS(rs, 8)]), VFNMSI(TP, TO), ms, &(x[0]));
ST(&(x[WS(rs, 4)]), VFMAI(TP, TO), ms, &(x[0]));
ST(&(x[WS(rs, 2)]), VFMAI(TN, TG), ms, &(x[0]));
ST(&(x[WS(rs, 10)]), VFNMSI(TN, TG), ms, &(x[0]));
ST(&(x[WS(rs, 5)]), VFMAI(TX, TW), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 7)]), VFNMSI(TX, TW), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 11)]), VFNMSI(TV, TS), ms, &(x[WS(rs, 1)]));
//.........这里部分代码省略.........
开发者ID:phillipstanleymarbell,项目名称:sunflower-simulator,代码行数:101,代码来源:t1bv_12.c
示例6: t3bv_10
static void t3bv_10(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
INT m;
R *x;
x = ii;
for (m = mb, W = W + (mb * ((TWVL / VL) * 6)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T2, T3, Ti, T6, T7, TA, Tb, To;
T1 = LDW(&(W[0]));
T2 = LDW(&(W[TWVL * 2]));
T3 = VZMULJ(T1, T2);
Ti = VZMUL(T1, T2);
T6 = LDW(&(W[TWVL * 4]));
T7 = VZMULJ(T3, T6);
TA = VZMULJ(Ti, T6);
Tb = VZMULJ(T1, T6);
To = VZMULJ(T2, T6);
{
V TD, TQ, Tn, Tt, Tx, TM, TN, TS, Ta, Tg, Tw, TJ, TK, TR, Tz;
V TC, TB;
Tz = LD(&(x[0]), ms, &(x[0]));
TB = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
TC = VZMUL(TA, TB);
TD = VSUB(Tz, TC);
TQ = VADD(Tz, TC);
{
V Tk, Ts, Tm, Tq;
{
V Tj, Tr, Tl, Tp;
Tj = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tk = VZMUL(Ti, Tj);
Tr = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
Ts = VZMUL(T1, Tr);
Tl = LD(&(x[WS(rs, 9)]), ms, &(x[WS(rs, 1)]));
Tm = VZMUL(T6, Tl);
Tp = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Tq = VZMUL(To, Tp);
}
Tn = VSUB(Tk, Tm);
Tt = VSUB(Tq, Ts);
Tx = VADD(Tn, Tt);
TM = VADD(Tk, Tm);
TN = VADD(Tq, Ts);
TS = VADD(TM, TN);
}
{
V T5, Tf, T9, Td;
{
V T4, Te, T8, Tc;
T4 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
T5 = VZMUL(T3, T4);
Te = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
Tf = VZMUL(T2, Te);
T8 = LD(&(x[WS(rs, 7)]), ms, &(x[WS(rs, 1)]));
T9 = VZMUL(T7, T8);
Tc = LD(&(x[WS(rs, 8)]), ms, &(x[0]));
Td = VZMUL(Tb, Tc);
}
Ta = VSUB(T5, T9);
Tg = VSUB(Td, Tf);
Tw = VADD(Ta, Tg);
TJ = VADD(T5, T9);
TK = VADD(Td, Tf);
TR = VADD(TJ, TK);
}
{
V Ty, TE, TF, Tv, TI, Th, Tu, TH, TG;
Ty = VMUL(LDK(KP559016994), VSUB(Tw, Tx));
TE = VADD(Tw, Tx);
TF = VFNMS(LDK(KP250000000), TE, TD);
Th = VSUB(Ta, Tg);
Tu = VSUB(Tn, Tt);
Tv = VBYI(VFMA(LDK(KP951056516), Th, VMUL(LDK(KP587785252), Tu)));
TI = VBYI(VFNMS(LDK(KP951056516), Tu, VMUL(LDK(KP587785252), Th)));
ST(&(x[WS(rs, 5)]), VADD(TD, TE), ms, &(x[WS(rs, 1)]));
TH = VSUB(TF, Ty);
ST(&(x[WS(rs, 3)]), VSUB(TH, TI), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 7)]), VADD(TI, TH), ms, &(x[WS(rs, 1)]));
TG = VADD(Ty, TF);
ST(&(x[WS(rs, 1)]), VADD(Tv, TG), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 9)]), VSUB(TG, Tv), ms, &(x[WS(rs, 1)]));
}
{
V TV, TT, TU, TP, TY, TL, TO, TX, TW;
TV = VMUL(LDK(KP559016994), VSUB(TR, TS));
TT = VADD(TR, TS);
TU = VFNMS(LDK(KP250000000), TT, TQ);
TL = VSUB(TJ, TK);
TO = VSUB(TM, TN);
TP = VBYI(VFNMS(LDK(KP951056516), TO, VMUL(LDK(KP587785252), TL)));
TY = VBYI(VFMA(LDK(KP951056516), TL, VMUL(LDK(KP587785252), TO)));
ST(&(x[0]), VADD(TQ, TT), ms, &(x[0]));
TX = VADD(TV, TU);
ST(&(x[WS(rs, 4)]), VSUB(TX, TY), ms, &(x[0]));
ST(&(x[WS(rs, 6)]), VADD(TY, TX), ms, &(x[0]));
TW = VSUB(TU, TV);
ST(&(x[WS(rs, 2)]), VADD(TP, TW), ms, &(x[0]));
//.........这里部分代码省略.........
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:101,代码来源:t3bv_10.c
示例7: t1fuv_7
static void t1fuv_7(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
INT m;
R *x;
x = ri;
for (m = mb, W = W + (mb * ((TWVL / VL) * 12)); m < me; m = m + VL, x = x + (VL * ms), W = W + (TWVL * 12), MAKE_VOLATILE_STRIDE(rs)) {
V T1, T2, T4, Te, Tc, T9, T7;
T1 = LD(&(x[0]), ms, &(x[0]));
T2 = LD(&(x[WS(rs, 1)]), ms, &(x[WS(rs, 1)]));
T4 = LD(&(x[WS(rs, 6)]), ms, &(x[0]));
Te = LD(&(x[WS(rs, 4)]), ms, &(x[0]));
Tc = LD(&(x[WS(rs, 3)]), ms, &(x[WS(rs, 1)]));
T9 = LD(&(x[WS(rs, 5)]), ms, &(x[WS(rs, 1)]));
T7 = LD(&(x[WS(rs, 2)]), ms, &(x[0]));
{
V T3, T5, Tf, Td, Ta, T8;
T3 = BYTWJ(&(W[0]), T2);
T5 = BYTWJ(&(W[TWVL * 10]), T4);
Tf = BYTWJ(&(W[TWVL * 6]), Te);
Td = BYTWJ(&(W[TWVL * 4]), Tc);
Ta = BYTWJ(&(W[TWVL * 8]), T9);
T8 = BYTWJ(&(W[TWVL * 2]), T7);
{
V T6, Tk, Tg, Tl, Tb, Tm;
T6 = VADD(T3, T5);
Tk = VSUB(T5, T3);
Tg = VADD(Td, Tf);
Tl = VSUB(Tf, Td);
Tb = VADD(T8, Ta);
Tm = VSUB(Ta, T8);
{
V Th, Ts, Tp, Tu, Tn, Tx, Ti, Tt;
Th = VFNMS(LDK(KP356895867), T6, Tg);
Ts = VFMA(LDK(KP554958132), Tl, Tk);
ST(&(x[0]), VADD(T1, VADD(T6, VADD(Tb, Tg))), ms, &(x[0]));
Tp = VFNMS(LDK(KP356895867), Tb, T6);
Tu = VFNMS(LDK(KP356895867), Tg, Tb);
Tn = VFMA(LDK(KP554958132), Tm, Tl);
Tx = VFNMS(LDK(KP554958132), Tk, Tm);
Ti = VFNMS(LDK(KP692021471), Th, Tb);
Tt = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Ts, Tm));
{
V Tq, Tv, To, Ty, Tj, Tr, Tw;
Tq = VFNMS(LDK(KP692021471), Tp, Tg);
Tv = VFNMS(LDK(KP692021471), Tu, T6);
To = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tn, Tk));
Ty = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tx, Tl));
Tj = VFNMS(LDK(KP900968867), Ti, T1);
Tr = VFNMS(LDK(KP900968867), Tq, T1);
Tw = VFNMS(LDK(KP900968867), Tv, T1);
ST(&(x[WS(rs, 2)]), VFMAI(To, Tj), ms, &(x[0]));
ST(&(x[WS(rs, 5)]), VFNMSI(To, Tj), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 1)]), VFMAI(Tt, Tr), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 6)]), VFNMSI(Tt, Tr), ms, &(x[0]));
ST(&(x[WS(rs, 3)]), VFMAI(Ty, Tw), ms, &(x[WS(rs, 1)]));
ST(&(x[WS(rs, 4)]), VFNMSI(Ty, Tw), ms, &(x[0]));
}
}
}
}
}
}
开发者ID:BackupTheBerlios,项目名称:openvsipl,代码行数:68,代码来源:t1fuv_7.c
示例8: n1fv_9
static void n1fv_9(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
DVK(KP939692620, +0.939692620785908384054109277324731469936208134);
DVK(KP826351822, +0.826351822333069651148283373230685203999624323);
DVK(KP879385241, +0.879385241571816768108218554649462939872416269);
DVK(KP984807753, +0.984807753012208059366743024589523013670643252);
DVK(KP666666666, +0.666666666666666666666666666666666666666666667);
DVK(KP852868531, +0.852868531952443209628250963940074071936020296);
DVK(KP907603734, +0.907603734547952313649323976213898122064543220);
DVK(KP420276625, +0.420276625461206169731530603237061658838781920);
DVK(KP673648177, +0.673648177666930348851716626769314796000375677);
DVK(KP898197570, +0.898197570222573798468955502359086394667167570);
DVK(KP347296355, +0.347296355333860697703433253538629592000751354);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP439692620, +0.439692620785908384054109277324731469936208134);
DVK(KP203604859, +0.203604859554852403062088995281827210665664861);
DVK(KP152703644, +0.152703644666139302296566746461370407999248646);
DVK(KP586256827, +0.586256827714544512072145703099641959914944179);
DVK(KP968908795, +0.968908795874236621082202410917456709164223497);
DVK(KP726681596, +0.726681596905677465811651808188092531873167623);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
INT i;
const R *xi;
R *xo;
xi = ri;
xo = ro;
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
V T1, T2, T3, T6, Tb, T7, T8, Tc, Td, Tv, T4;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
T6 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Tb = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
T7 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
T8 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
Tc = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
Td = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
Tv = VSUB(T3, T2);
T4 = VADD(T2, T3);
{
V Tl, T9, Tm, Te, Tj, T5;
Tl = VSUB(T7, T8);
T9 = VADD(T7, T8);
Tm = VSUB(Td, Tc);
Te = VADD(Tc, Td);
Tj = VFNMS(LDK(KP500000000), T4, T1);
T5 = VADD(T1, T4);
{
V Tn, Ta, Tk, Tf;
Tn = VFNMS(LDK(KP500000000), T9, T6);
Ta = VADD(T6, T9);
Tk = VFNMS(LDK(KP500000000), Te, Tb);
Tf = VADD(Tb, Te);
{
V Ty, TC, To, TB, Tx, Ts, Tg, Ti;
Ty = VFNMS(LDK(KP726681596), Tl, Tn);
TC = VFMA(LDK(KP968908795), Tn, Tl);
To = VFNMS(LDK(KP586256827), Tn, Tm);
TB = VFNMS(LDK(KP152703644), Tm, Tk);
Tx = VFMA(LDK(KP203604859), Tk, Tm);
Ts = VFNMS(LDK(KP439692620), Tl, Tk);
Tg = VADD(Ta, Tf);
Ti = VMUL(LDK(KP866025403), VSUB(Tf, Ta));
{
V Tz, TI, TF, TD, Tt, Th, Tq, Tp;
Tp = VFNMS(LDK(KP347296355), To, Tl);
Tz = VFMA(LDK(KP898197570), Ty, Tx);
TI = VFNMS(LDK(KP898197570), Ty, Tx);
TF = VFNMS(LDK(KP673648177), TC, TB);
TD = VFMA(LDK(KP673648177), TC, TB);
Tt = VFNMS(LDK(KP420276625), Ts, Tm);
ST(&(xo[0]), VADD(T5, Tg), ovs, &(xo[0]));
Th = VFNMS(LDK(KP500000000), Tg, T5);
Tq = VFNMS(LDK(KP907603734), Tp, Tk);
{
V TA, TJ, TE, TG, Tu, Tr, TK, TH, Tw;
TA = VFMA(LDK(KP852868531), Tz, Tj);
TJ = VFMA(LDK(KP666666666), TD, TI);
TE = VMUL(LDK(KP984807753), VFNMS(LDK(KP879385241), Tv, TD));
TG = VFNMS(LDK(KP500000000), Tz, TF);
Tu = VFNMS(LDK(KP826351822), Tt, Tn);
ST(&(xo[WS(os, 6)]), VFNMSI(Ti, Th), ovs, &(xo[0]));
ST(&(xo[WS(os, 3)]), VFMAI(Ti, Th), ovs, &(xo[WS(os, 1)]));
Tr = VFNMS(LDK(KP939692620), Tq, Tj);
TK = VMUL(LDK(KP866025403), VFMA(LDK(KP852868531), TJ, Tv));
ST(&(xo[WS(os, 8)]), VFMAI(TE, TA), ovs, &(xo[0]));
ST(&(xo[WS(os, 1)]), VFNMSI(TE, TA), ovs, &(xo[WS(os, 1)]));
TH = VFMA(LDK(KP852868531), TG, Tj);
Tw = VMUL(LDK(KP984807753), VFMA(LDK(KP879385241), Tv, Tu));
ST(&(xo[WS(os, 4)]), VFMAI(TK, TH), ovs, &(xo[0]));
ST(&(xo[WS(os, 5)]), VFNMSI(TK, TH), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 7)]), VFMAI(Tw, Tr), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 2)]), VFNMSI(Tw, Tr), ovs, &(xo[0]));
}
}
}
}
}
}
}
开发者ID:phillipstanleymarbell,项目名称:sunflower-simulator,代码行数:100,代码来源:n1fv_9.c
示例9: INC
RETf INC( float &x, const __m128 y ) { __m128 t=ADD(LD(x),y); return STR(x,t); }
开发者ID:Claycau,项目名称:bigbrother,代码行数:1,代码来源:sse.hpp
示例10: t1sv_4
static void t1sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 6); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 6), MAKE_VOLATILE_STRIDE(rs)) {
V T1, Tv, T3, T6, T5, Ta, Td, Tc, Tg, Tj, Tt, T4, Tf, Ti, Tn;
V Tb, T2, T9;
T1 = LD(&(ri[0]), ms, &(ri[0]));
Tv = LD(&(ii[0]), ms, &(ii[0]));
T3 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));
T6 = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));
T2 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 3]));
Ta = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
Td = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
T9 = LDW(&(W[0]));
Tc = LDW(&(W[TWVL * 1]));
Tg = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)]));
Tj = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)]));
Tt = VMUL(T2, T6);
T4 = VMUL(T2, T3);
Tf = LDW(&(W[TWVL * 4]));
Ti = LDW(&(W[TWVL * 5]));
Tn = VMUL(T9, Td);
Tb = VMUL(T9, Ta);
{
V Tu, T7, Tp, Th, To, Te;
Tu = VFNMS(T5, T3, Tt);
T7 = VFMA(T5, T6, T4);
Tp = VMUL(Tf, Tj);
Th = VMUL(Tf, Tg);
To = VFNMS(Tc, Ta, Tn);
Te = VFMA(Tc, Td, Tb);
{
V Tw, Tx, T8, Tm, Tq, Tk;
Tw = VADD(Tu, Tv);
Tx = VSUB(Tv, Tu);
T8 = VADD(T1, T7);
Tm = VSUB(T1, T7);
Tq = VFNMS(Ti, Tg, Tp);
Tk = VFMA(Ti, Tj, Th);
{
V Ts, Tr, Tl, Ty;
Ts = VADD(To, Tq);
Tr = VSUB(To, Tq);
Tl = VADD(Te, Tk);
Ty = VSUB(Te, Tk);
ST(&(ri[WS(rs, 1)]), VADD(Tm, Tr), ms, &(ri[WS(rs, 1)]));
ST(&(ri[WS(rs, 3)]), VSUB(Tm, Tr), ms, &(ri[WS(rs, 1)]));
ST(&(ii[WS(rs, 2)]), VSUB(Tw, Ts), ms, &(ii[0]));
ST(&(ii[0]), VADD(Ts, Tw), ms, &(ii[0]));
ST(&(ii[WS(rs, 3)]), VADD(Ty, Tx), ms, &(ii[WS(rs, 1)]));
ST(&(ii[WS(rs, 1)]), VSUB(Tx, Ty), ms, &(ii[WS(rs, 1)]));
ST(&(ri[0]), VADD(T8, Tl), ms, &(ri[0]));
ST(&(ri[WS(rs, 2)]), VSUB(T8, Tl), ms, &(ri[0]));
}
}
}
}
}
VLEAVE();
}
开发者ID:dstuck,项目名称:tinker_integrated_PIMC,代码行数:62,代码来源:t1sv_4.c
示例11: STDU
SprxPatch emulator_api_patches[] =
{
// Read umd patches
{ psp_read, STDU(SP, 0xFF90, SP), &condition_psp_iso },
{ psp_read+4, MFLR(R0), &condition_psp_iso },
{ psp_read+8, STD(R0, 0x80, SP), &condition_psp_iso },
{ psp_read+0x0C, MR(R8, R7), &condition_psp_iso },
{ psp_read+0x10, MR(R7, R6), &condition_psp_iso },
{ psp_read+0x14, MR(R6, R5), &condition_psp_iso },
{ psp_read+0x18, MR(R5, R4), &condition_psp_iso },
{ psp_read+0x1C, MR(R4, R3), &condition_psp_iso },
{ psp_read+0x20, LI(R3, SYSCALL8_OPCODE_READ_PSP_UMD), &condition_psp_iso },
{ psp_read+0x24, LI(R11, 8), &condition_psp_iso },
{ psp_read+0x28, SC, &condition_psp_iso },
{ psp_read+0x2C, LD(R0, 0x80, SP), &condition_psp_iso },
{ psp_read+0x30, MTLR(R0), &condition_psp_iso },
{ psp_read+0x34, ADDI(SP, SP, 0x70), &condition_psp_iso },
{ psp_read+0x38, BLR, &condition_psp_iso },
// Read header patches
{ psp_read+0x3C, STDU(SP, 0xFF90, SP), &condition_psp_iso },
{ psp_read+0x40, MFLR(R0), &condition_psp_iso },
{ psp_read+0x44, STD(R0, 0x80, SP), &condition_psp_iso },
{ psp_read+0x48, MR(R7, R6), &condition_psp_iso },
{ psp_read+0x4C, MR(R6, R5), &condition_psp_iso },
{ psp_read+0x50, MR(R5, R4), &condition_psp_iso },
{ psp_read+0x54, MR(R4, R3), &condition_psp_iso },
{ psp_read+0x58, LI(R3, SYSCALL8_OPCODE_READ_PSP_HEADER), &condition_psp_iso },
{ psp_read+0x5C, LI(R11, 8), &condition_psp_iso },
{ psp_read+0x60, SC, &condition_psp_iso },
开发者ID:Joonie86,项目名称:COBRA-7.3,代码行数:30,代码来源:modulespatch.c
示例12: t2sv_4
static void t2sv_4(R *ri, R *ii, const R *W, stride rs, INT mb, INT me, INT ms)
{
{
INT m;
for (m = mb, W = W + (mb * 4); m < me; m = m + (2 * VL), ri = ri + ((2 * VL) * ms), ii = ii + ((2 * VL) * ms), W = W + ((2 * VL) * 4), MAKE_VOLATILE_STRIDE(8, rs)) {
V T2, T6, T3, T5, T1, Tx, T8, Tc, Tf, Ta, T4, Th, Tj, Tl;
T2 = LDW(&(W[0]));
T6 = LDW(&(W[TWVL * 3]));
T3 = LDW(&(W[TWVL * 2]));
T5 = LDW(&(W[TWVL * 1]));
T1 = LD(&(ri[0]), ms, &(ri[0]));
Tx = LD(&(ii[0]), ms, &(ii[0]));
T8 = LD(&(ri[WS(rs, 2)]), ms, &(ri[0]));
Tc = LD(&(ii[WS(rs, 2)]), ms, &(ii[0]));
Tf = LD(&(ri[WS(rs, 1)]), ms, &(ri[WS(rs, 1)]));
Ta = VMUL(T2, T6);
T4 = VMUL(T2, T3);
Th = LD(&(ii[WS(rs, 1)]), ms, &(ii[WS(rs, 1)]));
Tj = LD(&(ri[WS(rs, 3)]), ms, &(ri[WS(rs, 1)]));
Tl = LD(&(ii[WS(rs, 3)]), ms, &(ii[WS(rs, 1)]));
{
V Tg, Tb, T7, Tp, Tk, Tr, Ti;
Tg = VMUL(T2, Tf);
Tb = VFNMS(T5, T3, Ta);
T7 = VFMA(T5, T6, T4);
Tp = VMUL(T2, Th);
Tk = VMUL(T3, Tj);
Tr = VMUL(T3, Tl);
Ti = VFMA(T5, Th, Tg);
{
V Tv, T9, Tq, Tm, Ts, Tw, Td;
Tv = VMUL(T7, Tc);
T9 = VMUL(T7, T8);
Tq = VFNMS(T5, Tf, Tp);
Tm = VFMA(T6, Tl, Tk);
Ts = VFNMS(T6, Tj, Tr);
Tw = VFNMS(Tb, T8, Tv);
Td = VFMA(Tb, Tc, T9);
{
V Tn, TA, Tu, Tt;
Tn = VADD(Ti, Tm);
TA = VSUB(Ti, Tm);
Tu = VADD(Tq, Ts);
Tt = VSUB(Tq, Ts);
{
V Ty, Tz, Te, To;
Ty = VADD(Tw, Tx);
Tz = VSUB(Tx, Tw);
Te = VADD(T1, Td);
To = VSUB(T1, Td);
ST(&(ii[WS(rs, 3)]), VADD(TA, Tz), ms, &(ii[WS(rs, 1)]));
ST(&(ii[WS(rs, 1)]), VSUB(Tz, TA), ms, &(ii[WS(rs, 1)]));
ST(&(ii[WS(rs, 2)]), VSUB(Ty, Tu), ms, &(ii[0]));
ST(&(ii[0]), VADD(Tu, Ty), ms, &(ii[0]));
ST(&(ri[WS(rs, 1)]), VADD(To, Tt), ms, &(ri[WS(rs, 1)]));
ST(&(ri[WS(rs, 3)]), VSUB(To, Tt), ms, &(ri[WS(rs, 1)]));
ST(&(ri[0]), VADD(Te, Tn), ms, &(ri[0]));
ST(&(ri[WS(rs, 2)]), VSUB(Te, Tn), ms, &(ri[0]));
}
}
}
}
}
}
VLEAVE();
}
开发者ID:SKA-ScienceDataProcessor,项目名称:FastImaging,代码行数:66,代码来源:t2sv_4.c
示例13: n2fv_13
static void n2fv_13(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
{
DVK(KP2_000000000, +2.000000000000000000000000000000000000000000000);
DVK(KP083333333, +0.083333333333333333333333333333333333333333333);
DVK(KP075902986, +0.075902986037193865983102897245103540356428373);
DVK(KP251768516, +0.251768516431883313623436926934233488546674281);
DVK(KP132983124, +0.132983124607418643793760531921092974399165133);
DVK(KP258260390, +0.258260390311744861420450644284508567852516811);
DVK(KP1_732050807, +1.732050807568877293527446341505872366942805254);
DVK(KP300238635, +0.300238635966332641462884626667381504676006424);
DVK(KP011599105, +0.011599105605768290721655456654083252189827041);
DVK(KP156891391, +0.156891391051584611046832726756003269660212636);
DVK(KP256247671, +0.256247671582936600958684654061725059144125175);
DVK(KP174138601, +0.174138601152135905005660794929264742616964676);
DVK(KP575140729, +0.575140729474003121368385547455453388461001608);
DVK(KP503537032, +0.503537032863766627246873853868466977093348562);
DVK(KP113854479, +0.113854479055790798974654345867655310534642560);
DVK(KP265966249, +0.265966249214837287587521063842185948798330267);
DVK(KP387390585, +0.387390585467617292130675966426762851778775217);
DVK(KP300462606, +0.300462606288665774426601772289207995520941381);
DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
int i;
const R *xi;
R *xo;
xi = ri;
xo = ro;
BEGIN_SIMD();
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs)) {
V TW, Tb, Tm, Tu, TC, TR, TX, TK, TU, Tz, TB, TN, TT;
TW = LD(&(xi[0]), ivs, &(xi[0]));
{
V T3, TH, Tl, Tw, Tp, Tg, Tv, To, T6, Tr, T9, Ts, Ta, TI, T1;
V T2, Tq, Tt;
T1 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
T3 = VSUB(T1, T2);
TH = VADD(T1, T2);
{
V Th, Ti, Tj, Tk;
Th = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
Ti = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
Tj = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
Tk = VADD(Ti, Tj);
Tl = VADD(Th, Tk);
Tw = VSUB(Ti, Tj);
Tp = VFNMS(LDK(KP500000000), Tk, Th);
}
{
V Tc, Td, Te, Tf;
Tc = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
Td = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
Te = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
Tf = VADD(Td, Te);
Tg = VADD(Tc, Tf);
Tv = VSUB(Td, Te);
To = VFNMS(LDK(KP500000000), Tf, Tc);
}
{
V T4, T5, T7, T8;
T4 = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
T5 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
T6 = VSUB(T4, T5);
Tr = VADD(T4, T5);
T7 = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
T8 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
T9 = VSUB(T7, T8);
Ts = VADD(T7, T8);
}
Ta = VADD(T6, T9);
TI = VADD(Tr, Ts);
Tb = VADD(T3, Ta);
Tm = VSUB(Tg, Tl);
Tq = VSUB(To, Tp);
Tt = VMUL(LDK(KP866025403), VSUB(Tr, Ts));
Tu = VADD(Tq, Tt);
TC = VSUB(Tq, Tt);
{
V TP, TQ, TG, TJ;
TP = VADD(Tg, Tl);
TQ = VADD(TH, TI);
TR = VMUL(LDK(KP300462606), VSUB(TP, TQ));
TX = VADD(TP, TQ);
TG = VADD(To, Tp);
TJ = VFNMS(LDK(KP500000000), TI, TH);
TK = VSUB(TG, TJ);
TU = VADD(TG, TJ);
}
{
V Tx, Ty, TL, TM;
Tx = VMUL(LDK(KP866025403), VSUB(Tv, Tw));
Ty = VFNMS(LDK(KP500000000), Ta, T3);
Tz = VSUB(Tx, Ty);
TB = VADD(Tx, Ty);
TL = VADD(Tv, Tw);
TM = VSUB(T6, T9);
TN = VSUB(TL, TM);
TT = VADD(TL, TM);
}
}
//.........这里部分代码省略.........
开发者ID:abrahamneben,项目名称:orbcomm_beam_mapping,代码行数:101,代码来源:n2fv_13.c
示例14: DEC
RETf DEC( float &x, const __m128 y ) { __m128 t=SUB(LD(x),y); return STR(x,t); }
开发者ID:Claycau,项目名称:bigbrother,代码行数:1,代码来源:sse.hpp
示例15: n1bv_7
static void n1bv_7(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, INT v, INT ivs, INT ovs)
{
DVK(KP900968867, +0.900968867902419126236102319507445051165919162);
DVK(KP692021471, +0.692021471630095869627814897002069140197260599);
DVK(KP801937735, +0.801937735804838252472204639014890102331838324);
DVK(KP974927912, +0.974927912181823607018131682993931217232785801);
DVK(KP356895867, +0.356895867892209443894399510021300583399127187);
DVK(KP554958132, +0.554958132087371191422194871006410481067288862);
{
INT i;
const R *xi;
R *xo;
xi = ii;
xo = io;
for (i = v; i > 0; i = i - VL, xi = xi + (VL * ivs), xo = xo + (VL * ovs), MAKE_VOLATILE_STRIDE(is), MAKE_VOLATILE_STRIDE(os)) {
V T1, T2, T3, T8, T9, T5, T6;
T1 = LD(&(xi[0]), ivs, &(xi[0]));
T2 = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
T3 = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
T8 = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
T9 = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
T5 = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
T6 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
{
V Tg, T4, Te, Ta, Tf, T7;
Tg = VSUB(T2, T3);
T4 = VADD(T2, T3);
Te = VSUB(T8, T9);
Ta = VADD(T8, T9);
Tf = VSUB(T5, T6);
T7 = VADD(T5, T6);
{
V Tr, Tj, Tm, Th, To, Tb;
Tr = VFMA(LDK(KP554958132), Te, Tg);
Tj = VFNMS(LDK(KP356895867), T4, Ta);
Tm = VFMA(LDK(KP554958132), Tf, Te);
Th = VFNMS(LDK(KP554958132), Tg, Tf);
ST(&(xo[0]), VADD(T1, VADD(T4, VADD(T7, Ta))), ovs, &(xo[0]));
To = VFNMS(LDK(KP356895867), T7, T4);
Tb = VFNMS(LDK(KP356895867), Ta, T7);
{
V Ts, Tk, Tn, Ti;
Ts = VMUL(LDK(KP974927912), VFMA(LDK(KP801937735), Tr, Tf));
Tk = VFNMS(LDK(KP692021471), Tj, T7);
Tn = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Tm, Tg));
Ti = VMUL(LDK(KP974927912), VFNMS(LDK(KP801937735), Th, Te));
{
V Tp, Tc, Tl, Tq, Td;
Tp = VFNMS(LDK(KP692021471), To, Ta);
Tc = VFNMS(LDK(KP692021471), Tb, T4);
Tl = VFNMS(LDK(KP900968867), Tk, T1);
Tq = VFNMS(LDK(KP900968867), Tp, T1);
Td = VFNMS(LDK(KP900968867), Tc, T1);
ST(&(xo[WS(os, 5)]), VFNMSI(Tn, Tl), ovs, &(xo[WS(os, 1)]));
ST(&(xo[WS(os, 2)]), VFMAI(Tn, Tl), ovs, &(xo[0]));
|
请发表评论