1*e1eccf28SAndroid Build Coastguard Worker /*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2012 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker
18*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsic.h"
19*e1eccf28SAndroid Build Coastguard Worker #include "rsCpuIntrinsicInlines.h"
20*e1eccf28SAndroid Build Coastguard Worker
21*e1eccf28SAndroid Build Coastguard Worker namespace android {
22*e1eccf28SAndroid Build Coastguard Worker namespace renderscript {
23*e1eccf28SAndroid Build Coastguard Worker
24*e1eccf28SAndroid Build Coastguard Worker
25*e1eccf28SAndroid Build Coastguard Worker class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic {
26*e1eccf28SAndroid Build Coastguard Worker public:
27*e1eccf28SAndroid Build Coastguard Worker void populateScript(Script *) override;
28*e1eccf28SAndroid Build Coastguard Worker void invokeFreeChildren() override;
29*e1eccf28SAndroid Build Coastguard Worker
30*e1eccf28SAndroid Build Coastguard Worker void setGlobalObj(uint32_t slot, ObjectBase *data) override;
31*e1eccf28SAndroid Build Coastguard Worker
32*e1eccf28SAndroid Build Coastguard Worker ~RsdCpuScriptIntrinsic3DLUT() override;
33*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
34*e1eccf28SAndroid Build Coastguard Worker
35*e1eccf28SAndroid Build Coastguard Worker protected:
36*e1eccf28SAndroid Build Coastguard Worker ObjectBaseRef<Allocation> mLUT;
37*e1eccf28SAndroid Build Coastguard Worker
38*e1eccf28SAndroid Build Coastguard Worker static void kernel(const RsExpandKernelDriverInfo *info,
39*e1eccf28SAndroid Build Coastguard Worker uint32_t xstart, uint32_t xend,
40*e1eccf28SAndroid Build Coastguard Worker uint32_t outstep);
41*e1eccf28SAndroid Build Coastguard Worker };
42*e1eccf28SAndroid Build Coastguard Worker
setGlobalObj(uint32_t slot,ObjectBase * data)43*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) {
44*e1eccf28SAndroid Build Coastguard Worker rsAssert(slot == 0);
45*e1eccf28SAndroid Build Coastguard Worker mLUT.set(static_cast<Allocation *>(data));
46*e1eccf28SAndroid Build Coastguard Worker }
47*e1eccf28SAndroid Build Coastguard Worker
48*e1eccf28SAndroid Build Coastguard Worker extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count,
49*e1eccf28SAndroid Build Coastguard Worker void const *lut,
50*e1eccf28SAndroid Build Coastguard Worker int32_t pitchy, int32_t pitchz,
51*e1eccf28SAndroid Build Coastguard Worker int dimx, int dimy, int dimz);
52*e1eccf28SAndroid Build Coastguard Worker
53*e1eccf28SAndroid Build Coastguard Worker
kernel(const RsExpandKernelDriverInfo * info,uint32_t xstart,uint32_t xend,uint32_t outstep)54*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info,
55*e1eccf28SAndroid Build Coastguard Worker uint32_t xstart, uint32_t xend,
56*e1eccf28SAndroid Build Coastguard Worker uint32_t outstep) {
57*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr;
58*e1eccf28SAndroid Build Coastguard Worker
59*e1eccf28SAndroid Build Coastguard Worker uchar4 *out = (uchar4 *)info->outPtr[0];
60*e1eccf28SAndroid Build Coastguard Worker uchar4 *in = (uchar4 *)info->inPtr[0];
61*e1eccf28SAndroid Build Coastguard Worker uint32_t x1 = xstart;
62*e1eccf28SAndroid Build Coastguard Worker uint32_t x2 = xend;
63*e1eccf28SAndroid Build Coastguard Worker
64*e1eccf28SAndroid Build Coastguard Worker const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr;
65*e1eccf28SAndroid Build Coastguard Worker
66*e1eccf28SAndroid Build Coastguard Worker int4 dims = {
67*e1eccf28SAndroid Build Coastguard Worker static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1),
68*e1eccf28SAndroid Build Coastguard Worker static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1),
69*e1eccf28SAndroid Build Coastguard Worker static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1),
70*e1eccf28SAndroid Build Coastguard Worker -1
71*e1eccf28SAndroid Build Coastguard Worker };
72*e1eccf28SAndroid Build Coastguard Worker const float4 m = (float4)(1.f / 255.f) * convert_float4(dims);
73*e1eccf28SAndroid Build Coastguard Worker const int4 coordMul = convert_int4(m * (float4)0x8000);
74*e1eccf28SAndroid Build Coastguard Worker const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride;
75*e1eccf28SAndroid Build Coastguard Worker const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY;
76*e1eccf28SAndroid Build Coastguard Worker
77*e1eccf28SAndroid Build Coastguard Worker //ALOGE("strides %zu %zu", stride_y, stride_z);
78*e1eccf28SAndroid Build Coastguard Worker
79*e1eccf28SAndroid Build Coastguard Worker #if defined(ARCH_ARM_USE_INTRINSICS)
80*e1eccf28SAndroid Build Coastguard Worker if (gArchUseSIMD) {
81*e1eccf28SAndroid Build Coastguard Worker int32_t len = x2 - x1;
82*e1eccf28SAndroid Build Coastguard Worker if(len > 0) {
83*e1eccf28SAndroid Build Coastguard Worker rsdIntrinsic3DLUT_K(out, in, len,
84*e1eccf28SAndroid Build Coastguard Worker bp, stride_y, stride_z,
85*e1eccf28SAndroid Build Coastguard Worker dims.x, dims.y, dims.z);
86*e1eccf28SAndroid Build Coastguard Worker x1 += len;
87*e1eccf28SAndroid Build Coastguard Worker out += len;
88*e1eccf28SAndroid Build Coastguard Worker in += len;
89*e1eccf28SAndroid Build Coastguard Worker }
90*e1eccf28SAndroid Build Coastguard Worker }
91*e1eccf28SAndroid Build Coastguard Worker #endif
92*e1eccf28SAndroid Build Coastguard Worker
93*e1eccf28SAndroid Build Coastguard Worker while (x1 < x2) {
94*e1eccf28SAndroid Build Coastguard Worker int4 baseCoord = convert_int4(*in) * coordMul;
95*e1eccf28SAndroid Build Coastguard Worker int4 coord1 = baseCoord >> (int4)15;
96*e1eccf28SAndroid Build Coastguard Worker //int4 coord2 = min(coord1 + 1, gDims - 1);
97*e1eccf28SAndroid Build Coastguard Worker
98*e1eccf28SAndroid Build Coastguard Worker int4 weight2 = baseCoord & 0x7fff;
99*e1eccf28SAndroid Build Coastguard Worker int4 weight1 = (int4)0x8000 - weight2;
100*e1eccf28SAndroid Build Coastguard Worker
101*e1eccf28SAndroid Build Coastguard Worker //ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
102*e1eccf28SAndroid Build Coastguard Worker const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z);
103*e1eccf28SAndroid Build Coastguard Worker const uchar4 *pt_00 = (const uchar4 *)&bp2[0];
104*e1eccf28SAndroid Build Coastguard Worker const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y];
105*e1eccf28SAndroid Build Coastguard Worker const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z];
106*e1eccf28SAndroid Build Coastguard Worker const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z];
107*e1eccf28SAndroid Build Coastguard Worker
108*e1eccf28SAndroid Build Coastguard Worker uint4 v000 = convert_uint4(pt_00[0]);
109*e1eccf28SAndroid Build Coastguard Worker uint4 v100 = convert_uint4(pt_00[1]);
110*e1eccf28SAndroid Build Coastguard Worker uint4 v010 = convert_uint4(pt_10[0]);
111*e1eccf28SAndroid Build Coastguard Worker uint4 v110 = convert_uint4(pt_10[1]);
112*e1eccf28SAndroid Build Coastguard Worker uint4 v001 = convert_uint4(pt_01[0]);
113*e1eccf28SAndroid Build Coastguard Worker uint4 v101 = convert_uint4(pt_01[1]);
114*e1eccf28SAndroid Build Coastguard Worker uint4 v011 = convert_uint4(pt_11[0]);
115*e1eccf28SAndroid Build Coastguard Worker uint4 v111 = convert_uint4(pt_11[1]);
116*e1eccf28SAndroid Build Coastguard Worker
117*e1eccf28SAndroid Build Coastguard Worker uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7;
118*e1eccf28SAndroid Build Coastguard Worker uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7;
119*e1eccf28SAndroid Build Coastguard Worker uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7;
120*e1eccf28SAndroid Build Coastguard Worker uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7;
121*e1eccf28SAndroid Build Coastguard Worker
122*e1eccf28SAndroid Build Coastguard Worker uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15;
123*e1eccf28SAndroid Build Coastguard Worker uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15;
124*e1eccf28SAndroid Build Coastguard Worker
125*e1eccf28SAndroid Build Coastguard Worker uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15;
126*e1eccf28SAndroid Build Coastguard Worker uint4 v2 = (v + 0x7f) >> (int4)8;
127*e1eccf28SAndroid Build Coastguard Worker
128*e1eccf28SAndroid Build Coastguard Worker uchar4 ret = convert_uchar4(v2);
129*e1eccf28SAndroid Build Coastguard Worker ret.w = in->w;
130*e1eccf28SAndroid Build Coastguard Worker
131*e1eccf28SAndroid Build Coastguard Worker #if 0
132*e1eccf28SAndroid Build Coastguard Worker if (!x1) {
133*e1eccf28SAndroid Build Coastguard Worker ALOGE("in %08x %08x %08x %08x", in->r, in->g, in->b, in->a);
134*e1eccf28SAndroid Build Coastguard Worker ALOGE("baseCoord %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w);
135*e1eccf28SAndroid Build Coastguard Worker ALOGE("coord1 %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
136*e1eccf28SAndroid Build Coastguard Worker ALOGE("weight1 %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w);
137*e1eccf28SAndroid Build Coastguard Worker ALOGE("weight2 %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w);
138*e1eccf28SAndroid Build Coastguard Worker
139*e1eccf28SAndroid Build Coastguard Worker ALOGE("v000 %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w);
140*e1eccf28SAndroid Build Coastguard Worker ALOGE("v100 %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w);
141*e1eccf28SAndroid Build Coastguard Worker ALOGE("yz00 %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w);
142*e1eccf28SAndroid Build Coastguard Worker ALOGE("z0 %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w);
143*e1eccf28SAndroid Build Coastguard Worker
144*e1eccf28SAndroid Build Coastguard Worker ALOGE("v %08x %08x %08x %08x", v.x, v.y, v.z, v.w);
145*e1eccf28SAndroid Build Coastguard Worker ALOGE("v2 %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w);
146*e1eccf28SAndroid Build Coastguard Worker }
147*e1eccf28SAndroid Build Coastguard Worker #endif
148*e1eccf28SAndroid Build Coastguard Worker *out = ret;
149*e1eccf28SAndroid Build Coastguard Worker
150*e1eccf28SAndroid Build Coastguard Worker
151*e1eccf28SAndroid Build Coastguard Worker in++;
152*e1eccf28SAndroid Build Coastguard Worker out++;
153*e1eccf28SAndroid Build Coastguard Worker x1++;
154*e1eccf28SAndroid Build Coastguard Worker }
155*e1eccf28SAndroid Build Coastguard Worker }
156*e1eccf28SAndroid Build Coastguard Worker
RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)157*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(
158*e1eccf28SAndroid Build Coastguard Worker RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) :
159*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
160*e1eccf28SAndroid Build Coastguard Worker
161*e1eccf28SAndroid Build Coastguard Worker mRootPtr = &kernel;
162*e1eccf28SAndroid Build Coastguard Worker }
163*e1eccf28SAndroid Build Coastguard Worker
~RsdCpuScriptIntrinsic3DLUT()164*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() {
165*e1eccf28SAndroid Build Coastguard Worker }
166*e1eccf28SAndroid Build Coastguard Worker
populateScript(Script * s)167*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) {
168*e1eccf28SAndroid Build Coastguard Worker s->mHal.info.exportedVariableCount = 1;
169*e1eccf28SAndroid Build Coastguard Worker }
170*e1eccf28SAndroid Build Coastguard Worker
invokeFreeChildren()171*e1eccf28SAndroid Build Coastguard Worker void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() {
172*e1eccf28SAndroid Build Coastguard Worker mLUT.clear();
173*e1eccf28SAndroid Build Coastguard Worker }
174*e1eccf28SAndroid Build Coastguard Worker
rsdIntrinsic_3DLUT(RsdCpuReferenceImpl * ctx,const Script * s,const Element * e)175*e1eccf28SAndroid Build Coastguard Worker RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
176*e1eccf28SAndroid Build Coastguard Worker const Script *s, const Element *e) {
177*e1eccf28SAndroid Build Coastguard Worker
178*e1eccf28SAndroid Build Coastguard Worker return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
179*e1eccf28SAndroid Build Coastguard Worker }
180*e1eccf28SAndroid Build Coastguard Worker
181*e1eccf28SAndroid Build Coastguard Worker } // namespace renderscript
182*e1eccf28SAndroid Build Coastguard Worker } // namespace android
183