#define ABSDIF(x, y) ((x <= y) ? y-x : x-y)
int error(char xx[16][16], char yy[16][16]){
char *across;
char *cacross;
// int localDiff;
int diff=0;
int y;
for (y=0;y<16;y++) {
across = &xx[y][0];
cacross = &yy[y][0];
diff += ABSDIF(across[0], cacross[0]);
diff += ABSDIF(across[1], cacross[1]);
diff += ABSDIF(across[2], cacross[2]);
diff += ABSDIF(across[3], cacross[3]);
diff += ABSDIF(across[4], cacross[4]);
diff += ABSDIF(across[5], cacross[5]);
diff += ABSDIF(across[6], cacross[6]);
diff += ABSDIF(across[7], cacross[7]);
diff += ABSDIF(across[8], cacross[8]);
diff += ABSDIF(across[9], cacross[9]);
diff += ABSDIF(across[10], cacross[10]);
diff += ABSDIF(across[11], cacross[11]);
diff += ABSDIF(across[12], cacross[12]);
diff += ABSDIF(across[13], cacross[13]);
diff += ABSDIF(across[14], cacross[14]);
diff += ABSDIF(across[15], cacross[15]);
};
return diff;
}
Note: SIR (S-expression Intermediate Representation) is similar to LIR, but operations required for SIMD optimization are added. Most of them will be adopted in the future LIR.
In the case of the SIMD code generation, the data for a pattern match is used. It is created based on the instruction description file which strictly describes the meaning of SIMD instructions in detail. For example, in the above-mentioned loop, the pattern based on the PDIST instruction (it accumulates absolute differences of 8 pairs of 8-bit numbers) of UltraSPARC processors is matched.
PDIST instruction description in SIR is as follows:
;; PDIST
(DEFINST ("pdist ?1f, ?2f, ?3f" "pdist ?1f, ?2f, ?3f")
(PARALLEL
(SET (SUBREG I32 (REG F64 (HOLE 3 FREG)) 0)
(ADD I32 (ADD I32 (ADD I32 (ADD I32 (ADD I32 (ADD I32 (ADD I32
(ADD I32
(SUBREG I32 (REG F64 (HOLE 3 FREG)) 0)
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 0))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 0)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 0))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 0))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 0))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 0)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 0))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 0)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 1))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 1)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 1))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 1))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 1))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 1)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 1))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 1)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 2))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 2)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 2))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 2))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 2))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 2)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 2))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 2)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 3))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 3)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 3))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 3))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 3))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 3)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 3))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 3)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 4))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 4)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 4))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 4))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 4))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 4)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 4))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 4)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 5))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 5)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 5))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 5))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 5))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 5)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 5))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 5)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 6))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 6)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 6))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 6))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 6))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 6)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 6))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 6)))))))
(BOR I32
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 7))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 7)))
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 7))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 7))))
(BAND I32
(SUB I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 7))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 7)))
(BNOT I32
(TSTLES I32 (CONVZX I32 (SUBREG I8 (REG F64 (HOLE 1 FREG)) 7))
(CONVZX I32 (SUBREG I8 (REG F64 (HOLE 2 FREG)) 7)))))))
)
))
.file "testsimd/testerror-1.c"
.section ".text"
.align 4
.global error
.type error,#function
.proc 04
error:
!#PROLOGUE# 0
save %sp, -8, %sp
!#PROLOGUE# 1
_lab1:
mov (0), %l0
mov (0), %l1
_lab5:
cmp %l1, (16)
bl _lab6
nop
ba _lab4
nop
_lab6:
mov %i0, %l2
smul %l1, (16), %l3
add %l2, %l3, %l4
mov %i1, %l5
add %l5, %l3, %l6
ldd [%l4], %f0
ldd [%l6], %f2
st %l0, [%i6-(4)]
st %g0, [%i6-(8)]
ldd [%i6-(8)], %f4
pdist %f0, %f2, %f4
ldd [%l4+(8)], %f0
ldd [%l6+(8)], %f2
pdist %f0, %f2, %f4
std %f4, [%i6-(8)]
ld [%i6-(4)], %l0
add %l1, (1), %l1
ba _lab5
nop
_lab4:
mov %l0,%i0
ret
restore
| Compiling method | Execution time (second) |
|---|---|
| GCC | 223 |
| GCC -O2 | 45 |
| COINS | 327 |
| SIMD | 13 |
| SIMD+manual | 8 |