arm-asm: Add vmla, vmls, vnmls, vnmla, vmul, vnmul, vadd, vsub, vdiv, vneg, vabs, vsqrt, vcmp, vcmpe

This commit is contained in:
Danny Milosavljevic 2021-01-22 01:20:07 +01:00
parent 104037a4c5
commit b82e52a497
3 changed files with 343 additions and 2 deletions

290
arm-asm.c
View File

@ -1465,7 +1465,6 @@ static int asm_parse_vfp_regvar(int t, int double_precision)
return -1;
}
static void asm_floating_point_single_data_transfer_opcode(TCCState *s1, int token)
{
Operand ops[3];
@ -1648,6 +1647,264 @@ static void asm_floating_point_block_data_transfer_opcode(TCCState *s1, int toke
else
asm_emit_coprocessor_data_transfer(condition_code_of_token(token), coprocessor, first_regset_register, &ops[0], &offset, 0, preincrement, op0_exclam, extra_register_bit, load);
}
// Not standalone.
static void asm_floating_point_immediate_data_processing_opcode_tail(TCCState *s1, int token, uint8_t coprocessor, uint8_t CRd) {
uint8_t opcode1 = 0;
uint8_t opcode2 = 0;
uint8_t operands[3] = {0, 0, 0};
Operand operand;
operands[0] = CRd;
parse_operand(s1, &operand);
if (operand.type != OP_IM8 && operand.type != OP_IM8N) {
expect("Immediate value");
return;
}
opcode1 = 11; // "Other" instruction
switch (ARM_INSTRUCTION_GROUP(token)) {
case TOK_ASM_vcmpeq_f32:
case TOK_ASM_vcmpeq_f64:
opcode2 = 2;
operands[1] = 5;
if (operand.e.v) {
expect("Immediate value 0");
return;
}
break;
case TOK_ASM_vcmpeeq_f32:
case TOK_ASM_vcmpeeq_f64:
opcode2 = 6;
operands[1] = 5;
if (operand.e.v) {
expect("Immediate value 0");
return;
}
break;
default:
expect("known floating point with immediate instruction");
return;
}
if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
if (operands[0] & 1)
opcode1 |= 4;
operands[0] >>= 1;
}
asm_emit_coprocessor_opcode(condition_code_of_token(token), coprocessor, opcode1, operands[0], operands[1], operands[2], opcode2, 0);
}
static void asm_floating_point_data_processing_opcode(TCCState *s1, int token) {
uint8_t coprocessor = CP_SINGLE_PRECISION_FLOAT;
uint8_t opcode1 = 0;
uint8_t opcode2 = 0; // (0 || 2) | register selection
uint8_t operands[3];
uint8_t nb_operands = 0;
int operand_1_register = 1;
int reg;
/* TODO:
Instruction opcode opcode2 Reason
=============================================================
- 1?00 ?1? Undefined
VFNMS 1?01 ?0? Must be unconditional
VFNMA 1?01 ?1? Must be unconditional
VFMA 1?10 ?0? Must be unconditional
VFMS 1?10 ?1? Must be unconditional
VCVT*
VMOV Fd, Fm
VMOV Sn, Rd
VMOV Rd, Sn
VMOV Sn, Sm, Rd, Rn
VMOV Rd, Rn, Sn, Sm
VMOV Dm, Rd, Rn
VMOV Rd, Rn, Dm
VMOV Dn[0], Rd
VMOV Rd, Dn[0]
VMOV Dn[1], Rd
VMOV Rd, Dn[1]
VMSR <sysreg>, Rd
VMRS Rd, <sysreg>
VMRS APSR_nzcv, FPSCR
*/
switch (ARM_INSTRUCTION_GROUP(token)) {
case TOK_ASM_vmlaeq_f64:
case TOK_ASM_vmlseq_f64:
case TOK_ASM_vnmlseq_f64:
case TOK_ASM_vnmlaeq_f64:
case TOK_ASM_vmuleq_f64:
case TOK_ASM_vnmuleq_f64:
case TOK_ASM_vaddeq_f64:
case TOK_ASM_vsubeq_f64:
case TOK_ASM_vdiveq_f64:
case TOK_ASM_vnegeq_f64:
case TOK_ASM_vabseq_f64:
case TOK_ASM_vsqrteq_f64:
case TOK_ASM_vcmpeq_f64:
case TOK_ASM_vcmpeeq_f64:
coprocessor = CP_DOUBLE_PRECISION_FLOAT;
}
for (nb_operands = 0; nb_operands < 3; ) {
if (nb_operands == 1 && (tok == '#' || tok == '$')) {
asm_floating_point_immediate_data_processing_opcode_tail(s1, token, coprocessor, operands[0]);
return;
}
if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
if ((reg = asm_parse_vfp_regvar(tok, 0)) != -1) {
operands[nb_operands] = reg;
next();
} else {
expect("'s<number>'");
return;
}
} else if (coprocessor == CP_DOUBLE_PRECISION_FLOAT) {
if ((reg = asm_parse_vfp_regvar(tok, 1)) != -1) {
operands[nb_operands] = reg;
next();
} else {
expect("'d<number>'");
return;
}
} else if ((reg = asm_parse_vfp_regvar(tok, 0)) != -1) {
coprocessor = CP_SINGLE_PRECISION_FLOAT;
operands[nb_operands] = reg;
next();
} else if ((reg = asm_parse_vfp_regvar(tok, 1)) != -1) {
coprocessor = CP_DOUBLE_PRECISION_FLOAT;
operands[nb_operands] = reg;
next();
} else
tcc_internal_error("unknown coprocessor");
++nb_operands;
if (tok == ',')
next();
else
break;
}
if (nb_operands == 2) { // implicit
operands[2] = operands[1];
operands[1] = operands[0];
nb_operands = 3;
}
if (nb_operands < 3) {
tcc_error("Not enough operands for '%s' (%u)", get_tok_str(token, NULL), nb_operands);
return;
}
switch (ARM_INSTRUCTION_GROUP(token)) {
case TOK_ASM_vmlaeq_f32:
case TOK_ASM_vmlaeq_f64:
opcode1 = 0;
opcode2 = 0;
break;
case TOK_ASM_vmlseq_f32:
case TOK_ASM_vmlseq_f64:
opcode1 = 0;
opcode2 = 2;
break;
case TOK_ASM_vnmlseq_f32:
case TOK_ASM_vnmlseq_f64:
opcode1 = 1;
opcode2 = 0;
break;
case TOK_ASM_vnmlaeq_f32:
case TOK_ASM_vnmlaeq_f64:
opcode1 = 1;
opcode2 = 2;
break;
case TOK_ASM_vmuleq_f32:
case TOK_ASM_vmuleq_f64:
opcode1 = 2;
opcode2 = 0;
break;
case TOK_ASM_vnmuleq_f32:
case TOK_ASM_vnmuleq_f64:
opcode1 = 2;
opcode2 = 2;
break;
case TOK_ASM_vaddeq_f32:
case TOK_ASM_vaddeq_f64:
opcode1 = 3;
opcode2 = 0;
break;
case TOK_ASM_vsubeq_f32:
case TOK_ASM_vsubeq_f64:
opcode1 = 3;
opcode2 = 2;
break;
case TOK_ASM_vdiveq_f32:
case TOK_ASM_vdiveq_f64:
opcode1 = 8;
opcode2 = 0;
break;
case TOK_ASM_vnegeq_f32:
case TOK_ASM_vnegeq_f64:
opcode1 = 11; // Other" instruction
opcode2 = 2;
operands[1] = 1;
operand_1_register = 0;
break;
case TOK_ASM_vabseq_f32:
case TOK_ASM_vabseq_f64:
opcode1 = 11; // "Other" instruction
opcode2 = 6;
operands[1] = 0;
operand_1_register = 0;
break;
case TOK_ASM_vsqrteq_f32:
case TOK_ASM_vsqrteq_f64:
opcode1 = 11; // "Other" instruction
opcode2 = 6;
operands[1] = 1;
operand_1_register = 0;
break;
case TOK_ASM_vcmpeq_f32:
case TOK_ASM_vcmpeq_f64:
opcode1 = 11; // "Other" instruction
opcode2 = 2;
operands[1] = 4;
operand_1_register = 0;
break;
case TOK_ASM_vcmpeeq_f32:
case TOK_ASM_vcmpeeq_f64:
opcode1 = 11; // "Other" instruction
opcode2 = 6;
operands[1] = 4;
operand_1_register = 0;
break;
// TODO: vcvt; vcvtr
default:
expect("known floating point instruction");
return;
}
if (coprocessor == CP_SINGLE_PRECISION_FLOAT) {
if (operands[2] & 1)
opcode2 |= 1;
operands[2] >>= 1;
if (operand_1_register) {
if (operands[1] & 1)
opcode2 |= 4;
operands[1] >>= 1;
}
if (operands[0] & 1)
opcode1 |= 4;
operands[0] >>= 1;
}
asm_emit_coprocessor_opcode(condition_code_of_token(token), coprocessor, opcode1, operands[0], operands[1], operands[2], opcode2, 0);
}
#endif
static void asm_misc_single_data_transfer_opcode(TCCState *s1, int token)
@ -2013,6 +2270,37 @@ ST_FUNC void asm_opcode(TCCState *s1, int token)
asm_floating_point_single_data_transfer_opcode(s1, token);
return;
case TOK_ASM_vmlaeq_f32:
case TOK_ASM_vmlseq_f32:
case TOK_ASM_vnmlseq_f32:
case TOK_ASM_vnmlaeq_f32:
case TOK_ASM_vmuleq_f32:
case TOK_ASM_vnmuleq_f32:
case TOK_ASM_vaddeq_f32:
case TOK_ASM_vsubeq_f32:
case TOK_ASM_vdiveq_f32:
case TOK_ASM_vnegeq_f32:
case TOK_ASM_vabseq_f32:
case TOK_ASM_vsqrteq_f32:
case TOK_ASM_vcmpeq_f32:
case TOK_ASM_vcmpeeq_f32:
case TOK_ASM_vmlaeq_f64:
case TOK_ASM_vmlseq_f64:
case TOK_ASM_vnmlseq_f64:
case TOK_ASM_vnmlaeq_f64:
case TOK_ASM_vmuleq_f64:
case TOK_ASM_vnmuleq_f64:
case TOK_ASM_vaddeq_f64:
case TOK_ASM_vsubeq_f64:
case TOK_ASM_vdiveq_f64:
case TOK_ASM_vnegeq_f64:
case TOK_ASM_vabseq_f64:
case TOK_ASM_vsqrteq_f64:
case TOK_ASM_vcmpeq_f64:
case TOK_ASM_vcmpeeq_f64:
asm_floating_point_data_processing_opcode(s1, token);
return;
case TOK_ASM_vpusheq:
case TOK_ASM_vpopeq:
case TOK_ASM_vldmeq:

View File

@ -153,6 +153,29 @@
DEF(TOK_ASM_ ## x, #x) \
DEF(TOK_ASM_ ## x ## rsvd, #x "rsvd")
/* Note: condition code is 4 bits */
#define DEF_ASM_CONDED_WITH_SUFFIX(x, y) \
DEF(TOK_ASM_ ## x ## eq ## _ ## y, #x "eq." #y) \
DEF(TOK_ASM_ ## x ## ne ## _ ## y, #x "ne." #y) \
DEF(TOK_ASM_ ## x ## cs ## _ ## y, #x "cs." #y) \
DEF(TOK_ASM_ ## x ## cc ## _ ## y, #x "cc." #y) \
DEF(TOK_ASM_ ## x ## mi ## _ ## y, #x "mi." #y) \
DEF(TOK_ASM_ ## x ## pl ## _ ## y, #x "pl." #y) \
DEF(TOK_ASM_ ## x ## vs ## _ ## y, #x "vs." #y) \
DEF(TOK_ASM_ ## x ## vc ## _ ## y, #x "vc." #y) \
DEF(TOK_ASM_ ## x ## hi ## _ ## y, #x "hi." #y) \
DEF(TOK_ASM_ ## x ## ls ## _ ## y, #x "ls." #y) \
DEF(TOK_ASM_ ## x ## ge ## _ ## y, #x "ge." #y) \
DEF(TOK_ASM_ ## x ## lt ## _ ## y, #x "lt." #y) \
DEF(TOK_ASM_ ## x ## gt ## _ ## y, #x "gt." #y) \
DEF(TOK_ASM_ ## x ## le ## _ ## y, #x "le." #y) \
DEF(TOK_ASM_ ## x ## _ ## y, #x "." #y) \
DEF(TOK_ASM_ ## x ## rsvd ## _ ## y, #x "rsvd." #y)
#define DEF_ASM_CONDED_VFP_F32_F64(x) \
DEF_ASM_CONDED_WITH_SUFFIX(x, f32) \
DEF_ASM_CONDED_WITH_SUFFIX(x, f64)
/* Note: add new tokens after nop (MUST always use DEF_ASM_CONDED) */
DEF_ASM_CONDED(nop)
@ -286,6 +309,21 @@
DEF_ASM_CONDED(vldr)
DEF_ASM_CONDED(vstr)
DEF_ASM_CONDED_VFP_F32_F64(vmla)
DEF_ASM_CONDED_VFP_F32_F64(vmls)
DEF_ASM_CONDED_VFP_F32_F64(vnmls)
DEF_ASM_CONDED_VFP_F32_F64(vnmla)
DEF_ASM_CONDED_VFP_F32_F64(vmul)
DEF_ASM_CONDED_VFP_F32_F64(vnmul)
DEF_ASM_CONDED_VFP_F32_F64(vadd)
DEF_ASM_CONDED_VFP_F32_F64(vsub)
DEF_ASM_CONDED_VFP_F32_F64(vdiv)
DEF_ASM_CONDED_VFP_F32_F64(vneg)
DEF_ASM_CONDED_VFP_F32_F64(vabs)
DEF_ASM_CONDED_VFP_F32_F64(vsqrt)
DEF_ASM_CONDED_VFP_F32_F64(vcmp)
DEF_ASM_CONDED_VFP_F32_F64(vcmpe)
DEF_ASM_CONDED(vpush)
DEF_ASM_CONDED(vpop)
DEF_ASM_CONDED(vldm)

View File

@ -5,7 +5,16 @@ set -e
# Note: "{r3}" is definitely different--but would complicate the assembler.
state="`mktemp -d`"
cat ../arm-tok.h |grep DEF_ASM |grep -v 'not useful' |grep -v '#define' |grep -v '/[*]' |sed -e 's;^[ ]*DEF_ASM[^(]*(\(.*\)).*$;\1;' | egrep -v '^((r|c|p|s|d)[0-9]+|fp|ip|sp|lr|pc|asl)$' | while read s
cat ../arm-tok.h | \
grep DEF_ASM | \
grep -v 'not useful' | \
grep -v '#define' | \
grep -v '/[*]' | \
grep -v 'DEF_ASM_CONDED_WITH_SUFFIX(x' | \
sed -e 's;^[ ]*DEF_ASM_CONDED_VFP_F32_F64[^(]*(\(.*\)).*$; DEF_ASM_CONDED(\1.f32)\
DEF_ASM_CONDED(\1.f64);g' | \
sed -e 's;^[ ]*DEF_ASM[^(]*(\(.*\)).*$;\1;g' | \
egrep -v '^((r|c|p|s|d)[0-9]+|fp|ip|sp|lr|pc|asl)$' | while read s
do
as_opts=""
if [ "${s#v}" != "${s}" ]
@ -132,6 +141,12 @@ do
"{d4}" \
"{s4-s31}" \
"{s4}" \
"s2, s3, s4" \
"s2, s3" \
"d2, d3, d4" \
"d2, d3" \
"s2, #0" \
"d2, #0" \
""
do
#echo ".syntax unified" > a.s