From c9940681750d581254492801893301717d80317a Mon Sep 17 00:00:00 2001
From: Ekaitz Zarraga <ekaitz@elenq.tech>
Date: Tue, 23 Apr 2024 12:05:05 +0200
Subject: [PATCH] riscv: asm: Add load-reserved and store-conditional

Add Atomic instructions `ld` and `sc` in their 32 bit and 64 bit
versions.
---
 riscv64-asm.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++++-
 riscv64-tok.h |  26 +++++++++++
 2 files changed, 144 insertions(+), 1 deletion(-)

diff --git a/riscv64-asm.c b/riscv64-asm.c
index 88af663b..26288605 100644
--- a/riscv64-asm.c
+++ b/riscv64-asm.c
@@ -51,6 +51,7 @@ typedef struct Operand {
 static void asm_binary_opcode(TCCState* s1, int token);
 ST_FUNC void asm_clobber(uint8_t *clobber_regs, const char *str);
 ST_FUNC void asm_compute_constraints(ASMOperand *operands, int nb_operands, int nb_outputs, const uint8_t *clobber_regs, int *pout_reg);
+static void asm_emit_a(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *rd1, int aq, int rl);
 static void asm_emit_b(int token, uint32_t opcode, const Operand *rs1, const Operand *rs2, const Operand *imm);
 static void asm_emit_i(int token, uint32_t opcode, const Operand *rd, const Operand *rs1, const Operand *rs2);
 static void asm_emit_j(int token, uint32_t opcode, const Operand *rd, const Operand *rs2);
@@ -1044,6 +1045,102 @@ static void asm_ternary_opcode(TCCState *s1, int token)
     }
 }
 
+static void asm_atomic_opcode(TCCState *s1, int token)
+{
+    static const Operand zero = {.type = OP_REG};
+    Operand ops[3];
+
+    parse_operand(s1, &ops[0]);
+    if ( tok == ',') next(); else expect("','");
+
+    if ( token <= TOK_ASM_lr_d_aqrl && token >= TOK_ASM_lr_w ) {
+        ops[1] = zero;
+    } else {
+        parse_operand(s1, &ops[1]);
+        if ( tok == ',') next(); else expect("','");
+    }
+
+    if ( tok == '(') next(); else expect("'('");
+    parse_operand(s1, &ops[2]);
+    if ( tok == ')') next(); else expect("')'");
+
+    switch(token){
+        case TOK_ASM_lr_w:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 0);
+            break;
+        case TOK_ASM_lr_w_aq:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 0);
+            break;
+        case TOK_ASM_lr_w_rl:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 1);
+            break;
+        case TOK_ASM_lr_w_aqrl:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 1);
+            break;
+
+        case TOK_ASM_lr_d:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 0);
+            break;
+        case TOK_ASM_lr_d_aq:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 0);
+            break;
+        case TOK_ASM_lr_d_rl:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 0, 1);
+            break;
+        case TOK_ASM_lr_d_aqrl:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x2<<27, &ops[0], &ops[1], &ops[2], 1, 1);
+            break;
+
+        case TOK_ASM_sc_w:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 0);
+            break;
+        case TOK_ASM_sc_w_aq:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 0);
+            break;
+        case TOK_ASM_sc_w_rl:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 1);
+            break;
+        case TOK_ASM_sc_w_aqrl:
+            asm_emit_a(token, 0x2F | 0x2<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 1);
+            break;
+
+        case TOK_ASM_sc_d:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 0);
+            break;
+        case TOK_ASM_sc_d_aq:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 0);
+            break;
+        case TOK_ASM_sc_d_rl:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 0, 1);
+            break;
+        case TOK_ASM_sc_d_aqrl:
+            asm_emit_a(token, 0x2F | 0x3<<12 | 0x3<<27, &ops[0], &ops[1], &ops[2], 1, 1);
+            break;
+    }
+}
+
+/* caller: Add funct3 and func5 to opcode */
+static void asm_emit_a(int token, uint32_t opcode, const Operand *rd1, const Operand *rs2, const Operand *rs1, int aq, int rl)
+{
+    if (rd1->type != OP_REG)
+        tcc_error("'%s': Expected first destination operand that is a register", get_tok_str(token, NULL));
+    if (rs2->type != OP_REG)
+        tcc_error("'%s': Expected second source operand that is a register", get_tok_str(token, NULL));
+    if (rs1->type != OP_REG)
+        tcc_error("'%s': Expected third source operand that is a register", get_tok_str(token, NULL));
+        /* A-type instruction:
+	        31...27 funct5
+	        26      aq
+	        25      rl
+	        24...20 rs2
+	        19...15 rs1
+	        14...11 funct3
+	        11...7  rd
+	        6...0 opcode
+        opcode always fixed pos. */
+    gen_le32(opcode | ENCODE_RS1(rs1->reg) | ENCODE_RS2(rs2->reg) | ENCODE_RD(rd1->reg) | aq << 26 | rl << 25);
+}
+
 /* caller: Add funct3 to opcode */
 static void asm_emit_s(int token, uint32_t opcode, const Operand* rs1, const Operand* rs2, const Operand* imm)
 {
@@ -1109,7 +1206,7 @@ ST_FUNC void asm_opcode(TCCState *s1, int token)
     switch (token) {
     case TOK_ASM_ebreak:
     case TOK_ASM_ecall:
-    case TOK_ASM_fence:
+    case TOK_ASM_fence: // XXX: it's missing iorw for pred and succ
     case TOK_ASM_fence_i:
     case TOK_ASM_hrts:
     case TOK_ASM_mrth:
@@ -1306,6 +1403,26 @@ ST_FUNC void asm_opcode(TCCState *s1, int token)
         asm_ternary_opcode(s1, token);
         return;
 
+    /* Atomic operations */
+    case TOK_ASM_lr_w:
+    case TOK_ASM_lr_w_aq:
+    case TOK_ASM_lr_w_rl:
+    case TOK_ASM_lr_w_aqrl:
+    case TOK_ASM_lr_d:
+    case TOK_ASM_lr_d_aq:
+    case TOK_ASM_lr_d_rl:
+    case TOK_ASM_lr_d_aqrl:
+    case TOK_ASM_sc_w:
+    case TOK_ASM_sc_w_aq:
+    case TOK_ASM_sc_w_rl:
+    case TOK_ASM_sc_w_aqrl:
+    case TOK_ASM_sc_d:
+    case TOK_ASM_sc_d_aq:
+    case TOK_ASM_sc_d_rl:
+    case TOK_ASM_sc_d_aqrl:
+        asm_atomic_opcode(s1, token);
+        break;
+
     default:
         expect("known instruction");
     }
diff --git a/riscv64-tok.h b/riscv64-tok.h
index 25c95cee..67fc91a3 100644
--- a/riscv64-tok.h
+++ b/riscv64-tok.h
@@ -8,6 +8,9 @@
 #define DEF_ASM_WITH_SUFFIX(x, y) \
   DEF(TOK_ASM_ ## x ## _ ## y, #x "." #y)
 
+#define DEF_ASM_WITH_SUFFIXES(x, y, z) \
+  DEF(TOK_ASM_ ## x ## _ ## y ## _ ## z, #x "." #y "." #z)
+
 /* register */
  /* integer */
  DEF_ASM(x0)
@@ -422,4 +425,27 @@
  DEF_ASM(push)
  DEF_ASM(pop)
 
+/* “A” Standard Extension for Atomic Instructions, Version 2.1 */
+ /* XXX: Atomic memory operations */
+ DEF_ASM_WITH_SUFFIX(lr, w)
+ DEF_ASM_WITH_SUFFIXES(lr, w, aq)
+ DEF_ASM_WITH_SUFFIXES(lr, w, rl)
+ DEF_ASM_WITH_SUFFIXES(lr, w, aqrl)
+
+ DEF_ASM_WITH_SUFFIX(lr, d)
+ DEF_ASM_WITH_SUFFIXES(lr, d, aq)
+ DEF_ASM_WITH_SUFFIXES(lr, d, rl)
+ DEF_ASM_WITH_SUFFIXES(lr, d, aqrl)
+
+
+ DEF_ASM_WITH_SUFFIX(sc, w)
+ DEF_ASM_WITH_SUFFIXES(sc, w, aq)
+ DEF_ASM_WITH_SUFFIXES(sc, w, rl)
+ DEF_ASM_WITH_SUFFIXES(sc, w, aqrl)
+
+ DEF_ASM_WITH_SUFFIX(sc, d)
+ DEF_ASM_WITH_SUFFIXES(sc, d, aq)
+ DEF_ASM_WITH_SUFFIXES(sc, d, rl)
+ DEF_ASM_WITH_SUFFIXES(sc, d, aqrl)
+
 #undef DEF_ASM_WITH_SUFFIX