diff -uNr a/ffa/libffa/ffa.gpr b/ffa/libffa/ffa.gpr
--- a/ffa/libffa/ffa.gpr 92d0220a48f6753fb699220db8eba867b4f4354291c3e4ebd6b3048982524e93f82356d40b8bf2371f7c6768545fe2ee0e531ebd70144cbd8213a6d8c84e727e
+++ b/ffa/libffa/ffa.gpr 9ddfc20107335ae2ca3f5b44ed95de89b6127a56c29429f652bd3f3632f8b70433a8110023d66cf000ba8214108428c2ebb3bf5c9eba346cbcc99889cf60124a
@@ -24,7 +24,7 @@
type Mode_Type is ("debug", "release");
Mode : Mode_Type := external ("mode", "release");
- for Languages use ("Ada");
+ for Languages use ("Ada", "Asm");
for Source_Dirs use (".");
for Library_Dir use "lib";
for Library_Name use "FFA";
diff -uNr a/ffa/libffa/fz_mul.adb b/ffa/libffa/fz_mul.adb
--- a/ffa/libffa/fz_mul.adb 5cb9ecb938f842b7c34ca7edf99fb92502986d7f660b223659c9b19c6008a24c3be6de8135db37e29e7ff278a451b68c61f6b57d8e404372dcd3fd6d4320ea0a
+++ b/ffa/libffa/fz_mul.adb bfad12cbd645042ea8d0121994c38af1a8945423cfd535af80a2762d506bef3d30ed8ecc7a6988eca6788789a6d36c25e3fe505c49f478afe227ab8d9d51821b
@@ -29,6 +29,32 @@
Y : in FZ;
XY_Lo : out FZ;
XY_Hi : out FZ) is
+
+ -- Words in each multiplicand
+ L : constant Word_Index := X'Length;
+
+ -- Length of Product, i.e. double the length of either multiplicand
+ LP : constant Word_Index := 2 * L;
+
+ -- Register holding Product; indexed from zero
+ XY : FZ(0 .. LP - 1);
+
+ procedure Asm_Comba(X : in FZ;
+ Y : in FZ;
+ XY : out FZ;
+ X_Size : in Word_Count);
+ pragma Import (C, Asm_Comba, "x86_64_comba");
+ begin
+ Asm_Comba(X, Y, XY, L);
+ XY_Lo := XY(0 .. L - 1);
+ XY_Hi := XY(L .. XY'Last);
+ end FZ_Mul_Comba;
+
+ -- Comba's multiplier.
+ procedure FZ_Mul_Comba_C(X : in FZ;
+ Y : in FZ;
+ XY_Lo : out FZ;
+ XY_Hi : out FZ) is
-- Words in each multiplicand
L : constant Word_Index := X'Length;
@@ -119,7 +145,7 @@
XY_Lo := XY(0 .. L - 1);
XY_Hi := XY(L .. XY'Last);
- end FZ_Mul_Comba;
+ end FZ_Mul_Comba_C;
pragma Inline_Always(FZ_Mul_Comba);
end FZ_Mul;
diff -uNr a/ffa/libffa/x86_64_comba.s b/ffa/libffa/x86_64_comba.s
--- a/ffa/libffa/x86_64_comba.s false
+++ b/ffa/libffa/x86_64_comba.s 20e3b23c757f4b95f87c9ea28197c703d3b29fb84862c8629f9063b9e00704b15838a4b8fa711655b8f09fbfa157a50915c54219de49ab75aee72602f7b12063
@@ -0,0 +1,85 @@
+.intel_mnemonic
+.intel_syntax noprefix
+
+# Register allocation
+# R8 = A0
+# R9 = A1
+# R10 = A2
+# R11 = J,U
+# R12 = V
+# RBX = N in col = I in comba
+# RDI = X
+# RSI = Y
+# R13 = XY
+# RCX = upper bound of the foreach-column loops
+# RAX, RDX = (temporary)
+
+col:
+cmp r11, r12 # exit when J > V
+jg col_output # ...
+lea rdx, [rsi + 8*rbx] # rdx := Y'Address + N*8
+lea rax, [8*r11] # rax := j
+sub rdx, rax # rdx := rdx - j*8
+mov rdx, [rdx] # rdx := *(rdx)
+mov rax, [rdi + 8*r11] # rax := X(j) := *(X'Address + j*8)
+mul rdx # rdx:rax := rax*rdx
+add r8, rax # A0, C := A0 + rax
+adc r9, rdx # A1, C := A1 + rdx + C
+adc r10, 0 # A2, [C=0] := A2 + 0 + C
+inc r11 # J := J + 1
+jmp col
+col_output:
+mov [r13 + 8*rbx], r8 # XY(N) := A0
+mov r8, r9 # A0 := A1
+mov r9, r10 # A1 := A2
+xor r10, r10 # A2 := 0
+ret
+
+# Arguments according to SysV ABI
+# RDI: X, array of words size X'Size elements
+# RSI: Y, array of words size X'Size elements
+# RDX: XY, array of words size 2*X'Size elements
+# RCX: X'Size, base FZ length (for X and Y).
+.global x86_64_comba
+x86_64_comba:
+push rbx
+push r12
+push r13
+mov r13, rdx # RDX is used by MUL, move XY to a free register
+
+xor r8, r8 # A0 := 0
+xor r9, r9 # A1 := 0
+xor r10, r10 # A2 := 0
+xor rbx, rbx # I := 0
+
+loop_1:
+cmp rbx, rcx # exit when I >= L
+jge end_loop_1 # ...
+xor r11, r11 # U := 0
+mov r12, rbx # V := I
+call col #
+inc rbx # I := I + 1
+jmp loop_1
+end_loop_1:
+
+# rbx = L after the previous loop
+lea r12, [rcx - 1] # V = L - 1
+mov rcx, r12 # RCX := L - 1
+shl rcx, 1 # RCX := (L - 1)*2
+loop_2:
+cmp rbx, rcx # exit when I > 2*L-2
+jg end_loop_2 # ...
+mov r11, rbx # U := I
+sub r11, r12 # U := I - V := I - L + 1
+call col # V already set to L - 1
+inc rbx # I := I + 1
+jmp loop_2
+end_loop_2:
+
+mov [r13 + 8*rbx], r8 # XY(I) := A0
+
+end_comba:
+pop r13
+pop r12
+pop rbx
+ret