tree checksum vpatch file split hunks
all signers: asciilifeform bvt diana_coman
antecedents: ffa_ch9_exodus.kv ffa_ch1_genesis.kv
press order:
patch:
(24 . 7)(24 . 7)
10 type Mode_Type is ("debug", "release");
11 Mode : Mode_Type := external ("mode", "release");
12
13 for Languages use ("Ada");
14 for Languages use ("Ada", "Asm");
15 for Source_Dirs use (".");
16 for Library_Dir use "lib";
17 for Library_Name use "FFA";
- 5CB9ECB938F842B7C34CA7EDF99FB92502986D7F660B223659C9B19C6008A24C3BE6DE8135DB37E29E7FF278A451B68C61F6B57D8E404372DCD3FD6D4320EA0A(29 . 6)(29 . 32)-
22 Y : in FZ;
23 XY_Lo : out FZ;
24 XY_Hi : out FZ) is
25
26 -- Words in each multiplicand
27 L : constant Word_Index := X'Length;
28
29 -- Length of Product, i.e. double the length of either multiplicand
30 LP : constant Word_Index := 2 * L;
31
32 -- Register holding Product; indexed from zero
33 XY : FZ(0 .. LP - 1);
34
35 procedure Asm_Comba(X : in FZ;
36 Y : in FZ;
37 XY : out FZ;
38 X_Size : in Word_Count);
39 pragma Import (C, Asm_Comba, "x86_64_comba");
40 begin
41 Asm_Comba(X, Y, XY, L);
42 XY_Lo := XY(0 .. L - 1);
43 XY_Hi := XY(L .. XY'Last);
44 end FZ_Mul_Comba;
45
46 -- Comba's multiplier.
47 procedure FZ_Mul_Comba_C(X : in FZ;
48 Y : in FZ;
49 XY_Lo : out FZ;
50 XY_Hi : out FZ) is
51
52 -- Words in each multiplicand
53 L : constant Word_Index := X'Length;
(119 . 7)(145 . 7)
55 XY_Lo := XY(0 .. L - 1);
56 XY_Hi := XY(L .. XY'Last);
57
58 end FZ_Mul_Comba;
59 end FZ_Mul_Comba_C;
60 pragma Inline_Always(FZ_Mul_Comba);
61
62 end FZ_Mul;
(0 . 0)(1 . 85)
67 .intel_mnemonic
68 .intel_syntax noprefix
69
70 # Register allocation
71 # R8 = A0
72 # R9 = A1
73 # R10 = A2
74 # R11 = J,U
75 # R12 = V
76 # RBX = N in col = I in comba
77 # RDI = X
78 # RSI = Y
79 # R13 = XY
80 # RCX = upper bound of the foreach-column loops
81 # RAX, RDX = (temporary)
82
83 col:
84 cmp r11, r12 # exit when J > V
85 jg col_output # ...
86 lea rdx, [rsi + 8*rbx] # rdx := Y'Address + N*8
87 lea rax, [8*r11] # rax := j
88 sub rdx, rax # rdx := rdx - j*8
89 mov rdx, [rdx] # rdx := *(rdx)
90 mov rax, [rdi + 8*r11] # rax := X(j) := *(X'Address + j*8)
91 mul rdx # rdx:rax := rax*rdx
92 add r8, rax # A0, C := A0 + rax
93 adc r9, rdx # A1, C := A1 + rdx + C
94 adc r10, 0 # A2, [C=0] := A2 + 0 + C
95 inc r11 # J := J + 1
96 jmp col
97 col_output:
98 mov [r13 + 8*rbx], r8 # XY(N) := A0
99 mov r8, r9 # A0 := A1
100 mov r9, r10 # A1 := A2
101 xor r10, r10 # A2 := 0
102 ret
103
104 # Arguments according to SysV ABI
105 # RDI: X, array of words size X'Size elements
106 # RSI: Y, array of words size X'Size elements
107 # RDX: XY, array of words size 2*X'Size elements
108 # RCX: X'Size, base FZ length (for X and Y).
109 .global x86_64_comba
110 x86_64_comba:
111 push rbx
112 push r12
113 push r13
114 mov r13, rdx # RDX is used by MUL, move XY to a free register
115
116 xor r8, r8 # A0 := 0
117 xor r9, r9 # A1 := 0
118 xor r10, r10 # A2 := 0
119 xor rbx, rbx # I := 0
120
121 loop_1:
122 cmp rbx, rcx # exit when I >= L
123 jge end_loop_1 # ...
124 xor r11, r11 # U := 0
125 mov r12, rbx # V := I
126 call col #
127 inc rbx # I := I + 1
128 jmp loop_1
129 end_loop_1:
130
131 # rbx = L after the previous loop
132 lea r12, [rcx - 1] # V = L - 1
133 mov rcx, r12 # RCX := L - 1
134 shl rcx, 1 # RCX := (L - 1)*2
135 loop_2:
136 cmp rbx, rcx # exit when I > 2*L-2
137 jg end_loop_2 # ...
138 mov r11, rbx # U := I
139 sub r11, r12 # U := I - V := I - L + 1
140 call col # V already set to L - 1
141 inc rbx # I := I + 1
142 jmp loop_2
143 end_loop_2:
144
145 mov [r13 + 8*rbx], r8 # XY(I) := A0
146
147 end_comba:
148 pop r13
149 pop r12
150 pop rbx
151 ret