tree checksum vpatch file split hunks

all signers: asciilifeform

antecedents: m_genesis.kv tlb_and_exc_speedup.kv

press order:

m_genesis.kvasciilifeform
errata_slaveirq.kvasciilifeform

patch:

- B93A4289F9DECD7FC48508715AFE4F4237A1EDD72646FEF53EBAB319D50F16ADF546473B023C657C5C6D16A47CF3DFA1ECFDEE1F00E5796126B8A6C727292904
+ D9F1B9E441A1F4E5D314D5C5F43319591CA03904B3AC36E1EDE4D7310B5FF30D2E0C720695F95733F1AC3E2E829FA96700B6E8776ACC974CFD30C6994EDAF4B6
m/MANIFEST.TXT
(1 . 3)(1 . 4)
5 586606 m_genesis "Genesis."
6 586747 errata_slaveirq "Fix of slave IRQ clearing."
7 586983 tlb_and_exc_speedup "Exc. handler fastpaths and TLB caching."
8 587480 simd_tlb_lookup "Experimental variant with SIMDistic TLB."
- 1427E0C04C15C733AED037B1A56252C2F7F8EE7960171AB548B6CE1244320EA1E9B4AA00045D94A4478AAD0E605E9206AA9E8B92581BE252B3E08C893ED372FE
+ 1A9CF2B89DFF48898ABB798D358E65AE567CDB25CA0521F248D4BB1CB388C604F6D9E17B61B25F6F1822AE4304DB1EE0059DB15B2C36858ACA08EC46653CC0A4
m/cpustate.asm
(18 . 7)(18 . 7)
13
14 ;-----------------------------------------------------------------------------
15 ; MIPS Processor State.
16 ; Note: PC, nPC, CP0_Status, CP0_Cause, CP0_Compare, are housed in x86 regs.
17 ; Note: PC, nPC, CP0_Status, CP0_Cause, are housed in x86 regs.
18 struc MCPU
19 .Regs resd 32 ; The standard MIPS Register Set
20 .LO resd 1 ; Multiplication/division results - Low Half
(33 . 20)(33 . 16)
22 .CP0_BadVAddr resd 1 ; Addr. of most recent addr.-caused exception
23 .CP0_ErrorEpc resd 1 ; Program counter at last exception
24 .CP0_PageMask resd 1 ; Control variable page sizes in TLB entries
25 ;; The TLB:
26 .TLB_Entries resd TLB_ENTRIES_COUNT ; TLB entries (without PFN)
27 .TLB_PFN_Even resd TLB_ENTRIES_COUNT ; TLB PFN0
28 .TLB_PFN_Odd resd TLB_ENTRIES_COUNT ; TLB PFN1
29 .CP0_Compare resd 1 ; Timer interrupt control
30 .TLB_PFN resq TLB_ENTRIES_COUNT ; TLB PFN : |ODD|EVEN|
31 endstruc
32 ;-----------------------------------------------------------------------------
33
34 ;-----------------------------------------------------------------------------
35 ; Refer to the N-th TLB Entry:
36 ;-----------------------------------------------------------------------------
37 %define TLB_E(N) dword [M_Base_32 + MCPU.TLB_Entries + 4 * (N)]
38 ; N-th PFN :
39 %define TLB_PFN_E(N) dword [M_Base_32 + MCPU.TLB_PFN_Even + 4 * (N)]
40 %define TLB_PFN_O(N) dword [M_Base_32 + MCPU.TLB_PFN_Odd + 4 * (N)]
41 %define TLB_PFN(N) qword [M_Base_64 + MCPU.TLB_PFN + 8 * (N)]
42 ;-----------------------------------------------------------------------------
43
44 ;-----------------------------------------------------------------------------
(61 . 7)(57 . 7)
46 ; TODO: is it possible to use the upper halves of the 64bit regs for anything?
47 ; ... or entirely worthless from intel's idiocy of 'auto-zero on mov' ?
48 ;-----------------------------------------------------------------------------
49 %define Flag_Reg edi ; Delay, Exception, etc flags
50 %define Flag_Reg edi ; Delay, Exception, etc flags and TLB G
51 %define RAM_Floor rsi ; Physical (x86) address of 1st RAM word
52 %define RAM_Ceiling r8 ; Physical (x86) address of last RAM word
53 %define PC r9d ; Current Program Counter
(69 . 23)(65 . 26)
55 %define CP0_Status r11d ; Processor status and control
56 %define CP0_Cause r12d ; Cause of last general exception
57 %define CP0_Count r13d ; Processor cycle count
58 %define CP0_Compare r14d ; Timer interrupt control
59 %define TLB_Flags r14 ; TLB D1/V1/D0/V0 Flags
60 %define AUX r15d ; Additional TMP for certain ops
61 %define AUX64 r15 ; all 64bits of AUX
62
63 ; TODO: 'Suspend to RAM' routine for all of the above.
64 ;-----------------------------------------------------------------------------
65
66 ;-----------------------------------------------------------------------------
67 ; XMM Regs used for TLB Caching:
68 ; XMM Regs:
69 ;-----------------------------------------------------------------------------
70 %define Rd_E_Last_Tag xmm5 ; Last good Tag on reading Even vAddr
71 %define Rd_E_Last_PFN xmm6 ; Last good PFN on reading Even vAddr
72 %define Rd_O_Last_Tag xmm7 ; Last good Tag on reading Odd vAddr
73 %define Rd_O_Last_PFN xmm8 ; Last good PFN on reading Odd vAddr
74 %define Wr_E_Last_Tag xmm9 ; Last good Tag on writing Even vAddr
75 %define Wr_E_Last_PFN xmm10 ; Last good PFN on writing Even vAddr
76 %define Wr_O_Last_Tag xmm11 ; Last good Tag on writing Odd vAddr
77 %define Wr_O_Last_PFN xmm12 ; Last good PFN on writing Odd vAddr
78 ;; 16 Tags:
79 %define TLB_TAG_BYTE_0 xmm5 ; Byte 0 of Tag
80 %define TLB_TAG_BYTE_1 xmm6 ; Byte 1 of Tag
81 %define TLB_TAG_BYTE_2 xmm7 ; Byte 2 of Tag
82 %define XMM_T0 xmm8 ; Temp
83
84 %define R_TLB_Last_Good_Tag xmm9 ; Last good Tag on reading vAddr (|O|E|)
85 %define W_TLB_Last_Good_Tag xmm10 ; Last good Tag on writing vAddr (|O|E|)
86 %define R_TLB_Last_Good_PFN xmm11 ; Last good PFN on reading vAddr (|O|E|)
87 %define W_TLB_Last_Good_PFN xmm12 ; Last good PFN on writing vAddr (|O|E|)
88 ;-----------------------------------------------------------------------------
89
90 ;-----------------------------------------------------------------------------
(128 . 8)(127 . 8)
92 mov nPC, eax
93 mov CP0_Status, eax
94 mov CP0_Cause, eax
95 mov CP0_Count, eax
96 mov CP0_Compare, eax
97 mov CP0_Count, eax
98 xor TLB_Flags, TLB_Flags
99 ;; Init 'slow' MIPS Regs:
100 mov ecx, 0
101 _init_reg:
(149 . 6)(148 . 7)
103 mov Sr(CP0_Epc), eax
104 mov Sr(CP0_BadVAddr), eax
105 mov Sr(CP0_ErrorEpc), eax
106 mov Sr(CP0_Compare), eax
107 Flg_Clear_All ; Reset all misc Flags to 0
108 bts CP0_Status, CP0St_ERL ; Start in kernel mode w/ unmapped useg
109 ret
- 7DEDB8135F032539DD5F6C0133070AFF4078CADB322FCB7E665A56BDFE7940E13D463B2803EA9C9726E7606579218BA9684D21AE106890C6EF7A285119887364
+ CB16F8AB1A1E89FCE1364577DD83A456E0859379A3A9FA42C883A18BC9962D7B017E5D2C99341C60E1D41B510FD0D588FCE6D55D058E49E62D89701099CC8080
m/flags.asm
(26 . 6)(26 . 7)
114 %define TLB_Rd_Cache_Valid 4
115 %define TLB_Wr_Cache_Valid 5
116 %define Shutdown 6
117 ; Positions 31 .. 15 store TLB's 'G' Flags
118 ;-----------------------------------------------------------------------------
119 ; Set a given Flag:
120 %macro Flg_On 1
- 3B8E7B9CF4B6B37A941B53F534FA000B523941E5C52747F0CCF92397C5E64FDCF74BBDD241E70E51BEF8893954C0CF5F4DB5A89066B68349A3DE4F24F737BDBC
+ E75680EEE6B4D6DAB5E13FD02DB2A86702136633846D4E9D9CA17FFAAE25CE6C1D0D138DB69081802520D9B418B7027A8150271E15E954971BA44D2506F70AD1
m/knobs.asm
(23 . 21)(23 . 6)
125 ;-----------------------------------------------------------------------------
126
127 ;-----------------------------------------------------------------------------
128 ; If TLBWR_CHEAT is enabled, the TLBWR ('Write Random TLB Entry') instruction
129 ; will slide all unwired entries down by one slot and write into the freed
130 ; slot near the top permitted by CP0_Wired, instead of the traditional
131 ; behaviour (where entry indexed by a modulus of the tick counter is killed.)
132 ; No known code (incl. Linux) tries to rely on the absolute position of
133 ; unwired TLB entries after a TLBWR instruction. So this gives faster lookup
134 ; when iterating over TLB, as the newer unwired entries will aggregate near
135 ; the base of the table. Iron MIPSen do not iterate, they look up in parallel,
136 ; ergo the original MIPS designer did not see any reason to attempt to order
137 ; TLB entries by frequency of use.
138 ;-----------------------------------------------------------------------------
139 %define TLBWR_CHEAT 1
140 ;-----------------------------------------------------------------------------
141
142 ;-----------------------------------------------------------------------------
143 ; Alignment Grain
144 ;-----------------------------------------------------------------------------
145 %define GRAIN 32
- F82EA8FEBCEB149C589262C1387C2A99D641219DAC217DE1BA1E3E99ED8B815B5FE4F6B68128CB55FEFED5BD41B4D764C802B6EBCE34BD4580769027CA001CD7
+ FA0643A3865257C9D5F290E90F980F02C7B6DDF6ED9DB24C8A1EA3571A9BAB6C69642EF743FE03BAE3B48BA2B8811CAF69F12606529178DA5724C6E617411C0F
m/mips.asm
(23 . 9)(23 . 9)
150 ;-----------------------------------------------------------------------------
151
152 ;-----------------------------------------------------------------------------
153 ; # of TLB entries. Could have more; but would have to change not only here.
154 ; # of TLB entries.
155 ;-----------------------------------------------------------------------------
156 %define TLB_ENTRIES_COUNT 16 ; in principle could have more.
157 %define TLB_ENTRIES_COUNT 16
158 ;-----------------------------------------------------------------------------
159
160 ;-----------------------------------------------------------------------------
(95 . 23)(95 . 3)
162 %define CP0St_EXL 1 ; Exception Level (0: Normal, 1: Kernel)
163 %define CP0St_IE 0 ; Interrupt Enable
164 ;-----------------------------------------------------------------------------
165
166 ;-----------------------------------------------------------------------------
167 ; MIPS TLB Entry.
168 ; We don't use C0 and C1 anywhere! and so we can put all of it in 32bits:
169 ;-----------------------------------------------------------------------------
170 ; 11111111111111110000000000000000
171 ; FEDCBA9876543210FEDCBA9876543210
172 ; --------------------------------
173 ; GVVDDAAAAAAAAVVVVVVVVVVVVVVVVVVV
174 ; |1010| ASID || VPN2 |
175 ;-----------------------------------------------------------------------------
176 %define TLB_VPN2_Mask 0x7FFFF ; 19 bits
177 %define TLB_ASID_Mask 0xFF ; 8 bits
178 %define TLB_ASID_Shift 19 ; sits after VPN2 Mask
179 %define TLB_D0 27 ; 27th bit
180 %define TLB_D1 28 ; 28th bit
181 %define TLB_V0 29 ; 29th bit
182 %define TLB_V1 30 ; 30th bit
183 %define TLB_G 31 ; 31st bit (last)
184 ;-----------------------------------------------------------------------------
- 35A5F7D843A515A6301C0D09957D3529F10F0443A50BD54177BCAAECC96054C502D2E14CCF1D5F106247DD2F566839AB49883E97B86CFF1D5AD889652E8F5EAF
+ 12DBA93AA8D5A453DF085F127856212310F14AB1538BE837CE079CE92619CF3C5DBB959E3B38B0B535D9D7FF103CB2DBFA351758FCB131264C2F88D9B2B2EE4F
m/mips_cpu.asm
(35 . 7)(35 . 7)
189 inc CP0_Count ; Timer: CP0_Count := CP0_Count + 1
190
191 ;; Timer Interrupt
192 cmp CP0_Count, CP0_Compare ; Has timer reached limit?
193 cmp CP0_Count, Sr(CP0_Compare) ; Has timer reached limit?
194 jne _cycle_no_mips_timer ; If not, do not invoke interrupt
195 SetIRQ TIMER_IRQ ; Timer reached limit, invoke timer IRQ
196
- 931B5FD9AC59730BBCB95E9A9D3DBA41483BBE6B3FC204AD8194397191795BACF3EF76DF5335F8F17B3479A007DE3A808DF640FCA949A7802B183BC25E7FE0C3
+ 343FF34A3CBC7CD5D51C465B8B91754C546C841055B6E84DFC8E928262E958534E727DC20EC0900B103F82F57895CBFB372D0789FAE1410B593746F76125187A
m/mipsinst/m_instrs.asm
(92 . 25)(92 . 6)
201 _m_tlbwr:
202 ; no fields
203 mov ecx, Sr(CP0_Wired) ; ecx := CP0_Wired
204 %ifdef TLBWR_CHEAT ; 'Cheat' behaviour (see knobs.asm for rationale) :
205 mov AUX, ecx ; save this index in AUX, we will use
206 mov edx, TLB_ENTRIES_COUNT - 1 ; index of last entry
207 ; Move all TLB entries after the Wired entries down by one slot:
208 .tlbwr_slide: ; Start by replacing the last entry with the next-to-last:
209 cmp edx, AUX ; time to stop?
210 je .tlbr_slide_done ; ... then stop.
211 mov ecx, edx ; ecx := edx
212 dec ecx ; ecx := ecx - 1 (prev. TLB index)
213 mov eax, TLB_E(ecx) ; eax := current TLB entry
214 mov TLB_E(edx), eax ; move the current into the next
215 mov eax, TLB_PFN_E(ecx) ; eax := current PFN_EVEN entry
216 mov TLB_PFN_E(edx), eax ; move the current into the next
217 mov eax, TLB_PFN_O(ecx) ; eax := current PFN_ODD entry
218 mov TLB_PFN_O(edx), eax ; move the current into the next
219 dec edx ; move back by one
220 jmp .tlbwr_slide ; Continue the slide.
221 .tlbr_slide_done: ; Now we freed up the top-most non-wired slot in TLB table:
222 %else ; 'Traditional' behaviour per the MIPS Standard:
223 mov ebx, TLB_ENTRIES_COUNT ; ebx := #tlbentries
224 sub ebx, ecx ; ebx := #tlbentries - Wired
225 mov edx, 0 ; edx (upper half of dividend) := 0
(118 . 7)(99 . 6)
227 div ebx ; edx:eax / ebx
228 add edx, ecx ; edx (remainder) := edx + wired
229 mov AUX, edx ; make edx the index for tlb write
230 %endif
231 call _write_tlb_entry ; Write the AUX-indexed TLB entry.
232 jmp _end_cycle
233 ;-----------------------------------------------------------------------------
(136 . 38)(116 . 77)
235 align GRAIN, db 0x90
236 _m_tlbp:
237 ; no fields
238 mov Sr(CP0_Index), 0x80000000 ; CP0_Index := 0x80000000
239 ;; Get the active ASID:
240 mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi
241 mov ecx, edx ; ecx := edx
242 and edx, 0xFF ; edx := edx & 0xFF (get current ASID)
243 ;; Get the desired tag:
244 and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000
245 shr ecx, 13 ; ecx := ecx >> 13 (current Tag)
246 ;; For each slot in table (0 .. 15), attempt lookup
247 xor AUX, AUX ; Start with the 0-th entry in table
248 _m_tlbp_lookup_entry:
249 mov eax, TLB_E(AUX) ; eax := current TLB entry
250 mov ebx, eax ; ebx := eax
251 and ebx, TLB_VPN2_Mask ; get VPN2 of this entry
252 cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag)
253 jne _m_tlbp_lookup_nope ; if entry.VPN2 != vAddr.tag: no match
254 bt eax, TLB_G ; is entry.G = 1?
255 jc _m_tlbp_lookup_match ; then match.
256 shr eax, TLB_ASID_Shift ; eax := eax >> TLB_ASID_Shift
257 and eax, TLB_ASID_Mask ; eax := entry.ASID
258 cmp eax, edx ; entry.ASID = current ASID ?
259 jne _m_tlbp_lookup_nope ; if neither G=1 nor ASID match.
260 ;; otherwise:
261 _m_tlbp_lookup_match: ; TLB Match:
262 mov Sr(CP0_Index), AUX ; Save the index
263 jmp _end_cycle ; Fin.
264 _m_tlbp_lookup_nope: ; try next one in the table, if any
265 inc AUX ; index := index + 1
266 cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1
267 jb _m_tlbp_lookup_entry ; if in range, go to next entry
268 ;; if we found nothing, we end up with CP0_Index = 0x80000000
269 jmp _end_cycle ; Fin.
270 mov AUX, Sr(CP0_EntryHi) ; AUX := CP0_EntryHi
271 ;; ecx := desired Tag:
272 mov edx, AUX ; edx := CP0_EntryHi
273 and edx, 0xFFFFF000 ; edx := edx & 0xFFFFF000
274 shr edx, 13 ; edx := edx >> 13 (wanted Tag)
275 ; Search for Byte 0 of Tag:
276 mov eax, edx ; eax := edx (wanted Tag)
277 and eax, 0xFF ; Byte 0 (lowest) of wanted Tag
278 ; Fill T0 with 16 copies of Tag Byte 0:
279 movd XMM_T0, eax
280 punpcklbw XMM_T0, XMM_T0
281 punpcklwd XMM_T0, XMM_T0
282 pshufd XMM_T0, XMM_T0, 0
283 ; Now SIMD-compare:
284 pcmpeqb XMM_T0, TLB_TAG_BYTE_0
285 ; Get the result mask of the compare:
286 pmovmskb ecx, XMM_T0 ; i-th bit in ecx = 1 where match B0
287 test ecx, ecx ; if Byte 0 of Tag not found:
288 jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found'
289 ; Search for Byte 1 of Tag:
290 mov eax, edx ; eax := edx (wanted Tag)
291 shr eax, 8 ; Byte 1 (middle) of wanted Tag
292 and eax, 0xFF
293 ; Fill T0 with 16 copies of Tag Byte 1:
294 movd XMM_T0, eax
295 punpcklbw XMM_T0, XMM_T0
296 punpcklwd XMM_T0, XMM_T0
297 pshufd XMM_T0, XMM_T0, 0
298 ; Now SIMD-compare:
299 pcmpeqb XMM_T0, TLB_TAG_BYTE_1
300 ; Get the result mask of the compare:
301 pmovmskb eax, XMM_T0 ; i-th bit in ecx = 1 where match B1
302 and ecx, eax ; Keep only where B0 also matched
303 test ecx, ecx ; if Bytes 0+1 of Tag not found:
304 jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found'
305 ; Search for Byte 2 of Tag:
306 mov eax, edx ; eax := edx (wanted Tag)
307 shr eax, 16 ; Byte 2 (top) of wanted Tag
308 and eax, 0xFF
309 ; Fill T0 with 16 copies of Tag Byte 2:
310 movd XMM_T0, eax
311 punpcklbw XMM_T0, XMM_T0
312 punpcklwd XMM_T0, XMM_T0
313 pshufd XMM_T0, XMM_T0, 0
314 ; Now SIMD-compare:
315 pcmpeqb XMM_T0, TLB_TAG_BYTE_2
316 ; Get the result mask of the compare:
317 pmovmskb eax, XMM_T0 ; i-th bit in ecx = 1 where match B2
318 and ecx, eax ; Keep only where B0,B1 also matched
319 test ecx, ecx ; if Bytes 0+1+2 of Tag not found:
320 jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found'
321 ; If we're here, Tag WAS found; so get the TLB index where it lies :
322 bsf ebx, ecx ; ebx := index of found TLB entry
323 mov edx, ebx ; edx := ebx
324 add edx, 16 ; G's start at bit 16 of Flag_Reg
325 bt Flag_Reg, edx ; See whether i-th G bit is set
326 jc ._m_tlbp_lookup_found ; Now if G is set, we've found it!
327 ; Otherwise, index of found Tag is still in bl (0 .. 0xF)
328 ; G was not set, so get the requested ASID and test whether it matches:
329 mov eax, AUX ; eax := CP0_EntryHi (al : our ASID)
330 lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy
331 cmp byte [rdx + rbx], al ; Compare stored ASID to current
332 jne ._m_tlbp_lookup_nope ; ... if not equal, 'not found'
333 ; If we're here, we FOUND a matching TLB entry:
334 ._m_tlbp_lookup_found:
335 mov Sr(CP0_Index), ebx ; Save the index of the found entry.
336 jmp _end_cycle ; Fin!
337 ._m_tlbp_lookup_nope: ; not found:
338 mov Sr(CP0_Index), 0x80000000 ; CP0_Index := 0x80000000
339 ; if we found nothing, we end up with CP0_Index = 0x80000000
340 jmp _end_cycle ; Fin.
341 ;-----------------------------------------------------------------------------
342
343 ;-----------------------------------------------------------------------------
(279 . 7)(298 . 7)
345 _mfc0_r11: ; 0x0b
346 test ecx, ecx ; Sel != 0 ?
347 jnz _mfc0_unknown ; ... then unknown; else:
348 mov TMP, CP0_Compare ; return CP0_Compare ('fast reg')
349 mov TMP, Sr(CP0_Compare) ; return CP0_Compare ('fast reg')
350 jmp _mfc0_writeback ; Done
351 ;-----------------------------------------------------------------------------
352 _mfc0_r12: ; 0x0c
(478 . 7)(497 . 7)
354 test ecx, ecx ; Sel != 0 ?
355 jnz _mtc0_unknown ; ... then unknown; else:
356 ClrIRQ TIMER_IRQ ; Clear MIPS Timer IRQ
357 mov CP0_Compare, ebx ; CP0_Compare := T ('fast reg')
358 mov Sr(CP0_Compare), ebx ; CP0_Compare := T ('fast reg')
359 jmp _end_cycle ; Done
360 ;-----------------------------------------------------------------------------
361 _mtc0_r12: ; 0x0c
- 016C026DBE4230BD120C0FC4269E61BD8A44B82580289EFC90FED0792B5893A5727E069191FBFB0E32C3C40D2700B4A39A5ACB0BE1FDBFC475274C344368626A
+ 19593ABC66AB9FF8A02FA39884524BBA012AAF3AFAB4C0A588272D07B9269BB59140F584519317BA0C2F412FC0B47D31CD4FEA28D3D6754A3F5BBF7BACCA3E78
m/ram.asm
(93 . 6)(93 . 16)
366 ;-----------------------------------------------------------------------------
367
368 ;-----------------------------------------------------------------------------
369 section .bss
370 align 32
371 TLB_TAG_BYTE_0_COPY resb 16 ; Byte-0 of each TLB entry Tag
372 TLB_TAG_BYTE_1_COPY resb 16 ; Byte-1 of each TLB entry Tag
373 TLB_TAG_BYTE_2_COPY resb 16 ; Byte-2 of each TLB entry Tag
374 TLB_ASID_COPY resb 16 ; ASID of each TLB entry
375 section .text
376 ;-----------------------------------------------------------------------------
377
378 ;-----------------------------------------------------------------------------
379 ; Virt2Phys Read : virtual address in eax; output (physical addr) in eax
380 ;-----------------------------------------------------------------------------
381 align GRAIN, db 0x90
(122 . 99)(132 . 147)
383 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
384 ;; Save ebx, ecx, edx, AUX, to xmm ;;
385 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
386 movd xmm0, ebx
387 movd xmm1, ecx
388 movd xmm2, edx
389 movd xmm3, AUX
390 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
391 mov ecx, eax ; ecx := eax (vAddr)
392 and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000
393 shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag)
394 movd xmm0, ebx
395 movd xmm1, ecx
396 movd xmm2, edx
397 movd xmm3, AUX
398 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
399 ;; Find out whether we actually must do the lookup, or can use cached:
400 Flg_Get TLB_Rd_Cache_Valid ; Is Read TLB Cache valid?
401 jnc .Lookup_Must ; If Read TLB Cache invalid -- must!
402 bt eax, 12 ; Test odd/even junior bit
403 jc .Rd_Cached_Odd ; If odd, look at last Odd vAddr Tag
404 .Rd_Cached_Even: ; If even, look at last Even vAddr Tag
405 movd edx, Rd_E_Last_Tag ; edx := last Even vAddr's Tag
406 cmp ecx, edx ; is the current vAddr's Tag equal?
407 jne .Lookup_Must ; ... if not, must do the lookup dance;
408 ;; ... Otherwise, we have an Even cache hit:
409 movd ebx, Rd_E_Last_PFN ; ebx := last good Even PFN
410 jmp .Cache_Hit ; apply the PFN and wrap up.
411 .Rd_Cached_Odd:
412 movd edx, Rd_O_Last_Tag ; edx := last Odd vAddr's Tag
413 cmp ecx, edx ; is the current vAddr's Tag equal?
414 jne .Lookup_Must ; ... if not, must do the lookup dance;
415 ;; ... Otherwise, we have an Odd cache hit:
416 movd ebx, Rd_O_Last_PFN ; ebx := last good Odd PFN
417 jmp .Cache_Hit ; apply the PFN and wrap up.
418 mov ecx, eax ; ecx := eax (vAddr)
419 and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000
420 shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag)
421 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
422 ;; Failing the above, we must actually walk the TLB:
423 ; Find out whether we actually must do the lookup, or can use cached:
424 Flg_Get TLB_Rd_Cache_Valid ; Is Read TLB Cache valid?
425 jnc .Lookup_Must ; If Read TLB Cache invalid -- must!
426 ; If cache is valid, lookup:
427 mov AUX, ecx ; AUX := tag
428 xor ecx, ecx ; ecx := 0
429 bt eax, 12 ; Test vAddr's odd/even junior bit
430 setc cl ; ecx := {1 if a-odd, 0 if a-even}
431 shl rcx, 6 ; rcx := {64 if a-odd, 0 if a-even}
432 ; get the last-good-Tags:
433 movq rbx, R_TLB_Last_Good_Tag ; Get last good R-Tag pair
434 shr rbx, cl ; if arity is odd, get top half
435 cmp ebx, AUX ; is current Tag == to last-good ?
436 jne .Lookup_Must ; ... if not, go to Lookup_Must
437 ; given Tag matched last-good. So get last-good PFN and wrap up:
438 movq rbx, R_TLB_Last_Good_PFN ; Get last good PFN pair
439 shr rbx, cl ; if arity is odd, get top half
440 jmp .PFN_And_Done ; use ebx as the PFN and wrap up.
441 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
442 .Lookup_Must:
443 movd xmm4, ecx ; xmm4 := current vAddr's Tag
444 ;; Get the active ASID:
445 mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi
446 and edx, 0xFF ; edx := edx & 0xFF (get current ASID)
447 ;; For each slot in table (0 .. 15), attempt lookup
448 xor AUX, AUX ; Start with the 0-th entry in table
449 .Lookup_TLB_E:
450 movd ecx, xmm4 ; ecx := current vAddr's Tag
451 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
452 and ebx, TLB_VPN2_Mask ; get VPN2 of this entry
453 cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag)
454 jne .Lookup_TLB_E_Not_Here ; if entry.VPN2 != vAddr.tag: no match
455 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
456 bt ebx, TLB_G ; is entry.G = 1?
457 jc .Lookup_TLB_E_Match ; then match.
458 shr ebx, TLB_ASID_Shift ; ebx := ebx >> TLB_ASID_Shift
459 and ebx, TLB_ASID_Mask ; ebx := entry.ASID
460 cmp ebx, edx ; entry.ASID = current ASID ?
461 jne .Lookup_TLB_E_Not_Here ; if neither G=1 nor ASID match.
462 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
463 .Lookup_TLB_E_Match: ; TLB Match:
464 bt eax, 12 ; Test odd/even junior bit
465 jc .Lookup_TLB_E_Match_Odd ; If odd: test V1, D1
466 .Lookup_TLB_E_Match_Even: ; If even: test V0, D0
467 bt ebx, TLB_V0 ; Is entry.V0=1 ?
468 jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID
469 lea ecx, TLB_PFN_E(AUX) ; prepare to load even PFN entry
470 mov ebx, dword [ecx] ; Actually load the current PFN entry
471 movd Rd_E_Last_PFN, ebx ; Save the current PFN as last Even
472 movd ecx, xmm4 ; ecx := the current Tag
473 movd Rd_E_Last_Tag, ecx ; Save the current Tag as last Even
474 jmp .Lookup_TLB_E_Match_Yes ; Since we're reading: go to Match Yes
475 .Lookup_TLB_E_Match_Odd: ; Odd bit:
476 bt ebx, TLB_V1 ; Is entry.V1=1 ?
477 jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID
478 lea ecx, TLB_PFN_O(AUX) ; prepare to load odd PFN entry
479 mov ebx, dword [ecx] ; Actually load the current PFN entry
480 movd Rd_O_Last_PFN, ebx ; Save the current PFN as last Odd
481 movd ecx, xmm4 ; ecx := the current Tag
482 movd Rd_O_Last_Tag, ecx ; Save the current Tag as last Odd
483 .Lookup_TLB_E_Match_Yes: ; This is the 'success' case
484 Flg_On TLB_Rd_Cache_Valid
485 ; Upon next TLB lookup, if cache is valid, and Tag remains same
486 ; as before, we can use the same PFN as was obtained last time
487 ; for the respective 12th bit arity of the vAddr!
488 .Cache_Hit:
489 and eax, 0xFFF ; vAddr := vAddr & 0xFFF
490 or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit]
491 jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done.
492 .Lookup_TLB_E_Not_Here: ; try next one in the table, if any
493 inc AUX ; index := index + 1
494 cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1
495 jb .Lookup_TLB_E ; if in range, go to next entry
496 ;; ... else:
497 add rsp, 16 ; squelch return to _Virt_xxx and its caller
498 push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler.
499 jmp _Lookup_TLB_E_WriteExtr ; Wrap up
500 .Lookup_TLB_E_Invalid:
501 SetEXC EXC_TLBL ; Set the EXC_TLBL Exception
502 add rsp, 16 ; squelch return to _Virt_xxx and its caller
503 push _Handle_Exception_Other ; 'return' straight to handler.
504 jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue.
505 movd xmm4, ecx ; ecx := copy of Tag
506 ;; Search for B0, B1, B2 of Tag, accumulate result in ebx ;;
507 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
508 ; Search for Byte 0 of Tag:
509 mov edx, ecx ; edx := ecx (wanted Tag)
510 and edx, 0xFF ; Byte 0 (lowest) of wanted Tag
511 ; Fill T0 with 16 copies of Tag Byte 0:
512 movd XMM_T0, edx
513 punpcklbw XMM_T0, XMM_T0
514 punpcklwd XMM_T0, XMM_T0
515 pshufd XMM_T0, XMM_T0, 0
516 ; Now SIMD-compare:
517 pcmpeqb XMM_T0, TLB_TAG_BYTE_0
518 ; Get the result mask of the compare:
519 pmovmskb ebx, XMM_T0 ; i-th bit in ebx = 1 where match B0
520 test ebx, ebx ; if Byte 0 of Tag not found:
521 jz .Not_Found ; ... then go straight to 'not found'
522 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
523 ; Search for Byte 1 of Tag:
524 mov edx, ecx ; edx := ecx (wanted Tag)
525 shr edx, 8 ; Byte 1 (middle) of wanted Tag
526 and edx, 0xFF
527 ; Fill T0 with 16 copies of Tag Byte 1:
528 movd XMM_T0, edx
529 punpcklbw XMM_T0, XMM_T0
530 punpcklwd XMM_T0, XMM_T0
531 pshufd XMM_T0, XMM_T0, 0
532 ; Now SIMD-compare:
533 pcmpeqb XMM_T0, TLB_TAG_BYTE_1
534 ; Get the result mask of the compare:
535 pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B1
536 and ebx, edx ; Keep only where B0 also matched
537 test ebx, ebx ; if Bytes 0+1 of Tag not found:
538 jz .Not_Found ; ... then go straight to 'not found'
539 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
540 ; Search for Byte 2 of Tag:
541 mov edx, ecx ; eax := edx (wanted Tag)
542 shr edx, 16 ; Byte 2 (top) of wanted Tag
543 and edx, 0xFF
544 ; Fill T0 with 16 copies of Tag Byte 2:
545 movd XMM_T0, edx
546 punpcklbw XMM_T0, XMM_T0
547 punpcklwd XMM_T0, XMM_T0
548 pshufd XMM_T0, XMM_T0, 0
549 ; Now SIMD-compare:
550 pcmpeqb XMM_T0, TLB_TAG_BYTE_2
551 ; Get the result mask of the compare:
552 pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B2
553 and ebx, edx ; Keep only where B0,B1 also matched
554 test ebx, ebx ; if Bytes 0+1+2 of Tag not found:
555 jz .Not_Found ; ... then go straight to 'not found'
556 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
557 ; If we're here, Tag WAS found; so get the TLB index where it lies :
558 bsf AUX, ebx ; AUX := index of found TLB entry
559 mov edx, AUX ; edx := AUX
560 ; Now see whether the corresponding G flag is set:
561 add edx, 16 ; G's start at bit 16 of Flag_Reg
562 bt Flag_Reg, edx ; See whether i-th G bit is set
563 jc .Match ; Now if G is set, we've found it!
564 ; G was not set, so get the requested ASID and test whether it matches:
565 ; Get the active ASID:
566 mov ebx, Sr(CP0_EntryHi); ebx := CP0_EntryHi
567 and ebx, 0xFF ; ebx := ebx & 0xFF (get current ASID)
568 ; Determine whether it matches the found Tag entry's :
569 mov ecx, AUX ; ecx := AUX (index of found entry)
570 lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy
571 cmp byte [rdx + rcx], bl ; Compare stored ASID to current
572 jne .Not_Found ; ... if not equal, 'not found'
573 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
574 .Match:
575 ; If we're here, we have a Match. AUX is index of matching entry;
576 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
577 xor edx, edx ; edx := 0
578 ; Get arity of desired entry:
579 bt eax, 12 ; Test vAddr's odd/even junior bit
580 setc dl ; edx := {1 if a-odd, 0 if a-even}
581 shl edx, 5 ; edx := {32 if a-odd, 0 if a-even}
582 mov ecx, edx ; ebx := edx (copy of above)
583 ; Now we know which bit of TLB_Flags is this entry's V. Test it:
584 add edx, AUX ; add the corresponding index
585 bt TLB_Flags, rdx ; test if V(Index) is set
586 jnc .Invalid_R ; ... V == 0, then go to Invalid
587 ; Now let's load the PFN:
588 mov rbx, TLB_PFN(AUX64) ; load the PFN pair to rbx
589 ; ebx is now the PFN. Before wrapping up, update the TLB read cache :
590 movq R_TLB_Last_Good_PFN, rbx ; Set last good PFN to this PFN:
591 ; now leave only the correct half of PFN, at bottom of rbx:
592 shr rbx, cl ; if arity is odd, get upper 32bit
593 ; set correct half of R_TLB_Last_Good_Tag to the found Tag:
594 mov rdx, 0xFFFFFFFF00000000 ; rdx := 0xFFFFFFFF00000000
595 shr rdx, cl ; if arity is odd, keep bottom
596 movq AUX64, R_TLB_Last_Good_Tag ; get last good Tag
597 and AUX64, rdx ; zap correct half of last good tag
598 movq rdx, xmm4 ; get the Tag again :
599 shl rdx, cl ; if arity if odd, slide into pos:
600 or AUX64, rdx ; now or it into place
601 movq R_TLB_Last_Good_Tag, AUX64 ; update last good Tag.
602 .PFN_And_Done:
603 and eax, 0xFFF ; vAddr := vAddr & 0xFFF
604 or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit]
605 jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done.
606 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
607 .Not_Found: ; Not Found in Table:
608 add rsp, 16 ; squelch return to _Virt_xxx and its caller
609 push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler.
610 jmp _Lookup_TLB_E_WriteExtr ; Wrap up
611 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
612 .Invalid_R:
613 SetEXC EXC_TLBL ; Set the EXC_TLBL Exception
614 add rsp, 16 ; squelch return to _Virt_xxx and its caller
615 push _Handle_Exception_Other ; 'return' straight to handler.
616 jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue.
617 ;-----------------------------------------------------------------------------
618
619 ;-----------------------------------------------------------------------------
(247 . 111)(305 . 158)
621 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
622 ;; Save ebx, ecx, edx, AUX, to xmm ;;
623 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
624 movd xmm0, ebx
625 movd xmm1, ecx
626 movd xmm2, edx
627 movd xmm3, AUX
628 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
629 mov ecx, eax ; ecx := eax (vAddr)
630 and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000
631 shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag)
632 movd xmm0, ebx
633 movd xmm1, ecx
634 movd xmm2, edx
635 movd xmm3, AUX
636 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
637 ;; Find out whether we actually must do the lookup, or can use cached:
638 Flg_Get TLB_Wr_Cache_Valid ; Is Write TLB Cache valid?
639 jnc .Lookup_Must ; If Write TLB Cache invalid -- must!
640 bt eax, 12 ; Test odd/even junior bit
641 jc .Wr_Cached_Odd ; If odd, look at last Odd vAddr Tag
642 .Wr_Cached_Even: ; If even, look at last Even vAddr Tag
643 movd edx, Wr_E_Last_Tag ; edx := last Even vAddr's Tag
644 cmp ecx, edx ; is the current vAddr's Tag equal?
645 jne .Lookup_Must ; ... if not, must do the lookup dance;
646 ;; ... Otherwise, we have an Even cache hit:
647 movd ebx, Wr_E_Last_PFN ; ebx := last good Even PFN
648 jmp .Cache_Hit ; apply the PFN and wrap up.
649 .Wr_Cached_Odd:
650 movd edx, Wr_O_Last_Tag ; edx := last Odd vAddr's Tag
651 cmp ecx, edx ; is the current vAddr's Tag equal?
652 jne .Lookup_Must ; ... if not, must do the lookup dance;
653 ;; ... Otherwise, we have an Odd cache hit:
654 movd ebx, Wr_O_Last_PFN ; ebx := last good Odd PFN
655 jmp .Cache_Hit ; apply the PFN and wrap up.
656 mov ecx, eax ; ecx := eax (vAddr)
657 and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000
658 shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag)
659 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
660 ;; Failing the above, we must actually walk the TLB:
661 ; Find out whether we actually must do the lookup, or can use cached:
662 Flg_Get TLB_Wr_Cache_Valid ; Is Write TLB Cache valid?
663 jnc .Lookup_Must ; If Write TLB Cache invalid -- must!
664 ; If cache is valid, lookup:
665 mov AUX, ecx ; AUX := tag
666 xor ecx, ecx ; ecx := 0
667 bt eax, 12 ; Test vAddr's odd/even junior bit
668 setc cl ; ecx := {1 if a-odd, 0 if a-even}
669 shl rcx, 6 ; rcx := {64 if a-odd, 0 if a-even}
670 ; get the last-good-Tags:
671 movq rbx, W_TLB_Last_Good_Tag ; Get last good W-Tag pair
672 shr rbx, cl ; if arity is odd, get top half
673 cmp ebx, AUX ; is current Tag == to last-good ?
674 jne .Lookup_Must ; ... if not, go to Lookup_Must
675 ; given Tag matched last-good. So get last-good PFN and wrap up:
676 movq rbx, W_TLB_Last_Good_PFN ; Get last good PFN pair
677 shr rbx, cl ; if arity is odd, get top half
678 jmp .PFN_And_Done ; use ebx as the PFN and wrap up.
679 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
680 .Lookup_Must:
681 movd xmm4, ecx ; xmm4 := current vAddr's Tag
682 ;; Get the active ASID:
683 mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi
684 and edx, 0xFF ; edx := edx & 0xFF (get current ASID)
685 ;; For each slot in table (0 .. 15), attempt lookup
686 xor AUX, AUX ; Start with the 0-th entry in table
687 .Lookup_TLB_E:
688 movd ecx, xmm4 ; ecx := current vAddr's Tag
689 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
690 and ebx, TLB_VPN2_Mask ; get VPN2 of this entry
691 cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag)
692 jne .Lookup_TLB_E_Not_Here ; if entry.VPN2 != vAddr.tag: no match
693 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
694 bt ebx, TLB_G ; is entry.G = 1?
695 jc .Lookup_TLB_E_Match ; then match.
696 shr ebx, TLB_ASID_Shift ; ebx := ebx >> TLB_ASID_Shift
697 and ebx, TLB_ASID_Mask ; ebx := entry.ASID
698 cmp ebx, edx ; entry.ASID = current ASID ?
699 jne .Lookup_TLB_E_Not_Here ; if neither G=1 nor ASID match.
700 mov ebx, TLB_E(AUX) ; ebx := current TLB entry
701 .Lookup_TLB_E_Match: ; TLB Match:
702 bt eax, 12 ; Test odd/even junior bit
703 jc .Lookup_TLB_E_Match_Odd ; If odd: test V1, D1
704 .Lookup_TLB_E_Match_Even: ; If even: test V0, D0
705 bt ebx, TLB_V0 ; Is entry.V0=1 ?
706 jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID
707 bt ebx, TLB_D0 ; Is entry.D0=1 ?
708 jnc .Lookup_TLB_E_Dirty ; If not, go to 'Dirty'
709 ;; Not invalid or dirty:
710 lea ecx, TLB_PFN_E(AUX) ; prepare to load even PFN entry
711 mov ebx, dword [ecx] ; Actually load the current PFN entry
712 movd Wr_E_Last_PFN, ebx ; Save the current PFN as last Even
713 movd ecx, xmm4 ; ecx := the current Tag
714 movd Wr_E_Last_Tag, ecx ; Save the current Tag as last Even
715 jmp .Lookup_TLB_E_Match_Yes ; ;; Proceed to 'match' :
716 .Lookup_TLB_E_Match_Odd: ; Odd bit:
717 bt ebx, TLB_V1 ; Is entry.V1=1 ?
718 jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID
719 bt ebx, TLB_D1 ; Is entry.D1=1 ?
720 jnc .Lookup_TLB_E_Dirty ; If not, go to 'Dirty'
721 ;; Not invalid or dirty:
722 lea ecx, TLB_PFN_O(AUX) ; prepare to load odd PFN entry
723 mov ebx, dword [ecx] ; Actually load the current PFN entry
724 movd Wr_O_Last_PFN, ebx ; Save the current PFN as last Odd
725 movd ecx, xmm4 ; ecx := the current Tag
726 movd Wr_O_Last_Tag, ecx ; Save the current Tag as last Odd
727 ;; Proceed to 'match' :
728 .Lookup_TLB_E_Match_Yes: ; This is the 'success' case
729 Flg_On TLB_Wr_Cache_Valid
730 ; Upon next TLB lookup, if cache is valid, and Tag remains same
731 ; as before, we can use the same PFN as was obtained last time
732 ; for the respective 12th bit arity of the vAddr!
733 .Cache_Hit:
734 and eax, 0xFFF ; vAddr := vAddr & 0xFFF
735 or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit]
736 jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done.
737 .Lookup_TLB_E_Not_Here: ; try next one in the table, if any
738 inc AUX ; index := index + 1
739 cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1
740 jb .Lookup_TLB_E ; if in range, go to next entry
741 ;; ... else:
742 add rsp, 16 ; squelch return to _Virt_xxx and its caller
743 push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler.
744 jmp _Lookup_TLB_E_WriteExtr ; Wrap up
745 .Lookup_TLB_E_Dirty: ; ... else, Dirty:
746 SetEXC EXC_Mod ; Set the EXC_Mod Exception
747 add rsp, 16 ; squelch return to _Virt_xxx and its caller
748 push _Handle_Exception_Other ; 'return' straight to handler.
749 jmp _Lookup_TLB_E_WriteExtr ; Write the 'extra data' and finish.
750 .Lookup_TLB_E_Invalid: ; Invalid Write:
751 SetEXC EXC_TLBS ; Set the EXC_TLBS Exception
752 add rsp, 16 ; squelch return to _Virt_xxx and its caller
753 push _Handle_Exception_Other ; 'return' straight to handler.
754 ;; then drop down to _Lookup_TLB_E_WriteExtr
755 movd xmm4, ecx ; ecx := copy of Tag
756 ;; Search for B0, B1, B2 of Tag, accumulate result in ebx ;;
757 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
758 ; Search for Byte 0 of Tag:
759 mov edx, ecx ; edx := ecx (wanted Tag)
760 and edx, 0xFF ; Byte 0 (lowest) of wanted Tag
761 ; Fill T0 with 16 copies of Tag Byte 0:
762 movd XMM_T0, edx
763 punpcklbw XMM_T0, XMM_T0
764 punpcklwd XMM_T0, XMM_T0
765 pshufd XMM_T0, XMM_T0, 0
766 ; Now SIMD-compare:
767 pcmpeqb XMM_T0, TLB_TAG_BYTE_0
768 ; Get the result mask of the compare:
769 pmovmskb ebx, XMM_T0 ; i-th bit in ebx = 1 where match B0
770 test ebx, ebx ; if Byte 0 of Tag not found:
771 jz .Not_Found ; ... then go straight to 'not found'
772 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
773 ; Search for Byte 1 of Tag:
774 mov edx, ecx ; edx := ecx (wanted Tag)
775 shr edx, 8 ; Byte 1 (middle) of wanted Tag
776 and edx, 0xFF
777 ; Fill T0 with 16 copies of Tag Byte 1:
778 movd XMM_T0, edx
779 punpcklbw XMM_T0, XMM_T0
780 punpcklwd XMM_T0, XMM_T0
781 pshufd XMM_T0, XMM_T0, 0
782 ; Now SIMD-compare:
783 pcmpeqb XMM_T0, TLB_TAG_BYTE_1
784 ; Get the result mask of the compare:
785 pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B1
786 and ebx, edx ; Keep only where B0 also matched
787 test ebx, ebx ; if Bytes 0+1 of Tag not found:
788 jz .Not_Found ; ... then go straight to 'not found'
789 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
790 ; Search for Byte 2 of Tag:
791 mov edx, ecx ; eax := edx (wanted Tag)
792 shr edx, 16 ; Byte 2 (top) of wanted Tag
793 and edx, 0xFF
794 ; Fill T0 with 16 copies of Tag Byte 2:
795 movd XMM_T0, edx
796 punpcklbw XMM_T0, XMM_T0
797 punpcklwd XMM_T0, XMM_T0
798 pshufd XMM_T0, XMM_T0, 0
799 ; Now SIMD-compare:
800 pcmpeqb XMM_T0, TLB_TAG_BYTE_2
801 ; Get the result mask of the compare:
802 pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B2
803 and ebx, edx ; Keep only where B0,B1 also matched
804 test ebx, ebx ; if Bytes 0+1+2 of Tag not found:
805 jz .Not_Found ; ... then go straight to 'not found'
806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
807 ; If we're here, Tag WAS found; so get the TLB index where it lies :
808 bsf AUX, ebx ; AUX := index of found TLB entry
809 mov edx, AUX ; edx := AUX
810 ; Now see whether the corresponding G flag is set:
811 add edx, 16 ; G's start at bit 16 of Flag_Reg
812 bt Flag_Reg, edx ; See whether i-th G bit is set
813 jc .Match ; Now if G is set, we've found it!
814 ; G was not set, so get the requested ASID and test whether it matches:
815 ; Get the active ASID:
816 mov ebx, Sr(CP0_EntryHi); ebx := CP0_EntryHi
817 and ebx, 0xFF ; ebx := ebx & 0xFF (get current ASID)
818 ; Determine whether it matches the found Tag entry's :
819 mov ecx, AUX ; ecx := AUX (index of found entry)
820 lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy
821 cmp byte [rdx + rcx], bl ; Compare stored ASID to current
822 jne .Not_Found ; ... if not equal, 'not found'
823 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
824 .Match:
825 ; If we're here, we have a Match. AUX is index of matching entry;
826 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
827 xor edx, edx ; edx := 0
828 ; Get arity of desired entry:
829 bt eax, 12 ; Test vAddr's odd/even junior bit
830 setc dl ; edx := {1 if a-odd, 0 if a-even}
831 shl edx, 5 ; edx := {32 if a-odd, 0 if a-even}
832 mov ecx, edx ; ebx := edx (copy of above)
833 ; Now we know which bit of TLB_Flags is this entry's V. Test it:
834 add edx, AUX ; add the corresponding index
835 bt TLB_Flags, rdx ; test if V(Index) is set
836 jnc .Invalid_W ; ... if V == 0, then go to Invalid
837 ; Now, since we're writing, test this entry's D, at pos(V) + 16:
838 add edx, 16
839 bt TLB_Flags, rdx ; test if D(Index) is set
840 jnc .Dirty_W ; ... if D == 0, then go to Dirty
841 ; Now let's load the correct odd or even PFN:
842 mov rbx, TLB_PFN(AUX64) ; load the PFN pair to rbx
843 ; ebx is now the PFN. Before wrapping up, update the TLB read cache :
844 movq W_TLB_Last_Good_PFN, rbx ; Set last good PFN to this PFN:
845 ; now leave only the correct half of PFN, at bottom of rbx:
846 shr rbx, cl ; if arity is odd, get upper 32bit
847 ; set correct half of R_TLB_Last_Good_Tag to the found Tag:
848 mov rdx, 0xFFFFFFFF00000000 ; rdx := 0xFFFFFFFF00000000
849 shr rdx, cl ; if arity is odd, keep bottom
850 movq AUX64, W_TLB_Last_Good_Tag ; get last good Tag
851 and AUX64, rdx ; zap correct half of last good tag
852 movq rdx, xmm4 ; get the Tag again :
853 shl rdx, cl ; if arity if odd, slide into pos:
854 or AUX64, rdx ; now or it into place
855 movq W_TLB_Last_Good_Tag, AUX64 ; update last good Tag.
856 .PFN_And_Done:
857 and eax, 0xFFF ; vAddr := vAddr & 0xFFF
858 or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit]
859 jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done.
860 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
861 .Not_Found: ; Not Found in Table:
862 add rsp, 16 ; squelch return to _Virt_xxx and its caller
863 push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler.
864 jmp _Lookup_TLB_E_WriteExtr ; Wrap up
865 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
866 .Invalid_W:
867 SetEXC EXC_TLBS ; Set the EXC_TLBS Exception
868 add rsp, 16 ; squelch return to _Virt_xxx and its caller
869 push _Handle_Exception_Other ; 'return' straight to handler.
870 jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue.
871 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
872 .Dirty_W:
873 SetEXC EXC_Mod ; Set the EXC_Mod Exception
874 add rsp, 16 ; squelch return to _Virt_xxx and its caller
875 push _Handle_Exception_Other ; 'return' straight to handler.
876 ; jmp _Lookup_TLB_E_WriteExtr ; Write the 'extra data' and finish.
877 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
878
879 ;-----------------------------------------------------------------------------
880 ; Epilogue common to _Virt_To_Phys_Read and _Virt_To_Phys_Write:
(526 . 56)(631 . 93)
882 ; Kills eax, ebx, ecx, edx.
883 ;-----------------------------------------------------------------------------
884 _write_tlb_entry:
885 mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi
886 mov ecx, edx ; ecx := edx
887 shr ecx, 13 ; ecx := ecx >> 13 to get VPN2
888 and edx, TLB_ASID_Mask ; edx := edx & 0xFF to get ASID
889 shl edx, TLB_ASID_Shift ; edx := edx << 19 to put ASID in place
890 or ecx, edx ; now we have VPN2 and ASID in ecx
891 ;; done with edx, can reuse
892 mov edx, Sr(CP0_EntryLo0) ; edx := CP0_EntryLo0
893 mov ebx, Sr(CP0_EntryLo1) ; ebx := CP0_EntryLo1
894 ;; get G:
895 mov eax, edx ; eax := CP0_EntryLo0
896 and eax, ebx ; eax := eax & CP0_EntryLo1
897 and eax, 0x1 ; eax := eax & 1 to get G
898 shl eax, TLB_G ; move G bit into position
899 or ecx, eax ; ecx := ecx | eax to put in G bit
900 ;; now ecx contains VPN2, ASID, G
901 ;; Get V0 from CP0_EntryLo0 and put in ecx where belongs:
902 mov eax, edx ; eax := CP0_EntryLo0
903 and eax, 0x2 ; eax := eax & 0x2 to get V0 bit
904 shl eax, (TLB_V0 - 1) ; put V0 bit in position
905 or ecx, eax ; ecx := ecx | eax to put in V0 bit
906 ;; Get D0 from CP0_EntryLo0 and put in ecx where belongs:
907 mov eax, edx ; eax := CP0_EntryLo0
908 and eax, 0x4 ; eax := eax & 0x4 to get D0 bit
909 shl eax, (TLB_D0 - 2) ; put D0 bit in position
910 or ecx, eax ; ecx := ecx | eax to put in D0 bit
911 ;; Get V1 from CP0_EntryLo1 and put in ecx where belongs:
912 mov eax, ebx ; eax := CP0_EntryLo1
913 and eax, 0x2 ; eax := eax & 0x2 to get V1 bit
914 shl eax, (TLB_V1 - 1) ; put V1 bit in position
915 or ecx, eax ; ecx := ecx | eax to put in V1 bit
916 ;; Get D1 from CP0_EntryLo1 and put in ecx where belongs:
917 mov eax, ebx ; eax := CP0_EntryLo1
918 and eax, 0x4 ; eax := eax & 0x4 to get D1 bit
919 shl eax, (TLB_D1 - 2) ; put D1 bit in position
920 or ecx, eax ; ecx := ecx | eax to put in D1 bit
921 ;; Write the TLB entry to the given index (in AUX) :
922 and AUX, 0xF ; Index of TLB entry is bottom 4 bits
923 mov TLB_E(AUX), ecx ; Write TLB entry in ecx to n-th slot.
924 ;; Transform CP0_EntryLo0 (edx) into PFN0:
925 shr edx, 6
926 and edx, 0xFFFFF
927 shl edx, 12
928 ;; Transform CP0_EntryLo1 (ebx) into PFN1:
929 shr ebx, 6
930 and ebx, 0xFFFFF
931 shl ebx, 12
932 ;; Store PFN:
933 mov TLB_PFN_E(AUX), edx ; Store PFN0
934 mov TLB_PFN_O(AUX), ebx ; Store PFN1
935 Invalidate_TLB_Cache ; Invalidate both R and W TLB Caches
936 ret ; Done.
937 and AUX, 0xF ; constrain Index range (0 .. 15)
938 mov ecx, AUX ; ecx := Index
939 ; Clear old Flags: V0, D0, V1, D1 :
940 mov rax, ~(1 | (1 << 16) | (1 << 32) | (1 << 48)) ; zap mask
941 rol rax, cl ; Rotate into indexed position
942 and TLB_Flags, rax ; Clear old V0, D0, V1, D1.
943 ; Clear old G:
944 add ecx, 16 ; G starts at 16th bit;
945 btr Flag_Reg, ecx ; Clear old G.
946 ; Now, set the new values:
947 mov ecx, AUX ; ecx := Index
948 mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi
949 ; First, get this entry's Tag (VPN2) from CP0_EntryHi :
950 mov eax, edx ; eax := edx (CP0_EntryHi)
951 shr eax, 13 ; eax := ecx >> 13 to get the Tag
952 ; Now, save this entry's Tag to the selected Index:
953 ; Write Byte 0 of Tag to TLB_TAG_BYTE_0_COPY and TLB_TAG_BYTE_0 :
954 lea rbx, [TLB_TAG_BYTE_0_COPY] ; Load address of B0 Copy
955 mov byte [rbx + rcx], al ; Change the indexed byte
956 movdqa TLB_TAG_BYTE_0, [rbx] ; Update XMM reg with B0 Copy
957 ; Write Byte 1 of Tag to TLB_TAG_BYTE_1_COPY and TLB_TAG_BYTE_1 :
958 lea rbx, [TLB_TAG_BYTE_1_COPY] ; Load address of B1 Copy
959 mov byte [rbx + rcx], ah ; Change the indexed byte
960 movdqa TLB_TAG_BYTE_1, [rbx] ; Update XMM reg with B1 Copy
961 ; Write Byte 2 of Tag to TLB_TAG_BYTE_2_COPY and TLB_TAG_BYTE_2 :
962 shr eax, 16
963 lea rbx, [TLB_TAG_BYTE_2_COPY] ; Load address of B2 Copy
964 mov byte [rbx + rcx], al ; Change the indexed byte
965 movdqa TLB_TAG_BYTE_2, [rbx] ; Update XMM reg with B2 Copy
966 ; Done with Tag. Now, get this entry's ASID from CP0_EntryHi :
967 mov eax, edx ; eax := edx
968 and eax, 0xFF ; Get ASID from CP0_EntryHi
969 ; Store this entry's ASID to the selected Index:
970 lea rbx, [TLB_ASID_COPY] ; Load address of ASID Copy
971 mov byte [rbx + rcx], al ; Change the indexed byte
972 ; Done with contents of CP0_EntryHi. Now, get G, V0, D0, V1, D1 :
973 mov edx, Sr(CP0_EntryLo0) ; edx := CP0_EntryLo0
974 mov ebx, Sr(CP0_EntryLo1) ; ebx := CP0_EntryLo1
975 ; Get G using CP0_EntryLo0 and CP0_EntryLo1 :
976 mov eax, edx ; eax := CP0_EntryLo0
977 and eax, ebx ; eax := eax & CP0_EntryLo1
978 and eax, 0x1 ; eax := eax & 1 to get G
979 ; Write the new G(Index) to indexed pos of upper 16 bits of Flag_Reg :
980 add ecx, 16 ; Position of all G's in Flag_Reg
981 shl eax, cl ; Slide new G into final position
982 or Flag_Reg, eax ; Set the new value G(Index)
983 ; Get V0 from CP0_EntryLo0 and write to TLB_Flags :
984 mov ecx, AUX ; ecx := Index
985 mov eax, edx ; eax := CP0_EntryLo0
986 and eax, 0x2 ; eax := eax & 0x2 to get V0 bit
987 shr eax, 1 ; Put V0 into bottom-most pos
988 shl eax, cl ; Slide V0 into final position
989 or TLB_Flags, rax ; Put the new value in V0(Index)
990 ; Get D0 from CP0_EntryLo0 and write to TLB_Flags :
991 add ecx, 16 ; Position where D0 lives:
992 mov eax, edx ; eax := CP0_EntryLo0
993 and eax, 0x4 ; eax := eax & 0x4 to get D0 bit
994 shr eax, 2 ; Put D0 into bottom-most pos
995 shl eax, cl ; Slide D0 into final position
996 or TLB_Flags, rax ; Put the new value in D0(Index)
997 ; Get V1 from CP0_EntryLo1 and write to TLB_Flags :
998 add ecx, 16 ; V1 starts at 32-nd bit
999 mov eax, ebx ; eax := CP0_EntryLo1
1000 and eax, 0x2 ; eax := eax & 0x2 to get V1 bit
1001 shr eax, 1 ; Put V0 into bottom-most pos
1002 shl rax, cl ; Slide V1 into final position
1003 or TLB_Flags, rax ; Put the new value in V1(Index)
1004 ; Get D1 from CP0_EntryLo1 and write to TLB_Flags :
1005 add ecx, 16 ; D1 starts at 48-th bit
1006 mov eax, ebx ; eax := CP0_EntryLo1
1007 and eax, 0x4 ; eax := eax & 0x4 to get D1 bit
1008 shr eax, 2 ; Put D1 into bottom-most pos
1009 shl rax, cl ; Slide D1 into final position
1010 or TLB_Flags, rax ; Put the new value in D1(Index)
1011 ; Transform CP0_EntryLo0 (edx) into PFN0:
1012 shr edx, 6
1013 and edx, 0xFFFFF
1014 shl edx, 12
1015 ; Transform CP0_EntryLo1 (ebx) into PFN1:
1016 shr ebx, 6
1017 and ebx, 0xFFFFF
1018 shl ebx, 12
1019 ; Store PFN:
1020 shl rbx, 32 ; rbx := rbx << 32 (Odd)
1021 and rdx, 0xFFFFFFFF ; rdx := rdx & 0xFFFFFFFF (Even)
1022 or rbx, rdx ; rbx := rbx | rdx (combined PFN)
1023 mov TLB_PFN(AUX64), rbx ; Store PFN
1024 ; Fin.
1025 ret
1026 ;-----------------------------------------------------------------------------