summaryrefslogtreecommitdiffhomepage
path: root/zlib/contrib/masmx86
diff options
context:
space:
mode:
Diffstat (limited to 'zlib/contrib/masmx86')
-rw-r--r--zlib/contrib/masmx86/bld_ml32.bat2
-rw-r--r--zlib/contrib/masmx86/inffas32.asm1080
-rw-r--r--zlib/contrib/masmx86/inffas32.lst1224
-rw-r--r--zlib/contrib/masmx86/match686.asm479
-rw-r--r--zlib/contrib/masmx86/match686.lst624
-rw-r--r--zlib/contrib/masmx86/readme.txt27
6 files changed, 3436 insertions, 0 deletions
diff --git a/zlib/contrib/masmx86/bld_ml32.bat b/zlib/contrib/masmx86/bld_ml32.bat
new file mode 100644
index 0000000..67e6a6a
--- /dev/null
+++ b/zlib/contrib/masmx86/bld_ml32.bat
@@ -0,0 +1,2 @@
+ml /safeseh /coff /Zi /c /Flmatch686.lst match686.asm
+ml /safeseh /coff /Zi /c /Flinffas32.lst inffas32.asm
diff --git a/zlib/contrib/masmx86/inffas32.asm b/zlib/contrib/masmx86/inffas32.asm
new file mode 100644
index 0000000..cb37a81
--- /dev/null
+++ b/zlib/contrib/masmx86/inffas32.asm
@@ -0,0 +1,1080 @@
+;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
+; *
+; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
+; *
+; * Copyright (C) 1995-2003 Mark Adler
+; * For conditions of distribution and use, see copyright notice in zlib.h
+; *
+; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
+; * Please use the copyright conditions above.
+; *
+; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
+; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
+; * the moment. I have successfully compiled and tested this code with gcc2.96,
+; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
+; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
+; * enabled. I will attempt to merge the MMX code into this version. Newer
+; * versions of this and inffast.S can be found at
+; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
+; *
+; * 2005 : modification by Gilles Vollant
+; */
+; For Visual C++ 4.x and higher and ML 6.x and higher
+; ml.exe is in directory \MASM611C of Win95 DDK
+; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
+; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
+;
+;
+; compile with command line option
+; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
+
+; if you define NO_GZIP (see inflate.h), compile with
+; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
+
+
+; zlib122sup is 0 fort zlib 1.2.2.1 and lower
+; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
+; in inflate_state in inflate.h)
+zlib1222sup equ 8
+
+
+IFDEF GUNZIP
+ INFLATE_MODE_TYPE equ 11
+ INFLATE_MODE_BAD equ 26
+ELSE
+ IFNDEF NO_GUNZIP
+ INFLATE_MODE_TYPE equ 11
+ INFLATE_MODE_BAD equ 26
+ ELSE
+ INFLATE_MODE_TYPE equ 3
+ INFLATE_MODE_BAD equ 17
+ ENDIF
+ENDIF
+
+
+; 75 "inffast.S"
+;FILE "inffast.S"
+
+;;;GLOBAL _inflate_fast
+
+;;;SECTION .text
+
+
+
+ .586p
+ .mmx
+
+ name inflate_fast_x86
+ .MODEL FLAT
+
+_DATA segment
+inflate_fast_use_mmx:
+ dd 1
+
+
+_TEXT segment
+
+
+
+ALIGN 4
+ db 'Fast decoding Code from Chris Anderson'
+ db 0
+
+ALIGN 4
+invalid_literal_length_code_msg:
+ db 'invalid literal/length code'
+ db 0
+
+ALIGN 4
+invalid_distance_code_msg:
+ db 'invalid distance code'
+ db 0
+
+ALIGN 4
+invalid_distance_too_far_msg:
+ db 'invalid distance too far back'
+ db 0
+
+
+ALIGN 4
+inflate_fast_mask:
+dd 0
+dd 1
+dd 3
+dd 7
+dd 15
+dd 31
+dd 63
+dd 127
+dd 255
+dd 511
+dd 1023
+dd 2047
+dd 4095
+dd 8191
+dd 16383
+dd 32767
+dd 65535
+dd 131071
+dd 262143
+dd 524287
+dd 1048575
+dd 2097151
+dd 4194303
+dd 8388607
+dd 16777215
+dd 33554431
+dd 67108863
+dd 134217727
+dd 268435455
+dd 536870911
+dd 1073741823
+dd 2147483647
+dd 4294967295
+
+
+mode_state equ 0 ;/* state->mode */
+wsize_state equ (32+zlib1222sup) ;/* state->wsize */
+write_state equ (36+4+zlib1222sup) ;/* state->write */
+window_state equ (40+4+zlib1222sup) ;/* state->window */
+hold_state equ (44+4+zlib1222sup) ;/* state->hold */
+bits_state equ (48+4+zlib1222sup) ;/* state->bits */
+lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
+distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
+lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
+distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
+
+
+;;SECTION .text
+; 205 "inffast.S"
+;GLOBAL inflate_fast_use_mmx
+
+;SECTION .data
+
+
+; GLOBAL inflate_fast_use_mmx:object
+;.size inflate_fast_use_mmx, 4
+; 226 "inffast.S"
+;SECTION .text
+
+ALIGN 4
+_inflate_fast proc near
+.FPO (16, 4, 0, 0, 1, 0)
+ push edi
+ push esi
+ push ebp
+ push ebx
+ pushfd
+ sub esp,64
+ cld
+
+
+
+
+ mov esi, [esp+88]
+ mov edi, [esi+28]
+
+
+
+
+
+
+
+ mov edx, [esi+4]
+ mov eax, [esi+0]
+
+ add edx,eax
+ sub edx,11
+
+ mov [esp+44],eax
+ mov [esp+20],edx
+
+ mov ebp, [esp+92]
+ mov ecx, [esi+16]
+ mov ebx, [esi+12]
+
+ sub ebp,ecx
+ neg ebp
+ add ebp,ebx
+
+ sub ecx,257
+ add ecx,ebx
+
+ mov [esp+60],ebx
+ mov [esp+40],ebp
+ mov [esp+16],ecx
+; 285 "inffast.S"
+ mov eax, [edi+lencode_state]
+ mov ecx, [edi+distcode_state]
+
+ mov [esp+8],eax
+ mov [esp+12],ecx
+
+ mov eax,1
+ mov ecx, [edi+lenbits_state]
+ shl eax,cl
+ dec eax
+ mov [esp+0],eax
+
+ mov eax,1
+ mov ecx, [edi+distbits_state]
+ shl eax,cl
+ dec eax
+ mov [esp+4],eax
+
+ mov eax, [edi+wsize_state]
+ mov ecx, [edi+write_state]
+ mov edx, [edi+window_state]
+
+ mov [esp+52],eax
+ mov [esp+48],ecx
+ mov [esp+56],edx
+
+ mov ebp, [edi+hold_state]
+ mov ebx, [edi+bits_state]
+; 321 "inffast.S"
+ mov esi, [esp+44]
+ mov ecx, [esp+20]
+ cmp ecx,esi
+ ja L_align_long
+
+ add ecx,11
+ sub ecx,esi
+ mov eax,12
+ sub eax,ecx
+ lea edi, [esp+28]
+ rep movsb
+ mov ecx,eax
+ xor eax,eax
+ rep stosb
+ lea esi, [esp+28]
+ mov [esp+20],esi
+ jmp L_is_aligned
+
+
+L_align_long:
+ test esi,3
+ jz L_is_aligned
+ xor eax,eax
+ mov al, [esi]
+ inc esi
+ mov ecx,ebx
+ add ebx,8
+ shl eax,cl
+ or ebp,eax
+ jmp L_align_long
+
+L_is_aligned:
+ mov edi, [esp+60]
+; 366 "inffast.S"
+L_check_mmx:
+ cmp dword ptr [inflate_fast_use_mmx],2
+ je L_init_mmx
+ ja L_do_loop
+
+ push eax
+ push ebx
+ push ecx
+ push edx
+ pushfd
+ mov eax, [esp]
+ xor dword ptr [esp],0200000h
+
+
+
+
+ popfd
+ pushfd
+ pop edx
+ xor edx,eax
+ jz L_dont_use_mmx
+ xor eax,eax
+ cpuid
+ cmp ebx,0756e6547h
+ jne L_dont_use_mmx
+ cmp ecx,06c65746eh
+ jne L_dont_use_mmx
+ cmp edx,049656e69h
+ jne L_dont_use_mmx
+ mov eax,1
+ cpuid
+ shr eax,8
+ and eax,15
+ cmp eax,6
+ jne L_dont_use_mmx
+ test edx,0800000h
+ jnz L_use_mmx
+ jmp L_dont_use_mmx
+L_use_mmx:
+ mov dword ptr [inflate_fast_use_mmx],2
+ jmp L_check_mmx_pop
+L_dont_use_mmx:
+ mov dword ptr [inflate_fast_use_mmx],3
+L_check_mmx_pop:
+ pop edx
+ pop ecx
+ pop ebx
+ pop eax
+ jmp L_check_mmx
+; 426 "inffast.S"
+ALIGN 4
+L_do_loop:
+; 437 "inffast.S"
+ cmp bl,15
+ ja L_get_length_code
+
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+
+L_get_length_code:
+ mov edx, [esp+0]
+ mov ecx, [esp+8]
+ and edx,ebp
+ mov eax, [ecx+edx*4]
+
+L_dolen:
+
+
+
+
+
+
+ mov cl,ah
+ sub bl,ah
+ shr ebp,cl
+
+
+
+
+
+
+ test al,al
+ jnz L_test_for_length_base
+
+ shr eax,16
+ stosb
+
+L_while_test:
+
+
+ cmp [esp+16],edi
+ jbe L_break_loop
+
+ cmp [esp+20],esi
+ ja L_do_loop
+ jmp L_break_loop
+
+L_test_for_length_base:
+; 502 "inffast.S"
+ mov edx,eax
+ shr edx,16
+ mov cl,al
+
+ test al,16
+ jz L_test_for_second_level_length
+ and cl,15
+ jz L_save_len
+ cmp bl,cl
+ jae L_add_bits_to_len
+
+ mov ch,cl
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+ mov cl,ch
+
+L_add_bits_to_len:
+ mov eax,1
+ shl eax,cl
+ dec eax
+ sub bl,cl
+ and eax,ebp
+ shr ebp,cl
+ add edx,eax
+
+L_save_len:
+ mov [esp+24],edx
+
+
+L_decode_distance:
+; 549 "inffast.S"
+ cmp bl,15
+ ja L_get_distance_code
+
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+
+L_get_distance_code:
+ mov edx, [esp+4]
+ mov ecx, [esp+12]
+ and edx,ebp
+ mov eax, [ecx+edx*4]
+
+
+L_dodist:
+ mov edx,eax
+ shr edx,16
+ mov cl,ah
+ sub bl,ah
+ shr ebp,cl
+; 584 "inffast.S"
+ mov cl,al
+
+ test al,16
+ jz L_test_for_second_level_dist
+ and cl,15
+ jz L_check_dist_one
+ cmp bl,cl
+ jae L_add_bits_to_dist
+
+ mov ch,cl
+ xor eax,eax
+ lodsw
+ mov cl,bl
+ add bl,16
+ shl eax,cl
+ or ebp,eax
+ mov cl,ch
+
+L_add_bits_to_dist:
+ mov eax,1
+ shl eax,cl
+ dec eax
+ sub bl,cl
+ and eax,ebp
+ shr ebp,cl
+ add edx,eax
+ jmp L_check_window
+
+L_check_window:
+; 625 "inffast.S"
+ mov [esp+44],esi
+ mov eax,edi
+ sub eax, [esp+40]
+
+ cmp eax,edx
+ jb L_clip_window
+
+ mov ecx, [esp+24]
+ mov esi,edi
+ sub esi,edx
+
+ sub ecx,3
+ mov al, [esi]
+ mov [edi],al
+ mov al, [esi+1]
+ mov dl, [esi+2]
+ add esi,3
+ mov [edi+1],al
+ mov [edi+2],dl
+ add edi,3
+ rep movsb
+
+ mov esi, [esp+44]
+ jmp L_while_test
+
+ALIGN 4
+L_check_dist_one:
+ cmp edx,1
+ jne L_check_window
+ cmp [esp+40],edi
+ je L_check_window
+
+ dec edi
+ mov ecx, [esp+24]
+ mov al, [edi]
+ sub ecx,3
+
+ mov [edi+1],al
+ mov [edi+2],al
+ mov [edi+3],al
+ add edi,4
+ rep stosb
+
+ jmp L_while_test
+
+ALIGN 4
+L_test_for_second_level_length:
+
+
+
+
+ test al,64
+ jnz L_test_for_end_of_block
+
+ mov eax,1
+ shl eax,cl
+ dec eax
+ and eax,ebp
+ add eax,edx
+ mov edx, [esp+8]
+ mov eax, [edx+eax*4]
+ jmp L_dolen
+
+ALIGN 4
+L_test_for_second_level_dist:
+
+
+
+
+ test al,64
+ jnz L_invalid_distance_code
+
+ mov eax,1
+ shl eax,cl
+ dec eax
+ and eax,ebp
+ add eax,edx
+ mov edx, [esp+12]
+ mov eax, [edx+eax*4]
+ jmp L_dodist
+
+ALIGN 4
+L_clip_window:
+; 721 "inffast.S"
+ mov ecx,eax
+ mov eax, [esp+52]
+ neg ecx
+ mov esi, [esp+56]
+
+ cmp eax,edx
+ jb L_invalid_distance_too_far
+
+ add ecx,edx
+ cmp dword ptr [esp+48],0
+ jne L_wrap_around_window
+
+ sub eax,ecx
+ add esi,eax
+; 749 "inffast.S"
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+L_wrap_around_window:
+; 793 "inffast.S"
+ mov eax, [esp+48]
+ cmp ecx,eax
+ jbe L_contiguous_in_window
+
+ add esi, [esp+52]
+ add esi,eax
+ sub esi,ecx
+ sub ecx,eax
+
+
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi, [esp+56]
+ mov ecx, [esp+48]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+ jmp L_do_copy1
+
+L_contiguous_in_window:
+; 836 "inffast.S"
+ add esi,eax
+ sub esi,ecx
+
+
+ mov eax, [esp+24]
+ cmp eax,ecx
+ jbe L_do_copy1
+
+ sub eax,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,edx
+
+L_do_copy1:
+; 862 "inffast.S"
+ mov ecx,eax
+ rep movsb
+
+ mov esi, [esp+44]
+ jmp L_while_test
+; 878 "inffast.S"
+ALIGN 4
+L_init_mmx:
+ emms
+
+
+
+
+
+ movd mm0,ebp
+ mov ebp,ebx
+; 896 "inffast.S"
+ movd mm4,dword ptr [esp+0]
+ movq mm3,mm4
+ movd mm5,dword ptr [esp+4]
+ movq mm2,mm5
+ pxor mm1,mm1
+ mov ebx, [esp+8]
+ jmp L_do_loop_mmx
+
+ALIGN 4
+L_do_loop_mmx:
+ psrlq mm0,mm1
+
+ cmp ebp,32
+ ja L_get_length_code_mmx
+
+ movd mm6,ebp
+ movd mm7,dword ptr [esi]
+ add esi,4
+ psllq mm7,mm6
+ add ebp,32
+ por mm0,mm7
+
+L_get_length_code_mmx:
+ pand mm4,mm0
+ movd eax,mm4
+ movq mm4,mm3
+ mov eax, [ebx+eax*4]
+
+L_dolen_mmx:
+ movzx ecx,ah
+ movd mm1,ecx
+ sub ebp,ecx
+
+ test al,al
+ jnz L_test_for_length_base_mmx
+
+ shr eax,16
+ stosb
+
+L_while_test_mmx:
+
+
+ cmp [esp+16],edi
+ jbe L_break_loop
+
+ cmp [esp+20],esi
+ ja L_do_loop_mmx
+ jmp L_break_loop
+
+L_test_for_length_base_mmx:
+
+ mov edx,eax
+ shr edx,16
+
+ test al,16
+ jz L_test_for_second_level_length_mmx
+ and eax,15
+ jz L_decode_distance_mmx
+
+ psrlq mm0,mm1
+ movd mm1,eax
+ movd ecx,mm0
+ sub ebp,eax
+ and ecx, [inflate_fast_mask+eax*4]
+ add edx,ecx
+
+L_decode_distance_mmx:
+ psrlq mm0,mm1
+
+ cmp ebp,32
+ ja L_get_dist_code_mmx
+
+ movd mm6,ebp
+ movd mm7,dword ptr [esi]
+ add esi,4
+ psllq mm7,mm6
+ add ebp,32
+ por mm0,mm7
+
+L_get_dist_code_mmx:
+ mov ebx, [esp+12]
+ pand mm5,mm0
+ movd eax,mm5
+ movq mm5,mm2
+ mov eax, [ebx+eax*4]
+
+L_dodist_mmx:
+
+ movzx ecx,ah
+ mov ebx,eax
+ shr ebx,16
+ sub ebp,ecx
+ movd mm1,ecx
+
+ test al,16
+ jz L_test_for_second_level_dist_mmx
+ and eax,15
+ jz L_check_dist_one_mmx
+
+L_add_bits_to_dist_mmx:
+ psrlq mm0,mm1
+ movd mm1,eax
+ movd ecx,mm0
+ sub ebp,eax
+ and ecx, [inflate_fast_mask+eax*4]
+ add ebx,ecx
+
+L_check_window_mmx:
+ mov [esp+44],esi
+ mov eax,edi
+ sub eax, [esp+40]
+
+ cmp eax,ebx
+ jb L_clip_window_mmx
+
+ mov ecx,edx
+ mov esi,edi
+ sub esi,ebx
+
+ sub ecx,3
+ mov al, [esi]
+ mov [edi],al
+ mov al, [esi+1]
+ mov dl, [esi+2]
+ add esi,3
+ mov [edi+1],al
+ mov [edi+2],dl
+ add edi,3
+ rep movsb
+
+ mov esi, [esp+44]
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+
+ALIGN 4
+L_check_dist_one_mmx:
+ cmp ebx,1
+ jne L_check_window_mmx
+ cmp [esp+40],edi
+ je L_check_window_mmx
+
+ dec edi
+ mov ecx,edx
+ mov al, [edi]
+ sub ecx,3
+
+ mov [edi+1],al
+ mov [edi+2],al
+ mov [edi+3],al
+ add edi,4
+ rep stosb
+
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+
+ALIGN 4
+L_test_for_second_level_length_mmx:
+ test al,64
+ jnz L_test_for_end_of_block
+
+ and eax,15
+ psrlq mm0,mm1
+ movd ecx,mm0
+ and ecx, [inflate_fast_mask+eax*4]
+ add ecx,edx
+ mov eax, [ebx+ecx*4]
+ jmp L_dolen_mmx
+
+ALIGN 4
+L_test_for_second_level_dist_mmx:
+ test al,64
+ jnz L_invalid_distance_code
+
+ and eax,15
+ psrlq mm0,mm1
+ movd ecx,mm0
+ and ecx, [inflate_fast_mask+eax*4]
+ mov eax, [esp+12]
+ add ecx,ebx
+ mov eax, [eax+ecx*4]
+ jmp L_dodist_mmx
+
+ALIGN 4
+L_clip_window_mmx:
+
+ mov ecx,eax
+ mov eax, [esp+52]
+ neg ecx
+ mov esi, [esp+56]
+
+ cmp eax,ebx
+ jb L_invalid_distance_too_far
+
+ add ecx,ebx
+ cmp dword ptr [esp+48],0
+ jne L_wrap_around_window_mmx
+
+ sub eax,ecx
+ add esi,eax
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+L_wrap_around_window_mmx:
+
+ mov eax, [esp+48]
+ cmp ecx,eax
+ jbe L_contiguous_in_window_mmx
+
+ add esi, [esp+52]
+ add esi,eax
+ sub esi,ecx
+ sub ecx,eax
+
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi, [esp+56]
+ mov ecx, [esp+48]
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+ jmp L_do_copy1_mmx
+
+L_contiguous_in_window_mmx:
+
+ add esi,eax
+ sub esi,ecx
+
+
+ cmp edx,ecx
+ jbe L_do_copy1_mmx
+
+ sub edx,ecx
+ rep movsb
+ mov esi,edi
+ sub esi,ebx
+
+L_do_copy1_mmx:
+
+
+ mov ecx,edx
+ rep movsb
+
+ mov esi, [esp+44]
+ mov ebx, [esp+8]
+ jmp L_while_test_mmx
+; 1174 "inffast.S"
+L_invalid_distance_code:
+
+
+
+
+
+ mov ecx, invalid_distance_code_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_test_for_end_of_block:
+
+
+
+
+
+ test al,32
+ jz L_invalid_literal_length_code
+
+ mov ecx,0
+ mov edx,INFLATE_MODE_TYPE
+ jmp L_update_stream_state
+
+L_invalid_literal_length_code:
+
+
+
+
+
+ mov ecx, invalid_literal_length_code_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_invalid_distance_too_far:
+
+
+
+ mov esi, [esp+44]
+ mov ecx, invalid_distance_too_far_msg
+ mov edx,INFLATE_MODE_BAD
+ jmp L_update_stream_state
+
+L_update_stream_state:
+
+ mov eax, [esp+88]
+ test ecx,ecx
+ jz L_skip_msg
+ mov [eax+24],ecx
+L_skip_msg:
+ mov eax, [eax+28]
+ mov [eax+mode_state],edx
+ jmp L_break_loop
+
+ALIGN 4
+L_break_loop:
+; 1243 "inffast.S"
+ cmp dword ptr [inflate_fast_use_mmx],2
+ jne L_update_next_in
+
+
+
+ mov ebx,ebp
+
+L_update_next_in:
+; 1266 "inffast.S"
+ mov eax, [esp+88]
+ mov ecx,ebx
+ mov edx, [eax+28]
+ shr ecx,3
+ sub esi,ecx
+ shl ecx,3
+ sub ebx,ecx
+ mov [eax+12],edi
+ mov [edx+bits_state],ebx
+ mov ecx,ebx
+
+ lea ebx, [esp+28]
+ cmp [esp+20],ebx
+ jne L_buf_not_used
+
+ sub esi,ebx
+ mov ebx, [eax+0]
+ mov [esp+20],ebx
+ add esi,ebx
+ mov ebx, [eax+4]
+ sub ebx,11
+ add [esp+20],ebx
+
+L_buf_not_used:
+ mov [eax+0],esi
+
+ mov ebx,1
+ shl ebx,cl
+ dec ebx
+
+
+
+
+
+ cmp dword ptr [inflate_fast_use_mmx],2
+ jne L_update_hold
+
+
+
+ psrlq mm0,mm1
+ movd ebp,mm0
+
+ emms
+
+L_update_hold:
+
+
+
+ and ebp,ebx
+ mov [edx+hold_state],ebp
+
+
+
+
+ mov ebx, [esp+20]
+ cmp ebx,esi
+ jbe L_last_is_smaller
+
+ sub ebx,esi
+ add ebx,11
+ mov [eax+4],ebx
+ jmp L_fixup_out
+L_last_is_smaller:
+ sub esi,ebx
+ neg esi
+ add esi,11
+ mov [eax+4],esi
+
+
+
+
+L_fixup_out:
+
+ mov ebx, [esp+16]
+ cmp ebx,edi
+ jbe L_end_is_smaller
+
+ sub ebx,edi
+ add ebx,257
+ mov [eax+16],ebx
+ jmp L_done
+L_end_is_smaller:
+ sub edi,ebx
+ neg edi
+ add edi,257
+ mov [eax+16],edi
+
+
+
+
+
+L_done:
+ add esp,64
+ popfd
+ pop ebx
+ pop ebp
+ pop esi
+ pop edi
+ ret
+_inflate_fast endp
+
+_TEXT ends
+end
diff --git a/zlib/contrib/masmx86/inffas32.lst b/zlib/contrib/masmx86/inffas32.lst
new file mode 100644
index 0000000..025627c
--- /dev/null
+++ b/zlib/contrib/masmx86/inffas32.lst
@@ -0,0 +1,1224 @@
+Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23
+inffas32.asm Page 1 - 1
+
+
+ ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
+ ; *
+ ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
+ ; *
+ ; * Copyright (C) 1995-2003 Mark Adler
+ ; * For conditions of distribution and use, see copyright notice in zlib.h
+ ; *
+ ; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
+ ; * Please use the copyright conditions above.
+ ; *
+ ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
+ ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at
+ ; * the moment. I have successfully compiled and tested this code with gcc2.96,
+ ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S
+ ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
+ ; * enabled. I will attempt to merge the MMX code into this version. Newer
+ ; * versions of this and inffast.S can be found at
+ ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
+ ; *
+ ; * 2005 : modification by Gilles Vollant
+ ; */
+ ; For Visual C++ 4.x and higher and ML 6.x and higher
+ ; ml.exe is in directory \MASM611C of Win95 DDK
+ ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm
+ ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
+ ;
+ ;
+ ; compile with command line option
+ ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm
+
+ ; if you define NO_GZIP (see inflate.h), compile with
+ ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
+
+
+ ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
+ ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
+ ; in inflate_state in inflate.h)
+ = 00000008 zlib1222sup equ 8
+
+
+ IFDEF GUNZIP
+ ELSE
+ IFNDEF NO_GUNZIP
+ = 0000000B INFLATE_MODE_TYPE equ 11
+ = 0000001A INFLATE_MODE_BAD equ 26
+ ELSE
+ ENDIF
+ ENDIF
+
+
+ ; 75 "inffast.S"
+ ;FILE "inffast.S"
+
+ ;;;GLOBAL _inflate_fast
+
+ ;;;SECTION .text
+
+
+
+ .586p
+ .mmx
+
+ name inflate_fast_x86
+ .MODEL FLAT
+
+ 00000000 _DATA segment
+ 00000000 inflate_fast_use_mmx:
+ 00000000 00000001 dd 1
+
+
+ 00000000 _TEXT segment
+
+
+
+ ALIGN 4
+ 00000000 46 61 73 74 20 db 'Fast decoding Code from Chris Anderson'
+ 64 65 63 6F 64
+ 69 6E 67 20 43
+ 6F 64 65 20 66
+ 72 6F 6D 20 43
+ 68 72 69 73 20
+ 41 6E 64 65 72
+ 73 6F 6E
+ 00000026 00 db 0
+
+ ALIGN 4
+ 00000028 invalid_literal_length_code_msg:
+ 00000028 69 6E 76 61 6C db 'invalid literal/length code'
+ 69 64 20 6C 69
+ 74 65 72 61 6C
+ 2F 6C 65 6E 67
+ 74 68 20 63 6F
+ 64 65
+ 00000043 00 db 0
+
+ ALIGN 4
+ 00000044 invalid_distance_code_msg:
+ 00000044 69 6E 76 61 6C db 'invalid distance code'
+ 69 64 20 64 69
+ 73 74 61 6E 63
+ 65 20 63 6F 64
+ 65
+ 00000059 00 db 0
+
+ ALIGN 4
+ 0000005C invalid_distance_too_far_msg:
+ 0000005C 69 6E 76 61 6C db 'invalid distance too far back'
+ 69 64 20 64 69
+ 73 74 61 6E 63
+ 65 20 74 6F 6F
+ 20 66 61 72 20
+ 62 61 63 6B
+ 00000079 00 db 0
+
+
+ ALIGN 4
+ 0000007C inflate_fast_mask:
+ 0000007C 00000000 dd 0
+ 00000080 00000001 dd 1
+ 00000084 00000003 dd 3
+ 00000088 00000007 dd 7
+ 0000008C 0000000F dd 15
+ 00000090 0000001F dd 31
+ 00000094 0000003F dd 63
+ 00000098 0000007F dd 127
+ 0000009C 000000FF dd 255
+ 000000A0 000001FF dd 511
+ 000000A4 000003FF dd 1023
+ 000000A8 000007FF dd 2047
+ 000000AC 00000FFF dd 4095
+ 000000B0 00001FFF dd 8191
+ 000000B4 00003FFF dd 16383
+ 000000B8 00007FFF dd 32767
+ 000000BC 0000FFFF dd 65535
+ 000000C0 0001FFFF dd 131071
+ 000000C4 0003FFFF dd 262143
+ 000000C8 0007FFFF dd 524287
+ 000000CC 000FFFFF dd 1048575
+ 000000D0 001FFFFF dd 2097151
+ 000000D4 003FFFFF dd 4194303
+ 000000D8 007FFFFF dd 8388607
+ 000000DC 00FFFFFF dd 16777215
+ 000000E0 01FFFFFF dd 33554431
+ 000000E4 03FFFFFF dd 67108863
+ 000000E8 07FFFFFF dd 134217727
+ 000000EC 0FFFFFFF dd 268435455
+ 000000F0 1FFFFFFF dd 536870911
+ 000000F4 3FFFFFFF dd 1073741823
+ 000000F8 7FFFFFFF dd 2147483647
+ 000000FC FFFFFFFF dd 4294967295
+
+
+ = 00000000 mode_state equ 0 ;/* state->mode */
+ = 00000028 wsize_state equ (32+zlib1222sup) ;/* state->wsize */
+ = 00000030 write_state equ (36+4+zlib1222sup) ;/* state->write */
+ = 00000034 window_state equ (40+4+zlib1222sup) ;/* state->window */
+ = 00000038 hold_state equ (44+4+zlib1222sup) ;/* state->hold */
+ = 0000003C bits_state equ (48+4+zlib1222sup) ;/* state->bits */
+ = 0000004C lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */
+ = 00000050 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */
+ = 00000054 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */
+ = 00000058 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */
+
+
+ ;;SECTION .text
+ ; 205 "inffast.S"
+ ;GLOBAL inflate_fast_use_mmx
+
+ ;SECTION .data
+
+
+ ; GLOBAL inflate_fast_use_mmx:object
+ ;.size inflate_fast_use_mmx, 4
+ ; 226 "inffast.S"
+ ;SECTION .text
+
+ ALIGN 4
+ 00000100 _inflate_fast proc near
+ 00000100 .FPO (16, 4, 0, 0, 1, 0)
+ 00000100 57 push edi
+ 00000101 56 push esi
+ 00000102 55 push ebp
+ 00000103 53 push ebx
+ 00000104 9C pushfd
+ 00000105 83 EC 40 sub esp,64
+ 00000108 FC cld
+
+
+
+
+ 00000109 8B 74 24 58 mov esi, [esp+88]
+ 0000010D 8B 7E 1C mov edi, [esi+28]
+
+
+
+
+
+
+
+ 00000110 8B 56 04 mov edx, [esi+4]
+ 00000113 8B 06 mov eax, [esi+0]
+
+ 00000115 03 D0 add edx,eax
+ 00000117 83 EA 0B sub edx,11
+
+ 0000011A 89 44 24 2C mov [esp+44],eax
+ 0000011E 89 54 24 14 mov [esp+20],edx
+
+ 00000122 8B 6C 24 5C mov ebp, [esp+92]
+ 00000126 8B 4E 10 mov ecx, [esi+16]
+ 00000129 8B 5E 0C mov ebx, [esi+12]
+
+ 0000012C 2B E9 sub ebp,ecx
+ 0000012E F7 DD neg ebp
+ 00000130 03 EB add ebp,ebx
+
+ 00000132 81 E9 00000101 sub ecx,257
+ 00000138 03 CB add ecx,ebx
+
+ 0000013A 89 5C 24 3C mov [esp+60],ebx
+ 0000013E 89 6C 24 28 mov [esp+40],ebp
+ 00000142 89 4C 24 10 mov [esp+16],ecx
+ ; 285 "inffast.S"
+ 00000146 8B 47 4C mov eax, [edi+lencode_state]
+ 00000149 8B 4F 50 mov ecx, [edi+distcode_state]
+
+ 0000014C 89 44 24 08 mov [esp+8],eax
+ 00000150 89 4C 24 0C mov [esp+12],ecx
+
+ 00000154 B8 00000001 mov eax,1
+ 00000159 8B 4F 54 mov ecx, [edi+lenbits_state]
+ 0000015C D3 E0 shl eax,cl
+ 0000015E 48 dec eax
+ 0000015F 89 04 24 mov [esp+0],eax
+
+ 00000162 B8 00000001 mov eax,1
+ 00000167 8B 4F 58 mov ecx, [edi+distbits_state]
+ 0000016A D3 E0 shl eax,cl
+ 0000016C 48 dec eax
+ 0000016D 89 44 24 04 mov [esp+4],eax
+
+ 00000171 8B 47 28 mov eax, [edi+wsize_state]
+ 00000174 8B 4F 30 mov ecx, [edi+write_state]
+ 00000177 8B 57 34 mov edx, [edi+window_state]
+
+ 0000017A 89 44 24 34 mov [esp+52],eax
+ 0000017E 89 4C 24 30 mov [esp+48],ecx
+ 00000182 89 54 24 38 mov [esp+56],edx
+
+ 00000186 8B 6F 38 mov ebp, [edi+hold_state]
+ 00000189 8B 5F 3C mov ebx, [edi+bits_state]
+ ; 321 "inffast.S"
+ 0000018C 8B 74 24 2C mov esi, [esp+44]
+ 00000190 8B 4C 24 14 mov ecx, [esp+20]
+ 00000194 3B CE cmp ecx,esi
+ 00000196 77 22 ja L_align_long
+
+ 00000198 83 C1 0B add ecx,11
+ 0000019B 2B CE sub ecx,esi
+ 0000019D B8 0000000C mov eax,12
+ 000001A2 2B C1 sub eax,ecx
+ 000001A4 8D 7C 24 1C lea edi, [esp+28]
+ 000001A8 F3/ A4 rep movsb
+ 000001AA 8B C8 mov ecx,eax
+ 000001AC 33 C0 xor eax,eax
+ 000001AE F3/ AA rep stosb
+ 000001B0 8D 74 24 1C lea esi, [esp+28]
+ 000001B4 89 74 24 14 mov [esp+20],esi
+ 000001B8 EB 18 jmp L_is_aligned
+
+
+ 000001BA L_align_long:
+ 000001BA F7 C6 00000003 test esi,3
+ 000001C0 74 10 jz L_is_aligned
+ 000001C2 33 C0 xor eax,eax
+ 000001C4 8A 06 mov al, [esi]
+ 000001C6 46 inc esi
+ 000001C7 8B CB mov ecx,ebx
+ 000001C9 83 C3 08 add ebx,8
+ 000001CC D3 E0 shl eax,cl
+ 000001CE 0B E8 or ebp,eax
+ 000001D0 EB E8 jmp L_align_long
+
+ 000001D2 L_is_aligned:
+ 000001D2 8B 7C 24 3C mov edi, [esp+60]
+ ; 366 "inffast.S"
+ 000001D6 L_check_mmx:
+ 000001D6 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2
+ 02
+ 000001DD 0F 84 00000289 je L_init_mmx
+ 000001E3 77 6B ja L_do_loop
+
+ 000001E5 50 push eax
+ 000001E6 53 push ebx
+ 000001E7 51 push ecx
+ 000001E8 52 push edx
+ 000001E9 9C pushfd
+ 000001EA 8B 04 24 mov eax, [esp]
+ 000001ED 81 34 24 xor dword ptr [esp],0200000h
+ 00200000
+
+
+
+
+ 000001F4 9D popfd
+ 000001F5 9C pushfd
+ 000001F6 5A pop edx
+ 000001F7 33 D0 xor edx,eax
+ 000001F9 74 44 jz L_dont_use_mmx
+ 000001FB 33 C0 xor eax,eax
+ 000001FD 0F A2 cpuid
+ 000001FF 81 FB 756E6547 cmp ebx,0756e6547h
+ 00000205 75 38 jne L_dont_use_mmx
+ 00000207 81 F9 6C65746E cmp ecx,06c65746eh
+ 0000020D 75 30 jne L_dont_use_mmx
+ 0000020F 81 FA 49656E69 cmp edx,049656e69h
+ 00000215 75 28 jne L_dont_use_mmx
+ 00000217 B8 00000001 mov eax,1
+ 0000021C 0F A2 cpuid
+ 0000021E C1 E8 08 shr eax,8
+ 00000221 83 E0 0F and eax,15
+ 00000224 83 F8 06 cmp eax,6
+ 00000227 75 16 jne L_dont_use_mmx
+ 00000229 F7 C2 00800000 test edx,0800000h
+ 0000022F 75 02 jnz L_use_mmx
+ 00000231 EB 0C jmp L_dont_use_mmx
+ 00000233 L_use_mmx:
+ 00000233 C7 05 00000000 R mov dword ptr [inflate_fast_use_mmx],2
+ 00000002
+ 0000023D EB 0A jmp L_check_mmx_pop
+ 0000023F L_dont_use_mmx:
+ 0000023F C7 05 00000000 R mov dword ptr [inflate_fast_use_mmx],3
+ 00000003
+ 00000249 L_check_mmx_pop:
+ 00000249 5A pop edx
+ 0000024A 59 pop ecx
+ 0000024B 5B pop ebx
+ 0000024C 58 pop eax
+ 0000024D EB 87 jmp L_check_mmx
+ ; 426 "inffast.S"
+ ALIGN 4
+ 00000250 L_do_loop:
+ ; 437 "inffast.S"
+ 00000250 80 FB 0F cmp bl,15
+ 00000253 77 0D ja L_get_length_code
+
+ 00000255 33 C0 xor eax,eax
+ 00000257 66| AD lodsw
+ 00000259 8A CB mov cl,bl
+ 0000025B 80 C3 10 add bl,16
+ 0000025E D3 E0 shl eax,cl
+ 00000260 0B E8 or ebp,eax
+
+ 00000262 L_get_length_code:
+ 00000262 8B 14 24 mov edx, [esp+0]
+ 00000265 8B 4C 24 08 mov ecx, [esp+8]
+ 00000269 23 D5 and edx,ebp
+ 0000026B 8B 04 91 mov eax, [ecx+edx*4]
+
+ 0000026E L_dolen:
+
+
+
+
+
+
+ 0000026E 8A CC mov cl,ah
+ 00000270 2A DC sub bl,ah
+ 00000272 D3 ED shr ebp,cl
+
+
+
+
+
+
+ 00000274 84 C0 test al,al
+ 00000276 75 19 jnz L_test_for_length_base
+
+ 00000278 C1 E8 10 shr eax,16
+ 0000027B AA stosb
+
+ 0000027C L_while_test:
+
+
+ 0000027C 39 7C 24 10 cmp [esp+16],edi
+ 00000280 0F 86 00000462 jbe L_break_loop
+
+ 00000286 39 74 24 14 cmp [esp+20],esi
+ 0000028A 77 C4 ja L_do_loop
+ 0000028C E9 00000457 jmp L_break_loop
+
+ 00000291 L_test_for_length_base:
+ ; 502 "inffast.S"
+ 00000291 8B D0 mov edx,eax
+ 00000293 C1 EA 10 shr edx,16
+ 00000296 8A C8 mov cl,al
+
+ 00000298 A8 10 test al,16
+ 0000029A 0F 84 000000F4 jz L_test_for_second_level_length
+ 000002A0 80 E1 0F and cl,15
+ 000002A3 74 25 jz L_save_len
+ 000002A5 3A D9 cmp bl,cl
+ 000002A7 73 11 jae L_add_bits_to_len
+
+ 000002A9 8A E9 mov ch,cl
+ 000002AB 33 C0 xor eax,eax
+ 000002AD 66| AD lodsw
+ 000002AF 8A CB mov cl,bl
+ 000002B1 80 C3 10 add bl,16
+ 000002B4 D3 E0 shl eax,cl
+ 000002B6 0B E8 or ebp,eax
+ 000002B8 8A CD mov cl,ch
+
+ 000002BA L_add_bits_to_len:
+ 000002BA B8 00000001 mov eax,1
+ 000002BF D3 E0 shl eax,cl
+ 000002C1 48 dec eax
+ 000002C2 2A D9 sub bl,cl
+ 000002C4 23 C5 and eax,ebp
+ 000002C6 D3 ED shr ebp,cl
+ 000002C8 03 D0 add edx,eax
+
+ 000002CA L_save_len:
+ 000002CA 89 54 24 18 mov [esp+24],edx
+
+
+ 000002CE L_decode_distance:
+ ; 549 "inffast.S"
+ 000002CE 80 FB 0F cmp bl,15
+ 000002D1 77 0D ja L_get_distance_code
+
+ 000002D3 33 C0 xor eax,eax
+ 000002D5 66| AD lodsw
+ 000002D7 8A CB mov cl,bl
+ 000002D9 80 C3 10 add bl,16
+ 000002DC D3 E0 shl eax,cl
+ 000002DE 0B E8 or ebp,eax
+
+ 000002E0 L_get_distance_code:
+ 000002E0 8B 54 24 04 mov edx, [esp+4]
+ 000002E4 8B 4C 24 0C mov ecx, [esp+12]
+ 000002E8 23 D5 and edx,ebp
+ 000002EA 8B 04 91 mov eax, [ecx+edx*4]
+
+
+ 000002ED L_dodist:
+ 000002ED 8B D0 mov edx,eax
+ 000002EF C1 EA 10 shr edx,16
+ 000002F2 8A CC mov cl,ah
+ 000002F4 2A DC sub bl,ah
+ 000002F6 D3 ED shr ebp,cl
+ ; 584 "inffast.S"
+ 000002F8 8A C8 mov cl,al
+
+ 000002FA A8 10 test al,16
+ 000002FC 0F 84 000000B2 jz L_test_for_second_level_dist
+ 00000302 80 E1 0F and cl,15
+ 00000305 74 65 jz L_check_dist_one
+ 00000307 3A D9 cmp bl,cl
+ 00000309 73 11 jae L_add_bits_to_dist
+
+ 0000030B 8A E9 mov ch,cl
+ 0000030D 33 C0 xor eax,eax
+ 0000030F 66| AD lodsw
+ 00000311 8A CB mov cl,bl
+ 00000313 80 C3 10 add bl,16
+ 00000316 D3 E0 shl eax,cl
+ 00000318 0B E8 or ebp,eax
+ 0000031A 8A CD mov cl,ch
+
+ 0000031C L_add_bits_to_dist:
+ 0000031C B8 00000001 mov eax,1
+ 00000321 D3 E0 shl eax,cl
+ 00000323 48 dec eax
+ 00000324 2A D9 sub bl,cl
+ 00000326 23 C5 and eax,ebp
+ 00000328 D3 ED shr ebp,cl
+ 0000032A 03 D0 add edx,eax
+ 0000032C EB 00 jmp L_check_window
+
+ 0000032E L_check_window:
+ ; 625 "inffast.S"
+ 0000032E 89 74 24 2C mov [esp+44],esi
+ 00000332 8B C7 mov eax,edi
+ 00000334 2B 44 24 28 sub eax, [esp+40]
+
+ 00000338 3B C2 cmp eax,edx
+ 0000033A 0F 82 00000094 jb L_clip_window
+
+ 00000340 8B 4C 24 18 mov ecx, [esp+24]
+ 00000344 8B F7 mov esi,edi
+ 00000346 2B F2 sub esi,edx
+
+ 00000348 83 E9 03 sub ecx,3
+ 0000034B 8A 06 mov al, [esi]
+ 0000034D 88 07 mov [edi],al
+ 0000034F 8A 46 01 mov al, [esi+1]
+ 00000352 8A 56 02 mov dl, [esi+2]
+ 00000355 83 C6 03 add esi,3
+ 00000358 88 47 01 mov [edi+1],al
+ 0000035B 88 57 02 mov [edi+2],dl
+ 0000035E 83 C7 03 add edi,3
+ 00000361 F3/ A4 rep movsb
+
+ 00000363 8B 74 24 2C mov esi, [esp+44]
+ 00000367 E9 FFFFFF10 jmp L_while_test
+
+ ALIGN 4
+ 0000036C L_check_dist_one:
+ 0000036C 83 FA 01 cmp edx,1
+ 0000036F 75 BD jne L_check_window
+ 00000371 39 7C 24 28 cmp [esp+40],edi
+ 00000375 74 B7 je L_check_window
+
+ 00000377 4F dec edi
+ 00000378 8B 4C 24 18 mov ecx, [esp+24]
+ 0000037C 8A 07 mov al, [edi]
+ 0000037E 83 E9 03 sub ecx,3
+
+ 00000381 88 47 01 mov [edi+1],al
+ 00000384 88 47 02 mov [edi+2],al
+ 00000387 88 47 03 mov [edi+3],al
+ 0000038A 83 C7 04 add edi,4
+ 0000038D F3/ AA rep stosb
+
+ 0000038F E9 FFFFFEE8 jmp L_while_test
+
+ ALIGN 4
+ 00000394 L_test_for_second_level_length:
+
+
+
+
+ 00000394 A8 40 test al,64
+ 00000396 0F 85 0000030E jnz L_test_for_end_of_block
+
+ 0000039C B8 00000001 mov eax,1
+ 000003A1 D3 E0 shl eax,cl
+ 000003A3 48 dec eax
+ 000003A4 23 C5 and eax,ebp
+ 000003A6 03 C2 add eax,edx
+ 000003A8 8B 54 24 08 mov edx, [esp+8]
+ 000003AC 8B 04 82 mov eax, [edx+eax*4]
+ 000003AF E9 FFFFFEBA jmp L_dolen
+
+ ALIGN 4
+ 000003B4 L_test_for_second_level_dist:
+
+
+
+
+ 000003B4 A8 40 test al,64
+ 000003B6 0F 85 000002E2 jnz L_invalid_distance_code
+
+ 000003BC B8 00000001 mov eax,1
+ 000003C1 D3 E0 shl eax,cl
+ 000003C3 48 dec eax
+ 000003C4 23 C5 and eax,ebp
+ 000003C6 03 C2 add eax,edx
+ 000003C8 8B 54 24 0C mov edx, [esp+12]
+ 000003CC 8B 04 82 mov eax, [edx+eax*4]
+ 000003CF E9 FFFFFF19 jmp L_dodist
+
+ ALIGN 4
+ 000003D4 L_clip_window:
+ ; 721 "inffast.S"
+ 000003D4 8B C8 mov ecx,eax
+ 000003D6 8B 44 24 34 mov eax, [esp+52]
+ 000003DA F7 D9 neg ecx
+ 000003DC 8B 74 24 38 mov esi, [esp+56]
+
+ 000003E0 3B C2 cmp eax,edx
+ 000003E2 0F 82 000002DE jb L_invalid_distance_too_far
+
+ 000003E8 03 CA add ecx,edx
+ 000003EA 83 7C 24 30 00 cmp dword ptr [esp+48],0
+ 000003EF 75 24 jne L_wrap_around_window
+
+ 000003F1 2B C1 sub eax,ecx
+ 000003F3 03 F0 add esi,eax
+ ; 749 "inffast.S"
+ 000003F5 8B 44 24 18 mov eax, [esp+24]
+ 000003F9 3B C1 cmp eax,ecx
+ 000003FB 76 60 jbe L_do_copy1
+
+ 000003FD 2B C1 sub eax,ecx
+ 000003FF F3/ A4 rep movsb
+ 00000401 8B F7 mov esi,edi
+ 00000403 2B F2 sub esi,edx
+ 00000405 EB 56 jmp L_do_copy1
+
+ 00000407 3B C1 cmp eax,ecx
+ 00000409 76 52 jbe L_do_copy1
+
+ 0000040B 2B C1 sub eax,ecx
+ 0000040D F3/ A4 rep movsb
+ 0000040F 8B F7 mov esi,edi
+ 00000411 2B F2 sub esi,edx
+ 00000413 EB 48 jmp L_do_copy1
+
+ 00000415 L_wrap_around_window:
+ ; 793 "inffast.S"
+ 00000415 8B 44 24 30 mov eax, [esp+48]
+ 00000419 3B C8 cmp ecx,eax
+ 0000041B 76 2C jbe L_contiguous_in_window
+
+ 0000041D 03 74 24 34 add esi, [esp+52]
+ 00000421 03 F0 add esi,eax
+ 00000423 2B F1 sub esi,ecx
+ 00000425 2B C8 sub ecx,eax
+
+
+ 00000427 8B 44 24 18 mov eax, [esp+24]
+ 0000042B 3B C1 cmp eax,ecx
+ 0000042D 76 2E jbe L_do_copy1
+
+ 0000042F 2B C1 sub eax,ecx
+ 00000431 F3/ A4 rep movsb
+ 00000433 8B 74 24 38 mov esi, [esp+56]
+ 00000437 8B 4C 24 30 mov ecx, [esp+48]
+ 0000043B 3B C1 cmp eax,ecx
+ 0000043D 76 1E jbe L_do_copy1
+
+ 0000043F 2B C1 sub eax,ecx
+ 00000441 F3/ A4 rep movsb
+ 00000443 8B F7 mov esi,edi
+ 00000445 2B F2 sub esi,edx
+ 00000447 EB 14 jmp L_do_copy1
+
+ 00000449 L_contiguous_in_window:
+ ; 836 "inffast.S"
+ 00000449 03 F0 add esi,eax
+ 0000044B 2B F1 sub esi,ecx
+
+
+ 0000044D 8B 44 24 18 mov eax, [esp+24]
+ 00000451 3B C1 cmp eax,ecx
+ 00000453 76 08 jbe L_do_copy1
+
+ 00000455 2B C1 sub eax,ecx
+ 00000457 F3/ A4 rep movsb
+ 00000459 8B F7 mov esi,edi
+ 0000045B 2B F2 sub esi,edx
+
+ 0000045D L_do_copy1:
+ ; 862 "inffast.S"
+ 0000045D 8B C8 mov ecx,eax
+ 0000045F F3/ A4 rep movsb
+
+ 00000461 8B 74 24 2C mov esi, [esp+44]
+ 00000465 E9 FFFFFE12 jmp L_while_test
+ ; 878 "inffast.S"
+ ALIGN 4
+ 0000046C L_init_mmx:
+ 0000046C 0F 77 emms
+
+
+
+
+
+ 0000046E 0F 6E C5 movd mm0,ebp
+ 00000471 8B EB mov ebp,ebx
+ ; 896 "inffast.S"
+ 00000473 0F 6E 24 24 movd mm4,dword ptr [esp+0]
+ 00000477 0F 7F E3 movq mm3,mm4
+ 0000047A 0F 6E 6C 24 04 movd mm5,dword ptr [esp+4]
+ 0000047F 0F 7F EA movq mm2,mm5
+ 00000482 0F EF C9 pxor mm1,mm1
+ 00000485 8B 5C 24 08 mov ebx, [esp+8]
+ 00000489 EB 01 jmp L_do_loop_mmx
+
+ ALIGN 4
+ 0000048C L_do_loop_mmx:
+ 0000048C 0F D3 C1 psrlq mm0,mm1
+
+ 0000048F 83 FD 20 cmp ebp,32
+ 00000492 77 12 ja L_get_length_code_mmx
+
+ 00000494 0F 6E F5 movd mm6,ebp
+ 00000497 0F 6E 3E movd mm7,dword ptr [esi]
+ 0000049A 83 C6 04 add esi,4
+ 0000049D 0F F3 FE psllq mm7,mm6
+ 000004A0 83 C5 20 add ebp,32
+ 000004A3 0F EB C7 por mm0,mm7
+
+ 000004A6 L_get_length_code_mmx:
+ 000004A6 0F DB E0 pand mm4,mm0
+ 000004A9 0F 7E E0 movd eax,mm4
+ 000004AC 0F 7F DC movq mm4,mm3
+ 000004AF 8B 04 83 mov eax, [ebx+eax*4]
+
+ 000004B2 L_dolen_mmx:
+ 000004B2 0F B6 CC movzx ecx,ah
+ 000004B5 0F 6E C9 movd mm1,ecx
+ 000004B8 2B E9 sub ebp,ecx
+
+ 000004BA 84 C0 test al,al
+ 000004BC 75 19 jnz L_test_for_length_base_mmx
+
+ 000004BE C1 E8 10 shr eax,16
+ 000004C1 AA stosb
+
+ 000004C2 L_while_test_mmx:
+
+
+ 000004C2 39 7C 24 10 cmp [esp+16],edi
+ 000004C6 0F 86 0000021C jbe L_break_loop
+
+ 000004CC 39 74 24 14 cmp [esp+20],esi
+ 000004D0 77 BA ja L_do_loop_mmx
+ 000004D2 E9 00000211 jmp L_break_loop
+
+ 000004D7 L_test_for_length_base_mmx:
+
+ 000004D7 8B D0 mov edx,eax
+ 000004D9 C1 EA 10 shr edx,16
+
+ 000004DC A8 10 test al,16
+ 000004DE 0F 84 000000E0 jz L_test_for_second_level_length_mmx
+ 000004E4 83 E0 0F and eax,15
+ 000004E7 74 14 jz L_decode_distance_mmx
+
+ 000004E9 0F D3 C1 psrlq mm0,mm1
+ 000004EC 0F 6E C8 movd mm1,eax
+ 000004EF 0F 7E C1 movd ecx,mm0
+ 000004F2 2B E8 sub ebp,eax
+ 000004F4 23 0C 85 and ecx, [inflate_fast_mask+eax*4]
+ 0000007C R
+ 000004FB 03 D1 add edx,ecx
+
+ 000004FD L_decode_distance_mmx:
+ 000004FD 0F D3 C1 psrlq mm0,mm1
+
+ 00000500 83 FD 20 cmp ebp,32
+ 00000503 77 12 ja L_get_dist_code_mmx
+
+ 00000505 0F 6E F5 movd mm6,ebp
+ 00000508 0F 6E 3E movd mm7,dword ptr [esi]
+ 0000050B 83 C6 04 add esi,4
+ 0000050E 0F F3 FE psllq mm7,mm6
+ 00000511 83 C5 20 add ebp,32
+ 00000514 0F EB C7 por mm0,mm7
+
+ 00000517 L_get_dist_code_mmx:
+ 00000517 8B 5C 24 0C mov ebx, [esp+12]
+ 0000051B 0F DB E8 pand mm5,mm0
+ 0000051E 0F 7E E8 movd eax,mm5
+ 00000521 0F 7F D5 movq mm5,mm2
+ 00000524 8B 04 83 mov eax, [ebx+eax*4]
+
+ 00000527 L_dodist_mmx:
+
+ 00000527 0F B6 CC movzx ecx,ah
+ 0000052A 8B D8 mov ebx,eax
+ 0000052C C1 EB 10 shr ebx,16
+ 0000052F 2B E9 sub ebp,ecx
+ 00000531 0F 6E C9 movd mm1,ecx
+
+ 00000534 A8 10 test al,16
+ 00000536 0F 84 000000AC jz L_test_for_second_level_dist_mmx
+ 0000053C 83 E0 0F and eax,15
+ 0000053F 74 57 jz L_check_dist_one_mmx
+
+ 00000541 L_add_bits_to_dist_mmx:
+ 00000541 0F D3 C1 psrlq mm0,mm1
+ 00000544 0F 6E C8 movd mm1,eax
+ 00000547 0F 7E C1 movd ecx,mm0
+ 0000054A 2B E8 sub ebp,eax
+ 0000054C 23 0C 85 and ecx, [inflate_fast_mask+eax*4]
+ 0000007C R
+ 00000553 03 D9 add ebx,ecx
+
+ 00000555 L_check_window_mmx:
+ 00000555 89 74 24 2C mov [esp+44],esi
+ 00000559 8B C7 mov eax,edi
+ 0000055B 2B 44 24 28 sub eax, [esp+40]
+
+ 0000055F 3B C3 cmp eax,ebx
+ 00000561 0F 82 000000A9 jb L_clip_window_mmx
+
+ 00000567 8B CA mov ecx,edx
+ 00000569 8B F7 mov esi,edi
+ 0000056B 2B F3 sub esi,ebx
+
+ 0000056D 83 E9 03 sub ecx,3
+ 00000570 8A 06 mov al, [esi]
+ 00000572 88 07 mov [edi],al
+ 00000574 8A 46 01 mov al, [esi+1]
+ 00000577 8A 56 02 mov dl, [esi+2]
+ 0000057A 83 C6 03 add esi,3
+ 0000057D 88 47 01 mov [edi+1],al
+ 00000580 88 57 02 mov [edi+2],dl
+ 00000583 83 C7 03 add edi,3
+ 00000586 F3/ A4 rep movsb
+
+ 00000588 8B 74 24 2C mov esi, [esp+44]
+ 0000058C 8B 5C 24 08 mov ebx, [esp+8]
+ 00000590 E9 FFFFFF2D jmp L_while_test_mmx
+
+ ALIGN 4
+ 00000598 L_check_dist_one_mmx:
+ 00000598 83 FB 01 cmp ebx,1
+ 0000059B 75 B8 jne L_check_window_mmx
+ 0000059D 39 7C 24 28 cmp [esp+40],edi
+ 000005A1 74 B2 je L_check_window_mmx
+
+ 000005A3 4F dec edi
+ 000005A4 8B CA mov ecx,edx
+ 000005A6 8A 07 mov al, [edi]
+ 000005A8 83 E9 03 sub ecx,3
+
+ 000005AB 88 47 01 mov [edi+1],al
+ 000005AE 88 47 02 mov [edi+2],al
+ 000005B1 88 47 03 mov [edi+3],al
+ 000005B4 83 C7 04 add edi,4
+ 000005B7 F3/ AA rep stosb
+
+ 000005B9 8B 5C 24 08 mov ebx, [esp+8]
+ 000005BD E9 FFFFFF00 jmp L_while_test_mmx
+
+ ALIGN 4
+ 000005C4 L_test_for_second_level_length_mmx:
+ 000005C4 A8 40 test al,64
+ 000005C6 0F 85 000000DE jnz L_test_for_end_of_block
+
+ 000005CC 83 E0 0F and eax,15
+ 000005CF 0F D3 C1 psrlq mm0,mm1
+ 000005D2 0F 7E C1 movd ecx,mm0
+ 000005D5 23 0C 85 and ecx, [inflate_fast_mask+eax*4]
+ 0000007C R
+ 000005DC 03 CA add ecx,edx
+ 000005DE 8B 04 8B mov eax, [ebx+ecx*4]
+ 000005E1 E9 FFFFFECC jmp L_dolen_mmx
+
+ ALIGN 4
+ 000005E8 L_test_for_second_level_dist_mmx:
+ 000005E8 A8 40 test al,64
+ 000005EA 0F 85 000000AE jnz L_invalid_distance_code
+
+ 000005F0 83 E0 0F and eax,15
+ 000005F3 0F D3 C1 psrlq mm0,mm1
+ 000005F6 0F 7E C1 movd ecx,mm0
+ 000005F9 23 0C 85 and ecx, [inflate_fast_mask+eax*4]
+ 0000007C R
+ 00000600 8B 44 24 0C mov eax, [esp+12]
+ 00000604 03 CB add ecx,ebx
+ 00000606 8B 04 88 mov eax, [eax+ecx*4]
+ 00000609 E9 FFFFFF19 jmp L_dodist_mmx
+
+ ALIGN 4
+ 00000610 L_clip_window_mmx:
+
+ 00000610 8B C8 mov ecx,eax
+ 00000612 8B 44 24 34 mov eax, [esp+52]
+ 00000616 F7 D9 neg ecx
+ 00000618 8B 74 24 38 mov esi, [esp+56]
+
+ 0000061C 3B C3 cmp eax,ebx
+ 0000061E 0F 82 000000A2 jb L_invalid_distance_too_far
+
+ 00000624 03 CB add ecx,ebx
+ 00000626 83 7C 24 30 00 cmp dword ptr [esp+48],0
+ 0000062B 75 20 jne L_wrap_around_window_mmx
+
+ 0000062D 2B C1 sub eax,ecx
+ 0000062F 03 F0 add esi,eax
+
+ 00000631 3B D1 cmp edx,ecx
+ 00000633 76 58 jbe L_do_copy1_mmx
+
+ 00000635 2B D1 sub edx,ecx
+ 00000637 F3/ A4 rep movsb
+ 00000639 8B F7 mov esi,edi
+ 0000063B 2B F3 sub esi,ebx
+ 0000063D EB 4E jmp L_do_copy1_mmx
+
+ 0000063F 3B D1 cmp edx,ecx
+ 00000641 76 4A jbe L_do_copy1_mmx
+
+ 00000643 2B D1 sub edx,ecx
+ 00000645 F3/ A4 rep movsb
+ 00000647 8B F7 mov esi,edi
+ 00000649 2B F3 sub esi,ebx
+ 0000064B EB 40 jmp L_do_copy1_mmx
+
+ 0000064D L_wrap_around_window_mmx:
+
+ 0000064D 8B 44 24 30 mov eax, [esp+48]
+ 00000651 3B C8 cmp ecx,eax
+ 00000653 76 28 jbe L_contiguous_in_window_mmx
+
+ 00000655 03 74 24 34 add esi, [esp+52]
+ 00000659 03 F0 add esi,eax
+ 0000065B 2B F1 sub esi,ecx
+ 0000065D 2B C8 sub ecx,eax
+
+
+ 0000065F 3B D1 cmp edx,ecx
+ 00000661 76 2A jbe L_do_copy1_mmx
+
+ 00000663 2B D1 sub edx,ecx
+ 00000665 F3/ A4 rep movsb
+ 00000667 8B 74 24 38 mov esi, [esp+56]
+ 0000066B 8B 4C 24 30 mov ecx, [esp+48]
+ 0000066F 3B D1 cmp edx,ecx
+ 00000671 76 1A jbe L_do_copy1_mmx
+
+ 00000673 2B D1 sub edx,ecx
+ 00000675 F3/ A4 rep movsb
+ 00000677 8B F7 mov esi,edi
+ 00000679 2B F3 sub esi,ebx
+ 0000067B EB 10 jmp L_do_copy1_mmx
+
+ 0000067D L_contiguous_in_window_mmx:
+
+ 0000067D 03 F0 add esi,eax
+ 0000067F 2B F1 sub esi,ecx
+
+
+ 00000681 3B D1 cmp edx,ecx
+ 00000683 76 08 jbe L_do_copy1_mmx
+
+ 00000685 2B D1 sub edx,ecx
+ 00000687 F3/ A4 rep movsb
+ 00000689 8B F7 mov esi,edi
+ 0000068B 2B F3 sub esi,ebx
+
+ 0000068D L_do_copy1_mmx:
+
+
+ 0000068D 8B CA mov ecx,edx
+ 0000068F F3/ A4 rep movsb
+
+ 00000691 8B 74 24 2C mov esi, [esp+44]
+ 00000695 8B 5C 24 08 mov ebx, [esp+8]
+ 00000699 E9 FFFFFE24 jmp L_while_test_mmx
+ ; 1174 "inffast.S"
+ 0000069E L_invalid_distance_code:
+
+
+
+
+
+ 0000069E B9 00000044 R mov ecx, invalid_distance_code_msg
+ 000006A3 BA 0000001A mov edx,INFLATE_MODE_BAD
+ 000006A8 EB 2C jmp L_update_stream_state
+
+ 000006AA L_test_for_end_of_block:
+
+
+
+
+
+ 000006AA A8 20 test al,32
+ 000006AC 74 0C jz L_invalid_literal_length_code
+
+ 000006AE B9 00000000 mov ecx,0
+ 000006B3 BA 0000000B mov edx,INFLATE_MODE_TYPE
+ 000006B8 EB 1C jmp L_update_stream_state
+
+ 000006BA L_invalid_literal_length_code:
+
+
+
+
+
+ 000006BA B9 00000028 R mov ecx, invalid_literal_length_code_msg
+ 000006BF BA 0000001A mov edx,INFLATE_MODE_BAD
+ 000006C4 EB 10 jmp L_update_stream_state
+
+ 000006C6 L_invalid_distance_too_far:
+
+
+
+ 000006C6 8B 74 24 2C mov esi, [esp+44]
+ 000006CA B9 0000005C R mov ecx, invalid_distance_too_far_msg
+ 000006CF BA 0000001A mov edx,INFLATE_MODE_BAD
+ 000006D4 EB 00 jmp L_update_stream_state
+
+ 000006D6 L_update_stream_state:
+
+ 000006D6 8B 44 24 58 mov eax, [esp+88]
+ 000006DA 85 C9 test ecx,ecx
+ 000006DC 74 03 jz L_skip_msg
+ 000006DE 89 48 18 mov [eax+24],ecx
+ 000006E1 L_skip_msg:
+ 000006E1 8B 40 1C mov eax, [eax+28]
+ 000006E4 89 10 mov [eax+mode_state],edx
+ 000006E6 EB 00 jmp L_break_loop
+
+ ALIGN 4
+ 000006E8 L_break_loop:
+ ; 1243 "inffast.S"
+ 000006E8 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2
+ 02
+ 000006EF 75 02 jne L_update_next_in
+
+
+
+ 000006F1 8B DD mov ebx,ebp
+
+ 000006F3 L_update_next_in:
+ ; 1266 "inffast.S"
+ 000006F3 8B 44 24 58 mov eax, [esp+88]
+ 000006F7 8B CB mov ecx,ebx
+ 000006F9 8B 50 1C mov edx, [eax+28]
+ 000006FC C1 E9 03 shr ecx,3
+ 000006FF 2B F1 sub esi,ecx
+ 00000701 C1 E1 03 shl ecx,3
+ 00000704 2B D9 sub ebx,ecx
+ 00000706 89 78 0C mov [eax+12],edi
+ 00000709 89 5A 3C mov [edx+bits_state],ebx
+ 0000070C 8B CB mov ecx,ebx
+
+ 0000070E 8D 5C 24 1C lea ebx, [esp+28]
+ 00000712 39 5C 24 14 cmp [esp+20],ebx
+ 00000716 75 14 jne L_buf_not_used
+
+ 00000718 2B F3 sub esi,ebx
+ 0000071A 8B 18 mov ebx, [eax+0]
+ 0000071C 89 5C 24 14 mov [esp+20],ebx
+ 00000720 03 F3 add esi,ebx
+ 00000722 8B 58 04 mov ebx, [eax+4]
+ 00000725 83 EB 0B sub ebx,11
+ 00000728 01 5C 24 14 add [esp+20],ebx
+
+ 0000072C L_buf_not_used:
+ 0000072C 89 30 mov [eax+0],esi
+
+ 0000072E BB 00000001 mov ebx,1
+ 00000733 D3 E3 shl ebx,cl
+ 00000735 4B dec ebx
+
+
+
+
+
+ 00000736 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2
+ 02
+ 0000073D 75 08 jne L_update_hold
+
+
+
+ 0000073F 0F D3 C1 psrlq mm0,mm1
+ 00000742 0F 7E C5 movd ebp,mm0
+
+ 00000745 0F 77 emms
+
+ 00000747 L_update_hold:
+
+
+
+ 00000747 23 EB and ebp,ebx
+ 00000749 89 6A 38 mov [edx+hold_state],ebp
+
+
+
+
+ 0000074C 8B 5C 24 14 mov ebx, [esp+20]
+ 00000750 3B DE cmp ebx,esi
+ 00000752 76 0A jbe L_last_is_smaller
+
+ 00000754 2B DE sub ebx,esi
+ 00000756 83 C3 0B add ebx,11
+ 00000759 89 58 04 mov [eax+4],ebx
+ 0000075C EB 0A jmp L_fixup_out
+ 0000075E L_last_is_smaller:
+ 0000075E 2B F3 sub esi,ebx
+ 00000760 F7 DE neg esi
+ 00000762 83 C6 0B add esi,11
+ 00000765 89 70 04 mov [eax+4],esi
+
+
+
+
+ 00000768 L_fixup_out:
+
+ 00000768 8B 5C 24 10 mov ebx, [esp+16]
+ 0000076C 3B DF cmp ebx,edi
+ 0000076E 76 0D jbe L_end_is_smaller
+
+ 00000770 2B DF sub ebx,edi
+ 00000772 81 C3 00000101 add ebx,257
+ 00000778 89 58 10 mov [eax+16],ebx
+ 0000077B EB 0D jmp L_done
+ 0000077D L_end_is_smaller:
+ 0000077D 2B FB sub edi,ebx
+ 0000077F F7 DF neg edi
+ 00000781 81 C7 00000101 add edi,257
+ 00000787 89 78 10 mov [eax+16],edi
+
+
+
+
+
+ 0000078A L_done:
+ 0000078A 83 C4 40 add esp,64
+ 0000078D 9D popfd
+ 0000078E 5B pop ebx
+ 0000078F 5D pop ebp
+ 00000790 5E pop esi
+ 00000791 5F pop edi
+ 00000792 C3 ret
+ 00000793 _inflate_fast endp
+
+ 00000004 _TEXT ends
+ end
+ Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23
+inffas32.asm Symbols 2 - 1
+
+
+
+
+Segments and Groups:
+
+ N a m e Size Length Align Combine Class
+
+FLAT . . . . . . . . . . . . . . GROUP
+_DATA . . . . . . . . . . . . . 32 Bit 00000004 Para Public 'DATA'
+_TEXT . . . . . . . . . . . . . 32 Bit 00000793 Para Public 'CODE'
+
+
+Procedures, parameters, and locals:
+
+ N a m e Type Value Attr
+
+_inflate_fast . . . . . . . . . P Near 00000100 _TEXT Length= 00000693 Public
+ L_align_long . . . . . . . . . L Near 000001BA _TEXT
+ L_is_aligned . . . . . . . . . L Near 000001D2 _TEXT
+ L_check_mmx . . . . . . . . . L Near 000001D6 _TEXT
+ L_use_mmx . . . . . . . . . . L Near 00000233 _TEXT
+ L_dont_use_mmx . . . . . . . . L Near 0000023F _TEXT
+ L_check_mmx_pop . . . . . . . L Near 00000249 _TEXT
+ L_do_loop . . . . . . . . . . L Near 00000250 _TEXT
+ L_get_length_code . . . . . . L Near 00000262 _TEXT
+ L_dolen . . . . . . . . . . . L Near 0000026E _TEXT
+ L_while_test . . . . . . . . . L Near 0000027C _TEXT
+ L_test_for_length_base . . . . L Near 00000291 _TEXT
+ L_add_bits_to_len . . . . . . L Near 000002BA _TEXT
+ L_save_len . . . . . . . . . . L Near 000002CA _TEXT
+ L_decode_distance . . . . . . L Near 000002CE _TEXT
+ L_get_distance_code . . . . . L Near 000002E0 _TEXT
+ L_dodist . . . . . . . . . . . L Near 000002ED _TEXT
+ L_add_bits_to_dist . . . . . . L Near 0000031C _TEXT
+ L_check_window . . . . . . . . L Near 0000032E _TEXT
+ L_check_dist_one . . . . . . . L Near 0000036C _TEXT
+ L_test_for_second_level_length . L Near 00000394 _TEXT
+ L_test_for_second_level_dist . L Near 000003B4 _TEXT
+ L_clip_window . . . . . . . . L Near 000003D4 _TEXT
+ L_wrap_around_window . . . . . L Near 00000415 _TEXT
+ L_contiguous_in_window . . . . L Near 00000449 _TEXT
+ L_do_copy1 . . . . . . . . . . L Near 0000045D _TEXT
+ L_init_mmx . . . . . . . . . . L Near 0000046C _TEXT
+ L_do_loop_mmx . . . . . . . . L Near 0000048C _TEXT
+ L_get_length_code_mmx . . . . L Near 000004A6 _TEXT
+ L_dolen_mmx . . . . . . . . . L Near 000004B2 _TEXT
+ L_while_test_mmx . . . . . . . L Near 000004C2 _TEXT
+ L_test_for_length_base_mmx . . L Near 000004D7 _TEXT
+ L_decode_distance_mmx . . . . L Near 000004FD _TEXT
+ L_get_dist_code_mmx . . . . . L Near 00000517 _TEXT
+ L_dodist_mmx . . . . . . . . . L Near 00000527 _TEXT
+ L_add_bits_to_dist_mmx . . . . L Near 00000541 _TEXT
+ L_check_window_mmx . . . . . . L Near 00000555 _TEXT
+ L_check_dist_one_mmx . . . . . L Near 00000598 _TEXT
+ L_test_for_second_level_length_mmx . L Near 000005C4 _TEXT
+ L_test_for_second_level_dist_mmx . L Near 000005E8 _TEXT
+ L_clip_window_mmx . . . . . . L Near 00000610 _TEXT
+ L_wrap_around_window_mmx . . . L Near 0000064D _TEXT
+ L_contiguous_in_window_mmx . . L Near 0000067D _TEXT
+ L_do_copy1_mmx . . . . . . . . L Near 0000068D _TEXT
+ L_invalid_distance_code . . . L Near 0000069E _TEXT
+ L_test_for_end_of_block . . . L Near 000006AA _TEXT
+ L_invalid_literal_length_code L Near 000006BA _TEXT
+ L_invalid_distance_too_far . . L Near 000006C6 _TEXT
+ L_update_stream_state . . . . L Near 000006D6 _TEXT
+ L_skip_msg . . . . . . . . . . L Near 000006E1 _TEXT
+ L_break_loop . . . . . . . . . L Near 000006E8 _TEXT
+ L_update_next_in . . . . . . . L Near 000006F3 _TEXT
+ L_buf_not_used . . . . . . . . L Near 0000072C _TEXT
+ L_update_hold . . . . . . . . L Near 00000747 _TEXT
+ L_last_is_smaller . . . . . . L Near 0000075E _TEXT
+ L_fixup_out . . . . . . . . . L Near 00000768 _TEXT
+ L_end_is_smaller . . . . . . . L Near 0000077D _TEXT
+ L_done . . . . . . . . . . . . L Near 0000078A _TEXT
+
+
+Symbols:
+
+ N a m e Type Value Attr
+
+@CodeSize . . . . . . . . . . . Number 00000000h
+@DataSize . . . . . . . . . . . Number 00000000h
+@Interface . . . . . . . . . . . Number 00000000h
+@Model . . . . . . . . . . . . . Number 00000007h
+@code . . . . . . . . . . . . . Text _TEXT
+@data . . . . . . . . . . . . . Text FLAT
+@fardata? . . . . . . . . . . . Text FLAT
+@fardata . . . . . . . . . . . . Text FLAT
+@stack . . . . . . . . . . . . . Text FLAT
+INFLATE_MODE_BAD . . . . . . . . Number 0000001Ah
+INFLATE_MODE_TYPE . . . . . . . Number 0000000Bh
+bits_state . . . . . . . . . . . Number 0000003Ch
+distbits_state . . . . . . . . . Number 00000058h
+distcode_state . . . . . . . . . Number 00000050h
+hold_state . . . . . . . . . . . Number 00000038h
+inflate_fast_mask . . . . . . . L Near 0000007C _TEXT
+inflate_fast_use_mmx . . . . . . L Near 00000000 _DATA
+invalid_distance_code_msg . . . L Near 00000044 _TEXT
+invalid_distance_too_far_msg . . L Near 0000005C _TEXT
+invalid_literal_length_code_msg L Near 00000028 _TEXT
+lenbits_state . . . . . . . . . Number 00000054h
+lencode_state . . . . . . . . . Number 0000004Ch
+mode_state . . . . . . . . . . . Number 00000000h
+window_state . . . . . . . . . . Number 00000034h
+write_state . . . . . . . . . . Number 00000030h
+wsize_state . . . . . . . . . . Number 00000028h
+zlib1222sup . . . . . . . . . . Number 00000008h
+
+ 0 Warnings
+ 0 Errors
diff --git a/zlib/contrib/masmx86/match686.asm b/zlib/contrib/masmx86/match686.asm
new file mode 100644
index 0000000..69e0eed
--- /dev/null
+++ b/zlib/contrib/masmx86/match686.asm
@@ -0,0 +1,479 @@
+; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
+; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
+; File written by Gilles Vollant, by converting match686.S from Brian Raiter
+; for MASM. This is as assembly version of longest_match
+; from Jean-loup Gailly in deflate.c
+;
+; http://www.zlib.net
+; http://www.winimage.com/zLibDll
+; http://www.muppetlabs.com/~breadbox/software/assembly.html
+;
+; For Visual C++ 4.x and higher and ML 6.x and higher
+; ml.exe is distributed in
+; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
+;
+; this file contain two implementation of longest_match
+;
+; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
+; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
+;
+; for using an assembly version of longest_match, you need define ASMV in project
+;
+; compile the asm file running
+; ml /coff /Zi /c /Flmatch686.lst match686.asm
+; and do not include match686.obj in your project
+;
+; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
+; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
+; with autoselect (with cpu detection code)
+; if you want support the old pentium optimization, you can still use these version
+;
+; this file is not optimized for old pentium, but it compatible with all x86 32 bits
+; processor (starting 80386)
+;
+;
+; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
+
+;uInt longest_match(s, cur_match)
+; deflate_state *s;
+; IPos cur_match; /* current match */
+
+ NbStack equ 76
+ cur_match equ dword ptr[esp+NbStack-0]
+ str_s equ dword ptr[esp+NbStack-4]
+; 5 dword on top (ret,ebp,esi,edi,ebx)
+ adrret equ dword ptr[esp+NbStack-8]
+ pushebp equ dword ptr[esp+NbStack-12]
+ pushedi equ dword ptr[esp+NbStack-16]
+ pushesi equ dword ptr[esp+NbStack-20]
+ pushebx equ dword ptr[esp+NbStack-24]
+
+ chain_length equ dword ptr [esp+NbStack-28]
+ limit equ dword ptr [esp+NbStack-32]
+ best_len equ dword ptr [esp+NbStack-36]
+ window equ dword ptr [esp+NbStack-40]
+ prev equ dword ptr [esp+NbStack-44]
+ scan_start equ word ptr [esp+NbStack-48]
+ wmask equ dword ptr [esp+NbStack-52]
+ match_start_ptr equ dword ptr [esp+NbStack-56]
+ nice_match equ dword ptr [esp+NbStack-60]
+ scan equ dword ptr [esp+NbStack-64]
+
+ windowlen equ dword ptr [esp+NbStack-68]
+ match_start equ dword ptr [esp+NbStack-72]
+ strend equ dword ptr [esp+NbStack-76]
+ NbStackAdd equ (NbStack-24)
+
+ .386p
+
+ name gvmatch
+ .MODEL FLAT
+
+
+
+; all the +zlib1222add offsets are due to the addition of fields
+; in zlib in the deflate_state structure since the asm code was first written
+; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
+; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
+; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
+
+ zlib1222add equ 8
+
+; Note : these value are good with a 8 bytes boundary pack structure
+ dep_chain_length equ 74h+zlib1222add
+ dep_window equ 30h+zlib1222add
+ dep_strstart equ 64h+zlib1222add
+ dep_prev_length equ 70h+zlib1222add
+ dep_nice_match equ 88h+zlib1222add
+ dep_w_size equ 24h+zlib1222add
+ dep_prev equ 38h+zlib1222add
+ dep_w_mask equ 2ch+zlib1222add
+ dep_good_match equ 84h+zlib1222add
+ dep_match_start equ 68h+zlib1222add
+ dep_lookahead equ 6ch+zlib1222add
+
+
+_TEXT segment
+
+IFDEF NOUNDERLINE
+ public longest_match
+ public match_init
+ELSE
+ public _longest_match
+ public _match_init
+ENDIF
+
+ MAX_MATCH equ 258
+ MIN_MATCH equ 3
+ MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
+
+
+
+MAX_MATCH equ 258
+MIN_MATCH equ 3
+MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
+MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
+
+
+;;; stack frame offsets
+
+chainlenwmask equ esp + 0 ; high word: current chain len
+ ; low word: s->wmask
+window equ esp + 4 ; local copy of s->window
+windowbestlen equ esp + 8 ; s->window + bestlen
+scanstart equ esp + 16 ; first two bytes of string
+scanend equ esp + 12 ; last two bytes of string
+scanalign equ esp + 20 ; dword-misalignment of string
+nicematch equ esp + 24 ; a good enough match size
+bestlen equ esp + 28 ; size of best match so far
+scan equ esp + 32 ; ptr to string wanting match
+
+LocalVarsSize equ 36
+; saved ebx byte esp + 36
+; saved edi byte esp + 40
+; saved esi byte esp + 44
+; saved ebp byte esp + 48
+; return address byte esp + 52
+deflatestate equ esp + 56 ; the function arguments
+curmatch equ esp + 60
+
+;;; Offsets for fields in the deflate_state structure. These numbers
+;;; are calculated from the definition of deflate_state, with the
+;;; assumption that the compiler will dword-align the fields. (Thus,
+;;; changing the definition of deflate_state could easily cause this
+;;; program to crash horribly, without so much as a warning at
+;;; compile time. Sigh.)
+
+dsWSize equ 36+zlib1222add
+dsWMask equ 44+zlib1222add
+dsWindow equ 48+zlib1222add
+dsPrev equ 56+zlib1222add
+dsMatchLen equ 88+zlib1222add
+dsPrevMatch equ 92+zlib1222add
+dsStrStart equ 100+zlib1222add
+dsMatchStart equ 104+zlib1222add
+dsLookahead equ 108+zlib1222add
+dsPrevLen equ 112+zlib1222add
+dsMaxChainLen equ 116+zlib1222add
+dsGoodMatch equ 132+zlib1222add
+dsNiceMatch equ 136+zlib1222add
+
+
+;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
+;;; Written for zlib 1.1.2
+;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
+;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
+;;;
+;;
+;; This software is provided 'as-is', without any express or implied
+;; warranty. In no event will the authors be held liable for any damages
+;; arising from the use of this software.
+;;
+;; Permission is granted to anyone to use this software for any purpose,
+;; including commercial applications, and to alter it and redistribute it
+;; freely, subject to the following restrictions:
+;;
+;; 1. The origin of this software must not be misrepresented; you must not
+;; claim that you wrote the original software. If you use this software
+;; in a product, an acknowledgment in the product documentation would be
+;; appreciated but is not required.
+;; 2. Altered source versions must be plainly marked as such, and must not be
+;; misrepresented as being the original software
+;; 3. This notice may not be removed or altered from any source distribution.
+;;
+
+;GLOBAL _longest_match, _match_init
+
+
+;SECTION .text
+
+;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
+
+;_longest_match:
+ IFDEF NOUNDERLINE
+ longest_match proc near
+ ELSE
+ _longest_match proc near
+ ENDIF
+.FPO (9, 4, 0, 0, 1, 0)
+
+;;; Save registers that the compiler may be using, and adjust esp to
+;;; make room for our stack frame.
+
+ push ebp
+ push edi
+ push esi
+ push ebx
+ sub esp, LocalVarsSize
+
+;;; Retrieve the function arguments. ecx will hold cur_match
+;;; throughout the entire function. edx will hold the pointer to the
+;;; deflate_state structure during the function's setup (before
+;;; entering the main loop.
+
+ mov edx, [deflatestate]
+ mov ecx, [curmatch]
+
+;;; uInt wmask = s->w_mask;
+;;; unsigned chain_length = s->max_chain_length;
+;;; if (s->prev_length >= s->good_match) {
+;;; chain_length >>= 2;
+;;; }
+
+ mov eax, [edx + dsPrevLen]
+ mov ebx, [edx + dsGoodMatch]
+ cmp eax, ebx
+ mov eax, [edx + dsWMask]
+ mov ebx, [edx + dsMaxChainLen]
+ jl LastMatchGood
+ shr ebx, 2
+LastMatchGood:
+
+;;; chainlen is decremented once beforehand so that the function can
+;;; use the sign flag instead of the zero flag for the exit test.
+;;; It is then shifted into the high word, to make room for the wmask
+;;; value, which it will always accompany.
+
+ dec ebx
+ shl ebx, 16
+ or ebx, eax
+ mov [chainlenwmask], ebx
+
+;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ mov eax, [edx + dsNiceMatch]
+ mov ebx, [edx + dsLookahead]
+ cmp ebx, eax
+ jl LookaheadLess
+ mov ebx, eax
+LookaheadLess: mov [nicematch], ebx
+
+;;; register Bytef *scan = s->window + s->strstart;
+
+ mov esi, [edx + dsWindow]
+ mov [window], esi
+ mov ebp, [edx + dsStrStart]
+ lea edi, [esi + ebp]
+ mov [scan], edi
+
+;;; Determine how many bytes the scan ptr is off from being
+;;; dword-aligned.
+
+ mov eax, edi
+ neg eax
+ and eax, 3
+ mov [scanalign], eax
+
+;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
+
+ mov eax, [edx + dsWSize]
+ sub eax, MIN_LOOKAHEAD
+ sub ebp, eax
+ jg LimitPositive
+ xor ebp, ebp
+LimitPositive:
+
+;;; int best_len = s->prev_length;
+
+ mov eax, [edx + dsPrevLen]
+ mov [bestlen], eax
+
+;;; Store the sum of s->window + best_len in esi locally, and in esi.
+
+ add esi, eax
+ mov [windowbestlen], esi
+
+;;; register ush scan_start = *(ushf*)scan;
+;;; register ush scan_end = *(ushf*)(scan+best_len-1);
+;;; Posf *prev = s->prev;
+
+ movzx ebx, word ptr [edi]
+ mov [scanstart], ebx
+ movzx ebx, word ptr [edi + eax - 1]
+ mov [scanend], ebx
+ mov edi, [edx + dsPrev]
+
+;;; Jump into the main loop.
+
+ mov edx, [chainlenwmask]
+ jmp short LoopEntry
+
+align 4
+
+;;; do {
+;;; match = s->window + cur_match;
+;;; if (*(ushf*)(match+best_len-1) != scan_end ||
+;;; *(ushf*)match != scan_start) continue;
+;;; [...]
+;;; } while ((cur_match = prev[cur_match & wmask]) > limit
+;;; && --chain_length != 0);
+;;;
+;;; Here is the inner loop of the function. The function will spend the
+;;; majority of its time in this loop, and majority of that time will
+;;; be spent in the first ten instructions.
+;;;
+;;; Within this loop:
+;;; ebx = scanend
+;;; ecx = curmatch
+;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
+;;; esi = windowbestlen - i.e., (window + bestlen)
+;;; edi = prev
+;;; ebp = limit
+
+LookupLoop:
+ and ecx, edx
+ movzx ecx, word ptr [edi + ecx*2]
+ cmp ecx, ebp
+ jbe LeaveNow
+ sub edx, 00010000h
+ js LeaveNow
+LoopEntry: movzx eax, word ptr [esi + ecx - 1]
+ cmp eax, ebx
+ jnz LookupLoop
+ mov eax, [window]
+ movzx eax, word ptr [eax + ecx]
+ cmp eax, [scanstart]
+ jnz LookupLoop
+
+;;; Store the current value of chainlen.
+
+ mov [chainlenwmask], edx
+
+;;; Point edi to the string under scrutiny, and esi to the string we
+;;; are hoping to match it up with. In actuality, esi and edi are
+;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
+;;; initialized to -(MAX_MATCH_8 - scanalign).
+
+ mov esi, [window]
+ mov edi, [scan]
+ add esi, ecx
+ mov eax, [scanalign]
+ mov edx, 0fffffef8h; -(MAX_MATCH_8)
+ lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
+ lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
+
+;;; Test the strings for equality, 8 bytes at a time. At the end,
+;;; adjust edx so that it is offset to the exact byte that mismatched.
+;;;
+;;; We already know at this point that the first three bytes of the
+;;; strings match each other, and they can be safely passed over before
+;;; starting the compare loop. So what this code does is skip over 0-3
+;;; bytes, as much as necessary in order to dword-align the edi
+;;; pointer. (esi will still be misaligned three times out of four.)
+;;;
+;;; It should be confessed that this loop usually does not represent
+;;; much of the total running time. Replacing it with a more
+;;; straightforward "rep cmpsb" would not drastically degrade
+;;; performance.
+
+LoopCmps:
+ mov eax, [esi + edx]
+ xor eax, [edi + edx]
+ jnz LeaveLoopCmps
+ mov eax, [esi + edx + 4]
+ xor eax, [edi + edx + 4]
+ jnz LeaveLoopCmps4
+ add edx, 8
+ jnz LoopCmps
+ jmp short LenMaximum
+LeaveLoopCmps4: add edx, 4
+LeaveLoopCmps: test eax, 0000FFFFh
+ jnz LenLower
+ add edx, 2
+ shr eax, 16
+LenLower: sub al, 1
+ adc edx, 0
+
+;;; Calculate the length of the match. If it is longer than MAX_MATCH,
+;;; then automatically accept it as the best possible match and leave.
+
+ lea eax, [edi + edx]
+ mov edi, [scan]
+ sub eax, edi
+ cmp eax, MAX_MATCH
+ jge LenMaximum
+
+;;; If the length of the match is not longer than the best match we
+;;; have so far, then forget it and return to the lookup loop.
+
+ mov edx, [deflatestate]
+ mov ebx, [bestlen]
+ cmp eax, ebx
+ jg LongerMatch
+ mov esi, [windowbestlen]
+ mov edi, [edx + dsPrev]
+ mov ebx, [scanend]
+ mov edx, [chainlenwmask]
+ jmp LookupLoop
+
+;;; s->match_start = cur_match;
+;;; best_len = len;
+;;; if (len >= nice_match) break;
+;;; scan_end = *(ushf*)(scan+best_len-1);
+
+LongerMatch: mov ebx, [nicematch]
+ mov [bestlen], eax
+ mov [edx + dsMatchStart], ecx
+ cmp eax, ebx
+ jge LeaveNow
+ mov esi, [window]
+ add esi, eax
+ mov [windowbestlen], esi
+ movzx ebx, word ptr [edi + eax - 1]
+ mov edi, [edx + dsPrev]
+ mov [scanend], ebx
+ mov edx, [chainlenwmask]
+ jmp LookupLoop
+
+;;; Accept the current string, with the maximum possible length.
+
+LenMaximum: mov edx, [deflatestate]
+ mov dword ptr [bestlen], MAX_MATCH
+ mov [edx + dsMatchStart], ecx
+
+;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+;;; return s->lookahead;
+
+LeaveNow:
+ mov edx, [deflatestate]
+ mov ebx, [bestlen]
+ mov eax, [edx + dsLookahead]
+ cmp ebx, eax
+ jg LookaheadRet
+ mov eax, ebx
+LookaheadRet:
+
+;;; Restore the stack and return from whence we came.
+
+ add esp, LocalVarsSize
+ pop ebx
+ pop esi
+ pop edi
+ pop ebp
+
+ ret
+; please don't remove this string !
+; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
+ db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
+
+
+ IFDEF NOUNDERLINE
+ longest_match endp
+ ELSE
+ _longest_match endp
+ ENDIF
+
+ IFDEF NOUNDERLINE
+ match_init proc near
+ ret
+ match_init endp
+ ELSE
+ _match_init proc near
+ ret
+ _match_init endp
+ ENDIF
+
+
+_TEXT ends
+end
diff --git a/zlib/contrib/masmx86/match686.lst b/zlib/contrib/masmx86/match686.lst
new file mode 100644
index 0000000..315ad87
--- /dev/null
+++ b/zlib/contrib/masmx86/match686.lst
@@ -0,0 +1,624 @@
+Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23
+match686.asm Page 1 - 1
+
+
+ ; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
+ ; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
+ ; File written by Gilles Vollant, by converting match686.S from Brian Raiter
+ ; for MASM. This is as assembly version of longest_match
+ ; from Jean-loup Gailly in deflate.c
+ ;
+ ; http://www.zlib.net
+ ; http://www.winimage.com/zLibDll
+ ; http://www.muppetlabs.com/~breadbox/software/assembly.html
+ ;
+ ; For Visual C++ 4.x and higher and ML 6.x and higher
+ ; ml.exe is distributed in
+ ; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
+ ;
+ ; this file contain two implementation of longest_match
+ ;
+ ; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
+ ; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
+ ;
+ ; for using an assembly version of longest_match, you need define ASMV in project
+ ;
+ ; compile the asm file running
+ ; ml /coff /Zi /c /Flmatch686.lst match686.asm
+ ; and do not include match686.obj in your project
+ ;
+ ; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
+ ; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
+ ; with autoselect (with cpu detection code)
+ ; if you want support the old pentium optimization, you can still use these version
+ ;
+ ; this file is not optimized for old pentium, but it compatible with all x86 32 bits
+ ; processor (starting 80386)
+ ;
+ ;
+ ; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
+
+ ;uInt longest_match(s, cur_match)
+ ; deflate_state *s;
+ ; IPos cur_match; /* current match */
+
+ = 0000004C NbStack equ 76
+ = dword ptr[esp+NbStack-0] cur_match equ dword ptr[esp+NbStack-0]
+ = dword ptr[esp+NbStack-4] str_s equ dword ptr[esp+NbStack-4]
+ ; 5 dword on top (ret,ebp,esi,edi,ebx)
+ = dword ptr[esp+NbStack-8] adrret equ dword ptr[esp+NbStack-8]
+ = dword ptr[esp+NbStack-12 pushebp equ dword ptr[esp+NbStack-12]
+ ]
+ = dword ptr[esp+NbStack-16 pushedi equ dword ptr[esp+NbStack-16]
+ ]
+ = dword ptr[esp+NbStack-20 pushesi equ dword ptr[esp+NbStack-20]
+ ]
+ = dword ptr[esp+NbStack-24 pushebx equ dword ptr[esp+NbStack-24]
+ ]
+
+ = dword ptr [esp+NbStack-2 chain_length equ dword ptr [esp+NbStack-28]
+ 8]
+ = dword ptr [esp+NbStack-3 limit equ dword ptr [esp+NbStack-32]
+ 2]
+ = dword ptr [esp+NbStack-3 best_len equ dword ptr [esp+NbStack-36]
+ 6]
+ = dword ptr [esp+NbStack-4 window equ dword ptr [esp+NbStack-40]
+ 0]
+ = dword ptr [esp+NbStack-4 prev equ dword ptr [esp+NbStack-44]
+ 4]
+ = word ptr [esp+NbStack-48 scan_start equ word ptr [esp+NbStack-48]
+ ]
+ = dword ptr [esp+NbStack-5 wmask equ dword ptr [esp+NbStack-52]
+ 2]
+ = dword ptr [esp+NbStack-5 match_start_ptr equ dword ptr [esp+NbStack-56]
+ 6]
+ = dword ptr [esp+NbStack-6 nice_match equ dword ptr [esp+NbStack-60]
+ 0]
+ = dword ptr [esp+NbStack-6 scan equ dword ptr [esp+NbStack-64]
+ 4]
+
+ = dword ptr [esp+NbStack-6 windowlen equ dword ptr [esp+NbStack-68]
+ 8]
+ = dword ptr [esp+NbStack-7 match_start equ dword ptr [esp+NbStack-72]
+ 2]
+ = dword ptr [esp+NbStack-7 strend equ dword ptr [esp+NbStack-76]
+ 6]
+ = 00000034 NbStackAdd equ (NbStack-24)
+
+ .386p
+
+ name gvmatch
+ .MODEL FLAT
+
+
+
+ ; all the +zlib1222add offsets are due to the addition of fields
+ ; in zlib in the deflate_state structure since the asm code was first written
+ ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
+ ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
+ ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
+
+ = 00000008 zlib1222add equ 8
+
+ ; Note : these value are good with a 8 bytes boundary pack structure
+ = 0000007C dep_chain_length equ 74h+zlib1222add
+ = 00000038 dep_window equ 30h+zlib1222add
+ = 0000006C dep_strstart equ 64h+zlib1222add
+ = 00000078 dep_prev_length equ 70h+zlib1222add
+ = 00000090 dep_nice_match equ 88h+zlib1222add
+ = 0000002C dep_w_size equ 24h+zlib1222add
+ = 00000040 dep_prev equ 38h+zlib1222add
+ = 00000034 dep_w_mask equ 2ch+zlib1222add
+ = 0000008C dep_good_match equ 84h+zlib1222add
+ = 00000070 dep_match_start equ 68h+zlib1222add
+ = 00000074 dep_lookahead equ 6ch+zlib1222add
+
+
+ 00000000 _TEXT segment
+
+ IFDEF NOUNDERLINE
+ ELSE
+ public _longest_match
+ public _match_init
+ ENDIF
+
+ = 00000102 MAX_MATCH equ 258
+ = 00000003 MIN_MATCH equ 3
+ = 00000106 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1)
+
+
+
+ = 00000102 MAX_MATCH equ 258
+ = 00000003 MIN_MATCH equ 3
+ = 00000106 MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1)
+ = 00000100 MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h)
+
+
+ ;;; stack frame offsets
+
+ = esp + 0 chainlenwmask equ esp + 0 ; high word: current chain len
+ ; low word: s->wmask
+ = esp + 4 window equ esp + 4 ; local copy of s->window
+ = esp + 8 windowbestlen equ esp + 8 ; s->window + bestlen
+ = esp + 16 scanstart equ esp + 16 ; first two bytes of string
+ = esp + 12 scanend equ esp + 12 ; last two bytes of string
+ = esp + 20 scanalign equ esp + 20 ; dword-misalignment of string
+ = esp + 24 nicematch equ esp + 24 ; a good enough match size
+ = esp + 28 bestlen equ esp + 28 ; size of best match so far
+ = esp + 32 scan equ esp + 32 ; ptr to string wanting match
+
+ = 00000024 LocalVarsSize equ 36
+ ; saved ebx byte esp + 36
+ ; saved edi byte esp + 40
+ ; saved esi byte esp + 44
+ ; saved ebp byte esp + 48
+ ; return address byte esp + 52
+ = esp + 56 deflatestate equ esp + 56 ; the function arguments
+ = esp + 60 curmatch equ esp + 60
+
+ ;;; Offsets for fields in the deflate_state structure. These numbers
+ ;;; are calculated from the definition of deflate_state, with the
+ ;;; assumption that the compiler will dword-align the fields. (Thus,
+ ;;; changing the definition of deflate_state could easily cause this
+ ;;; program to crash horribly, without so much as a warning at
+ ;;; compile time. Sigh.)
+
+ = 0000002C dsWSize equ 36+zlib1222add
+ = 00000034 dsWMask equ 44+zlib1222add
+ = 00000038 dsWindow equ 48+zlib1222add
+ = 00000040 dsPrev equ 56+zlib1222add
+ = 00000060 dsMatchLen equ 88+zlib1222add
+ = 00000064 dsPrevMatch equ 92+zlib1222add
+ = 0000006C dsStrStart equ 100+zlib1222add
+ = 00000070 dsMatchStart equ 104+zlib1222add
+ = 00000074 dsLookahead equ 108+zlib1222add
+ = 00000078 dsPrevLen equ 112+zlib1222add
+ = 0000007C dsMaxChainLen equ 116+zlib1222add
+ = 0000008C dsGoodMatch equ 132+zlib1222add
+ = 00000090 dsNiceMatch equ 136+zlib1222add
+
+
+ ;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
+ ;;; Written for zlib 1.1.2
+ ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com>
+ ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
+ ;;;
+ ;;
+ ;; This software is provided 'as-is', without any express or implied
+ ;; warranty. In no event will the authors be held liable for any damages
+ ;; arising from the use of this software.
+ ;;
+ ;; Permission is granted to anyone to use this software for any purpose,
+ ;; including commercial applications, and to alter it and redistribute it
+ ;; freely, subject to the following restrictions:
+ ;;
+ ;; 1. The origin of this software must not be misrepresented; you must not
+ ;; claim that you wrote the original software. If you use this software
+ ;; in a product, an acknowledgment in the product documentation would be
+ ;; appreciated but is not required.
+ ;; 2. Altered source versions must be plainly marked as such, and must not be
+ ;; misrepresented as being the original software
+ ;; 3. This notice may not be removed or altered from any source distribution.
+ ;;
+
+ ;GLOBAL _longest_match, _match_init
+
+
+ ;SECTION .text
+
+ ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
+
+ ;_longest_match:
+ IFDEF NOUNDERLINE
+ ELSE
+ 00000000 _longest_match proc near
+ ENDIF
+ 00000000 .FPO (9, 4, 0, 0, 1, 0)
+
+ ;;; Save registers that the compiler may be using, and adjust esp to
+ ;;; make room for our stack frame.
+
+ 00000000 55 push ebp
+ 00000001 57 push edi
+ 00000002 56 push esi
+ 00000003 53 push ebx
+ 00000004 83 EC 24 sub esp, LocalVarsSize
+
+ ;;; Retrieve the function arguments. ecx will hold cur_match
+ ;;; throughout the entire function. edx will hold the pointer to the
+ ;;; deflate_state structure during the function's setup (before
+ ;;; entering the main loop.
+
+ 00000007 8B 54 24 38 mov edx, [deflatestate]
+ 0000000B 8B 4C 24 3C mov ecx, [curmatch]
+
+ ;;; uInt wmask = s->w_mask;
+ ;;; unsigned chain_length = s->max_chain_length;
+ ;;; if (s->prev_length >= s->good_match) {
+ ;;; chain_length >>= 2;
+ ;;; }
+
+ 0000000F 8B 42 78 mov eax, [edx + dsPrevLen]
+ 00000012 8B 9A 0000008C mov ebx, [edx + dsGoodMatch]
+ 00000018 3B C3 cmp eax, ebx
+ 0000001A 8B 42 34 mov eax, [edx + dsWMask]
+ 0000001D 8B 5A 7C mov ebx, [edx + dsMaxChainLen]
+ 00000020 7C 03 jl LastMatchGood
+ 00000022 C1 EB 02 shr ebx, 2
+ 00000025 LastMatchGood:
+
+ ;;; chainlen is decremented once beforehand so that the function can
+ ;;; use the sign flag instead of the zero flag for the exit test.
+ ;;; It is then shifted into the high word, to make room for the wmask
+ ;;; value, which it will always accompany.
+
+ 00000025 4B dec ebx
+ 00000026 C1 E3 10 shl ebx, 16
+ 00000029 0B D8 or ebx, eax
+ 0000002B 89 1C 24 mov [chainlenwmask], ebx
+
+ ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+
+ 0000002E 8B 82 00000090 mov eax, [edx + dsNiceMatch]
+ 00000034 8B 5A 74 mov ebx, [edx + dsLookahead]
+ 00000037 3B D8 cmp ebx, eax
+ 00000039 7C 02 jl LookaheadLess
+ 0000003B 8B D8 mov ebx, eax
+ 0000003D 89 5C 24 18 LookaheadLess: mov [nicematch], ebx
+
+ ;;; register Bytef *scan = s->window + s->strstart;
+
+ 00000041 8B 72 38 mov esi, [edx + dsWindow]
+ 00000044 89 74 24 04 mov [window], esi
+ 00000048 8B 6A 6C mov ebp, [edx + dsStrStart]
+ 0000004B 8D 3C 2E lea edi, [esi + ebp]
+ 0000004E 89 7C 24 20 mov [scan], edi
+
+ ;;; Determine how many bytes the scan ptr is off from being
+ ;;; dword-aligned.
+
+ 00000052 8B C7 mov eax, edi
+ 00000054 F7 D8 neg eax
+ 00000056 83 E0 03 and eax, 3
+ 00000059 89 44 24 14 mov [scanalign], eax
+
+ ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
+ ;;; s->strstart - (IPos)MAX_DIST(s) : NIL;
+
+ 0000005D 8B 42 2C mov eax, [edx + dsWSize]
+ 00000060 2D 00000106 sub eax, MIN_LOOKAHEAD
+ 00000065 2B E8 sub ebp, eax
+ 00000067 7F 02 jg LimitPositive
+ 00000069 33 ED xor ebp, ebp
+ 0000006B LimitPositive:
+
+ ;;; int best_len = s->prev_length;
+
+ 0000006B 8B 42 78 mov eax, [edx + dsPrevLen]
+ 0000006E 89 44 24 1C mov [bestlen], eax
+
+ ;;; Store the sum of s->window + best_len in esi locally, and in esi.
+
+ 00000072 03 F0 add esi, eax
+ 00000074 89 74 24 08 mov [windowbestlen], esi
+
+ ;;; register ush scan_start = *(ushf*)scan;
+ ;;; register ush scan_end = *(ushf*)(scan+best_len-1);
+ ;;; Posf *prev = s->prev;
+
+ 00000078 0F B7 1F movzx ebx, word ptr [edi]
+ 0000007B 89 5C 24 10 mov [scanstart], ebx
+ 0000007F 0F B7 5C 07 FF movzx ebx, word ptr [edi + eax - 1]
+ 00000084 89 5C 24 0C mov [scanend], ebx
+ 00000088 8B 7A 40 mov edi, [edx + dsPrev]
+
+ ;;; Jump into the main loop.
+
+ 0000008B 8B 14 24 mov edx, [chainlenwmask]
+ 0000008E EB 1A jmp short LoopEntry
+
+ align 4
+
+ ;;; do {
+ ;;; match = s->window + cur_match;
+ ;;; if (*(ushf*)(match+best_len-1) != scan_end ||
+ ;;; *(ushf*)match != scan_start) continue;
+ ;;; [...]
+ ;;; } while ((cur_match = prev[cur_match & wmask]) > limit
+ ;;; && --chain_length != 0);
+ ;;;
+ ;;; Here is the inner loop of the function. The function will spend the
+ ;;; majority of its time in this loop, and majority of that time will
+ ;;; be spent in the first ten instructions.
+ ;;;
+ ;;; Within this loop:
+ ;;; ebx = scanend
+ ;;; ecx = curmatch
+ ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
+ ;;; esi = windowbestlen - i.e., (window + bestlen)
+ ;;; edi = prev
+ ;;; ebp = limit
+
+ 00000090 LookupLoop:
+ 00000090 23 CA and ecx, edx
+ 00000092 0F B7 0C 4F movzx ecx, word ptr [edi + ecx*2]
+ 00000096 3B CD cmp ecx, ebp
+ 00000098 0F 86 000000E0 jbe LeaveNow
+ 0000009E 81 EA 00010000 sub edx, 00010000h
+ 000000A4 0F 88 000000D4 js LeaveNow
+ 000000AA 0F B7 44 0E FF LoopEntry: movzx eax, word ptr [esi + ecx - 1]
+ 000000AF 3B C3 cmp eax, ebx
+ 000000B1 75 DD jnz LookupLoop
+ 000000B3 8B 44 24 04 mov eax, [window]
+ 000000B7 0F B7 04 08 movzx eax, word ptr [eax + ecx]
+ 000000BB 3B 44 24 10 cmp eax, [scanstart]
+ 000000BF 75 CF jnz LookupLoop
+
+ ;;; Store the current value of chainlen.
+
+ 000000C1 89 14 24 mov [chainlenwmask], edx
+
+ ;;; Point edi to the string under scrutiny, and esi to the string we
+ ;;; are hoping to match it up with. In actuality, esi and edi are
+ ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
+ ;;; initialized to -(MAX_MATCH_8 - scanalign).
+
+ 000000C4 8B 74 24 04 mov esi, [window]
+ 000000C8 8B 7C 24 20 mov edi, [scan]
+ 000000CC 03 F1 add esi, ecx
+ 000000CE 8B 44 24 14 mov eax, [scanalign]
+ 000000D2 BA FFFFFEF8 mov edx, 0fffffef8h; -(MAX_MATCH_8)
+ 000000D7 8D BC 07 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
+ 00000108
+ 000000DE 8D B4 06 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
+ 00000108
+
+ ;;; Test the strings for equality, 8 bytes at a time. At the end,
+ ;;; adjust edx so that it is offset to the exact byte that mismatched.
+ ;;;
+ ;;; We already know at this point that the first three bytes of the
+ ;;; strings match each other, and they can be safely passed over before
+ ;;; starting the compare loop. So what this code does is skip over 0-3
+ ;;; bytes, as much as necessary in order to dword-align the edi
+ ;;; pointer. (esi will still be misaligned three times out of four.)
+ ;;;
+ ;;; It should be confessed that this loop usually does not represent
+ ;;; much of the total running time. Replacing it with a more
+ ;;; straightforward "rep cmpsb" would not drastically degrade
+ ;;; performance.
+
+ 000000E5 LoopCmps:
+ 000000E5 8B 04 16 mov eax, [esi + edx]
+ 000000E8 33 04 17 xor eax, [edi + edx]
+ 000000EB 75 14 jnz LeaveLoopCmps
+ 000000ED 8B 44 16 04 mov eax, [esi + edx + 4]
+ 000000F1 33 44 17 04 xor eax, [edi + edx + 4]
+ 000000F5 75 07 jnz LeaveLoopCmps4
+ 000000F7 83 C2 08 add edx, 8
+ 000000FA 75 E9 jnz LoopCmps
+ 000000FC EB 71 jmp short LenMaximum
+ 000000FE 83 C2 04 LeaveLoopCmps4: add edx, 4
+ 00000101 A9 0000FFFF LeaveLoopCmps: test eax, 0000FFFFh
+ 00000106 75 06 jnz LenLower
+ 00000108 83 C2 02 add edx, 2
+ 0000010B C1 E8 10 shr eax, 16
+ 0000010E 2C 01 LenLower: sub al, 1
+ 00000110 83 D2 00 adc edx, 0
+
+ ;;; Calculate the length of the match. If it is longer than MAX_MATCH,
+ ;;; then automatically accept it as the best possible match and leave.
+
+ 00000113 8D 04 17 lea eax, [edi + edx]
+ 00000116 8B 7C 24 20 mov edi, [scan]
+ 0000011A 2B C7 sub eax, edi
+ 0000011C 3D 00000102 cmp eax, MAX_MATCH
+ 00000121 7D 4C jge LenMaximum
+
+ ;;; If the length of the match is not longer than the best match we
+ ;;; have so far, then forget it and return to the lookup loop.
+
+ 00000123 8B 54 24 38 mov edx, [deflatestate]
+ 00000127 8B 5C 24 1C mov ebx, [bestlen]
+ 0000012B 3B C3 cmp eax, ebx
+ 0000012D 7F 13 jg LongerMatch
+ 0000012F 8B 74 24 08 mov esi, [windowbestlen]
+ 00000133 8B 7A 40 mov edi, [edx + dsPrev]
+ 00000136 8B 5C 24 0C mov ebx, [scanend]
+ 0000013A 8B 14 24 mov edx, [chainlenwmask]
+ 0000013D E9 FFFFFF4E jmp LookupLoop
+
+ ;;; s->match_start = cur_match;
+ ;;; best_len = len;
+ ;;; if (len >= nice_match) break;
+ ;;; scan_end = *(ushf*)(scan+best_len-1);
+
+ 00000142 8B 5C 24 18 LongerMatch: mov ebx, [nicematch]
+ 00000146 89 44 24 1C mov [bestlen], eax
+ 0000014A 89 4A 70 mov [edx + dsMatchStart], ecx
+ 0000014D 3B C3 cmp eax, ebx
+ 0000014F 7D 2D jge LeaveNow
+ 00000151 8B 74 24 04 mov esi, [window]
+ 00000155 03 F0 add esi, eax
+ 00000157 89 74 24 08 mov [windowbestlen], esi
+ 0000015B 0F B7 5C 07 FF movzx ebx, word ptr [edi + eax - 1]
+ 00000160 8B 7A 40 mov edi, [edx + dsPrev]
+ 00000163 89 5C 24 0C mov [scanend], ebx
+ 00000167 8B 14 24 mov edx, [chainlenwmask]
+ 0000016A E9 FFFFFF21 jmp LookupLoop
+
+ ;;; Accept the current string, with the maximum possible length.
+
+ 0000016F 8B 54 24 38 LenMaximum: mov edx, [deflatestate]
+ 00000173 C7 44 24 1C mov dword ptr [bestlen], MAX_MATCH
+ 00000102
+ 0000017B 89 4A 70 mov [edx + dsMatchStart], ecx
+
+ ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
+ ;;; return s->lookahead;
+
+ 0000017E LeaveNow:
+ 0000017E 8B 54 24 38 mov edx, [deflatestate]
+ 00000182 8B 5C 24 1C mov ebx, [bestlen]
+ 00000186 8B 42 74 mov eax, [edx + dsLookahead]
+ 00000189 3B D8 cmp ebx, eax
+ 0000018B 7F 02 jg LookaheadRet
+ 0000018D 8B C3 mov eax, ebx
+ 0000018F LookaheadRet:
+
+ ;;; Restore the stack and return from whence we came.
+
+ 0000018F 83 C4 24 add esp, LocalVarsSize
+ 00000192 5B pop ebx
+ 00000193 5E pop esi
+ 00000194 5F pop edi
+ 00000195 5D pop ebp
+
+ 00000196 C3 ret
+ ; please don't remove this string !
+ ; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
+ 00000197 0D 0A 61 73 6D db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
+ 36 38 36 20 77
+ 69 74 68 20 6D
+ 61 73 6D 2C 20
+ 6F 70 74 69 6D
+ 69 73 65 64 20
+ 61 73 73 65 6D
+ 62 6C 79 20 63
+ 6F 64 65 20 66
+ 72 6F 6D 20 42
+ 72 69 61 6E 20
+ 52 61 69 74 65
+ 72 2C 20 77 72
+ 69 74 74 65 6E
+ 20 31 39 39 38
+ 0D 0A
+
+
+ IFDEF NOUNDERLINE
+ ELSE
+ 000001E4 _longest_match endp
+ ENDIF
+
+ IFDEF NOUNDERLINE
+ ELSE
+ 000001E4 _match_init proc near
+ 000001E4 C3 ret
+ 000001E5 _match_init endp
+ ENDIF
+
+
+ 000001E5 _TEXT ends
+ end
+ Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23
+match686.asm Symbols 2 - 1
+
+
+
+
+Segments and Groups:
+
+ N a m e Size Length Align Combine Class
+
+FLAT . . . . . . . . . . . . . . GROUP
+_DATA . . . . . . . . . . . . . 32 Bit 00000000 Para Public 'DATA'
+_TEXT . . . . . . . . . . . . . 32 Bit 000001E5 Para Public 'CODE'
+
+
+Procedures, parameters, and locals:
+
+ N a m e Type Value Attr
+
+_longest_match . . . . . . . . . P Near 00000000 _TEXT Length= 000001E4 Public
+ LastMatchGood . . . . . . . . L Near 00000025 _TEXT
+ LookaheadLess . . . . . . . . L Near 0000003D _TEXT
+ LimitPositive . . . . . . . . L Near 0000006B _TEXT
+ LookupLoop . . . . . . . . . . L Near 00000090 _TEXT
+ LoopEntry . . . . . . . . . . L Near 000000AA _TEXT
+ LoopCmps . . . . . . . . . . . L Near 000000E5 _TEXT
+ LeaveLoopCmps4 . . . . . . . . L Near 000000FE _TEXT
+ LeaveLoopCmps . . . . . . . . L Near 00000101 _TEXT
+ LenLower . . . . . . . . . . . L Near 0000010E _TEXT
+ LongerMatch . . . . . . . . . L Near 00000142 _TEXT
+ LenMaximum . . . . . . . . . . L Near 0000016F _TEXT
+ LeaveNow . . . . . . . . . . . L Near 0000017E _TEXT
+ LookaheadRet . . . . . . . . . L Near 0000018F _TEXT
+_match_init . . . . . . . . . . P Near 000001E4 _TEXT Length= 00000001 Public
+
+
+Symbols:
+
+ N a m e Type Value Attr
+
+@CodeSize . . . . . . . . . . . Number 00000000h
+@DataSize . . . . . . . . . . . Number 00000000h
+@Interface . . . . . . . . . . . Number 00000000h
+@Model . . . . . . . . . . . . . Number 00000007h
+@code . . . . . . . . . . . . . Text _TEXT
+@data . . . . . . . . . . . . . Text FLAT
+@fardata? . . . . . . . . . . . Text FLAT
+@fardata . . . . . . . . . . . . Text FLAT
+@stack . . . . . . . . . . . . . Text FLAT
+LocalVarsSize . . . . . . . . . Number 00000024h
+MAX_MATCH_8_ . . . . . . . . . . Number 00000100h
+MAX_MATCH . . . . . . . . . . . Number 00000102h
+MIN_LOOKAHEAD . . . . . . . . . Number 00000106h
+MIN_MATCH . . . . . . . . . . . Number 00000003h
+NbStackAdd . . . . . . . . . . . Number 00000034h
+NbStack . . . . . . . . . . . . Number 0000004Ch
+adrret . . . . . . . . . . . . . Text dword ptr[esp+NbStack-8]
+best_len . . . . . . . . . . . . Text dword ptr [esp+NbStack-36]
+bestlen . . . . . . . . . . . . Text esp + 28
+chain_length . . . . . . . . . . Text dword ptr [esp+NbStack-28]
+chainlenwmask . . . . . . . . . Text esp + 0
+cur_match . . . . . . . . . . . Text dword ptr[esp+NbStack-0]
+curmatch . . . . . . . . . . . . Text esp + 60
+deflatestate . . . . . . . . . . Text esp + 56
+dep_chain_length . . . . . . . . Number 0000007Ch
+dep_good_match . . . . . . . . . Number 0000008Ch
+dep_lookahead . . . . . . . . . Number 00000074h
+dep_match_start . . . . . . . . Number 00000070h
+dep_nice_match . . . . . . . . . Number 00000090h
+dep_prev_length . . . . . . . . Number 00000078h
+dep_prev . . . . . . . . . . . . Number 00000040h
+dep_strstart . . . . . . . . . . Number 0000006Ch
+dep_w_mask . . . . . . . . . . . Number 00000034h
+dep_w_size . . . . . . . . . . . Number 0000002Ch
+dep_window . . . . . . . . . . . Number 00000038h
+dsGoodMatch . . . . . . . . . . Number 0000008Ch
+dsLookahead . . . . . . . . . . Number 00000074h
+dsMatchLen . . . . . . . . . . . Number 00000060h
+dsMatchStart . . . . . . . . . . Number 00000070h
+dsMaxChainLen . . . . . . . . . Number 0000007Ch
+dsNiceMatch . . . . . . . . . . Number 00000090h
+dsPrevLen . . . . . . . . . . . Number 00000078h
+dsPrevMatch . . . . . . . . . . Number 00000064h
+dsPrev . . . . . . . . . . . . . Number 00000040h
+dsStrStart . . . . . . . . . . . Number 0000006Ch
+dsWMask . . . . . . . . . . . . Number 00000034h
+dsWSize . . . . . . . . . . . . Number 0000002Ch
+dsWindow . . . . . . . . . . . . Number 00000038h
+limit . . . . . . . . . . . . . Text dword ptr [esp+NbStack-32]
+match_start_ptr . . . . . . . . Text dword ptr [esp+NbStack-56]
+match_start . . . . . . . . . . Text dword ptr [esp+NbStack-72]
+nice_match . . . . . . . . . . . Text dword ptr [esp+NbStack-60]
+nicematch . . . . . . . . . . . Text esp + 24
+prev . . . . . . . . . . . . . . Text dword ptr [esp+NbStack-44]
+pushebp . . . . . . . . . . . . Text dword ptr[esp+NbStack-12]
+pushebx . . . . . . . . . . . . Text dword ptr[esp+NbStack-24]
+pushedi . . . . . . . . . . . . Text dword ptr[esp+NbStack-16]
+pushesi . . . . . . . . . . . . Text dword ptr[esp+NbStack-20]
+scan_start . . . . . . . . . . . Text word ptr [esp+NbStack-48]
+scanalign . . . . . . . . . . . Text esp + 20
+scanend . . . . . . . . . . . . Text esp + 12
+scanstart . . . . . . . . . . . Text esp + 16
+scan . . . . . . . . . . . . . . Text esp + 32
+str_s . . . . . . . . . . . . . Text dword ptr[esp+NbStack-4]
+strend . . . . . . . . . . . . . Text dword ptr [esp+NbStack-76]
+windowbestlen . . . . . . . . . Text esp + 8
+windowlen . . . . . . . . . . . Text dword ptr [esp+NbStack-68]
+window . . . . . . . . . . . . . Text esp + 4
+wmask . . . . . . . . . . . . . Text dword ptr [esp+NbStack-52]
+zlib1222add . . . . . . . . . . Number 00000008h
+
+ 0 Warnings
+ 0 Errors
diff --git a/zlib/contrib/masmx86/readme.txt b/zlib/contrib/masmx86/readme.txt
new file mode 100644
index 0000000..3f88886
--- /dev/null
+++ b/zlib/contrib/masmx86/readme.txt
@@ -0,0 +1,27 @@
+
+Summary
+-------
+This directory contains ASM implementations of the functions
+longest_match() and inflate_fast().
+
+
+Use instructions
+----------------
+Assemble using MASM, and copy the object files into the zlib source
+directory, then run the appropriate makefile, as suggested below. You can
+donwload MASM from here:
+
+ http://www.microsoft.com/downloads/details.aspx?displaylang=en&FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
+
+You can also get objects files here:
+
+ http://www.winimage.com/zLibDll/zlib124_masm_obj.zip
+
+Build instructions
+------------------
+* With Microsoft C and MASM:
+nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj"
+
+* With Borland C and TASM:
+make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj"
+