diff options
Diffstat (limited to 'zlib/contrib/masmx86')
-rw-r--r-- | zlib/contrib/masmx86/bld_ml32.bat | 2 | ||||
-rw-r--r-- | zlib/contrib/masmx86/inffas32.asm | 1080 | ||||
-rw-r--r-- | zlib/contrib/masmx86/inffas32.lst | 1224 | ||||
-rw-r--r-- | zlib/contrib/masmx86/match686.asm | 479 | ||||
-rw-r--r-- | zlib/contrib/masmx86/match686.lst | 624 | ||||
-rw-r--r-- | zlib/contrib/masmx86/readme.txt | 27 |
6 files changed, 0 insertions, 3436 deletions
diff --git a/zlib/contrib/masmx86/bld_ml32.bat b/zlib/contrib/masmx86/bld_ml32.bat deleted file mode 100644 index 67e6a6a..0000000 --- a/zlib/contrib/masmx86/bld_ml32.bat +++ /dev/null @@ -1,2 +0,0 @@ -ml /safeseh /coff /Zi /c /Flmatch686.lst match686.asm -ml /safeseh /coff /Zi /c /Flinffas32.lst inffas32.asm diff --git a/zlib/contrib/masmx86/inffas32.asm b/zlib/contrib/masmx86/inffas32.asm deleted file mode 100644 index cb37a81..0000000 --- a/zlib/contrib/masmx86/inffas32.asm +++ /dev/null @@ -1,1080 +0,0 @@ -;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding -; * -; * inffas32.asm is derivated from inffas86.c, with translation of assembly code -; * -; * Copyright (C) 1995-2003 Mark Adler -; * For conditions of distribution and use, see copyright notice in zlib.h -; * -; * Copyright (C) 2003 Chris Anderson <christop@charm.net> -; * Please use the copyright conditions above. -; * -; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from -; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at -; * the moment. I have successfully compiled and tested this code with gcc2.96, -; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S -; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX -; * enabled. I will attempt to merge the MMX code into this version. Newer -; * versions of this and inffast.S can be found at -; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ -; * -; * 2005 : modification by Gilles Vollant -; */ -; For Visual C++ 4.x and higher and ML 6.x and higher -; ml.exe is in directory \MASM611C of Win95 DDK -; ml.exe is also distributed in http://www.masm32.com/masmdl.htm -; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ -; -; -; compile with command line option -; ml /coff /Zi /c /Flinffas32.lst inffas32.asm - -; if you define NO_GZIP (see inflate.h), compile with -; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm - - -; zlib122sup is 0 fort zlib 1.2.2.1 and lower -; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head -; in inflate_state in inflate.h) -zlib1222sup equ 8 - - -IFDEF GUNZIP - INFLATE_MODE_TYPE equ 11 - INFLATE_MODE_BAD equ 26 -ELSE - IFNDEF NO_GUNZIP - INFLATE_MODE_TYPE equ 11 - INFLATE_MODE_BAD equ 26 - ELSE - INFLATE_MODE_TYPE equ 3 - INFLATE_MODE_BAD equ 17 - ENDIF -ENDIF - - -; 75 "inffast.S" -;FILE "inffast.S" - -;;;GLOBAL _inflate_fast - -;;;SECTION .text - - - - .586p - .mmx - - name inflate_fast_x86 - .MODEL FLAT - -_DATA segment -inflate_fast_use_mmx: - dd 1 - - -_TEXT segment - - - -ALIGN 4 - db 'Fast decoding Code from Chris Anderson' - db 0 - -ALIGN 4 -invalid_literal_length_code_msg: - db 'invalid literal/length code' - db 0 - -ALIGN 4 -invalid_distance_code_msg: - db 'invalid distance code' - db 0 - -ALIGN 4 -invalid_distance_too_far_msg: - db 'invalid distance too far back' - db 0 - - -ALIGN 4 -inflate_fast_mask: -dd 0 -dd 1 -dd 3 -dd 7 -dd 15 -dd 31 -dd 63 -dd 127 -dd 255 -dd 511 -dd 1023 -dd 2047 -dd 4095 -dd 8191 -dd 16383 -dd 32767 -dd 65535 -dd 131071 -dd 262143 -dd 524287 -dd 1048575 -dd 2097151 -dd 4194303 -dd 8388607 -dd 16777215 -dd 33554431 -dd 67108863 -dd 134217727 -dd 268435455 -dd 536870911 -dd 1073741823 -dd 2147483647 -dd 4294967295 - - -mode_state equ 0 ;/* state->mode */ -wsize_state equ (32+zlib1222sup) ;/* state->wsize */ -write_state equ (36+4+zlib1222sup) ;/* state->write */ -window_state equ (40+4+zlib1222sup) ;/* state->window */ -hold_state equ (44+4+zlib1222sup) ;/* state->hold */ -bits_state equ (48+4+zlib1222sup) ;/* state->bits */ -lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ -distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ -lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ -distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ - - -;;SECTION .text -; 205 "inffast.S" -;GLOBAL inflate_fast_use_mmx - -;SECTION .data - - -; GLOBAL inflate_fast_use_mmx:object -;.size inflate_fast_use_mmx, 4 -; 226 "inffast.S" -;SECTION .text - -ALIGN 4 -_inflate_fast proc near -.FPO (16, 4, 0, 0, 1, 0) - push edi - push esi - push ebp - push ebx - pushfd - sub esp,64 - cld - - - - - mov esi, [esp+88] - mov edi, [esi+28] - - - - - - - - mov edx, [esi+4] - mov eax, [esi+0] - - add edx,eax - sub edx,11 - - mov [esp+44],eax - mov [esp+20],edx - - mov ebp, [esp+92] - mov ecx, [esi+16] - mov ebx, [esi+12] - - sub ebp,ecx - neg ebp - add ebp,ebx - - sub ecx,257 - add ecx,ebx - - mov [esp+60],ebx - mov [esp+40],ebp - mov [esp+16],ecx -; 285 "inffast.S" - mov eax, [edi+lencode_state] - mov ecx, [edi+distcode_state] - - mov [esp+8],eax - mov [esp+12],ecx - - mov eax,1 - mov ecx, [edi+lenbits_state] - shl eax,cl - dec eax - mov [esp+0],eax - - mov eax,1 - mov ecx, [edi+distbits_state] - shl eax,cl - dec eax - mov [esp+4],eax - - mov eax, [edi+wsize_state] - mov ecx, [edi+write_state] - mov edx, [edi+window_state] - - mov [esp+52],eax - mov [esp+48],ecx - mov [esp+56],edx - - mov ebp, [edi+hold_state] - mov ebx, [edi+bits_state] -; 321 "inffast.S" - mov esi, [esp+44] - mov ecx, [esp+20] - cmp ecx,esi - ja L_align_long - - add ecx,11 - sub ecx,esi - mov eax,12 - sub eax,ecx - lea edi, [esp+28] - rep movsb - mov ecx,eax - xor eax,eax - rep stosb - lea esi, [esp+28] - mov [esp+20],esi - jmp L_is_aligned - - -L_align_long: - test esi,3 - jz L_is_aligned - xor eax,eax - mov al, [esi] - inc esi - mov ecx,ebx - add ebx,8 - shl eax,cl - or ebp,eax - jmp L_align_long - -L_is_aligned: - mov edi, [esp+60] -; 366 "inffast.S" -L_check_mmx: - cmp dword ptr [inflate_fast_use_mmx],2 - je L_init_mmx - ja L_do_loop - - push eax - push ebx - push ecx - push edx - pushfd - mov eax, [esp] - xor dword ptr [esp],0200000h - - - - - popfd - pushfd - pop edx - xor edx,eax - jz L_dont_use_mmx - xor eax,eax - cpuid - cmp ebx,0756e6547h - jne L_dont_use_mmx - cmp ecx,06c65746eh - jne L_dont_use_mmx - cmp edx,049656e69h - jne L_dont_use_mmx - mov eax,1 - cpuid - shr eax,8 - and eax,15 - cmp eax,6 - jne L_dont_use_mmx - test edx,0800000h - jnz L_use_mmx - jmp L_dont_use_mmx -L_use_mmx: - mov dword ptr [inflate_fast_use_mmx],2 - jmp L_check_mmx_pop -L_dont_use_mmx: - mov dword ptr [inflate_fast_use_mmx],3 -L_check_mmx_pop: - pop edx - pop ecx - pop ebx - pop eax - jmp L_check_mmx -; 426 "inffast.S" -ALIGN 4 -L_do_loop: -; 437 "inffast.S" - cmp bl,15 - ja L_get_length_code - - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - -L_get_length_code: - mov edx, [esp+0] - mov ecx, [esp+8] - and edx,ebp - mov eax, [ecx+edx*4] - -L_dolen: - - - - - - - mov cl,ah - sub bl,ah - shr ebp,cl - - - - - - - test al,al - jnz L_test_for_length_base - - shr eax,16 - stosb - -L_while_test: - - - cmp [esp+16],edi - jbe L_break_loop - - cmp [esp+20],esi - ja L_do_loop - jmp L_break_loop - -L_test_for_length_base: -; 502 "inffast.S" - mov edx,eax - shr edx,16 - mov cl,al - - test al,16 - jz L_test_for_second_level_length - and cl,15 - jz L_save_len - cmp bl,cl - jae L_add_bits_to_len - - mov ch,cl - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - mov cl,ch - -L_add_bits_to_len: - mov eax,1 - shl eax,cl - dec eax - sub bl,cl - and eax,ebp - shr ebp,cl - add edx,eax - -L_save_len: - mov [esp+24],edx - - -L_decode_distance: -; 549 "inffast.S" - cmp bl,15 - ja L_get_distance_code - - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - -L_get_distance_code: - mov edx, [esp+4] - mov ecx, [esp+12] - and edx,ebp - mov eax, [ecx+edx*4] - - -L_dodist: - mov edx,eax - shr edx,16 - mov cl,ah - sub bl,ah - shr ebp,cl -; 584 "inffast.S" - mov cl,al - - test al,16 - jz L_test_for_second_level_dist - and cl,15 - jz L_check_dist_one - cmp bl,cl - jae L_add_bits_to_dist - - mov ch,cl - xor eax,eax - lodsw - mov cl,bl - add bl,16 - shl eax,cl - or ebp,eax - mov cl,ch - -L_add_bits_to_dist: - mov eax,1 - shl eax,cl - dec eax - sub bl,cl - and eax,ebp - shr ebp,cl - add edx,eax - jmp L_check_window - -L_check_window: -; 625 "inffast.S" - mov [esp+44],esi - mov eax,edi - sub eax, [esp+40] - - cmp eax,edx - jb L_clip_window - - mov ecx, [esp+24] - mov esi,edi - sub esi,edx - - sub ecx,3 - mov al, [esi] - mov [edi],al - mov al, [esi+1] - mov dl, [esi+2] - add esi,3 - mov [edi+1],al - mov [edi+2],dl - add edi,3 - rep movsb - - mov esi, [esp+44] - jmp L_while_test - -ALIGN 4 -L_check_dist_one: - cmp edx,1 - jne L_check_window - cmp [esp+40],edi - je L_check_window - - dec edi - mov ecx, [esp+24] - mov al, [edi] - sub ecx,3 - - mov [edi+1],al - mov [edi+2],al - mov [edi+3],al - add edi,4 - rep stosb - - jmp L_while_test - -ALIGN 4 -L_test_for_second_level_length: - - - - - test al,64 - jnz L_test_for_end_of_block - - mov eax,1 - shl eax,cl - dec eax - and eax,ebp - add eax,edx - mov edx, [esp+8] - mov eax, [edx+eax*4] - jmp L_dolen - -ALIGN 4 -L_test_for_second_level_dist: - - - - - test al,64 - jnz L_invalid_distance_code - - mov eax,1 - shl eax,cl - dec eax - and eax,ebp - add eax,edx - mov edx, [esp+12] - mov eax, [edx+eax*4] - jmp L_dodist - -ALIGN 4 -L_clip_window: -; 721 "inffast.S" - mov ecx,eax - mov eax, [esp+52] - neg ecx - mov esi, [esp+56] - - cmp eax,edx - jb L_invalid_distance_too_far - - add ecx,edx - cmp dword ptr [esp+48],0 - jne L_wrap_around_window - - sub eax,ecx - add esi,eax -; 749 "inffast.S" - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - -L_wrap_around_window: -; 793 "inffast.S" - mov eax, [esp+48] - cmp ecx,eax - jbe L_contiguous_in_window - - add esi, [esp+52] - add esi,eax - sub esi,ecx - sub ecx,eax - - - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi, [esp+56] - mov ecx, [esp+48] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - jmp L_do_copy1 - -L_contiguous_in_window: -; 836 "inffast.S" - add esi,eax - sub esi,ecx - - - mov eax, [esp+24] - cmp eax,ecx - jbe L_do_copy1 - - sub eax,ecx - rep movsb - mov esi,edi - sub esi,edx - -L_do_copy1: -; 862 "inffast.S" - mov ecx,eax - rep movsb - - mov esi, [esp+44] - jmp L_while_test -; 878 "inffast.S" -ALIGN 4 -L_init_mmx: - emms - - - - - - movd mm0,ebp - mov ebp,ebx -; 896 "inffast.S" - movd mm4,dword ptr [esp+0] - movq mm3,mm4 - movd mm5,dword ptr [esp+4] - movq mm2,mm5 - pxor mm1,mm1 - mov ebx, [esp+8] - jmp L_do_loop_mmx - -ALIGN 4 -L_do_loop_mmx: - psrlq mm0,mm1 - - cmp ebp,32 - ja L_get_length_code_mmx - - movd mm6,ebp - movd mm7,dword ptr [esi] - add esi,4 - psllq mm7,mm6 - add ebp,32 - por mm0,mm7 - -L_get_length_code_mmx: - pand mm4,mm0 - movd eax,mm4 - movq mm4,mm3 - mov eax, [ebx+eax*4] - -L_dolen_mmx: - movzx ecx,ah - movd mm1,ecx - sub ebp,ecx - - test al,al - jnz L_test_for_length_base_mmx - - shr eax,16 - stosb - -L_while_test_mmx: - - - cmp [esp+16],edi - jbe L_break_loop - - cmp [esp+20],esi - ja L_do_loop_mmx - jmp L_break_loop - -L_test_for_length_base_mmx: - - mov edx,eax - shr edx,16 - - test al,16 - jz L_test_for_second_level_length_mmx - and eax,15 - jz L_decode_distance_mmx - - psrlq mm0,mm1 - movd mm1,eax - movd ecx,mm0 - sub ebp,eax - and ecx, [inflate_fast_mask+eax*4] - add edx,ecx - -L_decode_distance_mmx: - psrlq mm0,mm1 - - cmp ebp,32 - ja L_get_dist_code_mmx - - movd mm6,ebp - movd mm7,dword ptr [esi] - add esi,4 - psllq mm7,mm6 - add ebp,32 - por mm0,mm7 - -L_get_dist_code_mmx: - mov ebx, [esp+12] - pand mm5,mm0 - movd eax,mm5 - movq mm5,mm2 - mov eax, [ebx+eax*4] - -L_dodist_mmx: - - movzx ecx,ah - mov ebx,eax - shr ebx,16 - sub ebp,ecx - movd mm1,ecx - - test al,16 - jz L_test_for_second_level_dist_mmx - and eax,15 - jz L_check_dist_one_mmx - -L_add_bits_to_dist_mmx: - psrlq mm0,mm1 - movd mm1,eax - movd ecx,mm0 - sub ebp,eax - and ecx, [inflate_fast_mask+eax*4] - add ebx,ecx - -L_check_window_mmx: - mov [esp+44],esi - mov eax,edi - sub eax, [esp+40] - - cmp eax,ebx - jb L_clip_window_mmx - - mov ecx,edx - mov esi,edi - sub esi,ebx - - sub ecx,3 - mov al, [esi] - mov [edi],al - mov al, [esi+1] - mov dl, [esi+2] - add esi,3 - mov [edi+1],al - mov [edi+2],dl - add edi,3 - rep movsb - - mov esi, [esp+44] - mov ebx, [esp+8] - jmp L_while_test_mmx - -ALIGN 4 -L_check_dist_one_mmx: - cmp ebx,1 - jne L_check_window_mmx - cmp [esp+40],edi - je L_check_window_mmx - - dec edi - mov ecx,edx - mov al, [edi] - sub ecx,3 - - mov [edi+1],al - mov [edi+2],al - mov [edi+3],al - add edi,4 - rep stosb - - mov ebx, [esp+8] - jmp L_while_test_mmx - -ALIGN 4 -L_test_for_second_level_length_mmx: - test al,64 - jnz L_test_for_end_of_block - - and eax,15 - psrlq mm0,mm1 - movd ecx,mm0 - and ecx, [inflate_fast_mask+eax*4] - add ecx,edx - mov eax, [ebx+ecx*4] - jmp L_dolen_mmx - -ALIGN 4 -L_test_for_second_level_dist_mmx: - test al,64 - jnz L_invalid_distance_code - - and eax,15 - psrlq mm0,mm1 - movd ecx,mm0 - and ecx, [inflate_fast_mask+eax*4] - mov eax, [esp+12] - add ecx,ebx - mov eax, [eax+ecx*4] - jmp L_dodist_mmx - -ALIGN 4 -L_clip_window_mmx: - - mov ecx,eax - mov eax, [esp+52] - neg ecx - mov esi, [esp+56] - - cmp eax,ebx - jb L_invalid_distance_too_far - - add ecx,ebx - cmp dword ptr [esp+48],0 - jne L_wrap_around_window_mmx - - sub eax,ecx - add esi,eax - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - -L_wrap_around_window_mmx: - - mov eax, [esp+48] - cmp ecx,eax - jbe L_contiguous_in_window_mmx - - add esi, [esp+52] - add esi,eax - sub esi,ecx - sub ecx,eax - - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi, [esp+56] - mov ecx, [esp+48] - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - jmp L_do_copy1_mmx - -L_contiguous_in_window_mmx: - - add esi,eax - sub esi,ecx - - - cmp edx,ecx - jbe L_do_copy1_mmx - - sub edx,ecx - rep movsb - mov esi,edi - sub esi,ebx - -L_do_copy1_mmx: - - - mov ecx,edx - rep movsb - - mov esi, [esp+44] - mov ebx, [esp+8] - jmp L_while_test_mmx -; 1174 "inffast.S" -L_invalid_distance_code: - - - - - - mov ecx, invalid_distance_code_msg - mov edx,INFLATE_MODE_BAD - jmp L_update_stream_state - -L_test_for_end_of_block: - - - - - - test al,32 - jz L_invalid_literal_length_code - - mov ecx,0 - mov edx,INFLATE_MODE_TYPE - jmp L_update_stream_state - -L_invalid_literal_length_code: - - - - - - mov ecx, invalid_literal_length_code_msg - mov edx,INFLATE_MODE_BAD - jmp L_update_stream_state - -L_invalid_distance_too_far: - - - - mov esi, [esp+44] - mov ecx, invalid_distance_too_far_msg - mov edx,INFLATE_MODE_BAD - jmp L_update_stream_state - -L_update_stream_state: - - mov eax, [esp+88] - test ecx,ecx - jz L_skip_msg - mov [eax+24],ecx -L_skip_msg: - mov eax, [eax+28] - mov [eax+mode_state],edx - jmp L_break_loop - -ALIGN 4 -L_break_loop: -; 1243 "inffast.S" - cmp dword ptr [inflate_fast_use_mmx],2 - jne L_update_next_in - - - - mov ebx,ebp - -L_update_next_in: -; 1266 "inffast.S" - mov eax, [esp+88] - mov ecx,ebx - mov edx, [eax+28] - shr ecx,3 - sub esi,ecx - shl ecx,3 - sub ebx,ecx - mov [eax+12],edi - mov [edx+bits_state],ebx - mov ecx,ebx - - lea ebx, [esp+28] - cmp [esp+20],ebx - jne L_buf_not_used - - sub esi,ebx - mov ebx, [eax+0] - mov [esp+20],ebx - add esi,ebx - mov ebx, [eax+4] - sub ebx,11 - add [esp+20],ebx - -L_buf_not_used: - mov [eax+0],esi - - mov ebx,1 - shl ebx,cl - dec ebx - - - - - - cmp dword ptr [inflate_fast_use_mmx],2 - jne L_update_hold - - - - psrlq mm0,mm1 - movd ebp,mm0 - - emms - -L_update_hold: - - - - and ebp,ebx - mov [edx+hold_state],ebp - - - - - mov ebx, [esp+20] - cmp ebx,esi - jbe L_last_is_smaller - - sub ebx,esi - add ebx,11 - mov [eax+4],ebx - jmp L_fixup_out -L_last_is_smaller: - sub esi,ebx - neg esi - add esi,11 - mov [eax+4],esi - - - - -L_fixup_out: - - mov ebx, [esp+16] - cmp ebx,edi - jbe L_end_is_smaller - - sub ebx,edi - add ebx,257 - mov [eax+16],ebx - jmp L_done -L_end_is_smaller: - sub edi,ebx - neg edi - add edi,257 - mov [eax+16],edi - - - - - -L_done: - add esp,64 - popfd - pop ebx - pop ebp - pop esi - pop edi - ret -_inflate_fast endp - -_TEXT ends -end diff --git a/zlib/contrib/masmx86/inffas32.lst b/zlib/contrib/masmx86/inffas32.lst deleted file mode 100644 index 025627c..0000000 --- a/zlib/contrib/masmx86/inffas32.lst +++ /dev/null @@ -1,1224 +0,0 @@ -Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23 -inffas32.asm Page 1 - 1 - - - ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding - ; * - ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code - ; * - ; * Copyright (C) 1995-2003 Mark Adler - ; * For conditions of distribution and use, see copyright notice in zlib.h - ; * - ; * Copyright (C) 2003 Chris Anderson <christop@charm.net> - ; * Please use the copyright conditions above. - ; * - ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from - ; * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at - ; * the moment. I have successfully compiled and tested this code with gcc2.96, - ; * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S - ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX - ; * enabled. I will attempt to merge the MMX code into this version. Newer - ; * versions of this and inffast.S can be found at - ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ - ; * - ; * 2005 : modification by Gilles Vollant - ; */ - ; For Visual C++ 4.x and higher and ML 6.x and higher - ; ml.exe is in directory \MASM611C of Win95 DDK - ; ml.exe is also distributed in http://www.masm32.com/masmdl.htm - ; and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/ - ; - ; - ; compile with command line option - ; ml /coff /Zi /c /Flinffas32.lst inffas32.asm - - ; if you define NO_GZIP (see inflate.h), compile with - ; ml /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm - - - ; zlib122sup is 0 fort zlib 1.2.2.1 and lower - ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head - ; in inflate_state in inflate.h) - = 00000008 zlib1222sup equ 8 - - - IFDEF GUNZIP - ELSE - IFNDEF NO_GUNZIP - = 0000000B INFLATE_MODE_TYPE equ 11 - = 0000001A INFLATE_MODE_BAD equ 26 - ELSE - ENDIF - ENDIF - - - ; 75 "inffast.S" - ;FILE "inffast.S" - - ;;;GLOBAL _inflate_fast - - ;;;SECTION .text - - - - .586p - .mmx - - name inflate_fast_x86 - .MODEL FLAT - - 00000000 _DATA segment - 00000000 inflate_fast_use_mmx: - 00000000 00000001 dd 1 - - - 00000000 _TEXT segment - - - - ALIGN 4 - 00000000 46 61 73 74 20 db 'Fast decoding Code from Chris Anderson' - 64 65 63 6F 64 - 69 6E 67 20 43 - 6F 64 65 20 66 - 72 6F 6D 20 43 - 68 72 69 73 20 - 41 6E 64 65 72 - 73 6F 6E - 00000026 00 db 0 - - ALIGN 4 - 00000028 invalid_literal_length_code_msg: - 00000028 69 6E 76 61 6C db 'invalid literal/length code' - 69 64 20 6C 69 - 74 65 72 61 6C - 2F 6C 65 6E 67 - 74 68 20 63 6F - 64 65 - 00000043 00 db 0 - - ALIGN 4 - 00000044 invalid_distance_code_msg: - 00000044 69 6E 76 61 6C db 'invalid distance code' - 69 64 20 64 69 - 73 74 61 6E 63 - 65 20 63 6F 64 - 65 - 00000059 00 db 0 - - ALIGN 4 - 0000005C invalid_distance_too_far_msg: - 0000005C 69 6E 76 61 6C db 'invalid distance too far back' - 69 64 20 64 69 - 73 74 61 6E 63 - 65 20 74 6F 6F - 20 66 61 72 20 - 62 61 63 6B - 00000079 00 db 0 - - - ALIGN 4 - 0000007C inflate_fast_mask: - 0000007C 00000000 dd 0 - 00000080 00000001 dd 1 - 00000084 00000003 dd 3 - 00000088 00000007 dd 7 - 0000008C 0000000F dd 15 - 00000090 0000001F dd 31 - 00000094 0000003F dd 63 - 00000098 0000007F dd 127 - 0000009C 000000FF dd 255 - 000000A0 000001FF dd 511 - 000000A4 000003FF dd 1023 - 000000A8 000007FF dd 2047 - 000000AC 00000FFF dd 4095 - 000000B0 00001FFF dd 8191 - 000000B4 00003FFF dd 16383 - 000000B8 00007FFF dd 32767 - 000000BC 0000FFFF dd 65535 - 000000C0 0001FFFF dd 131071 - 000000C4 0003FFFF dd 262143 - 000000C8 0007FFFF dd 524287 - 000000CC 000FFFFF dd 1048575 - 000000D0 001FFFFF dd 2097151 - 000000D4 003FFFFF dd 4194303 - 000000D8 007FFFFF dd 8388607 - 000000DC 00FFFFFF dd 16777215 - 000000E0 01FFFFFF dd 33554431 - 000000E4 03FFFFFF dd 67108863 - 000000E8 07FFFFFF dd 134217727 - 000000EC 0FFFFFFF dd 268435455 - 000000F0 1FFFFFFF dd 536870911 - 000000F4 3FFFFFFF dd 1073741823 - 000000F8 7FFFFFFF dd 2147483647 - 000000FC FFFFFFFF dd 4294967295 - - - = 00000000 mode_state equ 0 ;/* state->mode */ - = 00000028 wsize_state equ (32+zlib1222sup) ;/* state->wsize */ - = 00000030 write_state equ (36+4+zlib1222sup) ;/* state->write */ - = 00000034 window_state equ (40+4+zlib1222sup) ;/* state->window */ - = 00000038 hold_state equ (44+4+zlib1222sup) ;/* state->hold */ - = 0000003C bits_state equ (48+4+zlib1222sup) ;/* state->bits */ - = 0000004C lencode_state equ (64+4+zlib1222sup) ;/* state->lencode */ - = 00000050 distcode_state equ (68+4+zlib1222sup) ;/* state->distcode */ - = 00000054 lenbits_state equ (72+4+zlib1222sup) ;/* state->lenbits */ - = 00000058 distbits_state equ (76+4+zlib1222sup) ;/* state->distbits */ - - - ;;SECTION .text - ; 205 "inffast.S" - ;GLOBAL inflate_fast_use_mmx - - ;SECTION .data - - - ; GLOBAL inflate_fast_use_mmx:object - ;.size inflate_fast_use_mmx, 4 - ; 226 "inffast.S" - ;SECTION .text - - ALIGN 4 - 00000100 _inflate_fast proc near - 00000100 .FPO (16, 4, 0, 0, 1, 0) - 00000100 57 push edi - 00000101 56 push esi - 00000102 55 push ebp - 00000103 53 push ebx - 00000104 9C pushfd - 00000105 83 EC 40 sub esp,64 - 00000108 FC cld - - - - - 00000109 8B 74 24 58 mov esi, [esp+88] - 0000010D 8B 7E 1C mov edi, [esi+28] - - - - - - - - 00000110 8B 56 04 mov edx, [esi+4] - 00000113 8B 06 mov eax, [esi+0] - - 00000115 03 D0 add edx,eax - 00000117 83 EA 0B sub edx,11 - - 0000011A 89 44 24 2C mov [esp+44],eax - 0000011E 89 54 24 14 mov [esp+20],edx - - 00000122 8B 6C 24 5C mov ebp, [esp+92] - 00000126 8B 4E 10 mov ecx, [esi+16] - 00000129 8B 5E 0C mov ebx, [esi+12] - - 0000012C 2B E9 sub ebp,ecx - 0000012E F7 DD neg ebp - 00000130 03 EB add ebp,ebx - - 00000132 81 E9 00000101 sub ecx,257 - 00000138 03 CB add ecx,ebx - - 0000013A 89 5C 24 3C mov [esp+60],ebx - 0000013E 89 6C 24 28 mov [esp+40],ebp - 00000142 89 4C 24 10 mov [esp+16],ecx - ; 285 "inffast.S" - 00000146 8B 47 4C mov eax, [edi+lencode_state] - 00000149 8B 4F 50 mov ecx, [edi+distcode_state] - - 0000014C 89 44 24 08 mov [esp+8],eax - 00000150 89 4C 24 0C mov [esp+12],ecx - - 00000154 B8 00000001 mov eax,1 - 00000159 8B 4F 54 mov ecx, [edi+lenbits_state] - 0000015C D3 E0 shl eax,cl - 0000015E 48 dec eax - 0000015F 89 04 24 mov [esp+0],eax - - 00000162 B8 00000001 mov eax,1 - 00000167 8B 4F 58 mov ecx, [edi+distbits_state] - 0000016A D3 E0 shl eax,cl - 0000016C 48 dec eax - 0000016D 89 44 24 04 mov [esp+4],eax - - 00000171 8B 47 28 mov eax, [edi+wsize_state] - 00000174 8B 4F 30 mov ecx, [edi+write_state] - 00000177 8B 57 34 mov edx, [edi+window_state] - - 0000017A 89 44 24 34 mov [esp+52],eax - 0000017E 89 4C 24 30 mov [esp+48],ecx - 00000182 89 54 24 38 mov [esp+56],edx - - 00000186 8B 6F 38 mov ebp, [edi+hold_state] - 00000189 8B 5F 3C mov ebx, [edi+bits_state] - ; 321 "inffast.S" - 0000018C 8B 74 24 2C mov esi, [esp+44] - 00000190 8B 4C 24 14 mov ecx, [esp+20] - 00000194 3B CE cmp ecx,esi - 00000196 77 22 ja L_align_long - - 00000198 83 C1 0B add ecx,11 - 0000019B 2B CE sub ecx,esi - 0000019D B8 0000000C mov eax,12 - 000001A2 2B C1 sub eax,ecx - 000001A4 8D 7C 24 1C lea edi, [esp+28] - 000001A8 F3/ A4 rep movsb - 000001AA 8B C8 mov ecx,eax - 000001AC 33 C0 xor eax,eax - 000001AE F3/ AA rep stosb - 000001B0 8D 74 24 1C lea esi, [esp+28] - 000001B4 89 74 24 14 mov [esp+20],esi - 000001B8 EB 18 jmp L_is_aligned - - - 000001BA L_align_long: - 000001BA F7 C6 00000003 test esi,3 - 000001C0 74 10 jz L_is_aligned - 000001C2 33 C0 xor eax,eax - 000001C4 8A 06 mov al, [esi] - 000001C6 46 inc esi - 000001C7 8B CB mov ecx,ebx - 000001C9 83 C3 08 add ebx,8 - 000001CC D3 E0 shl eax,cl - 000001CE 0B E8 or ebp,eax - 000001D0 EB E8 jmp L_align_long - - 000001D2 L_is_aligned: - 000001D2 8B 7C 24 3C mov edi, [esp+60] - ; 366 "inffast.S" - 000001D6 L_check_mmx: - 000001D6 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2 - 02 - 000001DD 0F 84 00000289 je L_init_mmx - 000001E3 77 6B ja L_do_loop - - 000001E5 50 push eax - 000001E6 53 push ebx - 000001E7 51 push ecx - 000001E8 52 push edx - 000001E9 9C pushfd - 000001EA 8B 04 24 mov eax, [esp] - 000001ED 81 34 24 xor dword ptr [esp],0200000h - 00200000 - - - - - 000001F4 9D popfd - 000001F5 9C pushfd - 000001F6 5A pop edx - 000001F7 33 D0 xor edx,eax - 000001F9 74 44 jz L_dont_use_mmx - 000001FB 33 C0 xor eax,eax - 000001FD 0F A2 cpuid - 000001FF 81 FB 756E6547 cmp ebx,0756e6547h - 00000205 75 38 jne L_dont_use_mmx - 00000207 81 F9 6C65746E cmp ecx,06c65746eh - 0000020D 75 30 jne L_dont_use_mmx - 0000020F 81 FA 49656E69 cmp edx,049656e69h - 00000215 75 28 jne L_dont_use_mmx - 00000217 B8 00000001 mov eax,1 - 0000021C 0F A2 cpuid - 0000021E C1 E8 08 shr eax,8 - 00000221 83 E0 0F and eax,15 - 00000224 83 F8 06 cmp eax,6 - 00000227 75 16 jne L_dont_use_mmx - 00000229 F7 C2 00800000 test edx,0800000h - 0000022F 75 02 jnz L_use_mmx - 00000231 EB 0C jmp L_dont_use_mmx - 00000233 L_use_mmx: - 00000233 C7 05 00000000 R mov dword ptr [inflate_fast_use_mmx],2 - 00000002 - 0000023D EB 0A jmp L_check_mmx_pop - 0000023F L_dont_use_mmx: - 0000023F C7 05 00000000 R mov dword ptr [inflate_fast_use_mmx],3 - 00000003 - 00000249 L_check_mmx_pop: - 00000249 5A pop edx - 0000024A 59 pop ecx - 0000024B 5B pop ebx - 0000024C 58 pop eax - 0000024D EB 87 jmp L_check_mmx - ; 426 "inffast.S" - ALIGN 4 - 00000250 L_do_loop: - ; 437 "inffast.S" - 00000250 80 FB 0F cmp bl,15 - 00000253 77 0D ja L_get_length_code - - 00000255 33 C0 xor eax,eax - 00000257 66| AD lodsw - 00000259 8A CB mov cl,bl - 0000025B 80 C3 10 add bl,16 - 0000025E D3 E0 shl eax,cl - 00000260 0B E8 or ebp,eax - - 00000262 L_get_length_code: - 00000262 8B 14 24 mov edx, [esp+0] - 00000265 8B 4C 24 08 mov ecx, [esp+8] - 00000269 23 D5 and edx,ebp - 0000026B 8B 04 91 mov eax, [ecx+edx*4] - - 0000026E L_dolen: - - - - - - - 0000026E 8A CC mov cl,ah - 00000270 2A DC sub bl,ah - 00000272 D3 ED shr ebp,cl - - - - - - - 00000274 84 C0 test al,al - 00000276 75 19 jnz L_test_for_length_base - - 00000278 C1 E8 10 shr eax,16 - 0000027B AA stosb - - 0000027C L_while_test: - - - 0000027C 39 7C 24 10 cmp [esp+16],edi - 00000280 0F 86 00000462 jbe L_break_loop - - 00000286 39 74 24 14 cmp [esp+20],esi - 0000028A 77 C4 ja L_do_loop - 0000028C E9 00000457 jmp L_break_loop - - 00000291 L_test_for_length_base: - ; 502 "inffast.S" - 00000291 8B D0 mov edx,eax - 00000293 C1 EA 10 shr edx,16 - 00000296 8A C8 mov cl,al - - 00000298 A8 10 test al,16 - 0000029A 0F 84 000000F4 jz L_test_for_second_level_length - 000002A0 80 E1 0F and cl,15 - 000002A3 74 25 jz L_save_len - 000002A5 3A D9 cmp bl,cl - 000002A7 73 11 jae L_add_bits_to_len - - 000002A9 8A E9 mov ch,cl - 000002AB 33 C0 xor eax,eax - 000002AD 66| AD lodsw - 000002AF 8A CB mov cl,bl - 000002B1 80 C3 10 add bl,16 - 000002B4 D3 E0 shl eax,cl - 000002B6 0B E8 or ebp,eax - 000002B8 8A CD mov cl,ch - - 000002BA L_add_bits_to_len: - 000002BA B8 00000001 mov eax,1 - 000002BF D3 E0 shl eax,cl - 000002C1 48 dec eax - 000002C2 2A D9 sub bl,cl - 000002C4 23 C5 and eax,ebp - 000002C6 D3 ED shr ebp,cl - 000002C8 03 D0 add edx,eax - - 000002CA L_save_len: - 000002CA 89 54 24 18 mov [esp+24],edx - - - 000002CE L_decode_distance: - ; 549 "inffast.S" - 000002CE 80 FB 0F cmp bl,15 - 000002D1 77 0D ja L_get_distance_code - - 000002D3 33 C0 xor eax,eax - 000002D5 66| AD lodsw - 000002D7 8A CB mov cl,bl - 000002D9 80 C3 10 add bl,16 - 000002DC D3 E0 shl eax,cl - 000002DE 0B E8 or ebp,eax - - 000002E0 L_get_distance_code: - 000002E0 8B 54 24 04 mov edx, [esp+4] - 000002E4 8B 4C 24 0C mov ecx, [esp+12] - 000002E8 23 D5 and edx,ebp - 000002EA 8B 04 91 mov eax, [ecx+edx*4] - - - 000002ED L_dodist: - 000002ED 8B D0 mov edx,eax - 000002EF C1 EA 10 shr edx,16 - 000002F2 8A CC mov cl,ah - 000002F4 2A DC sub bl,ah - 000002F6 D3 ED shr ebp,cl - ; 584 "inffast.S" - 000002F8 8A C8 mov cl,al - - 000002FA A8 10 test al,16 - 000002FC 0F 84 000000B2 jz L_test_for_second_level_dist - 00000302 80 E1 0F and cl,15 - 00000305 74 65 jz L_check_dist_one - 00000307 3A D9 cmp bl,cl - 00000309 73 11 jae L_add_bits_to_dist - - 0000030B 8A E9 mov ch,cl - 0000030D 33 C0 xor eax,eax - 0000030F 66| AD lodsw - 00000311 8A CB mov cl,bl - 00000313 80 C3 10 add bl,16 - 00000316 D3 E0 shl eax,cl - 00000318 0B E8 or ebp,eax - 0000031A 8A CD mov cl,ch - - 0000031C L_add_bits_to_dist: - 0000031C B8 00000001 mov eax,1 - 00000321 D3 E0 shl eax,cl - 00000323 48 dec eax - 00000324 2A D9 sub bl,cl - 00000326 23 C5 and eax,ebp - 00000328 D3 ED shr ebp,cl - 0000032A 03 D0 add edx,eax - 0000032C EB 00 jmp L_check_window - - 0000032E L_check_window: - ; 625 "inffast.S" - 0000032E 89 74 24 2C mov [esp+44],esi - 00000332 8B C7 mov eax,edi - 00000334 2B 44 24 28 sub eax, [esp+40] - - 00000338 3B C2 cmp eax,edx - 0000033A 0F 82 00000094 jb L_clip_window - - 00000340 8B 4C 24 18 mov ecx, [esp+24] - 00000344 8B F7 mov esi,edi - 00000346 2B F2 sub esi,edx - - 00000348 83 E9 03 sub ecx,3 - 0000034B 8A 06 mov al, [esi] - 0000034D 88 07 mov [edi],al - 0000034F 8A 46 01 mov al, [esi+1] - 00000352 8A 56 02 mov dl, [esi+2] - 00000355 83 C6 03 add esi,3 - 00000358 88 47 01 mov [edi+1],al - 0000035B 88 57 02 mov [edi+2],dl - 0000035E 83 C7 03 add edi,3 - 00000361 F3/ A4 rep movsb - - 00000363 8B 74 24 2C mov esi, [esp+44] - 00000367 E9 FFFFFF10 jmp L_while_test - - ALIGN 4 - 0000036C L_check_dist_one: - 0000036C 83 FA 01 cmp edx,1 - 0000036F 75 BD jne L_check_window - 00000371 39 7C 24 28 cmp [esp+40],edi - 00000375 74 B7 je L_check_window - - 00000377 4F dec edi - 00000378 8B 4C 24 18 mov ecx, [esp+24] - 0000037C 8A 07 mov al, [edi] - 0000037E 83 E9 03 sub ecx,3 - - 00000381 88 47 01 mov [edi+1],al - 00000384 88 47 02 mov [edi+2],al - 00000387 88 47 03 mov [edi+3],al - 0000038A 83 C7 04 add edi,4 - 0000038D F3/ AA rep stosb - - 0000038F E9 FFFFFEE8 jmp L_while_test - - ALIGN 4 - 00000394 L_test_for_second_level_length: - - - - - 00000394 A8 40 test al,64 - 00000396 0F 85 0000030E jnz L_test_for_end_of_block - - 0000039C B8 00000001 mov eax,1 - 000003A1 D3 E0 shl eax,cl - 000003A3 48 dec eax - 000003A4 23 C5 and eax,ebp - 000003A6 03 C2 add eax,edx - 000003A8 8B 54 24 08 mov edx, [esp+8] - 000003AC 8B 04 82 mov eax, [edx+eax*4] - 000003AF E9 FFFFFEBA jmp L_dolen - - ALIGN 4 - 000003B4 L_test_for_second_level_dist: - - - - - 000003B4 A8 40 test al,64 - 000003B6 0F 85 000002E2 jnz L_invalid_distance_code - - 000003BC B8 00000001 mov eax,1 - 000003C1 D3 E0 shl eax,cl - 000003C3 48 dec eax - 000003C4 23 C5 and eax,ebp - 000003C6 03 C2 add eax,edx - 000003C8 8B 54 24 0C mov edx, [esp+12] - 000003CC 8B 04 82 mov eax, [edx+eax*4] - 000003CF E9 FFFFFF19 jmp L_dodist - - ALIGN 4 - 000003D4 L_clip_window: - ; 721 "inffast.S" - 000003D4 8B C8 mov ecx,eax - 000003D6 8B 44 24 34 mov eax, [esp+52] - 000003DA F7 D9 neg ecx - 000003DC 8B 74 24 38 mov esi, [esp+56] - - 000003E0 3B C2 cmp eax,edx - 000003E2 0F 82 000002DE jb L_invalid_distance_too_far - - 000003E8 03 CA add ecx,edx - 000003EA 83 7C 24 30 00 cmp dword ptr [esp+48],0 - 000003EF 75 24 jne L_wrap_around_window - - 000003F1 2B C1 sub eax,ecx - 000003F3 03 F0 add esi,eax - ; 749 "inffast.S" - 000003F5 8B 44 24 18 mov eax, [esp+24] - 000003F9 3B C1 cmp eax,ecx - 000003FB 76 60 jbe L_do_copy1 - - 000003FD 2B C1 sub eax,ecx - 000003FF F3/ A4 rep movsb - 00000401 8B F7 mov esi,edi - 00000403 2B F2 sub esi,edx - 00000405 EB 56 jmp L_do_copy1 - - 00000407 3B C1 cmp eax,ecx - 00000409 76 52 jbe L_do_copy1 - - 0000040B 2B C1 sub eax,ecx - 0000040D F3/ A4 rep movsb - 0000040F 8B F7 mov esi,edi - 00000411 2B F2 sub esi,edx - 00000413 EB 48 jmp L_do_copy1 - - 00000415 L_wrap_around_window: - ; 793 "inffast.S" - 00000415 8B 44 24 30 mov eax, [esp+48] - 00000419 3B C8 cmp ecx,eax - 0000041B 76 2C jbe L_contiguous_in_window - - 0000041D 03 74 24 34 add esi, [esp+52] - 00000421 03 F0 add esi,eax - 00000423 2B F1 sub esi,ecx - 00000425 2B C8 sub ecx,eax - - - 00000427 8B 44 24 18 mov eax, [esp+24] - 0000042B 3B C1 cmp eax,ecx - 0000042D 76 2E jbe L_do_copy1 - - 0000042F 2B C1 sub eax,ecx - 00000431 F3/ A4 rep movsb - 00000433 8B 74 24 38 mov esi, [esp+56] - 00000437 8B 4C 24 30 mov ecx, [esp+48] - 0000043B 3B C1 cmp eax,ecx - 0000043D 76 1E jbe L_do_copy1 - - 0000043F 2B C1 sub eax,ecx - 00000441 F3/ A4 rep movsb - 00000443 8B F7 mov esi,edi - 00000445 2B F2 sub esi,edx - 00000447 EB 14 jmp L_do_copy1 - - 00000449 L_contiguous_in_window: - ; 836 "inffast.S" - 00000449 03 F0 add esi,eax - 0000044B 2B F1 sub esi,ecx - - - 0000044D 8B 44 24 18 mov eax, [esp+24] - 00000451 3B C1 cmp eax,ecx - 00000453 76 08 jbe L_do_copy1 - - 00000455 2B C1 sub eax,ecx - 00000457 F3/ A4 rep movsb - 00000459 8B F7 mov esi,edi - 0000045B 2B F2 sub esi,edx - - 0000045D L_do_copy1: - ; 862 "inffast.S" - 0000045D 8B C8 mov ecx,eax - 0000045F F3/ A4 rep movsb - - 00000461 8B 74 24 2C mov esi, [esp+44] - 00000465 E9 FFFFFE12 jmp L_while_test - ; 878 "inffast.S" - ALIGN 4 - 0000046C L_init_mmx: - 0000046C 0F 77 emms - - - - - - 0000046E 0F 6E C5 movd mm0,ebp - 00000471 8B EB mov ebp,ebx - ; 896 "inffast.S" - 00000473 0F 6E 24 24 movd mm4,dword ptr [esp+0] - 00000477 0F 7F E3 movq mm3,mm4 - 0000047A 0F 6E 6C 24 04 movd mm5,dword ptr [esp+4] - 0000047F 0F 7F EA movq mm2,mm5 - 00000482 0F EF C9 pxor mm1,mm1 - 00000485 8B 5C 24 08 mov ebx, [esp+8] - 00000489 EB 01 jmp L_do_loop_mmx - - ALIGN 4 - 0000048C L_do_loop_mmx: - 0000048C 0F D3 C1 psrlq mm0,mm1 - - 0000048F 83 FD 20 cmp ebp,32 - 00000492 77 12 ja L_get_length_code_mmx - - 00000494 0F 6E F5 movd mm6,ebp - 00000497 0F 6E 3E movd mm7,dword ptr [esi] - 0000049A 83 C6 04 add esi,4 - 0000049D 0F F3 FE psllq mm7,mm6 - 000004A0 83 C5 20 add ebp,32 - 000004A3 0F EB C7 por mm0,mm7 - - 000004A6 L_get_length_code_mmx: - 000004A6 0F DB E0 pand mm4,mm0 - 000004A9 0F 7E E0 movd eax,mm4 - 000004AC 0F 7F DC movq mm4,mm3 - 000004AF 8B 04 83 mov eax, [ebx+eax*4] - - 000004B2 L_dolen_mmx: - 000004B2 0F B6 CC movzx ecx,ah - 000004B5 0F 6E C9 movd mm1,ecx - 000004B8 2B E9 sub ebp,ecx - - 000004BA 84 C0 test al,al - 000004BC 75 19 jnz L_test_for_length_base_mmx - - 000004BE C1 E8 10 shr eax,16 - 000004C1 AA stosb - - 000004C2 L_while_test_mmx: - - - 000004C2 39 7C 24 10 cmp [esp+16],edi - 000004C6 0F 86 0000021C jbe L_break_loop - - 000004CC 39 74 24 14 cmp [esp+20],esi - 000004D0 77 BA ja L_do_loop_mmx - 000004D2 E9 00000211 jmp L_break_loop - - 000004D7 L_test_for_length_base_mmx: - - 000004D7 8B D0 mov edx,eax - 000004D9 C1 EA 10 shr edx,16 - - 000004DC A8 10 test al,16 - 000004DE 0F 84 000000E0 jz L_test_for_second_level_length_mmx - 000004E4 83 E0 0F and eax,15 - 000004E7 74 14 jz L_decode_distance_mmx - - 000004E9 0F D3 C1 psrlq mm0,mm1 - 000004EC 0F 6E C8 movd mm1,eax - 000004EF 0F 7E C1 movd ecx,mm0 - 000004F2 2B E8 sub ebp,eax - 000004F4 23 0C 85 and ecx, [inflate_fast_mask+eax*4] - 0000007C R - 000004FB 03 D1 add edx,ecx - - 000004FD L_decode_distance_mmx: - 000004FD 0F D3 C1 psrlq mm0,mm1 - - 00000500 83 FD 20 cmp ebp,32 - 00000503 77 12 ja L_get_dist_code_mmx - - 00000505 0F 6E F5 movd mm6,ebp - 00000508 0F 6E 3E movd mm7,dword ptr [esi] - 0000050B 83 C6 04 add esi,4 - 0000050E 0F F3 FE psllq mm7,mm6 - 00000511 83 C5 20 add ebp,32 - 00000514 0F EB C7 por mm0,mm7 - - 00000517 L_get_dist_code_mmx: - 00000517 8B 5C 24 0C mov ebx, [esp+12] - 0000051B 0F DB E8 pand mm5,mm0 - 0000051E 0F 7E E8 movd eax,mm5 - 00000521 0F 7F D5 movq mm5,mm2 - 00000524 8B 04 83 mov eax, [ebx+eax*4] - - 00000527 L_dodist_mmx: - - 00000527 0F B6 CC movzx ecx,ah - 0000052A 8B D8 mov ebx,eax - 0000052C C1 EB 10 shr ebx,16 - 0000052F 2B E9 sub ebp,ecx - 00000531 0F 6E C9 movd mm1,ecx - - 00000534 A8 10 test al,16 - 00000536 0F 84 000000AC jz L_test_for_second_level_dist_mmx - 0000053C 83 E0 0F and eax,15 - 0000053F 74 57 jz L_check_dist_one_mmx - - 00000541 L_add_bits_to_dist_mmx: - 00000541 0F D3 C1 psrlq mm0,mm1 - 00000544 0F 6E C8 movd mm1,eax - 00000547 0F 7E C1 movd ecx,mm0 - 0000054A 2B E8 sub ebp,eax - 0000054C 23 0C 85 and ecx, [inflate_fast_mask+eax*4] - 0000007C R - 00000553 03 D9 add ebx,ecx - - 00000555 L_check_window_mmx: - 00000555 89 74 24 2C mov [esp+44],esi - 00000559 8B C7 mov eax,edi - 0000055B 2B 44 24 28 sub eax, [esp+40] - - 0000055F 3B C3 cmp eax,ebx - 00000561 0F 82 000000A9 jb L_clip_window_mmx - - 00000567 8B CA mov ecx,edx - 00000569 8B F7 mov esi,edi - 0000056B 2B F3 sub esi,ebx - - 0000056D 83 E9 03 sub ecx,3 - 00000570 8A 06 mov al, [esi] - 00000572 88 07 mov [edi],al - 00000574 8A 46 01 mov al, [esi+1] - 00000577 8A 56 02 mov dl, [esi+2] - 0000057A 83 C6 03 add esi,3 - 0000057D 88 47 01 mov [edi+1],al - 00000580 88 57 02 mov [edi+2],dl - 00000583 83 C7 03 add edi,3 - 00000586 F3/ A4 rep movsb - - 00000588 8B 74 24 2C mov esi, [esp+44] - 0000058C 8B 5C 24 08 mov ebx, [esp+8] - 00000590 E9 FFFFFF2D jmp L_while_test_mmx - - ALIGN 4 - 00000598 L_check_dist_one_mmx: - 00000598 83 FB 01 cmp ebx,1 - 0000059B 75 B8 jne L_check_window_mmx - 0000059D 39 7C 24 28 cmp [esp+40],edi - 000005A1 74 B2 je L_check_window_mmx - - 000005A3 4F dec edi - 000005A4 8B CA mov ecx,edx - 000005A6 8A 07 mov al, [edi] - 000005A8 83 E9 03 sub ecx,3 - - 000005AB 88 47 01 mov [edi+1],al - 000005AE 88 47 02 mov [edi+2],al - 000005B1 88 47 03 mov [edi+3],al - 000005B4 83 C7 04 add edi,4 - 000005B7 F3/ AA rep stosb - - 000005B9 8B 5C 24 08 mov ebx, [esp+8] - 000005BD E9 FFFFFF00 jmp L_while_test_mmx - - ALIGN 4 - 000005C4 L_test_for_second_level_length_mmx: - 000005C4 A8 40 test al,64 - 000005C6 0F 85 000000DE jnz L_test_for_end_of_block - - 000005CC 83 E0 0F and eax,15 - 000005CF 0F D3 C1 psrlq mm0,mm1 - 000005D2 0F 7E C1 movd ecx,mm0 - 000005D5 23 0C 85 and ecx, [inflate_fast_mask+eax*4] - 0000007C R - 000005DC 03 CA add ecx,edx - 000005DE 8B 04 8B mov eax, [ebx+ecx*4] - 000005E1 E9 FFFFFECC jmp L_dolen_mmx - - ALIGN 4 - 000005E8 L_test_for_second_level_dist_mmx: - 000005E8 A8 40 test al,64 - 000005EA 0F 85 000000AE jnz L_invalid_distance_code - - 000005F0 83 E0 0F and eax,15 - 000005F3 0F D3 C1 psrlq mm0,mm1 - 000005F6 0F 7E C1 movd ecx,mm0 - 000005F9 23 0C 85 and ecx, [inflate_fast_mask+eax*4] - 0000007C R - 00000600 8B 44 24 0C mov eax, [esp+12] - 00000604 03 CB add ecx,ebx - 00000606 8B 04 88 mov eax, [eax+ecx*4] - 00000609 E9 FFFFFF19 jmp L_dodist_mmx - - ALIGN 4 - 00000610 L_clip_window_mmx: - - 00000610 8B C8 mov ecx,eax - 00000612 8B 44 24 34 mov eax, [esp+52] - 00000616 F7 D9 neg ecx - 00000618 8B 74 24 38 mov esi, [esp+56] - - 0000061C 3B C3 cmp eax,ebx - 0000061E 0F 82 000000A2 jb L_invalid_distance_too_far - - 00000624 03 CB add ecx,ebx - 00000626 83 7C 24 30 00 cmp dword ptr [esp+48],0 - 0000062B 75 20 jne L_wrap_around_window_mmx - - 0000062D 2B C1 sub eax,ecx - 0000062F 03 F0 add esi,eax - - 00000631 3B D1 cmp edx,ecx - 00000633 76 58 jbe L_do_copy1_mmx - - 00000635 2B D1 sub edx,ecx - 00000637 F3/ A4 rep movsb - 00000639 8B F7 mov esi,edi - 0000063B 2B F3 sub esi,ebx - 0000063D EB 4E jmp L_do_copy1_mmx - - 0000063F 3B D1 cmp edx,ecx - 00000641 76 4A jbe L_do_copy1_mmx - - 00000643 2B D1 sub edx,ecx - 00000645 F3/ A4 rep movsb - 00000647 8B F7 mov esi,edi - 00000649 2B F3 sub esi,ebx - 0000064B EB 40 jmp L_do_copy1_mmx - - 0000064D L_wrap_around_window_mmx: - - 0000064D 8B 44 24 30 mov eax, [esp+48] - 00000651 3B C8 cmp ecx,eax - 00000653 76 28 jbe L_contiguous_in_window_mmx - - 00000655 03 74 24 34 add esi, [esp+52] - 00000659 03 F0 add esi,eax - 0000065B 2B F1 sub esi,ecx - 0000065D 2B C8 sub ecx,eax - - - 0000065F 3B D1 cmp edx,ecx - 00000661 76 2A jbe L_do_copy1_mmx - - 00000663 2B D1 sub edx,ecx - 00000665 F3/ A4 rep movsb - 00000667 8B 74 24 38 mov esi, [esp+56] - 0000066B 8B 4C 24 30 mov ecx, [esp+48] - 0000066F 3B D1 cmp edx,ecx - 00000671 76 1A jbe L_do_copy1_mmx - - 00000673 2B D1 sub edx,ecx - 00000675 F3/ A4 rep movsb - 00000677 8B F7 mov esi,edi - 00000679 2B F3 sub esi,ebx - 0000067B EB 10 jmp L_do_copy1_mmx - - 0000067D L_contiguous_in_window_mmx: - - 0000067D 03 F0 add esi,eax - 0000067F 2B F1 sub esi,ecx - - - 00000681 3B D1 cmp edx,ecx - 00000683 76 08 jbe L_do_copy1_mmx - - 00000685 2B D1 sub edx,ecx - 00000687 F3/ A4 rep movsb - 00000689 8B F7 mov esi,edi - 0000068B 2B F3 sub esi,ebx - - 0000068D L_do_copy1_mmx: - - - 0000068D 8B CA mov ecx,edx - 0000068F F3/ A4 rep movsb - - 00000691 8B 74 24 2C mov esi, [esp+44] - 00000695 8B 5C 24 08 mov ebx, [esp+8] - 00000699 E9 FFFFFE24 jmp L_while_test_mmx - ; 1174 "inffast.S" - 0000069E L_invalid_distance_code: - - - - - - 0000069E B9 00000044 R mov ecx, invalid_distance_code_msg - 000006A3 BA 0000001A mov edx,INFLATE_MODE_BAD - 000006A8 EB 2C jmp L_update_stream_state - - 000006AA L_test_for_end_of_block: - - - - - - 000006AA A8 20 test al,32 - 000006AC 74 0C jz L_invalid_literal_length_code - - 000006AE B9 00000000 mov ecx,0 - 000006B3 BA 0000000B mov edx,INFLATE_MODE_TYPE - 000006B8 EB 1C jmp L_update_stream_state - - 000006BA L_invalid_literal_length_code: - - - - - - 000006BA B9 00000028 R mov ecx, invalid_literal_length_code_msg - 000006BF BA 0000001A mov edx,INFLATE_MODE_BAD - 000006C4 EB 10 jmp L_update_stream_state - - 000006C6 L_invalid_distance_too_far: - - - - 000006C6 8B 74 24 2C mov esi, [esp+44] - 000006CA B9 0000005C R mov ecx, invalid_distance_too_far_msg - 000006CF BA 0000001A mov edx,INFLATE_MODE_BAD - 000006D4 EB 00 jmp L_update_stream_state - - 000006D6 L_update_stream_state: - - 000006D6 8B 44 24 58 mov eax, [esp+88] - 000006DA 85 C9 test ecx,ecx - 000006DC 74 03 jz L_skip_msg - 000006DE 89 48 18 mov [eax+24],ecx - 000006E1 L_skip_msg: - 000006E1 8B 40 1C mov eax, [eax+28] - 000006E4 89 10 mov [eax+mode_state],edx - 000006E6 EB 00 jmp L_break_loop - - ALIGN 4 - 000006E8 L_break_loop: - ; 1243 "inffast.S" - 000006E8 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2 - 02 - 000006EF 75 02 jne L_update_next_in - - - - 000006F1 8B DD mov ebx,ebp - - 000006F3 L_update_next_in: - ; 1266 "inffast.S" - 000006F3 8B 44 24 58 mov eax, [esp+88] - 000006F7 8B CB mov ecx,ebx - 000006F9 8B 50 1C mov edx, [eax+28] - 000006FC C1 E9 03 shr ecx,3 - 000006FF 2B F1 sub esi,ecx - 00000701 C1 E1 03 shl ecx,3 - 00000704 2B D9 sub ebx,ecx - 00000706 89 78 0C mov [eax+12],edi - 00000709 89 5A 3C mov [edx+bits_state],ebx - 0000070C 8B CB mov ecx,ebx - - 0000070E 8D 5C 24 1C lea ebx, [esp+28] - 00000712 39 5C 24 14 cmp [esp+20],ebx - 00000716 75 14 jne L_buf_not_used - - 00000718 2B F3 sub esi,ebx - 0000071A 8B 18 mov ebx, [eax+0] - 0000071C 89 5C 24 14 mov [esp+20],ebx - 00000720 03 F3 add esi,ebx - 00000722 8B 58 04 mov ebx, [eax+4] - 00000725 83 EB 0B sub ebx,11 - 00000728 01 5C 24 14 add [esp+20],ebx - - 0000072C L_buf_not_used: - 0000072C 89 30 mov [eax+0],esi - - 0000072E BB 00000001 mov ebx,1 - 00000733 D3 E3 shl ebx,cl - 00000735 4B dec ebx - - - - - - 00000736 83 3D 00000000 R cmp dword ptr [inflate_fast_use_mmx],2 - 02 - 0000073D 75 08 jne L_update_hold - - - - 0000073F 0F D3 C1 psrlq mm0,mm1 - 00000742 0F 7E C5 movd ebp,mm0 - - 00000745 0F 77 emms - - 00000747 L_update_hold: - - - - 00000747 23 EB and ebp,ebx - 00000749 89 6A 38 mov [edx+hold_state],ebp - - - - - 0000074C 8B 5C 24 14 mov ebx, [esp+20] - 00000750 3B DE cmp ebx,esi - 00000752 76 0A jbe L_last_is_smaller - - 00000754 2B DE sub ebx,esi - 00000756 83 C3 0B add ebx,11 - 00000759 89 58 04 mov [eax+4],ebx - 0000075C EB 0A jmp L_fixup_out - 0000075E L_last_is_smaller: - 0000075E 2B F3 sub esi,ebx - 00000760 F7 DE neg esi - 00000762 83 C6 0B add esi,11 - 00000765 89 70 04 mov [eax+4],esi - - - - - 00000768 L_fixup_out: - - 00000768 8B 5C 24 10 mov ebx, [esp+16] - 0000076C 3B DF cmp ebx,edi - 0000076E 76 0D jbe L_end_is_smaller - - 00000770 2B DF sub ebx,edi - 00000772 81 C3 00000101 add ebx,257 - 00000778 89 58 10 mov [eax+16],ebx - 0000077B EB 0D jmp L_done - 0000077D L_end_is_smaller: - 0000077D 2B FB sub edi,ebx - 0000077F F7 DF neg edi - 00000781 81 C7 00000101 add edi,257 - 00000787 89 78 10 mov [eax+16],edi - - - - - - 0000078A L_done: - 0000078A 83 C4 40 add esp,64 - 0000078D 9D popfd - 0000078E 5B pop ebx - 0000078F 5D pop ebp - 00000790 5E pop esi - 00000791 5F pop edi - 00000792 C3 ret - 00000793 _inflate_fast endp - - 00000004 _TEXT ends - end -Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23 -inffas32.asm Symbols 2 - 1 - - - - -Segments and Groups: - - N a m e Size Length Align Combine Class - -FLAT . . . . . . . . . . . . . . GROUP -_DATA . . . . . . . . . . . . . 32 Bit 00000004 Para Public 'DATA' -_TEXT . . . . . . . . . . . . . 32 Bit 00000793 Para Public 'CODE' - - -Procedures, parameters, and locals: - - N a m e Type Value Attr - -_inflate_fast . . . . . . . . . P Near 00000100 _TEXT Length= 00000693 Public - L_align_long . . . . . . . . . L Near 000001BA _TEXT - L_is_aligned . . . . . . . . . L Near 000001D2 _TEXT - L_check_mmx . . . . . . . . . L Near 000001D6 _TEXT - L_use_mmx . . . . . . . . . . L Near 00000233 _TEXT - L_dont_use_mmx . . . . . . . . L Near 0000023F _TEXT - L_check_mmx_pop . . . . . . . L Near 00000249 _TEXT - L_do_loop . . . . . . . . . . L Near 00000250 _TEXT - L_get_length_code . . . . . . L Near 00000262 _TEXT - L_dolen . . . . . . . . . . . L Near 0000026E _TEXT - L_while_test . . . . . . . . . L Near 0000027C _TEXT - L_test_for_length_base . . . . L Near 00000291 _TEXT - L_add_bits_to_len . . . . . . L Near 000002BA _TEXT - L_save_len . . . . . . . . . . L Near 000002CA _TEXT - L_decode_distance . . . . . . L Near 000002CE _TEXT - L_get_distance_code . . . . . L Near 000002E0 _TEXT - L_dodist . . . . . . . . . . . L Near 000002ED _TEXT - L_add_bits_to_dist . . . . . . L Near 0000031C _TEXT - L_check_window . . . . . . . . L Near 0000032E _TEXT - L_check_dist_one . . . . . . . L Near 0000036C _TEXT - L_test_for_second_level_length . L Near 00000394 _TEXT - L_test_for_second_level_dist . L Near 000003B4 _TEXT - L_clip_window . . . . . . . . L Near 000003D4 _TEXT - L_wrap_around_window . . . . . L Near 00000415 _TEXT - L_contiguous_in_window . . . . L Near 00000449 _TEXT - L_do_copy1 . . . . . . . . . . L Near 0000045D _TEXT - L_init_mmx . . . . . . . . . . L Near 0000046C _TEXT - L_do_loop_mmx . . . . . . . . L Near 0000048C _TEXT - L_get_length_code_mmx . . . . L Near 000004A6 _TEXT - L_dolen_mmx . . . . . . . . . L Near 000004B2 _TEXT - L_while_test_mmx . . . . . . . L Near 000004C2 _TEXT - L_test_for_length_base_mmx . . L Near 000004D7 _TEXT - L_decode_distance_mmx . . . . L Near 000004FD _TEXT - L_get_dist_code_mmx . . . . . L Near 00000517 _TEXT - L_dodist_mmx . . . . . . . . . L Near 00000527 _TEXT - L_add_bits_to_dist_mmx . . . . L Near 00000541 _TEXT - L_check_window_mmx . . . . . . L Near 00000555 _TEXT - L_check_dist_one_mmx . . . . . L Near 00000598 _TEXT - L_test_for_second_level_length_mmx . L Near 000005C4 _TEXT - L_test_for_second_level_dist_mmx . L Near 000005E8 _TEXT - L_clip_window_mmx . . . . . . L Near 00000610 _TEXT - L_wrap_around_window_mmx . . . L Near 0000064D _TEXT - L_contiguous_in_window_mmx . . L Near 0000067D _TEXT - L_do_copy1_mmx . . . . . . . . L Near 0000068D _TEXT - L_invalid_distance_code . . . L Near 0000069E _TEXT - L_test_for_end_of_block . . . L Near 000006AA _TEXT - L_invalid_literal_length_code L Near 000006BA _TEXT - L_invalid_distance_too_far . . L Near 000006C6 _TEXT - L_update_stream_state . . . . L Near 000006D6 _TEXT - L_skip_msg . . . . . . . . . . L Near 000006E1 _TEXT - L_break_loop . . . . . . . . . L Near 000006E8 _TEXT - L_update_next_in . . . . . . . L Near 000006F3 _TEXT - L_buf_not_used . . . . . . . . L Near 0000072C _TEXT - L_update_hold . . . . . . . . L Near 00000747 _TEXT - L_last_is_smaller . . . . . . L Near 0000075E _TEXT - L_fixup_out . . . . . . . . . L Near 00000768 _TEXT - L_end_is_smaller . . . . . . . L Near 0000077D _TEXT - L_done . . . . . . . . . . . . L Near 0000078A _TEXT - - -Symbols: - - N a m e Type Value Attr - -@CodeSize . . . . . . . . . . . Number 00000000h -@DataSize . . . . . . . . . . . Number 00000000h -@Interface . . . . . . . . . . . Number 00000000h -@Model . . . . . . . . . . . . . Number 00000007h -@code . . . . . . . . . . . . . Text _TEXT -@data . . . . . . . . . . . . . Text FLAT -@fardata? . . . . . . . . . . . Text FLAT -@fardata . . . . . . . . . . . . Text FLAT -@stack . . . . . . . . . . . . . Text FLAT -INFLATE_MODE_BAD . . . . . . . . Number 0000001Ah -INFLATE_MODE_TYPE . . . . . . . Number 0000000Bh -bits_state . . . . . . . . . . . Number 0000003Ch -distbits_state . . . . . . . . . Number 00000058h -distcode_state . . . . . . . . . Number 00000050h -hold_state . . . . . . . . . . . Number 00000038h -inflate_fast_mask . . . . . . . L Near 0000007C _TEXT -inflate_fast_use_mmx . . . . . . L Near 00000000 _DATA -invalid_distance_code_msg . . . L Near 00000044 _TEXT -invalid_distance_too_far_msg . . L Near 0000005C _TEXT -invalid_literal_length_code_msg L Near 00000028 _TEXT -lenbits_state . . . . . . . . . Number 00000054h -lencode_state . . . . . . . . . Number 0000004Ch -mode_state . . . . . . . . . . . Number 00000000h -window_state . . . . . . . . . . Number 00000034h -write_state . . . . . . . . . . Number 00000030h -wsize_state . . . . . . . . . . Number 00000028h -zlib1222sup . . . . . . . . . . Number 00000008h - - 0 Warnings - 0 Errors diff --git a/zlib/contrib/masmx86/match686.asm b/zlib/contrib/masmx86/match686.asm deleted file mode 100644 index 69e0eed..0000000 --- a/zlib/contrib/masmx86/match686.asm +++ /dev/null @@ -1,479 +0,0 @@ -; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86 -; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant. -; File written by Gilles Vollant, by converting match686.S from Brian Raiter -; for MASM. This is as assembly version of longest_match -; from Jean-loup Gailly in deflate.c -; -; http://www.zlib.net -; http://www.winimage.com/zLibDll -; http://www.muppetlabs.com/~breadbox/software/assembly.html -; -; For Visual C++ 4.x and higher and ML 6.x and higher -; ml.exe is distributed in -; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 -; -; this file contain two implementation of longest_match -; -; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro -; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom) -; -; for using an assembly version of longest_match, you need define ASMV in project -; -; compile the asm file running -; ml /coff /Zi /c /Flmatch686.lst match686.asm -; and do not include match686.obj in your project -; -; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for -; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor -; with autoselect (with cpu detection code) -; if you want support the old pentium optimization, you can still use these version -; -; this file is not optimized for old pentium, but it compatible with all x86 32 bits -; processor (starting 80386) -; -; -; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 - -;uInt longest_match(s, cur_match) -; deflate_state *s; -; IPos cur_match; /* current match */ - - NbStack equ 76 - cur_match equ dword ptr[esp+NbStack-0] - str_s equ dword ptr[esp+NbStack-4] -; 5 dword on top (ret,ebp,esi,edi,ebx) - adrret equ dword ptr[esp+NbStack-8] - pushebp equ dword ptr[esp+NbStack-12] - pushedi equ dword ptr[esp+NbStack-16] - pushesi equ dword ptr[esp+NbStack-20] - pushebx equ dword ptr[esp+NbStack-24] - - chain_length equ dword ptr [esp+NbStack-28] - limit equ dword ptr [esp+NbStack-32] - best_len equ dword ptr [esp+NbStack-36] - window equ dword ptr [esp+NbStack-40] - prev equ dword ptr [esp+NbStack-44] - scan_start equ word ptr [esp+NbStack-48] - wmask equ dword ptr [esp+NbStack-52] - match_start_ptr equ dword ptr [esp+NbStack-56] - nice_match equ dword ptr [esp+NbStack-60] - scan equ dword ptr [esp+NbStack-64] - - windowlen equ dword ptr [esp+NbStack-68] - match_start equ dword ptr [esp+NbStack-72] - strend equ dword ptr [esp+NbStack-76] - NbStackAdd equ (NbStack-24) - - .386p - - name gvmatch - .MODEL FLAT - - - -; all the +zlib1222add offsets are due to the addition of fields -; in zlib in the deflate_state structure since the asm code was first written -; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). -; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). -; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). - - zlib1222add equ 8 - -; Note : these value are good with a 8 bytes boundary pack structure - dep_chain_length equ 74h+zlib1222add - dep_window equ 30h+zlib1222add - dep_strstart equ 64h+zlib1222add - dep_prev_length equ 70h+zlib1222add - dep_nice_match equ 88h+zlib1222add - dep_w_size equ 24h+zlib1222add - dep_prev equ 38h+zlib1222add - dep_w_mask equ 2ch+zlib1222add - dep_good_match equ 84h+zlib1222add - dep_match_start equ 68h+zlib1222add - dep_lookahead equ 6ch+zlib1222add - - -_TEXT segment - -IFDEF NOUNDERLINE - public longest_match - public match_init -ELSE - public _longest_match - public _match_init -ENDIF - - MAX_MATCH equ 258 - MIN_MATCH equ 3 - MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) - - - -MAX_MATCH equ 258 -MIN_MATCH equ 3 -MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) -MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) - - -;;; stack frame offsets - -chainlenwmask equ esp + 0 ; high word: current chain len - ; low word: s->wmask -window equ esp + 4 ; local copy of s->window -windowbestlen equ esp + 8 ; s->window + bestlen -scanstart equ esp + 16 ; first two bytes of string -scanend equ esp + 12 ; last two bytes of string -scanalign equ esp + 20 ; dword-misalignment of string -nicematch equ esp + 24 ; a good enough match size -bestlen equ esp + 28 ; size of best match so far -scan equ esp + 32 ; ptr to string wanting match - -LocalVarsSize equ 36 -; saved ebx byte esp + 36 -; saved edi byte esp + 40 -; saved esi byte esp + 44 -; saved ebp byte esp + 48 -; return address byte esp + 52 -deflatestate equ esp + 56 ; the function arguments -curmatch equ esp + 60 - -;;; Offsets for fields in the deflate_state structure. These numbers -;;; are calculated from the definition of deflate_state, with the -;;; assumption that the compiler will dword-align the fields. (Thus, -;;; changing the definition of deflate_state could easily cause this -;;; program to crash horribly, without so much as a warning at -;;; compile time. Sigh.) - -dsWSize equ 36+zlib1222add -dsWMask equ 44+zlib1222add -dsWindow equ 48+zlib1222add -dsPrev equ 56+zlib1222add -dsMatchLen equ 88+zlib1222add -dsPrevMatch equ 92+zlib1222add -dsStrStart equ 100+zlib1222add -dsMatchStart equ 104+zlib1222add -dsLookahead equ 108+zlib1222add -dsPrevLen equ 112+zlib1222add -dsMaxChainLen equ 116+zlib1222add -dsGoodMatch equ 132+zlib1222add -dsNiceMatch equ 136+zlib1222add - - -;;; match686.asm -- Pentium-Pro-optimized version of longest_match() -;;; Written for zlib 1.1.2 -;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> -;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html -;;; -;; -;; This software is provided 'as-is', without any express or implied -;; warranty. In no event will the authors be held liable for any damages -;; arising from the use of this software. -;; -;; Permission is granted to anyone to use this software for any purpose, -;; including commercial applications, and to alter it and redistribute it -;; freely, subject to the following restrictions: -;; -;; 1. The origin of this software must not be misrepresented; you must not -;; claim that you wrote the original software. If you use this software -;; in a product, an acknowledgment in the product documentation would be -;; appreciated but is not required. -;; 2. Altered source versions must be plainly marked as such, and must not be -;; misrepresented as being the original software -;; 3. This notice may not be removed or altered from any source distribution. -;; - -;GLOBAL _longest_match, _match_init - - -;SECTION .text - -;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) - -;_longest_match: - IFDEF NOUNDERLINE - longest_match proc near - ELSE - _longest_match proc near - ENDIF -.FPO (9, 4, 0, 0, 1, 0) - -;;; Save registers that the compiler may be using, and adjust esp to -;;; make room for our stack frame. - - push ebp - push edi - push esi - push ebx - sub esp, LocalVarsSize - -;;; Retrieve the function arguments. ecx will hold cur_match -;;; throughout the entire function. edx will hold the pointer to the -;;; deflate_state structure during the function's setup (before -;;; entering the main loop. - - mov edx, [deflatestate] - mov ecx, [curmatch] - -;;; uInt wmask = s->w_mask; -;;; unsigned chain_length = s->max_chain_length; -;;; if (s->prev_length >= s->good_match) { -;;; chain_length >>= 2; -;;; } - - mov eax, [edx + dsPrevLen] - mov ebx, [edx + dsGoodMatch] - cmp eax, ebx - mov eax, [edx + dsWMask] - mov ebx, [edx + dsMaxChainLen] - jl LastMatchGood - shr ebx, 2 -LastMatchGood: - -;;; chainlen is decremented once beforehand so that the function can -;;; use the sign flag instead of the zero flag for the exit test. -;;; It is then shifted into the high word, to make room for the wmask -;;; value, which it will always accompany. - - dec ebx - shl ebx, 16 - or ebx, eax - mov [chainlenwmask], ebx - -;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - mov eax, [edx + dsNiceMatch] - mov ebx, [edx + dsLookahead] - cmp ebx, eax - jl LookaheadLess - mov ebx, eax -LookaheadLess: mov [nicematch], ebx - -;;; register Bytef *scan = s->window + s->strstart; - - mov esi, [edx + dsWindow] - mov [window], esi - mov ebp, [edx + dsStrStart] - lea edi, [esi + ebp] - mov [scan], edi - -;;; Determine how many bytes the scan ptr is off from being -;;; dword-aligned. - - mov eax, edi - neg eax - and eax, 3 - mov [scanalign], eax - -;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? -;;; s->strstart - (IPos)MAX_DIST(s) : NIL; - - mov eax, [edx + dsWSize] - sub eax, MIN_LOOKAHEAD - sub ebp, eax - jg LimitPositive - xor ebp, ebp -LimitPositive: - -;;; int best_len = s->prev_length; - - mov eax, [edx + dsPrevLen] - mov [bestlen], eax - -;;; Store the sum of s->window + best_len in esi locally, and in esi. - - add esi, eax - mov [windowbestlen], esi - -;;; register ush scan_start = *(ushf*)scan; -;;; register ush scan_end = *(ushf*)(scan+best_len-1); -;;; Posf *prev = s->prev; - - movzx ebx, word ptr [edi] - mov [scanstart], ebx - movzx ebx, word ptr [edi + eax - 1] - mov [scanend], ebx - mov edi, [edx + dsPrev] - -;;; Jump into the main loop. - - mov edx, [chainlenwmask] - jmp short LoopEntry - -align 4 - -;;; do { -;;; match = s->window + cur_match; -;;; if (*(ushf*)(match+best_len-1) != scan_end || -;;; *(ushf*)match != scan_start) continue; -;;; [...] -;;; } while ((cur_match = prev[cur_match & wmask]) > limit -;;; && --chain_length != 0); -;;; -;;; Here is the inner loop of the function. The function will spend the -;;; majority of its time in this loop, and majority of that time will -;;; be spent in the first ten instructions. -;;; -;;; Within this loop: -;;; ebx = scanend -;;; ecx = curmatch -;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) -;;; esi = windowbestlen - i.e., (window + bestlen) -;;; edi = prev -;;; ebp = limit - -LookupLoop: - and ecx, edx - movzx ecx, word ptr [edi + ecx*2] - cmp ecx, ebp - jbe LeaveNow - sub edx, 00010000h - js LeaveNow -LoopEntry: movzx eax, word ptr [esi + ecx - 1] - cmp eax, ebx - jnz LookupLoop - mov eax, [window] - movzx eax, word ptr [eax + ecx] - cmp eax, [scanstart] - jnz LookupLoop - -;;; Store the current value of chainlen. - - mov [chainlenwmask], edx - -;;; Point edi to the string under scrutiny, and esi to the string we -;;; are hoping to match it up with. In actuality, esi and edi are -;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is -;;; initialized to -(MAX_MATCH_8 - scanalign). - - mov esi, [window] - mov edi, [scan] - add esi, ecx - mov eax, [scanalign] - mov edx, 0fffffef8h; -(MAX_MATCH_8) - lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] - lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] - -;;; Test the strings for equality, 8 bytes at a time. At the end, -;;; adjust edx so that it is offset to the exact byte that mismatched. -;;; -;;; We already know at this point that the first three bytes of the -;;; strings match each other, and they can be safely passed over before -;;; starting the compare loop. So what this code does is skip over 0-3 -;;; bytes, as much as necessary in order to dword-align the edi -;;; pointer. (esi will still be misaligned three times out of four.) -;;; -;;; It should be confessed that this loop usually does not represent -;;; much of the total running time. Replacing it with a more -;;; straightforward "rep cmpsb" would not drastically degrade -;;; performance. - -LoopCmps: - mov eax, [esi + edx] - xor eax, [edi + edx] - jnz LeaveLoopCmps - mov eax, [esi + edx + 4] - xor eax, [edi + edx + 4] - jnz LeaveLoopCmps4 - add edx, 8 - jnz LoopCmps - jmp short LenMaximum -LeaveLoopCmps4: add edx, 4 -LeaveLoopCmps: test eax, 0000FFFFh - jnz LenLower - add edx, 2 - shr eax, 16 -LenLower: sub al, 1 - adc edx, 0 - -;;; Calculate the length of the match. If it is longer than MAX_MATCH, -;;; then automatically accept it as the best possible match and leave. - - lea eax, [edi + edx] - mov edi, [scan] - sub eax, edi - cmp eax, MAX_MATCH - jge LenMaximum - -;;; If the length of the match is not longer than the best match we -;;; have so far, then forget it and return to the lookup loop. - - mov edx, [deflatestate] - mov ebx, [bestlen] - cmp eax, ebx - jg LongerMatch - mov esi, [windowbestlen] - mov edi, [edx + dsPrev] - mov ebx, [scanend] - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; s->match_start = cur_match; -;;; best_len = len; -;;; if (len >= nice_match) break; -;;; scan_end = *(ushf*)(scan+best_len-1); - -LongerMatch: mov ebx, [nicematch] - mov [bestlen], eax - mov [edx + dsMatchStart], ecx - cmp eax, ebx - jge LeaveNow - mov esi, [window] - add esi, eax - mov [windowbestlen], esi - movzx ebx, word ptr [edi + eax - 1] - mov edi, [edx + dsPrev] - mov [scanend], ebx - mov edx, [chainlenwmask] - jmp LookupLoop - -;;; Accept the current string, with the maximum possible length. - -LenMaximum: mov edx, [deflatestate] - mov dword ptr [bestlen], MAX_MATCH - mov [edx + dsMatchStart], ecx - -;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; -;;; return s->lookahead; - -LeaveNow: - mov edx, [deflatestate] - mov ebx, [bestlen] - mov eax, [edx + dsLookahead] - cmp ebx, eax - jg LookaheadRet - mov eax, ebx -LookaheadRet: - -;;; Restore the stack and return from whence we came. - - add esp, LocalVarsSize - pop ebx - pop esi - pop edi - pop ebp - - ret -; please don't remove this string ! -; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary! - db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah - - - IFDEF NOUNDERLINE - longest_match endp - ELSE - _longest_match endp - ENDIF - - IFDEF NOUNDERLINE - match_init proc near - ret - match_init endp - ELSE - _match_init proc near - ret - _match_init endp - ENDIF - - -_TEXT ends -end diff --git a/zlib/contrib/masmx86/match686.lst b/zlib/contrib/masmx86/match686.lst deleted file mode 100644 index 315ad87..0000000 --- a/zlib/contrib/masmx86/match686.lst +++ /dev/null @@ -1,624 +0,0 @@ -Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23 -match686.asm Page 1 - 1 - - - ; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86 - ; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant. - ; File written by Gilles Vollant, by converting match686.S from Brian Raiter - ; for MASM. This is as assembly version of longest_match - ; from Jean-loup Gailly in deflate.c - ; - ; http://www.zlib.net - ; http://www.winimage.com/zLibDll - ; http://www.muppetlabs.com/~breadbox/software/assembly.html - ; - ; For Visual C++ 4.x and higher and ML 6.x and higher - ; ml.exe is distributed in - ; http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 - ; - ; this file contain two implementation of longest_match - ; - ; this longest_match was written by Brian raiter (1998), optimized for Pentium Pro - ; (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom) - ; - ; for using an assembly version of longest_match, you need define ASMV in project - ; - ; compile the asm file running - ; ml /coff /Zi /c /Flmatch686.lst match686.asm - ; and do not include match686.obj in your project - ; - ; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for - ; Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor - ; with autoselect (with cpu detection code) - ; if you want support the old pentium optimization, you can still use these version - ; - ; this file is not optimized for old pentium, but it compatible with all x86 32 bits - ; processor (starting 80386) - ; - ; - ; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2 - - ;uInt longest_match(s, cur_match) - ; deflate_state *s; - ; IPos cur_match; /* current match */ - - = 0000004C NbStack equ 76 - = dword ptr[esp+NbStack-0] cur_match equ dword ptr[esp+NbStack-0] - = dword ptr[esp+NbStack-4] str_s equ dword ptr[esp+NbStack-4] - ; 5 dword on top (ret,ebp,esi,edi,ebx) - = dword ptr[esp+NbStack-8] adrret equ dword ptr[esp+NbStack-8] - = dword ptr[esp+NbStack-12 pushebp equ dword ptr[esp+NbStack-12] - ] - = dword ptr[esp+NbStack-16 pushedi equ dword ptr[esp+NbStack-16] - ] - = dword ptr[esp+NbStack-20 pushesi equ dword ptr[esp+NbStack-20] - ] - = dword ptr[esp+NbStack-24 pushebx equ dword ptr[esp+NbStack-24] - ] - - = dword ptr [esp+NbStack-2 chain_length equ dword ptr [esp+NbStack-28] - 8] - = dword ptr [esp+NbStack-3 limit equ dword ptr [esp+NbStack-32] - 2] - = dword ptr [esp+NbStack-3 best_len equ dword ptr [esp+NbStack-36] - 6] - = dword ptr [esp+NbStack-4 window equ dword ptr [esp+NbStack-40] - 0] - = dword ptr [esp+NbStack-4 prev equ dword ptr [esp+NbStack-44] - 4] - = word ptr [esp+NbStack-48 scan_start equ word ptr [esp+NbStack-48] - ] - = dword ptr [esp+NbStack-5 wmask equ dword ptr [esp+NbStack-52] - 2] - = dword ptr [esp+NbStack-5 match_start_ptr equ dword ptr [esp+NbStack-56] - 6] - = dword ptr [esp+NbStack-6 nice_match equ dword ptr [esp+NbStack-60] - 0] - = dword ptr [esp+NbStack-6 scan equ dword ptr [esp+NbStack-64] - 4] - - = dword ptr [esp+NbStack-6 windowlen equ dword ptr [esp+NbStack-68] - 8] - = dword ptr [esp+NbStack-7 match_start equ dword ptr [esp+NbStack-72] - 2] - = dword ptr [esp+NbStack-7 strend equ dword ptr [esp+NbStack-76] - 6] - = 00000034 NbStackAdd equ (NbStack-24) - - .386p - - name gvmatch - .MODEL FLAT - - - - ; all the +zlib1222add offsets are due to the addition of fields - ; in zlib in the deflate_state structure since the asm code was first written - ; (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)"). - ; (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0"). - ; if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8"). - - = 00000008 zlib1222add equ 8 - - ; Note : these value are good with a 8 bytes boundary pack structure - = 0000007C dep_chain_length equ 74h+zlib1222add - = 00000038 dep_window equ 30h+zlib1222add - = 0000006C dep_strstart equ 64h+zlib1222add - = 00000078 dep_prev_length equ 70h+zlib1222add - = 00000090 dep_nice_match equ 88h+zlib1222add - = 0000002C dep_w_size equ 24h+zlib1222add - = 00000040 dep_prev equ 38h+zlib1222add - = 00000034 dep_w_mask equ 2ch+zlib1222add - = 0000008C dep_good_match equ 84h+zlib1222add - = 00000070 dep_match_start equ 68h+zlib1222add - = 00000074 dep_lookahead equ 6ch+zlib1222add - - - 00000000 _TEXT segment - - IFDEF NOUNDERLINE - ELSE - public _longest_match - public _match_init - ENDIF - - = 00000102 MAX_MATCH equ 258 - = 00000003 MIN_MATCH equ 3 - = 00000106 MIN_LOOKAHEAD equ (MAX_MATCH+MIN_MATCH+1) - - - - = 00000102 MAX_MATCH equ 258 - = 00000003 MIN_MATCH equ 3 - = 00000106 MIN_LOOKAHEAD equ (MAX_MATCH + MIN_MATCH + 1) - = 00000100 MAX_MATCH_8_ equ ((MAX_MATCH + 7) AND 0FFF0h) - - - ;;; stack frame offsets - - = esp + 0 chainlenwmask equ esp + 0 ; high word: current chain len - ; low word: s->wmask - = esp + 4 window equ esp + 4 ; local copy of s->window - = esp + 8 windowbestlen equ esp + 8 ; s->window + bestlen - = esp + 16 scanstart equ esp + 16 ; first two bytes of string - = esp + 12 scanend equ esp + 12 ; last two bytes of string - = esp + 20 scanalign equ esp + 20 ; dword-misalignment of string - = esp + 24 nicematch equ esp + 24 ; a good enough match size - = esp + 28 bestlen equ esp + 28 ; size of best match so far - = esp + 32 scan equ esp + 32 ; ptr to string wanting match - - = 00000024 LocalVarsSize equ 36 - ; saved ebx byte esp + 36 - ; saved edi byte esp + 40 - ; saved esi byte esp + 44 - ; saved ebp byte esp + 48 - ; return address byte esp + 52 - = esp + 56 deflatestate equ esp + 56 ; the function arguments - = esp + 60 curmatch equ esp + 60 - - ;;; Offsets for fields in the deflate_state structure. These numbers - ;;; are calculated from the definition of deflate_state, with the - ;;; assumption that the compiler will dword-align the fields. (Thus, - ;;; changing the definition of deflate_state could easily cause this - ;;; program to crash horribly, without so much as a warning at - ;;; compile time. Sigh.) - - = 0000002C dsWSize equ 36+zlib1222add - = 00000034 dsWMask equ 44+zlib1222add - = 00000038 dsWindow equ 48+zlib1222add - = 00000040 dsPrev equ 56+zlib1222add - = 00000060 dsMatchLen equ 88+zlib1222add - = 00000064 dsPrevMatch equ 92+zlib1222add - = 0000006C dsStrStart equ 100+zlib1222add - = 00000070 dsMatchStart equ 104+zlib1222add - = 00000074 dsLookahead equ 108+zlib1222add - = 00000078 dsPrevLen equ 112+zlib1222add - = 0000007C dsMaxChainLen equ 116+zlib1222add - = 0000008C dsGoodMatch equ 132+zlib1222add - = 00000090 dsNiceMatch equ 136+zlib1222add - - - ;;; match686.asm -- Pentium-Pro-optimized version of longest_match() - ;;; Written for zlib 1.1.2 - ;;; Copyright (C) 1998 Brian Raiter <breadbox@muppetlabs.com> - ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html - ;;; - ;; - ;; This software is provided 'as-is', without any express or implied - ;; warranty. In no event will the authors be held liable for any damages - ;; arising from the use of this software. - ;; - ;; Permission is granted to anyone to use this software for any purpose, - ;; including commercial applications, and to alter it and redistribute it - ;; freely, subject to the following restrictions: - ;; - ;; 1. The origin of this software must not be misrepresented; you must not - ;; claim that you wrote the original software. If you use this software - ;; in a product, an acknowledgment in the product documentation would be - ;; appreciated but is not required. - ;; 2. Altered source versions must be plainly marked as such, and must not be - ;; misrepresented as being the original software - ;; 3. This notice may not be removed or altered from any source distribution. - ;; - - ;GLOBAL _longest_match, _match_init - - - ;SECTION .text - - ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch) - - ;_longest_match: - IFDEF NOUNDERLINE - ELSE - 00000000 _longest_match proc near - ENDIF - 00000000 .FPO (9, 4, 0, 0, 1, 0) - - ;;; Save registers that the compiler may be using, and adjust esp to - ;;; make room for our stack frame. - - 00000000 55 push ebp - 00000001 57 push edi - 00000002 56 push esi - 00000003 53 push ebx - 00000004 83 EC 24 sub esp, LocalVarsSize - - ;;; Retrieve the function arguments. ecx will hold cur_match - ;;; throughout the entire function. edx will hold the pointer to the - ;;; deflate_state structure during the function's setup (before - ;;; entering the main loop. - - 00000007 8B 54 24 38 mov edx, [deflatestate] - 0000000B 8B 4C 24 3C mov ecx, [curmatch] - - ;;; uInt wmask = s->w_mask; - ;;; unsigned chain_length = s->max_chain_length; - ;;; if (s->prev_length >= s->good_match) { - ;;; chain_length >>= 2; - ;;; } - - 0000000F 8B 42 78 mov eax, [edx + dsPrevLen] - 00000012 8B 9A 0000008C mov ebx, [edx + dsGoodMatch] - 00000018 3B C3 cmp eax, ebx - 0000001A 8B 42 34 mov eax, [edx + dsWMask] - 0000001D 8B 5A 7C mov ebx, [edx + dsMaxChainLen] - 00000020 7C 03 jl LastMatchGood - 00000022 C1 EB 02 shr ebx, 2 - 00000025 LastMatchGood: - - ;;; chainlen is decremented once beforehand so that the function can - ;;; use the sign flag instead of the zero flag for the exit test. - ;;; It is then shifted into the high word, to make room for the wmask - ;;; value, which it will always accompany. - - 00000025 4B dec ebx - 00000026 C1 E3 10 shl ebx, 16 - 00000029 0B D8 or ebx, eax - 0000002B 89 1C 24 mov [chainlenwmask], ebx - - ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead; - - 0000002E 8B 82 00000090 mov eax, [edx + dsNiceMatch] - 00000034 8B 5A 74 mov ebx, [edx + dsLookahead] - 00000037 3B D8 cmp ebx, eax - 00000039 7C 02 jl LookaheadLess - 0000003B 8B D8 mov ebx, eax - 0000003D 89 5C 24 18 LookaheadLess: mov [nicematch], ebx - - ;;; register Bytef *scan = s->window + s->strstart; - - 00000041 8B 72 38 mov esi, [edx + dsWindow] - 00000044 89 74 24 04 mov [window], esi - 00000048 8B 6A 6C mov ebp, [edx + dsStrStart] - 0000004B 8D 3C 2E lea edi, [esi + ebp] - 0000004E 89 7C 24 20 mov [scan], edi - - ;;; Determine how many bytes the scan ptr is off from being - ;;; dword-aligned. - - 00000052 8B C7 mov eax, edi - 00000054 F7 D8 neg eax - 00000056 83 E0 03 and eax, 3 - 00000059 89 44 24 14 mov [scanalign], eax - - ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ? - ;;; s->strstart - (IPos)MAX_DIST(s) : NIL; - - 0000005D 8B 42 2C mov eax, [edx + dsWSize] - 00000060 2D 00000106 sub eax, MIN_LOOKAHEAD - 00000065 2B E8 sub ebp, eax - 00000067 7F 02 jg LimitPositive - 00000069 33 ED xor ebp, ebp - 0000006B LimitPositive: - - ;;; int best_len = s->prev_length; - - 0000006B 8B 42 78 mov eax, [edx + dsPrevLen] - 0000006E 89 44 24 1C mov [bestlen], eax - - ;;; Store the sum of s->window + best_len in esi locally, and in esi. - - 00000072 03 F0 add esi, eax - 00000074 89 74 24 08 mov [windowbestlen], esi - - ;;; register ush scan_start = *(ushf*)scan; - ;;; register ush scan_end = *(ushf*)(scan+best_len-1); - ;;; Posf *prev = s->prev; - - 00000078 0F B7 1F movzx ebx, word ptr [edi] - 0000007B 89 5C 24 10 mov [scanstart], ebx - 0000007F 0F B7 5C 07 FF movzx ebx, word ptr [edi + eax - 1] - 00000084 89 5C 24 0C mov [scanend], ebx - 00000088 8B 7A 40 mov edi, [edx + dsPrev] - - ;;; Jump into the main loop. - - 0000008B 8B 14 24 mov edx, [chainlenwmask] - 0000008E EB 1A jmp short LoopEntry - - align 4 - - ;;; do { - ;;; match = s->window + cur_match; - ;;; if (*(ushf*)(match+best_len-1) != scan_end || - ;;; *(ushf*)match != scan_start) continue; - ;;; [...] - ;;; } while ((cur_match = prev[cur_match & wmask]) > limit - ;;; && --chain_length != 0); - ;;; - ;;; Here is the inner loop of the function. The function will spend the - ;;; majority of its time in this loop, and majority of that time will - ;;; be spent in the first ten instructions. - ;;; - ;;; Within this loop: - ;;; ebx = scanend - ;;; ecx = curmatch - ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask) - ;;; esi = windowbestlen - i.e., (window + bestlen) - ;;; edi = prev - ;;; ebp = limit - - 00000090 LookupLoop: - 00000090 23 CA and ecx, edx - 00000092 0F B7 0C 4F movzx ecx, word ptr [edi + ecx*2] - 00000096 3B CD cmp ecx, ebp - 00000098 0F 86 000000E0 jbe LeaveNow - 0000009E 81 EA 00010000 sub edx, 00010000h - 000000A4 0F 88 000000D4 js LeaveNow - 000000AA 0F B7 44 0E FF LoopEntry: movzx eax, word ptr [esi + ecx - 1] - 000000AF 3B C3 cmp eax, ebx - 000000B1 75 DD jnz LookupLoop - 000000B3 8B 44 24 04 mov eax, [window] - 000000B7 0F B7 04 08 movzx eax, word ptr [eax + ecx] - 000000BB 3B 44 24 10 cmp eax, [scanstart] - 000000BF 75 CF jnz LookupLoop - - ;;; Store the current value of chainlen. - - 000000C1 89 14 24 mov [chainlenwmask], edx - - ;;; Point edi to the string under scrutiny, and esi to the string we - ;;; are hoping to match it up with. In actuality, esi and edi are - ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is - ;;; initialized to -(MAX_MATCH_8 - scanalign). - - 000000C4 8B 74 24 04 mov esi, [window] - 000000C8 8B 7C 24 20 mov edi, [scan] - 000000CC 03 F1 add esi, ecx - 000000CE 8B 44 24 14 mov eax, [scanalign] - 000000D2 BA FFFFFEF8 mov edx, 0fffffef8h; -(MAX_MATCH_8) - 000000D7 8D BC 07 lea edi, [edi + eax + 0108h] ;MAX_MATCH_8] - 00000108 - 000000DE 8D B4 06 lea esi, [esi + eax + 0108h] ;MAX_MATCH_8] - 00000108 - - ;;; Test the strings for equality, 8 bytes at a time. At the end, - ;;; adjust edx so that it is offset to the exact byte that mismatched. - ;;; - ;;; We already know at this point that the first three bytes of the - ;;; strings match each other, and they can be safely passed over before - ;;; starting the compare loop. So what this code does is skip over 0-3 - ;;; bytes, as much as necessary in order to dword-align the edi - ;;; pointer. (esi will still be misaligned three times out of four.) - ;;; - ;;; It should be confessed that this loop usually does not represent - ;;; much of the total running time. Replacing it with a more - ;;; straightforward "rep cmpsb" would not drastically degrade - ;;; performance. - - 000000E5 LoopCmps: - 000000E5 8B 04 16 mov eax, [esi + edx] - 000000E8 33 04 17 xor eax, [edi + edx] - 000000EB 75 14 jnz LeaveLoopCmps - 000000ED 8B 44 16 04 mov eax, [esi + edx + 4] - 000000F1 33 44 17 04 xor eax, [edi + edx + 4] - 000000F5 75 07 jnz LeaveLoopCmps4 - 000000F7 83 C2 08 add edx, 8 - 000000FA 75 E9 jnz LoopCmps - 000000FC EB 71 jmp short LenMaximum - 000000FE 83 C2 04 LeaveLoopCmps4: add edx, 4 - 00000101 A9 0000FFFF LeaveLoopCmps: test eax, 0000FFFFh - 00000106 75 06 jnz LenLower - 00000108 83 C2 02 add edx, 2 - 0000010B C1 E8 10 shr eax, 16 - 0000010E 2C 01 LenLower: sub al, 1 - 00000110 83 D2 00 adc edx, 0 - - ;;; Calculate the length of the match. If it is longer than MAX_MATCH, - ;;; then automatically accept it as the best possible match and leave. - - 00000113 8D 04 17 lea eax, [edi + edx] - 00000116 8B 7C 24 20 mov edi, [scan] - 0000011A 2B C7 sub eax, edi - 0000011C 3D 00000102 cmp eax, MAX_MATCH - 00000121 7D 4C jge LenMaximum - - ;;; If the length of the match is not longer than the best match we - ;;; have so far, then forget it and return to the lookup loop. - - 00000123 8B 54 24 38 mov edx, [deflatestate] - 00000127 8B 5C 24 1C mov ebx, [bestlen] - 0000012B 3B C3 cmp eax, ebx - 0000012D 7F 13 jg LongerMatch - 0000012F 8B 74 24 08 mov esi, [windowbestlen] - 00000133 8B 7A 40 mov edi, [edx + dsPrev] - 00000136 8B 5C 24 0C mov ebx, [scanend] - 0000013A 8B 14 24 mov edx, [chainlenwmask] - 0000013D E9 FFFFFF4E jmp LookupLoop - - ;;; s->match_start = cur_match; - ;;; best_len = len; - ;;; if (len >= nice_match) break; - ;;; scan_end = *(ushf*)(scan+best_len-1); - - 00000142 8B 5C 24 18 LongerMatch: mov ebx, [nicematch] - 00000146 89 44 24 1C mov [bestlen], eax - 0000014A 89 4A 70 mov [edx + dsMatchStart], ecx - 0000014D 3B C3 cmp eax, ebx - 0000014F 7D 2D jge LeaveNow - 00000151 8B 74 24 04 mov esi, [window] - 00000155 03 F0 add esi, eax - 00000157 89 74 24 08 mov [windowbestlen], esi - 0000015B 0F B7 5C 07 FF movzx ebx, word ptr [edi + eax - 1] - 00000160 8B 7A 40 mov edi, [edx + dsPrev] - 00000163 89 5C 24 0C mov [scanend], ebx - 00000167 8B 14 24 mov edx, [chainlenwmask] - 0000016A E9 FFFFFF21 jmp LookupLoop - - ;;; Accept the current string, with the maximum possible length. - - 0000016F 8B 54 24 38 LenMaximum: mov edx, [deflatestate] - 00000173 C7 44 24 1C mov dword ptr [bestlen], MAX_MATCH - 00000102 - 0000017B 89 4A 70 mov [edx + dsMatchStart], ecx - - ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len; - ;;; return s->lookahead; - - 0000017E LeaveNow: - 0000017E 8B 54 24 38 mov edx, [deflatestate] - 00000182 8B 5C 24 1C mov ebx, [bestlen] - 00000186 8B 42 74 mov eax, [edx + dsLookahead] - 00000189 3B D8 cmp ebx, eax - 0000018B 7F 02 jg LookaheadRet - 0000018D 8B C3 mov eax, ebx - 0000018F LookaheadRet: - - ;;; Restore the stack and return from whence we came. - - 0000018F 83 C4 24 add esp, LocalVarsSize - 00000192 5B pop ebx - 00000193 5E pop esi - 00000194 5F pop edi - 00000195 5D pop ebp - - 00000196 C3 ret - ; please don't remove this string ! - ; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary! - 00000197 0D 0A 61 73 6D db 0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah - 36 38 36 20 77 - 69 74 68 20 6D - 61 73 6D 2C 20 - 6F 70 74 69 6D - 69 73 65 64 20 - 61 73 73 65 6D - 62 6C 79 20 63 - 6F 64 65 20 66 - 72 6F 6D 20 42 - 72 69 61 6E 20 - 52 61 69 74 65 - 72 2C 20 77 72 - 69 74 74 65 6E - 20 31 39 39 38 - 0D 0A - - - IFDEF NOUNDERLINE - ELSE - 000001E4 _longest_match endp - ENDIF - - IFDEF NOUNDERLINE - ELSE - 000001E4 _match_init proc near - 000001E4 C3 ret - 000001E5 _match_init endp - ENDIF - - - 000001E5 _TEXT ends - end -Microsoft (R) Macro Assembler Version 14.16.27031.1 09/14/19 11:35:23 -match686.asm Symbols 2 - 1 - - - - -Segments and Groups: - - N a m e Size Length Align Combine Class - -FLAT . . . . . . . . . . . . . . GROUP -_DATA . . . . . . . . . . . . . 32 Bit 00000000 Para Public 'DATA' -_TEXT . . . . . . . . . . . . . 32 Bit 000001E5 Para Public 'CODE' - - -Procedures, parameters, and locals: - - N a m e Type Value Attr - -_longest_match . . . . . . . . . P Near 00000000 _TEXT Length= 000001E4 Public - LastMatchGood . . . . . . . . L Near 00000025 _TEXT - LookaheadLess . . . . . . . . L Near 0000003D _TEXT - LimitPositive . . . . . . . . L Near 0000006B _TEXT - LookupLoop . . . . . . . . . . L Near 00000090 _TEXT - LoopEntry . . . . . . . . . . L Near 000000AA _TEXT - LoopCmps . . . . . . . . . . . L Near 000000E5 _TEXT - LeaveLoopCmps4 . . . . . . . . L Near 000000FE _TEXT - LeaveLoopCmps . . . . . . . . L Near 00000101 _TEXT - LenLower . . . . . . . . . . . L Near 0000010E _TEXT - LongerMatch . . . . . . . . . L Near 00000142 _TEXT - LenMaximum . . . . . . . . . . L Near 0000016F _TEXT - LeaveNow . . . . . . . . . . . L Near 0000017E _TEXT - LookaheadRet . . . . . . . . . L Near 0000018F _TEXT -_match_init . . . . . . . . . . P Near 000001E4 _TEXT Length= 00000001 Public - - -Symbols: - - N a m e Type Value Attr - -@CodeSize . . . . . . . . . . . Number 00000000h -@DataSize . . . . . . . . . . . Number 00000000h -@Interface . . . . . . . . . . . Number 00000000h -@Model . . . . . . . . . . . . . Number 00000007h -@code . . . . . . . . . . . . . Text _TEXT -@data . . . . . . . . . . . . . Text FLAT -@fardata? . . . . . . . . . . . Text FLAT -@fardata . . . . . . . . . . . . Text FLAT -@stack . . . . . . . . . . . . . Text FLAT -LocalVarsSize . . . . . . . . . Number 00000024h -MAX_MATCH_8_ . . . . . . . . . . Number 00000100h -MAX_MATCH . . . . . . . . . . . Number 00000102h -MIN_LOOKAHEAD . . . . . . . . . Number 00000106h -MIN_MATCH . . . . . . . . . . . Number 00000003h -NbStackAdd . . . . . . . . . . . Number 00000034h -NbStack . . . . . . . . . . . . Number 0000004Ch -adrret . . . . . . . . . . . . . Text dword ptr[esp+NbStack-8] -best_len . . . . . . . . . . . . Text dword ptr [esp+NbStack-36] -bestlen . . . . . . . . . . . . Text esp + 28 -chain_length . . . . . . . . . . Text dword ptr [esp+NbStack-28] -chainlenwmask . . . . . . . . . Text esp + 0 -cur_match . . . . . . . . . . . Text dword ptr[esp+NbStack-0] -curmatch . . . . . . . . . . . . Text esp + 60 -deflatestate . . . . . . . . . . Text esp + 56 -dep_chain_length . . . . . . . . Number 0000007Ch -dep_good_match . . . . . . . . . Number 0000008Ch -dep_lookahead . . . . . . . . . Number 00000074h -dep_match_start . . . . . . . . Number 00000070h -dep_nice_match . . . . . . . . . Number 00000090h -dep_prev_length . . . . . . . . Number 00000078h -dep_prev . . . . . . . . . . . . Number 00000040h -dep_strstart . . . . . . . . . . Number 0000006Ch -dep_w_mask . . . . . . . . . . . Number 00000034h -dep_w_size . . . . . . . . . . . Number 0000002Ch -dep_window . . . . . . . . . . . Number 00000038h -dsGoodMatch . . . . . . . . . . Number 0000008Ch -dsLookahead . . . . . . . . . . Number 00000074h -dsMatchLen . . . . . . . . . . . Number 00000060h -dsMatchStart . . . . . . . . . . Number 00000070h -dsMaxChainLen . . . . . . . . . Number 0000007Ch -dsNiceMatch . . . . . . . . . . Number 00000090h -dsPrevLen . . . . . . . . . . . Number 00000078h -dsPrevMatch . . . . . . . . . . Number 00000064h -dsPrev . . . . . . . . . . . . . Number 00000040h -dsStrStart . . . . . . . . . . . Number 0000006Ch -dsWMask . . . . . . . . . . . . Number 00000034h -dsWSize . . . . . . . . . . . . Number 0000002Ch -dsWindow . . . . . . . . . . . . Number 00000038h -limit . . . . . . . . . . . . . Text dword ptr [esp+NbStack-32] -match_start_ptr . . . . . . . . Text dword ptr [esp+NbStack-56] -match_start . . . . . . . . . . Text dword ptr [esp+NbStack-72] -nice_match . . . . . . . . . . . Text dword ptr [esp+NbStack-60] -nicematch . . . . . . . . . . . Text esp + 24 -prev . . . . . . . . . . . . . . Text dword ptr [esp+NbStack-44] -pushebp . . . . . . . . . . . . Text dword ptr[esp+NbStack-12] -pushebx . . . . . . . . . . . . Text dword ptr[esp+NbStack-24] -pushedi . . . . . . . . . . . . Text dword ptr[esp+NbStack-16] -pushesi . . . . . . . . . . . . Text dword ptr[esp+NbStack-20] -scan_start . . . . . . . . . . . Text word ptr [esp+NbStack-48] -scanalign . . . . . . . . . . . Text esp + 20 -scanend . . . . . . . . . . . . Text esp + 12 -scanstart . . . . . . . . . . . Text esp + 16 -scan . . . . . . . . . . . . . . Text esp + 32 -str_s . . . . . . . . . . . . . Text dword ptr[esp+NbStack-4] -strend . . . . . . . . . . . . . Text dword ptr [esp+NbStack-76] -windowbestlen . . . . . . . . . Text esp + 8 -windowlen . . . . . . . . . . . Text dword ptr [esp+NbStack-68] -window . . . . . . . . . . . . . Text esp + 4 -wmask . . . . . . . . . . . . . Text dword ptr [esp+NbStack-52] -zlib1222add . . . . . . . . . . Number 00000008h - - 0 Warnings - 0 Errors diff --git a/zlib/contrib/masmx86/readme.txt b/zlib/contrib/masmx86/readme.txt deleted file mode 100644 index 3f88886..0000000 --- a/zlib/contrib/masmx86/readme.txt +++ /dev/null @@ -1,27 +0,0 @@ - -Summary -------- -This directory contains ASM implementations of the functions -longest_match() and inflate_fast(). - - -Use instructions ----------------- -Assemble using MASM, and copy the object files into the zlib source -directory, then run the appropriate makefile, as suggested below. You can -donwload MASM from here: - - http://www.microsoft.com/downloads/details.aspx?displaylang=en&FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64 - -You can also get objects files here: - - http://www.winimage.com/zLibDll/zlib124_masm_obj.zip - -Build instructions ------------------- -* With Microsoft C and MASM: -nmake -f win32/Makefile.msc LOC="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" - -* With Borland C and TASM: -make -f win32/Makefile.bor LOCAL_ZLIB="-DASMV -DASMINF" OBJA="match686.obj inffas32.obj" OBJPA="+match686c.obj+match686.obj+inffas32.obj" - |