Skip to content

Commit

Permalink
Added the latest 'fast' decompressor
Browse files Browse the repository at this point in the history
(for the old (v1.x) version of the format)
  • Loading branch information
specke authored Dec 10, 2021
1 parent 7ab1554 commit 756109c
Showing 1 changed file with 187 additions and 1 deletion.
188 changes: 187 additions & 1 deletion asm/Z80/unzx0v1_fast.asm
Original file line number Diff line number Diff line change
@@ -1 +1,187 @@
dummy
;
; Speed-optimized ZX0v1 decompressor by spke & uniabis (190 bytes)
;
; ver.00 by spke (27/01-23/03/2021, 191 bytes)
; ver.01 by spke (24/03/2021, 193(+2) bytes, fixed a bug in the initialization)
; ver.02 by uniabis (25-29/03/2021, 191(-2) bytes, +0.5% speed, fixed a bug in the gamma code reader)
; ver.03 by uniabis (16/08/2021, 190(-1) bytes)
; ver.04 by spke (07-08/12/2021, updated info, renamed to reflect the use of the old compression format)
;
; Original ZX0 decompressors were written by Einar Saukas
;
; This decompressor was written on the basis of "Standard" decompressor by
; Einar Saukas and optimized for speed by spke and uniabis. This decompressor is
; about 5% faster than the "Turbo" decompressor, which is 128 bytes long.
; It has about the same speed as the 412 byte version of the "Mega" decompressor.
;
; The decompressor uses AF, AF', BC, DE, HL and IX and relies upon self-modified code.
;
; There are two compressors available for ZX0 format. The official optimal compressors
; by Einar Saukas are available from https://github.com/einar-saukas/ZX0
; They can be invoked as follows:
;
; zx0.exe file_to_be_compressed name_of_compressed_file.zx0 (for compressors ver.1.x), or
; zx0.exe -c file_to_be_compressed name_of_compressed_file.zx0 (for compressors ver.2.x)
;
; Option -c indicates the use of the "old" ver 1.x compression format assumed by this decompressor.
;
; An alternative heuristic compressor "Salvador" has been developed by Emmanuel Marty,
; see https://github.com/emmanuel-marty/salvador
;
; It has fractionally lower compression ratio compared to the official compressor,
; but works much faster, so may be a better fit for the majority of development needs.
; Salvador is invoked by using:
;
; salvador.exe -classic file_to_be_compressed name_of_compressed_file.zx0
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressZX0v1
;
; Of course, ZX0 compression algorithms are (c) 2021 Einar Saukas,
; see https://github.com/einar-saukas/ZX0 for more information
;
; Drop me an email if you have any comments/ideas/suggestions: [email protected]
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.

MACRO RELOAD_BITS
ld a,(hl) : inc hl : rla
ENDM

MACRO INLINE_READ_GAMMA
.ReadGammaBits add a : rl c : add a : jr nc,.ReadGammaBits
ENDM

@DecompressZX0v1: ld ix,CopyMatch1 : scf : exa ; AF' must have flag C switched on
ld bc,#FFFF : ld (PrevOffset),bc ; default offset is -1
inc bc : ld a,#80 : jr RunOfLiterals ; BC is assumed to contains 0 most of the time
ShorterOffsets ; 7-bit offsets allow additional optimizations,
; based on the facts that C==0 and AF' has C ON!
exa : sbc a : ld (PrevOffset+1),a ; the top byte of the offset is always #FF
ld a,(hl) : inc hl
rra : ld (PrevOffset),a ; note that AF' always has flag C ON

jr nc,LongerMatch

CopyMatch2 ; the case of matches with len=2
exa : ld c,2

CopyMatch1 ; the faster match copying code
push hl ; preserve source
PrevOffset EQU $+1 : ld hl,#FFFF ; restore offset (default offset is -1)
add hl,de ; HL = dest - offset
ldir
pop hl ; restore source

; after a match you can have either
; 0 + <elias length> = run of literals, or
; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
AfterMatch1 add a : jr nc,RunOfLiterals

UsualMatch: ; this is the case of usual match+offset
add a : jr nc,LongerOffets : jr nz,ShorterOffsets ; NZ after NC == "confirmed C"
RELOAD_BITS : jr c,ShorterOffsets

LongerOffets inc c : INLINE_READ_GAMMA ; reading gamma requires C=1
call z,ReloadReadGamma

ProcessOffset exa : xor a : sub c
ret z ; end-of-data marker (only checked for longer offsets)

rra : ld (PrevOffset+1),a
ld a,(hl) : inc hl
rra : ld (PrevOffset),a

; lowest bit is the first bit of the gamma code for length
jr c,CopyMatch2

; this wastes 1 t-state for longer matches far away,
; but saves 4 t-states for longer nearby (seems to pay off in testing)
ld c,b
LongerMatch inc c
; doing SCF here ensures that AF' has flag C ON and costs
; cheaper than doing SCF in the ShortestOffsets branch
scf : exa

INLINE_READ_GAMMA
call z,ReloadReadGamma

CopyMatch3 push hl ; preserve source
ld hl,(PrevOffset) ; restore offset
add hl,de ; HL = dest - offset
; because BC>=3-1, we can do 2 x LDI safely
ldi : ldir : inc c : ldi
pop hl ; restore source

; after a match you can have either
; 0 + <elias length> = run of literals, or
; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
AfterMatch3 add a : jr c,UsualMatch

RunOfLiterals: inc c : add a : jr nc,LongerRun : jr nz,CopyLiteral ; NZ after NC == "confirmed C"
RELOAD_BITS : jr c,CopyLiteral

LongerRun INLINE_READ_GAMMA : jr nz,CopyLiterals
RELOAD_BITS
call nc,ReadGammaAligned

CopyLiterals ldi
CopyLiteral ldir

; after a literal run you can have either
; 0 + <elias length> = match using a repeated offset, or
; 1 + <elias offset msb> + [7-bits of offset lsb + 1-bit of length] + <elias length> = another match
add a : jr c,UsualMatch

RepMatch: inc c : add a : jr nc,LongerRepMatch : jr nz,CopyMatch1 ; NZ after NC == "confirmed C"
RELOAD_BITS : jr c,CopyMatch1

LongerRepMatch INLINE_READ_GAMMA
jp nz,CopyMatch1

; this is a crafty equivalent of
; CALL ReloadReadGamma : JP CopyMatch1
push ix

;
; the subroutine for reading the remainder of the partly read Elias gamma code.
; it has two entry points: ReloadReadGamma first refills the bit reservoir in A,
; while ReadGammaAligned assumes that the bit reservoir has just been refilled.

ReloadReadGamma: RELOAD_BITS
ret c

ReadGammaAligned: add a : rl c
add a : ret c

add a : rl c
ReadingLongGammaRLA rla ; this should really be an ADD A, but since flag C
; is always off here, this saves us a byte (see below)

ReadingLongGamma ; this loop does not need unrolling,
; as it does not get much use anyway
ret c
add a : rl c : rl b
add a : jr nz,ReadingLongGamma

ld a,(hl) : inc hl
jr ReadingLongGammaRLA

0 comments on commit 756109c

Please sign in to comment.