From 373dc625f82b47096893add42c4472e4a57ab7eb Mon Sep 17 00:00:00 2001 From: Aki Date: Wed, 9 Feb 2022 22:23:03 +0100 Subject: Moved third-party libraries to a separate subdirectory --- zlib/examples/README.examples | 49 -- zlib/examples/enough.c | 572 ---------------------- zlib/examples/fitblk.c | 233 --------- zlib/examples/gun.c | 702 --------------------------- zlib/examples/gzappend.c | 504 -------------------- zlib/examples/gzjoin.c | 449 ----------------- zlib/examples/gzlog.c | 1059 ----------------------------------------- zlib/examples/gzlog.h | 91 ---- zlib/examples/zlib_how.html | 545 --------------------- zlib/examples/zpipe.c | 205 -------- zlib/examples/zran.c | 409 ---------------- 11 files changed, 4818 deletions(-) delete mode 100644 zlib/examples/README.examples delete mode 100644 zlib/examples/enough.c delete mode 100644 zlib/examples/fitblk.c delete mode 100644 zlib/examples/gun.c delete mode 100644 zlib/examples/gzappend.c delete mode 100644 zlib/examples/gzjoin.c delete mode 100644 zlib/examples/gzlog.c delete mode 100644 zlib/examples/gzlog.h delete mode 100644 zlib/examples/zlib_how.html delete mode 100644 zlib/examples/zpipe.c delete mode 100644 zlib/examples/zran.c (limited to 'zlib/examples') diff --git a/zlib/examples/README.examples b/zlib/examples/README.examples deleted file mode 100644 index 56a3171..0000000 --- a/zlib/examples/README.examples +++ /dev/null @@ -1,49 +0,0 @@ -This directory contains examples of the use of zlib and other relevant -programs and documentation. - -enough.c - calculation and justification of ENOUGH parameter in inftrees.h - - calculates the maximum table space used in inflate tree - construction over all possible Huffman codes - -fitblk.c - compress just enough input to nearly fill a requested output size - - zlib isn't designed to do this, but fitblk does it anyway - -gun.c - uncompress a gzip file - - illustrates the use of inflateBack() for high speed file-to-file - decompression using call-back functions - - is approximately twice as fast as gzip -d - - also provides Unix uncompress functionality, again twice as fast - -gzappend.c - append to a gzip file - - illustrates the use of the Z_BLOCK flush parameter for inflate() - - illustrates the use of deflatePrime() to start at any bit - -gzjoin.c - join gzip files without recalculating the crc or recompressing - - illustrates the use of the Z_BLOCK flush parameter for inflate() - - illustrates the use of crc32_combine() - -gzlog.c -gzlog.h - efficiently and robustly maintain a message log file in gzip format - - illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(), - and deflateSetDictionary() - - illustrates use of a gzip header extra field - -zlib_how.html - painfully comprehensive description of zpipe.c (see below) - - describes in excruciating detail the use of deflate() and inflate() - -zpipe.c - reads and writes zlib streams from stdin to stdout - - illustrates the proper use of deflate() and inflate() - - deeply commented in zlib_how.html (see above) - -zran.c - index a zlib or gzip stream and randomly access it - - illustrates the use of Z_BLOCK, inflatePrime(), and - inflateSetDictionary() to provide random access diff --git a/zlib/examples/enough.c b/zlib/examples/enough.c deleted file mode 100644 index b991144..0000000 --- a/zlib/examples/enough.c +++ /dev/null @@ -1,572 +0,0 @@ -/* enough.c -- determine the maximum size of inflate's Huffman code tables over - * all possible valid and complete Huffman codes, subject to a length limit. - * Copyright (C) 2007, 2008, 2012 Mark Adler - * Version 1.4 18 August 2012 Mark Adler - */ - -/* Version history: - 1.0 3 Jan 2007 First version (derived from codecount.c version 1.4) - 1.1 4 Jan 2007 Use faster incremental table usage computation - Prune examine() search on previously visited states - 1.2 5 Jan 2007 Comments clean up - As inflate does, decrease root for short codes - Refuse cases where inflate would increase root - 1.3 17 Feb 2008 Add argument for initial root table size - Fix bug for initial root table size == max - 1 - Use a macro to compute the history index - 1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!) - Clean up comparisons of different types - Clean up code indentation - */ - -/* - Examine all possible Huffman codes for a given number of symbols and a - maximum code length in bits to determine the maximum table size for zilb's - inflate. Only complete Huffman codes are counted. - - Two codes are considered distinct if the vectors of the number of codes per - length are not identical. So permutations of the symbol assignments result - in the same code for the counting, as do permutations of the assignments of - the bit values to the codes (i.e. only canonical codes are counted). - - We build a code from shorter to longer lengths, determining how many symbols - are coded at each length. At each step, we have how many symbols remain to - be coded, what the last code length used was, and how many bit patterns of - that length remain unused. Then we add one to the code length and double the - number of unused patterns to graduate to the next code length. We then - assign all portions of the remaining symbols to that code length that - preserve the properties of a correct and eventually complete code. Those - properties are: we cannot use more bit patterns than are available; and when - all the symbols are used, there are exactly zero possible bit patterns - remaining. - - The inflate Huffman decoding algorithm uses two-level lookup tables for - speed. There is a single first-level table to decode codes up to root bits - in length (root == 9 in the current inflate implementation). The table - has 1 << root entries and is indexed by the next root bits of input. Codes - shorter than root bits have replicated table entries, so that the correct - entry is pointed to regardless of the bits that follow the short code. If - the code is longer than root bits, then the table entry points to a second- - level table. The size of that table is determined by the longest code with - that root-bit prefix. If that longest code has length len, then the table - has size 1 << (len - root), to index the remaining bits in that set of - codes. Each subsequent root-bit prefix then has its own sub-table. The - total number of table entries required by the code is calculated - incrementally as the number of codes at each bit length is populated. When - all of the codes are shorter than root bits, then root is reduced to the - longest code length, resulting in a single, smaller, one-level table. - - The inflate algorithm also provides for small values of root (relative to - the log2 of the number of symbols), where the shortest code has more bits - than root. In that case, root is increased to the length of the shortest - code. This program, by design, does not handle that case, so it is verified - that the number of symbols is less than 2^(root + 1). - - In order to speed up the examination (by about ten orders of magnitude for - the default arguments), the intermediate states in the build-up of a code - are remembered and previously visited branches are pruned. The memory - required for this will increase rapidly with the total number of symbols and - the maximum code length in bits. However this is a very small price to pay - for the vast speedup. - - First, all of the possible Huffman codes are counted, and reachable - intermediate states are noted by a non-zero count in a saved-results array. - Second, the intermediate states that lead to (root + 1) bit or longer codes - are used to look at all sub-codes from those junctures for their inflate - memory usage. (The amount of memory used is not affected by the number of - codes of root bits or less in length.) Third, the visited states in the - construction of those sub-codes and the associated calculation of the table - size is recalled in order to avoid recalculating from the same juncture. - Beginning the code examination at (root + 1) bit codes, which is enabled by - identifying the reachable nodes, accounts for about six of the orders of - magnitude of improvement for the default arguments. About another four - orders of magnitude come from not revisiting previous states. Out of - approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes - need to be examined to cover all of the possible table memory usage cases - for the default arguments of 286 symbols limited to 15-bit codes. - - Note that an unsigned long long type is used for counting. It is quite easy - to exceed the capacity of an eight-byte integer with a large number of - symbols and a large maximum code length, so multiple-precision arithmetic - would need to replace the unsigned long long arithmetic in that case. This - program will abort if an overflow occurs. The big_t type identifies where - the counting takes place. - - An unsigned long long type is also used for calculating the number of - possible codes remaining at the maximum length. This limits the maximum - code length to the number of bits in a long long minus the number of bits - needed to represent the symbols in a flat code. The code_t type identifies - where the bit pattern counting takes place. - */ - -#include -#include -#include -#include - -#define local static - -/* special data types */ -typedef unsigned long long big_t; /* type for code counting */ -typedef unsigned long long code_t; /* type for bit pattern counting */ -struct tab { /* type for been here check */ - size_t len; /* length of bit vector in char's */ - char *vec; /* allocated bit vector */ -}; - -/* The array for saving results, num[], is indexed with this triplet: - - syms: number of symbols remaining to code - left: number of available bit patterns at length len - len: number of bits in the codes currently being assigned - - Those indices are constrained thusly when saving results: - - syms: 3..totsym (totsym == total symbols to code) - left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6) - len: 1..max - 1 (max == maximum code length in bits) - - syms == 2 is not saved since that immediately leads to a single code. left - must be even, since it represents the number of available bit patterns at - the current length, which is double the number at the previous length. - left ends at syms-1 since left == syms immediately results in a single code. - (left > sym is not allowed since that would result in an incomplete code.) - len is less than max, since the code completes immediately when len == max. - - The offset into the array is calculated for the three indices with the - first one (syms) being outermost, and the last one (len) being innermost. - We build the array with length max-1 lists for the len index, with syms-3 - of those for each symbol. There are totsym-2 of those, with each one - varying in length as a function of sym. See the calculation of index in - count() for the index, and the calculation of size in main() for the size - of the array. - - For the deflate example of 286 symbols limited to 15-bit codes, the array - has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than - half of the space allocated for saved results is actually used -- not all - possible triplets are reached in the generation of valid Huffman codes. - */ - -/* The array for tracking visited states, done[], is itself indexed identically - to the num[] array as described above for the (syms, left, len) triplet. - Each element in the array is further indexed by the (mem, rem) doublet, - where mem is the amount of inflate table space used so far, and rem is the - remaining unused entries in the current inflate sub-table. Each indexed - element is simply one bit indicating whether the state has been visited or - not. Since the ranges for mem and rem are not known a priori, each bit - vector is of a variable size, and grows as needed to accommodate the visited - states. mem and rem are used to calculate a single index in a triangular - array. Since the range of mem is expected in the default case to be about - ten times larger than the range of rem, the array is skewed to reduce the - memory usage, with eight times the range for mem than for rem. See the - calculations for offset and bit in beenhere() for the details. - - For the deflate example of 286 symbols limited to 15-bit codes, the bit - vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[] - array itself. - */ - -/* Globals to avoid propagating constants or constant pointers recursively */ -local int max; /* maximum allowed bit length for the codes */ -local int root; /* size of base code table in bits */ -local int large; /* largest code table so far */ -local size_t size; /* number of elements in num and done */ -local int *code; /* number of symbols assigned to each bit length */ -local big_t *num; /* saved results array for code counting */ -local struct tab *done; /* states already evaluated array */ - -/* Index function for num[] and done[] */ -#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1) - -/* Free allocated space. Uses globals code, num, and done. */ -local void cleanup(void) -{ - size_t n; - - if (done != NULL) { - for (n = 0; n < size; n++) - if (done[n].len) - free(done[n].vec); - free(done); - } - if (num != NULL) - free(num); - if (code != NULL) - free(code); -} - -/* Return the number of possible Huffman codes using bit patterns of lengths - len through max inclusive, coding syms symbols, with left bit patterns of - length len unused -- return -1 if there is an overflow in the counting. - Keep a record of previous results in num to prevent repeating the same - calculation. Uses the globals max and num. */ -local big_t count(int syms, int len, int left) -{ - big_t sum; /* number of possible codes from this juncture */ - big_t got; /* value returned from count() */ - int least; /* least number of syms to use at this juncture */ - int most; /* most number of syms to use at this juncture */ - int use; /* number of bit patterns to use in next call */ - size_t index; /* index of this case in *num */ - - /* see if only one possible code */ - if (syms == left) - return 1; - - /* note and verify the expected state */ - assert(syms > left && left > 0 && len < max); - - /* see if we've done this one already */ - index = INDEX(syms, left, len); - got = num[index]; - if (got) - return got; /* we have -- return the saved result */ - - /* we need to use at least this many bit patterns so that the code won't be - incomplete at the next length (more bit patterns than symbols) */ - least = (left << 1) - syms; - if (least < 0) - least = 0; - - /* we can use at most this many bit patterns, lest there not be enough - available for the remaining symbols at the maximum length (if there were - no limit to the code length, this would become: most = left - 1) */ - most = (((code_t)left << (max - len)) - syms) / - (((code_t)1 << (max - len)) - 1); - - /* count all possible codes from this juncture and add them up */ - sum = 0; - for (use = least; use <= most; use++) { - got = count(syms - use, len + 1, (left - use) << 1); - sum += got; - if (got == (big_t)0 - 1 || sum < got) /* overflow */ - return (big_t)0 - 1; - } - - /* verify that all recursive calls are productive */ - assert(sum != 0); - - /* save the result and return it */ - num[index] = sum; - return sum; -} - -/* Return true if we've been here before, set to true if not. Set a bit in a - bit vector to indicate visiting this state. Each (syms,len,left) state - has a variable size bit vector indexed by (mem,rem). The bit vector is - lengthened if needed to allow setting the (mem,rem) bit. */ -local int beenhere(int syms, int len, int left, int mem, int rem) -{ - size_t index; /* index for this state's bit vector */ - size_t offset; /* offset in this state's bit vector */ - int bit; /* mask for this state's bit */ - size_t length; /* length of the bit vector in bytes */ - char *vector; /* new or enlarged bit vector */ - - /* point to vector for (syms,left,len), bit in vector for (mem,rem) */ - index = INDEX(syms, left, len); - mem -= 1 << root; - offset = (mem >> 3) + rem; - offset = ((offset * (offset + 1)) >> 1) + rem; - bit = 1 << (mem & 7); - - /* see if we've been here */ - length = done[index].len; - if (offset < length && (done[index].vec[offset] & bit) != 0) - return 1; /* done this! */ - - /* we haven't been here before -- set the bit to show we have now */ - - /* see if we need to lengthen the vector in order to set the bit */ - if (length <= offset) { - /* if we have one already, enlarge it, zero out the appended space */ - if (length) { - do { - length <<= 1; - } while (length <= offset); - vector = realloc(done[index].vec, length); - if (vector != NULL) - memset(vector + done[index].len, 0, length - done[index].len); - } - - /* otherwise we need to make a new vector and zero it out */ - else { - length = 1 << (len - root); - while (length <= offset) - length <<= 1; - vector = calloc(length, sizeof(char)); - } - - /* in either case, bail if we can't get the memory */ - if (vector == NULL) { - fputs("abort: unable to allocate enough memory\n", stderr); - cleanup(); - exit(1); - } - - /* install the new vector */ - done[index].len = length; - done[index].vec = vector; - } - - /* set the bit */ - done[index].vec[offset] |= bit; - return 0; -} - -/* Examine all possible codes from the given node (syms, len, left). Compute - the amount of memory required to build inflate's decoding tables, where the - number of code structures used so far is mem, and the number remaining in - the current sub-table is rem. Uses the globals max, code, root, large, and - done. */ -local void examine(int syms, int len, int left, int mem, int rem) -{ - int least; /* least number of syms to use at this juncture */ - int most; /* most number of syms to use at this juncture */ - int use; /* number of bit patterns to use in next call */ - - /* see if we have a complete code */ - if (syms == left) { - /* set the last code entry */ - code[len] = left; - - /* complete computation of memory used by this code */ - while (rem < left) { - left -= rem; - rem = 1 << (len - root); - mem += rem; - } - assert(rem == left); - - /* if this is a new maximum, show the entries used and the sub-code */ - if (mem > large) { - large = mem; - printf("max %d: ", mem); - for (use = root + 1; use <= max; use++) - if (code[use]) - printf("%d[%d] ", code[use], use); - putchar('\n'); - fflush(stdout); - } - - /* remove entries as we drop back down in the recursion */ - code[len] = 0; - return; - } - - /* prune the tree if we can */ - if (beenhere(syms, len, left, mem, rem)) - return; - - /* we need to use at least this many bit patterns so that the code won't be - incomplete at the next length (more bit patterns than symbols) */ - least = (left << 1) - syms; - if (least < 0) - least = 0; - - /* we can use at most this many bit patterns, lest there not be enough - available for the remaining symbols at the maximum length (if there were - no limit to the code length, this would become: most = left - 1) */ - most = (((code_t)left << (max - len)) - syms) / - (((code_t)1 << (max - len)) - 1); - - /* occupy least table spaces, creating new sub-tables as needed */ - use = least; - while (rem < use) { - use -= rem; - rem = 1 << (len - root); - mem += rem; - } - rem -= use; - - /* examine codes from here, updating table space as we go */ - for (use = least; use <= most; use++) { - code[len] = use; - examine(syms - use, len + 1, (left - use) << 1, - mem + (rem ? 1 << (len - root) : 0), rem << 1); - if (rem == 0) { - rem = 1 << (len - root); - mem += rem; - } - rem--; - } - - /* remove entries as we drop back down in the recursion */ - code[len] = 0; -} - -/* Look at all sub-codes starting with root + 1 bits. Look at only the valid - intermediate code states (syms, left, len). For each completed code, - calculate the amount of memory required by inflate to build the decoding - tables. Find the maximum amount of memory required and show the code that - requires that maximum. Uses the globals max, root, and num. */ -local void enough(int syms) -{ - int n; /* number of remaing symbols for this node */ - int left; /* number of unused bit patterns at this length */ - size_t index; /* index of this case in *num */ - - /* clear code */ - for (n = 0; n <= max; n++) - code[n] = 0; - - /* look at all (root + 1) bit and longer codes */ - large = 1 << root; /* base table */ - if (root < max) /* otherwise, there's only a base table */ - for (n = 3; n <= syms; n++) - for (left = 2; left < n; left += 2) - { - /* look at all reachable (root + 1) bit nodes, and the - resulting codes (complete at root + 2 or more) */ - index = INDEX(n, left, root + 1); - if (root + 1 < max && num[index]) /* reachable node */ - examine(n, root + 1, left, 1 << root, 0); - - /* also look at root bit codes with completions at root + 1 - bits (not saved in num, since complete), just in case */ - if (num[index - 1] && n <= left << 1) - examine((n - left) << 1, root + 1, (n - left) << 1, - 1 << root, 0); - } - - /* done */ - printf("done: maximum of %d table entries\n", large); -} - -/* - Examine and show the total number of possible Huffman codes for a given - maximum number of symbols, initial root table size, and maximum code length - in bits -- those are the command arguments in that order. The default - values are 286, 9, and 15 respectively, for the deflate literal/length code. - The possible codes are counted for each number of coded symbols from two to - the maximum. The counts for each of those and the total number of codes are - shown. The maximum number of inflate table entires is then calculated - across all possible codes. Each new maximum number of table entries and the - associated sub-code (starting at root + 1 == 10 bits) is shown. - - To count and examine Huffman codes that are not length-limited, provide a - maximum length equal to the number of symbols minus one. - - For the deflate literal/length code, use "enough". For the deflate distance - code, use "enough 30 6". - - This uses the %llu printf format to print big_t numbers, which assumes that - big_t is an unsigned long long. If the big_t type is changed (for example - to a multiple precision type), the method of printing will also need to be - updated. - */ -int main(int argc, char **argv) -{ - int syms; /* total number of symbols to code */ - int n; /* number of symbols to code for this run */ - big_t got; /* return value of count() */ - big_t sum; /* accumulated number of codes over n */ - code_t word; /* for counting bits in code_t */ - - /* set up globals for cleanup() */ - code = NULL; - num = NULL; - done = NULL; - - /* get arguments -- default to the deflate literal/length code */ - syms = 286; - root = 9; - max = 15; - if (argc > 1) { - syms = atoi(argv[1]); - if (argc > 2) { - root = atoi(argv[2]); - if (argc > 3) - max = atoi(argv[3]); - } - } - if (argc > 4 || syms < 2 || root < 1 || max < 1) { - fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n", - stderr); - return 1; - } - - /* if not restricting the code length, the longest is syms - 1 */ - if (max > syms - 1) - max = syms - 1; - - /* determine the number of bits in a code_t */ - for (n = 0, word = 1; word; n++, word <<= 1) - ; - - /* make sure that the calculation of most will not overflow */ - if (max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (max - 1))) { - fputs("abort: code length too long for internal types\n", stderr); - return 1; - } - - /* reject impossible code requests */ - if ((code_t)(syms - 1) > ((code_t)1 << max) - 1) { - fprintf(stderr, "%d symbols cannot be coded in %d bits\n", - syms, max); - return 1; - } - - /* allocate code vector */ - code = calloc(max + 1, sizeof(int)); - if (code == NULL) { - fputs("abort: unable to allocate enough memory\n", stderr); - return 1; - } - - /* determine size of saved results array, checking for overflows, - allocate and clear the array (set all to zero with calloc()) */ - if (syms == 2) /* iff max == 1 */ - num = NULL; /* won't be saving any results */ - else { - size = syms >> 1; - if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) || - (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) || - (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) || - (num = calloc(size, sizeof(big_t))) == NULL) { - fputs("abort: unable to allocate enough memory\n", stderr); - cleanup(); - return 1; - } - } - - /* count possible codes for all numbers of symbols, add up counts */ - sum = 0; - for (n = 2; n <= syms; n++) { - got = count(n, 1, 2); - sum += got; - if (got == (big_t)0 - 1 || sum < got) { /* overflow */ - fputs("abort: can't count that high!\n", stderr); - cleanup(); - return 1; - } - printf("%llu %d-codes\n", got, n); - } - printf("%llu total codes for 2 to %d symbols", sum, syms); - if (max < syms - 1) - printf(" (%d-bit length limit)\n", max); - else - puts(" (no length limit)"); - - /* allocate and clear done array for beenhere() */ - if (syms == 2) - done = NULL; - else if (size > ((size_t)0 - 1) / sizeof(struct tab) || - (done = calloc(size, sizeof(struct tab))) == NULL) { - fputs("abort: unable to allocate enough memory\n", stderr); - cleanup(); - return 1; - } - - /* find and show maximum inflate table usage */ - if (root > max) /* reduce root to max length */ - root = max; - if ((code_t)syms < ((code_t)1 << (root + 1))) - enough(syms); - else - puts("cannot handle minimum code lengths > root"); - - /* done */ - cleanup(); - return 0; -} diff --git a/zlib/examples/fitblk.c b/zlib/examples/fitblk.c deleted file mode 100644 index c61de5c..0000000 --- a/zlib/examples/fitblk.c +++ /dev/null @@ -1,233 +0,0 @@ -/* fitblk.c: example of fitting compressed output to a specified size - Not copyrighted -- provided to the public domain - Version 1.1 25 November 2004 Mark Adler */ - -/* Version history: - 1.0 24 Nov 2004 First version - 1.1 25 Nov 2004 Change deflateInit2() to deflateInit() - Use fixed-size, stack-allocated raw buffers - Simplify code moving compression to subroutines - Use assert() for internal errors - Add detailed description of approach - */ - -/* Approach to just fitting a requested compressed size: - - fitblk performs three compression passes on a portion of the input - data in order to determine how much of that input will compress to - nearly the requested output block size. The first pass generates - enough deflate blocks to produce output to fill the requested - output size plus a specfied excess amount (see the EXCESS define - below). The last deflate block may go quite a bit past that, but - is discarded. The second pass decompresses and recompresses just - the compressed data that fit in the requested plus excess sized - buffer. The deflate process is terminated after that amount of - input, which is less than the amount consumed on the first pass. - The last deflate block of the result will be of a comparable size - to the final product, so that the header for that deflate block and - the compression ratio for that block will be about the same as in - the final product. The third compression pass decompresses the - result of the second step, but only the compressed data up to the - requested size minus an amount to allow the compressed stream to - complete (see the MARGIN define below). That will result in a - final compressed stream whose length is less than or equal to the - requested size. Assuming sufficient input and a requested size - greater than a few hundred bytes, the shortfall will typically be - less than ten bytes. - - If the input is short enough that the first compression completes - before filling the requested output size, then that compressed - stream is return with no recompression. - - EXCESS is chosen to be just greater than the shortfall seen in a - two pass approach similar to the above. That shortfall is due to - the last deflate block compressing more efficiently with a smaller - header on the second pass. EXCESS is set to be large enough so - that there is enough uncompressed data for the second pass to fill - out the requested size, and small enough so that the final deflate - block of the second pass will be close in size to the final deflate - block of the third and final pass. MARGIN is chosen to be just - large enough to assure that the final compression has enough room - to complete in all cases. - */ - -#include -#include -#include -#include "zlib.h" - -#define local static - -/* print nastygram and leave */ -local void quit(char *why) -{ - fprintf(stderr, "fitblk abort: %s\n", why); - exit(1); -} - -#define RAWLEN 4096 /* intermediate uncompressed buffer size */ - -/* compress from file to def until provided buffer is full or end of - input reached; return last deflate() return value, or Z_ERRNO if - there was read error on the file */ -local int partcompress(FILE *in, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_NO_FLUSH; - do { - def->avail_in = fread(raw, 1, RAWLEN, in); - if (ferror(in)) - return Z_ERRNO; - def->next_in = raw; - if (feof(in)) - flush = Z_FINISH; - ret = deflate(def, flush); - assert(ret != Z_STREAM_ERROR); - } while (def->avail_out != 0 && flush == Z_NO_FLUSH); - return ret; -} - -/* recompress from inf's input to def's output; the input for inf and - the output for def are set in those structures before calling; - return last deflate() return value, or Z_MEM_ERROR if inflate() - was not able to allocate enough memory when it needed to */ -local int recompress(z_streamp inf, z_streamp def) -{ - int ret, flush; - unsigned char raw[RAWLEN]; - - flush = Z_NO_FLUSH; - do { - /* decompress */ - inf->avail_out = RAWLEN; - inf->next_out = raw; - ret = inflate(inf, Z_NO_FLUSH); - assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR && - ret != Z_NEED_DICT); - if (ret == Z_MEM_ERROR) - return ret; - - /* compress what was decompresed until done or no room */ - def->avail_in = RAWLEN - inf->avail_out; - def->next_in = raw; - if (inf->avail_out != 0) - flush = Z_FINISH; - ret = deflate(def, flush); - assert(ret != Z_STREAM_ERROR); - } while (ret != Z_STREAM_END && def->avail_out != 0); - return ret; -} - -#define EXCESS 256 /* empirically determined stream overage */ -#define MARGIN 8 /* amount to back off for completion */ - -/* compress from stdin to fixed-size block on stdout */ -int main(int argc, char **argv) -{ - int ret; /* return code */ - unsigned size; /* requested fixed output block size */ - unsigned have; /* bytes written by deflate() call */ - unsigned char *blk; /* intermediate and final stream */ - unsigned char *tmp; /* close to desired size stream */ - z_stream def, inf; /* zlib deflate and inflate states */ - - /* get requested output size */ - if (argc != 2) - quit("need one argument: size of output block"); - ret = strtol(argv[1], argv + 1, 10); - if (argv[1][0] != 0) - quit("argument must be a number"); - if (ret < 8) /* 8 is minimum zlib stream size */ - quit("need positive size of 8 or greater"); - size = (unsigned)ret; - - /* allocate memory for buffers and compression engine */ - blk = malloc(size + EXCESS); - def.zalloc = Z_NULL; - def.zfree = Z_NULL; - def.opaque = Z_NULL; - ret = deflateInit(&def, Z_DEFAULT_COMPRESSION); - if (ret != Z_OK || blk == NULL) - quit("out of memory"); - - /* compress from stdin until output full, or no more input */ - def.avail_out = size + EXCESS; - def.next_out = blk; - ret = partcompress(stdin, &def); - if (ret == Z_ERRNO) - quit("error reading input"); - - /* if it all fit, then size was undersubscribed -- done! */ - if (ret == Z_STREAM_END && def.avail_out >= EXCESS) { - /* write block to stdout */ - have = size + EXCESS - def.avail_out; - if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - quit("error writing output"); - - /* clean up and print results to stderr */ - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (all input)\n", - size - have, size); - return 0; - } - - /* it didn't all fit -- set up for recompression */ - inf.zalloc = Z_NULL; - inf.zfree = Z_NULL; - inf.opaque = Z_NULL; - inf.avail_in = 0; - inf.next_in = Z_NULL; - ret = inflateInit(&inf); - tmp = malloc(size + EXCESS); - if (ret != Z_OK || tmp == NULL) - quit("out of memory"); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do first recompression close to the right amount */ - inf.avail_in = size + EXCESS; - inf.next_in = blk; - def.avail_out = size + EXCESS; - def.next_out = tmp; - ret = recompress(&inf, &def); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - - /* set up for next reocmpression */ - ret = inflateReset(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateReset(&def); - assert(ret != Z_STREAM_ERROR); - - /* do second and final recompression (third compression) */ - inf.avail_in = size - MARGIN; /* assure stream will complete */ - inf.next_in = tmp; - def.avail_out = size; - def.next_out = blk; - ret = recompress(&inf, &def); - if (ret == Z_MEM_ERROR) - quit("out of memory"); - assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */ - - /* done -- write block to stdout */ - have = size - def.avail_out; - if (fwrite(blk, 1, have, stdout) != have || ferror(stdout)) - quit("error writing output"); - - /* clean up and print results to stderr */ - free(tmp); - ret = inflateEnd(&inf); - assert(ret != Z_STREAM_ERROR); - ret = deflateEnd(&def); - assert(ret != Z_STREAM_ERROR); - free(blk); - fprintf(stderr, - "%u bytes unused out of %u requested (%lu input)\n", - size - have, size, def.total_in); - return 0; -} diff --git a/zlib/examples/gun.c b/zlib/examples/gun.c deleted file mode 100644 index be44fa5..0000000 --- a/zlib/examples/gun.c +++ /dev/null @@ -1,702 +0,0 @@ -/* gun.c -- simple gunzip to give an example of the use of inflateBack() - * Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - Version 1.7 12 August 2012 Mark Adler */ - -/* Version history: - 1.0 16 Feb 2003 First version for testing of inflateBack() - 1.1 21 Feb 2005 Decompress concatenated gzip streams - Remove use of "this" variable (C++ keyword) - Fix return value for in() - Improve allocation failure checking - Add typecasting for void * structures - Add -h option for command version and usage - Add a bunch of comments - 1.2 20 Mar 2005 Add Unix compress (LZW) decompression - Copy file attributes from input file to output file - 1.3 12 Jun 2005 Add casts for error messages [Oberhumer] - 1.4 8 Dec 2006 LZW decompression speed improvements - 1.5 9 Feb 2008 Avoid warning in latest version of gcc - 1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings - 1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8 - */ - -/* - gun [ -t ] [ name ... ] - - decompresses the data in the named gzip files. If no arguments are given, - gun will decompress from stdin to stdout. The names must end in .gz, -gz, - .z, -z, _z, or .Z. The uncompressed data will be written to a file name - with the suffix stripped. On success, the original file is deleted. On - failure, the output file is deleted. For most failures, the command will - continue to process the remaining names on the command line. A memory - allocation failure will abort the command. If -t is specified, then the - listed files or stdin will be tested as gzip files for integrity (without - checking for a proper suffix), no output will be written, and no files - will be deleted. - - Like gzip, gun allows concatenated gzip streams and will decompress them, - writing all of the uncompressed data to the output. Unlike gzip, gun allows - an empty file on input, and will produce no error writing an empty output - file. - - gun will also decompress files made by Unix compress, which uses LZW - compression. These files are automatically detected by virtue of their - magic header bytes. Since the end of Unix compress stream is marked by the - end-of-file, they cannot be concantenated. If a Unix compress stream is - encountered in an input file, it is the last stream in that file. - - Like gunzip and uncompress, the file attributes of the original compressed - file are maintained in the final uncompressed file, to the extent that the - user permissions allow it. - - On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version - 1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the - LZW decompression provided by gun is about twice as fast as the standard - Unix uncompress command. - */ - -/* external functions and related types and constants */ -#include /* fprintf() */ -#include /* malloc(), free() */ -#include /* strerror(), strcmp(), strlen(), memcpy() */ -#include /* errno */ -#include /* open() */ -#include /* read(), write(), close(), chown(), unlink() */ -#include -#include /* stat(), chmod() */ -#include /* utime() */ -#include "zlib.h" /* inflateBackInit(), inflateBack(), */ - /* inflateBackEnd(), crc32() */ - -/* function declaration */ -#define local static - -/* buffer constants */ -#define SIZE 32768U /* input and output buffer sizes */ -#define PIECE 16384 /* limits i/o chunks for 16-bit int case */ - -/* structure for infback() to pass to input function in() -- it maintains the - input file and a buffer of size SIZE */ -struct ind { - int infile; - unsigned char *inbuf; -}; - -/* Load input buffer, assumed to be empty, and return bytes loaded and a - pointer to them. read() is called until the buffer is full, or until it - returns end-of-file or error. Return 0 on error. */ -local unsigned in(void *in_desc, z_const unsigned char **buf) -{ - int ret; - unsigned len; - unsigned char *next; - struct ind *me = (struct ind *)in_desc; - - next = me->inbuf; - *buf = next; - len = 0; - do { - ret = PIECE; - if ((unsigned)ret > SIZE - len) - ret = (int)(SIZE - len); - ret = (int)read(me->infile, next, ret); - if (ret == -1) { - len = 0; - break; - } - next += ret; - len += ret; - } while (ret != 0 && len < SIZE); - return len; -} - -/* structure for infback() to pass to output function out() -- it maintains the - output file, a running CRC-32 check on the output and the total number of - bytes output, both for checking against the gzip trailer. (The length in - the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and - the output is greater than 4 GB.) */ -struct outd { - int outfile; - int check; /* true if checking crc and total */ - unsigned long crc; - unsigned long total; -}; - -/* Write output buffer and update the CRC-32 and total bytes written. write() - is called until all of the output is written or an error is encountered. - On success out() returns 0. For a write failure, out() returns 1. If the - output file descriptor is -1, then nothing is written. - */ -local int out(void *out_desc, unsigned char *buf, unsigned len) -{ - int ret; - struct outd *me = (struct outd *)out_desc; - - if (me->check) { - me->crc = crc32(me->crc, buf, len); - me->total += len; - } - if (me->outfile != -1) - do { - ret = PIECE; - if ((unsigned)ret > len) - ret = (int)len; - ret = (int)write(me->outfile, buf, ret); - if (ret == -1) - return 1; - buf += ret; - len -= ret; - } while (len != 0); - return 0; -} - -/* next input byte macro for use inside lunpipe() and gunpipe() */ -#define NEXT() (have ? 0 : (have = in(indp, &next)), \ - last = have ? (have--, (int)(*next++)) : -1) - -/* memory for gunpipe() and lunpipe() -- - the first 256 entries of prefix[] and suffix[] are never used, could - have offset the index, but it's faster to waste the memory */ -unsigned char inbuf[SIZE]; /* input buffer */ -unsigned char outbuf[SIZE]; /* output buffer */ -unsigned short prefix[65536]; /* index to LZW prefix string */ -unsigned char suffix[65536]; /* one-character LZW suffix */ -unsigned char match[65280 + 2]; /* buffer for reversed match or gzip - 32K sliding window */ - -/* throw out what's left in the current bits byte buffer (this is a vestigial - aspect of the compressed data format derived from an implementation that - made use of a special VAX machine instruction!) */ -#define FLUSHCODE() \ - do { \ - left = 0; \ - rem = 0; \ - if (chunk > have) { \ - chunk -= have; \ - have = 0; \ - if (NEXT() == -1) \ - break; \ - chunk--; \ - if (chunk > have) { \ - chunk = have = 0; \ - break; \ - } \ - } \ - have -= chunk; \ - next += chunk; \ - chunk = 0; \ - } while (0) - -/* Decompress a compress (LZW) file from indp to outfile. The compress magic - header (two bytes) has already been read and verified. There are have bytes - of buffered input at next. strm is used for passing error information back - to gunpipe(). - - lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of - file, read error, or write error (a write error indicated by strm->next_in - not equal to Z_NULL), or Z_DATA_ERROR for invalid input. - */ -local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp, - int outfile, z_stream *strm) -{ - int last; /* last byte read by NEXT(), or -1 if EOF */ - unsigned chunk; /* bytes left in current chunk */ - int left; /* bits left in rem */ - unsigned rem; /* unused bits from input */ - int bits; /* current bits per code */ - unsigned code; /* code, table traversal index */ - unsigned mask; /* mask for current bits codes */ - int max; /* maximum bits per code for this stream */ - unsigned flags; /* compress flags, then block compress flag */ - unsigned end; /* last valid entry in prefix/suffix tables */ - unsigned temp; /* current code */ - unsigned prev; /* previous code */ - unsigned final; /* last character written for previous code */ - unsigned stack; /* next position for reversed string */ - unsigned outcnt; /* bytes in output buffer */ - struct outd outd; /* output structure */ - unsigned char *p; - - /* set up output */ - outd.outfile = outfile; - outd.check = 0; - - /* process remainder of compress header -- a flags byte */ - flags = NEXT(); - if (last == -1) - return Z_BUF_ERROR; - if (flags & 0x60) { - strm->msg = (char *)"unknown lzw flags set"; - return Z_DATA_ERROR; - } - max = flags & 0x1f; - if (max < 9 || max > 16) { - strm->msg = (char *)"lzw bits out of range"; - return Z_DATA_ERROR; - } - if (max == 9) /* 9 doesn't really mean 9 */ - max = 10; - flags &= 0x80; /* true if block compress */ - - /* clear table */ - bits = 9; - mask = 0x1ff; - end = flags ? 256 : 255; - - /* set up: get first 9-bit code, which is the first decompressed byte, but - don't create a table entry until the next code */ - if (NEXT() == -1) /* no compressed data is ok */ - return Z_OK; - final = prev = (unsigned)last; /* low 8 bits of code */ - if (NEXT() == -1) /* missing a bit */ - return Z_BUF_ERROR; - if (last & 1) { /* code must be < 256 */ - strm->msg = (char *)"invalid lzw code"; - return Z_DATA_ERROR; - } - rem = (unsigned)last >> 1; /* remaining 7 bits */ - left = 7; - chunk = bits - 2; /* 7 bytes left in this chunk */ - outbuf[0] = (unsigned char)final; /* write first decompressed byte */ - outcnt = 1; - - /* decode codes */ - stack = 0; - for (;;) { - /* if the table will be full after this, increment the code size */ - if (end >= mask && bits < max) { - FLUSHCODE(); - bits++; - mask <<= 1; - mask++; - } - - /* get a code of length bits */ - if (chunk == 0) /* decrement chunk modulo bits */ - chunk = bits; - code = rem; /* low bits of code */ - if (NEXT() == -1) { /* EOF is end of compressed data */ - /* write remaining buffered output */ - if (outcnt && out(&outd, outbuf, outcnt)) { - strm->next_in = outbuf; /* signal write error */ - return Z_BUF_ERROR; - } - return Z_OK; - } - code += (unsigned)last << left; /* middle (or high) bits of code */ - left += 8; - chunk--; - if (bits > left) { /* need more bits */ - if (NEXT() == -1) /* can't end in middle of code */ - return Z_BUF_ERROR; - code += (unsigned)last << left; /* high bits of code */ - left += 8; - chunk--; - } - code &= mask; /* mask to current code length */ - left -= bits; /* number of unused bits */ - rem = (unsigned)last >> (8 - left); /* unused bits from last byte */ - - /* process clear code (256) */ - if (code == 256 && flags) { - FLUSHCODE(); - bits = 9; /* initialize bits and mask */ - mask = 0x1ff; - end = 255; /* empty table */ - continue; /* get next code */ - } - - /* special code to reuse last match */ - temp = code; /* save the current code */ - if (code > end) { - /* Be picky on the allowed code here, and make sure that the code - we drop through (prev) will be a valid index so that random - input does not cause an exception. The code != end + 1 check is - empirically derived, and not checked in the original uncompress - code. If this ever causes a problem, that check could be safely - removed. Leaving this check in greatly improves gun's ability - to detect random or corrupted input after a compress header. - In any case, the prev > end check must be retained. */ - if (code != end + 1 || prev > end) { - strm->msg = (char *)"invalid lzw code"; - return Z_DATA_ERROR; - } - match[stack++] = (unsigned char)final; - code = prev; - } - - /* walk through linked list to generate output in reverse order */ - p = match + stack; - while (code >= 256) { - *p++ = suffix[code]; - code = prefix[code]; - } - stack = p - match; - match[stack++] = (unsigned char)code; - final = code; - - /* link new table entry */ - if (end < mask) { - end++; - prefix[end] = (unsigned short)prev; - suffix[end] = (unsigned char)final; - } - - /* set previous code for next iteration */ - prev = temp; - - /* write output in forward order */ - while (stack > SIZE - outcnt) { - while (outcnt < SIZE) - outbuf[outcnt++] = match[--stack]; - if (out(&outd, outbuf, outcnt)) { - strm->next_in = outbuf; /* signal write error */ - return Z_BUF_ERROR; - } - outcnt = 0; - } - p = match + stack; - do { - outbuf[outcnt++] = *--p; - } while (p > match); - stack = 0; - - /* loop for next code with final and prev as the last match, rem and - left provide the first 0..7 bits of the next code, end is the last - valid table entry */ - } -} - -/* Decompress a gzip file from infile to outfile. strm is assumed to have been - successfully initialized with inflateBackInit(). The input file may consist - of a series of gzip streams, in which case all of them will be decompressed - to the output file. If outfile is -1, then the gzip stream(s) integrity is - checked and nothing is written. - - The return value is a zlib error code: Z_MEM_ERROR if out of memory, - Z_DATA_ERROR if the header or the compressed data is invalid, or if the - trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends - prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip - stream) follows a valid gzip stream. - */ -local int gunpipe(z_stream *strm, int infile, int outfile) -{ - int ret, first, last; - unsigned have, flags, len; - z_const unsigned char *next = NULL; - struct ind ind, *indp; - struct outd outd; - - /* setup input buffer */ - ind.infile = infile; - ind.inbuf = inbuf; - indp = &ind; - - /* decompress concatenated gzip streams */ - have = 0; /* no input data read in yet */ - first = 1; /* looking for first gzip header */ - strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ - for (;;) { - /* look for the two magic header bytes for a gzip stream */ - if (NEXT() == -1) { - ret = Z_OK; - break; /* empty gzip stream is ok */ - } - if (last != 31 || (NEXT() != 139 && last != 157)) { - strm->msg = (char *)"incorrect header check"; - ret = first ? Z_DATA_ERROR : Z_ERRNO; - break; /* not a gzip or compress header */ - } - first = 0; /* next non-header is junk */ - - /* process a compress (LZW) file -- can't be concatenated after this */ - if (last == 157) { - ret = lunpipe(have, next, indp, outfile, strm); - break; - } - - /* process remainder of gzip header */ - ret = Z_BUF_ERROR; - if (NEXT() != 8) { /* only deflate method allowed */ - if (last == -1) break; - strm->msg = (char *)"unknown compression method"; - ret = Z_DATA_ERROR; - break; - } - flags = NEXT(); /* header flags */ - NEXT(); /* discard mod time, xflgs, os */ - NEXT(); - NEXT(); - NEXT(); - NEXT(); - NEXT(); - if (last == -1) break; - if (flags & 0xe0) { - strm->msg = (char *)"unknown header flags set"; - ret = Z_DATA_ERROR; - break; - } - if (flags & 4) { /* extra field */ - len = NEXT(); - len += (unsigned)(NEXT()) << 8; - if (last == -1) break; - while (len > have) { - len -= have; - have = 0; - if (NEXT() == -1) break; - len--; - } - if (last == -1) break; - have -= len; - next += len; - } - if (flags & 8) /* file name */ - while (NEXT() != 0 && last != -1) - ; - if (flags & 16) /* comment */ - while (NEXT() != 0 && last != -1) - ; - if (flags & 2) { /* header crc */ - NEXT(); - NEXT(); - } - if (last == -1) break; - - /* set up output */ - outd.outfile = outfile; - outd.check = 1; - outd.crc = crc32(0L, Z_NULL, 0); - outd.total = 0; - - /* decompress data to output */ - strm->next_in = next; - strm->avail_in = have; - ret = inflateBack(strm, in, indp, out, &outd); - if (ret != Z_STREAM_END) break; - next = strm->next_in; - have = strm->avail_in; - strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */ - - /* check trailer */ - ret = Z_BUF_ERROR; - if (NEXT() != (int)(outd.crc & 0xff) || - NEXT() != (int)((outd.crc >> 8) & 0xff) || - NEXT() != (int)((outd.crc >> 16) & 0xff) || - NEXT() != (int)((outd.crc >> 24) & 0xff)) { - /* crc error */ - if (last != -1) { - strm->msg = (char *)"incorrect data check"; - ret = Z_DATA_ERROR; - } - break; - } - if (NEXT() != (int)(outd.total & 0xff) || - NEXT() != (int)((outd.total >> 8) & 0xff) || - NEXT() != (int)((outd.total >> 16) & 0xff) || - NEXT() != (int)((outd.total >> 24) & 0xff)) { - /* length error */ - if (last != -1) { - strm->msg = (char *)"incorrect length check"; - ret = Z_DATA_ERROR; - } - break; - } - - /* go back and look for another gzip stream */ - } - - /* clean up and return */ - return ret; -} - -/* Copy file attributes, from -> to, as best we can. This is best effort, so - no errors are reported. The mode bits, including suid, sgid, and the sticky - bit are copied (if allowed), the owner's user id and group id are copied - (again if allowed), and the access and modify times are copied. */ -local void copymeta(char *from, char *to) -{ - struct stat was; - struct utimbuf when; - - /* get all of from's Unix meta data, return if not a regular file */ - if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG) - return; - - /* set to's mode bits, ignore errors */ - (void)chmod(to, was.st_mode & 07777); - - /* copy owner's user and group, ignore errors */ - (void)chown(to, was.st_uid, was.st_gid); - - /* copy access and modify times, ignore errors */ - when.actime = was.st_atime; - when.modtime = was.st_mtime; - (void)utime(to, &when); -} - -/* Decompress the file inname to the file outnname, of if test is true, just - decompress without writing and check the gzip trailer for integrity. If - inname is NULL or an empty string, read from stdin. If outname is NULL or - an empty string, write to stdout. strm is a pre-initialized inflateBack - structure. When appropriate, copy the file attributes from inname to - outname. - - gunzip() returns 1 if there is an out-of-memory error or an unexpected - return code from gunpipe(). Otherwise it returns 0. - */ -local int gunzip(z_stream *strm, char *inname, char *outname, int test) -{ - int ret; - int infile, outfile; - - /* open files */ - if (inname == NULL || *inname == 0) { - inname = "-"; - infile = 0; /* stdin */ - } - else { - infile = open(inname, O_RDONLY, 0); - if (infile == -1) { - fprintf(stderr, "gun cannot open %s\n", inname); - return 0; - } - } - if (test) - outfile = -1; - else if (outname == NULL || *outname == 0) { - outname = "-"; - outfile = 1; /* stdout */ - } - else { - outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666); - if (outfile == -1) { - close(infile); - fprintf(stderr, "gun cannot create %s\n", outname); - return 0; - } - } - errno = 0; - - /* decompress */ - ret = gunpipe(strm, infile, outfile); - if (outfile > 2) close(outfile); - if (infile > 2) close(infile); - - /* interpret result */ - switch (ret) { - case Z_OK: - case Z_ERRNO: - if (infile > 2 && outfile > 2) { - copymeta(inname, outname); /* copy attributes */ - unlink(inname); - } - if (ret == Z_ERRNO) - fprintf(stderr, "gun warning: trailing garbage ignored in %s\n", - inname); - break; - case Z_DATA_ERROR: - if (outfile > 2) unlink(outname); - fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg); - break; - case Z_MEM_ERROR: - if (outfile > 2) unlink(outname); - fprintf(stderr, "gun out of memory error--aborting\n"); - return 1; - case Z_BUF_ERROR: - if (outfile > 2) unlink(outname); - if (strm->next_in != Z_NULL) { - fprintf(stderr, "gun write error on %s: %s\n", - outname, strerror(errno)); - } - else if (errno) { - fprintf(stderr, "gun read error on %s: %s\n", - inname, strerror(errno)); - } - else { - fprintf(stderr, "gun unexpected end of file on %s\n", - inname); - } - break; - default: - if (outfile > 2) unlink(outname); - fprintf(stderr, "gun internal error--aborting\n"); - return 1; - } - return 0; -} - -/* Process the gun command line arguments. See the command syntax near the - beginning of this source file. */ -int main(int argc, char **argv) -{ - int ret, len, test; - char *outname; - unsigned char *window; - z_stream strm; - - /* initialize inflateBack state for repeated use */ - window = match; /* reuse LZW match buffer */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - ret = inflateBackInit(&strm, 15, window); - if (ret != Z_OK) { - fprintf(stderr, "gun out of memory error--aborting\n"); - return 1; - } - - /* decompress each file to the same name with the suffix removed */ - argc--; - argv++; - test = 0; - if (argc && strcmp(*argv, "-h") == 0) { - fprintf(stderr, "gun 1.6 (17 Jan 2010)\n"); - fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n"); - fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n"); - return 0; - } - if (argc && strcmp(*argv, "-t") == 0) { - test = 1; - argc--; - argv++; - } - if (argc) - do { - if (test) - outname = NULL; - else { - len = (int)strlen(*argv); - if (strcmp(*argv + len - 3, ".gz") == 0 || - strcmp(*argv + len - 3, "-gz") == 0) - len -= 3; - else if (strcmp(*argv + len - 2, ".z") == 0 || - strcmp(*argv + len - 2, "-z") == 0 || - strcmp(*argv + len - 2, "_z") == 0 || - strcmp(*argv + len - 2, ".Z") == 0) - len -= 2; - else { - fprintf(stderr, "gun error: no gz type on %s--skipping\n", - *argv); - continue; - } - outname = malloc(len + 1); - if (outname == NULL) { - fprintf(stderr, "gun out of memory error--aborting\n"); - ret = 1; - break; - } - memcpy(outname, *argv, len); - outname[len] = 0; - } - ret = gunzip(&strm, *argv, outname, test); - if (outname != NULL) free(outname); - if (ret) break; - } while (argv++, --argc); - else - ret = gunzip(&strm, NULL, NULL, test); - - /* clean up */ - inflateBackEnd(&strm); - return ret; -} diff --git a/zlib/examples/gzappend.c b/zlib/examples/gzappend.c deleted file mode 100644 index 662dec3..0000000 --- a/zlib/examples/gzappend.c +++ /dev/null @@ -1,504 +0,0 @@ -/* gzappend -- command to append to a gzip file - - Copyright (C) 2003, 2012 Mark Adler, all rights reserved - version 1.2, 11 Oct 2012 - - This software is provided 'as-is', without any express or implied - warranty. In no event will the author be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Mark Adler madler@alumni.caltech.edu - */ - -/* - * Change history: - * - * 1.0 19 Oct 2003 - First version - * 1.1 4 Nov 2003 - Expand and clarify some comments and notes - * - Add version and copyright to help - * - Send help to stdout instead of stderr - * - Add some preemptive typecasts - * - Add L to constants in lseek() calls - * - Remove some debugging information in error messages - * - Use new data_type definition for zlib 1.2.1 - * - Simplfy and unify file operations - * - Finish off gzip file in gztack() - * - Use deflatePrime() instead of adding empty blocks - * - Keep gzip file clean on appended file read errors - * - Use in-place rotate instead of auxiliary buffer - * (Why you ask? Because it was fun to write!) - * 1.2 11 Oct 2012 - Fix for proper z_const usage - * - Check for input buffer malloc failure - */ - -/* - gzappend takes a gzip file and appends to it, compressing files from the - command line or data from stdin. The gzip file is written to directly, to - avoid copying that file, in case it's large. Note that this results in the - unfriendly behavior that if gzappend fails, the gzip file is corrupted. - - This program was written to illustrate the use of the new Z_BLOCK option of - zlib 1.2.x's inflate() function. This option returns from inflate() at each - block boundary to facilitate locating and modifying the last block bit at - the start of the final deflate block. Also whether using Z_BLOCK or not, - another required feature of zlib 1.2.x is that inflate() now provides the - number of unusued bits in the last input byte used. gzappend will not work - with versions of zlib earlier than 1.2.1. - - gzappend first decompresses the gzip file internally, discarding all but - the last 32K of uncompressed data, and noting the location of the last block - bit and the number of unused bits in the last byte of the compressed data. - The gzip trailer containing the CRC-32 and length of the uncompressed data - is verified. This trailer will be later overwritten. - - Then the last block bit is cleared by seeking back in the file and rewriting - the byte that contains it. Seeking forward, the last byte of the compressed - data is saved along with the number of unused bits to initialize deflate. - - A deflate process is initialized, using the last 32K of the uncompressed - data from the gzip file to initialize the dictionary. If the total - uncompressed data was less than 32K, then all of it is used to initialize - the dictionary. The deflate output bit buffer is also initialized with the - last bits from the original deflate stream. From here on, the data to - append is simply compressed using deflate, and written to the gzip file. - When that is complete, the new CRC-32 and uncompressed length are written - as the trailer of the gzip file. - */ - -#include -#include -#include -#include -#include -#include "zlib.h" - -#define local static -#define LGCHUNK 14 -#define CHUNK (1U << LGCHUNK) -#define DSIZE 32768U - -/* print an error message and terminate with extreme prejudice */ -local void bye(char *msg1, char *msg2) -{ - fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2); - exit(1); -} - -/* return the greatest common divisor of a and b using Euclid's algorithm, - modified to be fast when one argument much greater than the other, and - coded to avoid unnecessary swapping */ -local unsigned gcd(unsigned a, unsigned b) -{ - unsigned c; - - while (a && b) - if (a > b) { - c = b; - while (a - c >= c) - c <<= 1; - a -= c; - } - else { - c = a; - while (b - c >= c) - c <<= 1; - b -= c; - } - return a + b; -} - -/* rotate list[0..len-1] left by rot positions, in place */ -local void rotate(unsigned char *list, unsigned len, unsigned rot) -{ - unsigned char tmp; - unsigned cycles; - unsigned char *start, *last, *to, *from; - - /* normalize rot and handle degenerate cases */ - if (len < 2) return; - if (rot >= len) rot %= len; - if (rot == 0) return; - - /* pointer to last entry in list */ - last = list + (len - 1); - - /* do simple left shift by one */ - if (rot == 1) { - tmp = *list; - memcpy(list, list + 1, len - 1); - *last = tmp; - return; - } - - /* do simple right shift by one */ - if (rot == len - 1) { - tmp = *last; - memmove(list + 1, list, len - 1); - *list = tmp; - return; - } - - /* otherwise do rotate as a set of cycles in place */ - cycles = gcd(len, rot); /* number of cycles */ - do { - start = from = list + cycles; /* start index is arbitrary */ - tmp = *from; /* save entry to be overwritten */ - for (;;) { - to = from; /* next step in cycle */ - from += rot; /* go right rot positions */ - if (from > last) from -= len; /* (pointer better not wrap) */ - if (from == start) break; /* all but one shifted */ - *to = *from; /* shift left */ - } - *to = tmp; /* complete the circle */ - } while (--cycles); -} - -/* structure for gzip file read operations */ -typedef struct { - int fd; /* file descriptor */ - int size; /* 1 << size is bytes in buf */ - unsigned left; /* bytes available at next */ - unsigned char *buf; /* buffer */ - z_const unsigned char *next; /* next byte in buffer */ - char *name; /* file name for error messages */ -} file; - -/* reload buffer */ -local int readin(file *in) -{ - int len; - - len = read(in->fd, in->buf, 1 << in->size); - if (len == -1) bye("error reading ", in->name); - in->left = (unsigned)len; - in->next = in->buf; - return len; -} - -/* read from file in, exit if end-of-file */ -local int readmore(file *in) -{ - if (readin(in) == 0) bye("unexpected end of ", in->name); - return 0; -} - -#define read1(in) (in->left == 0 ? readmore(in) : 0, \ - in->left--, *(in->next)++) - -/* skip over n bytes of in */ -local void skip(file *in, unsigned n) -{ - unsigned bypass; - - if (n > in->left) { - n -= in->left; - bypass = n & ~((1U << in->size) - 1); - if (bypass) { - if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1) - bye("seeking ", in->name); - n -= bypass; - } - readmore(in); - if (n > in->left) - bye("unexpected end of ", in->name); - } - in->left -= n; - in->next += n; -} - -/* read a four-byte unsigned integer, little-endian, from in */ -unsigned long read4(file *in) -{ - unsigned long val; - - val = read1(in); - val += (unsigned)read1(in) << 8; - val += (unsigned long)read1(in) << 16; - val += (unsigned long)read1(in) << 24; - return val; -} - -/* skip over gzip header */ -local void gzheader(file *in) -{ - int flags; - unsigned n; - - if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file"); - if (read1(in) != 8) bye("unknown compression method in", in->name); - flags = read1(in); - if (flags & 0xe0) bye("unknown header flags set in", in->name); - skip(in, 6); - if (flags & 4) { - n = read1(in); - n += (unsigned)(read1(in)) << 8; - skip(in, n); - } - if (flags & 8) while (read1(in) != 0) ; - if (flags & 16) while (read1(in) != 0) ; - if (flags & 2) skip(in, 2); -} - -/* decompress gzip file "name", return strm with a deflate stream ready to - continue compression of the data in the gzip file, and return a file - descriptor pointing to where to write the compressed data -- the deflate - stream is initialized to compress using level "level" */ -local int gzscan(char *name, z_stream *strm, int level) -{ - int ret, lastbit, left, full; - unsigned have; - unsigned long crc, tot; - unsigned char *window; - off_t lastoff, end; - file gz; - - /* open gzip file */ - gz.name = name; - gz.fd = open(name, O_RDWR, 0); - if (gz.fd == -1) bye("cannot open ", name); - gz.buf = malloc(CHUNK); - if (gz.buf == NULL) bye("out of memory", ""); - gz.size = LGCHUNK; - gz.left = 0; - - /* skip gzip header */ - gzheader(&gz); - - /* prepare to decompress */ - window = malloc(DSIZE); - if (window == NULL) bye("out of memory", ""); - strm->zalloc = Z_NULL; - strm->zfree = Z_NULL; - strm->opaque = Z_NULL; - ret = inflateInit2(strm, -15); - if (ret != Z_OK) bye("out of memory", " or library mismatch"); - - /* decompress the deflate stream, saving append information */ - lastbit = 0; - lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; - left = 0; - strm->avail_in = gz.left; - strm->next_in = gz.next; - crc = crc32(0L, Z_NULL, 0); - have = full = 0; - do { - /* if needed, get more input */ - if (strm->avail_in == 0) { - readmore(&gz); - strm->avail_in = gz.left; - strm->next_in = gz.next; - } - - /* set up output to next available section of sliding window */ - strm->avail_out = DSIZE - have; - strm->next_out = window + have; - - /* inflate and check for errors */ - ret = inflate(strm, Z_BLOCK); - if (ret == Z_STREAM_ERROR) bye("internal stream error!", ""); - if (ret == Z_MEM_ERROR) bye("out of memory", ""); - if (ret == Z_DATA_ERROR) - bye("invalid compressed data--format violated in", name); - - /* update crc and sliding window pointer */ - crc = crc32(crc, window + have, DSIZE - have - strm->avail_out); - if (strm->avail_out) - have = DSIZE - strm->avail_out; - else { - have = 0; - full = 1; - } - - /* process end of block */ - if (strm->data_type & 128) { - if (strm->data_type & 64) - left = strm->data_type & 0x1f; - else { - lastbit = strm->data_type & 0x1f; - lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in; - } - } - } while (ret != Z_STREAM_END); - inflateEnd(strm); - gz.left = strm->avail_in; - gz.next = strm->next_in; - - /* save the location of the end of the compressed data */ - end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left; - - /* check gzip trailer and save total for deflate */ - if (crc != read4(&gz)) - bye("invalid compressed data--crc mismatch in ", name); - tot = strm->total_out; - if ((tot & 0xffffffffUL) != read4(&gz)) - bye("invalid compressed data--length mismatch in", name); - - /* if not at end of file, warn */ - if (gz.left || readin(&gz)) - fprintf(stderr, - "gzappend warning: junk at end of gzip file overwritten\n"); - - /* clear last block bit */ - lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET); - if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); - *gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7))); - lseek(gz.fd, -1L, SEEK_CUR); - if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name); - - /* if window wrapped, build dictionary from window by rotating */ - if (full) { - rotate(window, DSIZE, have); - have = DSIZE; - } - - /* set up deflate stream with window, crc, total_in, and leftover bits */ - ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY); - if (ret != Z_OK) bye("out of memory", ""); - deflateSetDictionary(strm, window, have); - strm->adler = crc; - strm->total_in = tot; - if (left) { - lseek(gz.fd, --end, SEEK_SET); - if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name); - deflatePrime(strm, 8 - left, *gz.buf); - } - lseek(gz.fd, end, SEEK_SET); - - /* clean up and return */ - free(window); - free(gz.buf); - return gz.fd; -} - -/* append file "name" to gzip file gd using deflate stream strm -- if last - is true, then finish off the deflate stream at the end */ -local void gztack(char *name, int gd, z_stream *strm, int last) -{ - int fd, len, ret; - unsigned left; - unsigned char *in, *out; - - /* open file to compress and append */ - fd = 0; - if (name != NULL) { - fd = open(name, O_RDONLY, 0); - if (fd == -1) - fprintf(stderr, "gzappend warning: %s not found, skipping ...\n", - name); - } - - /* allocate buffers */ - in = malloc(CHUNK); - out = malloc(CHUNK); - if (in == NULL || out == NULL) bye("out of memory", ""); - - /* compress input file and append to gzip file */ - do { - /* get more input */ - len = read(fd, in, CHUNK); - if (len == -1) { - fprintf(stderr, - "gzappend warning: error reading %s, skipping rest ...\n", - name); - len = 0; - } - strm->avail_in = (unsigned)len; - strm->next_in = in; - if (len) strm->adler = crc32(strm->adler, in, (unsigned)len); - - /* compress and write all available output */ - do { - strm->avail_out = CHUNK; - strm->next_out = out; - ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH); - left = CHUNK - strm->avail_out; - while (left) { - len = write(gd, out + CHUNK - strm->avail_out - left, left); - if (len == -1) bye("writing gzip file", ""); - left -= (unsigned)len; - } - } while (strm->avail_out == 0 && ret != Z_STREAM_END); - } while (len != 0); - - /* write trailer after last entry */ - if (last) { - deflateEnd(strm); - out[0] = (unsigned char)(strm->adler); - out[1] = (unsigned char)(strm->adler >> 8); - out[2] = (unsigned char)(strm->adler >> 16); - out[3] = (unsigned char)(strm->adler >> 24); - out[4] = (unsigned char)(strm->total_in); - out[5] = (unsigned char)(strm->total_in >> 8); - out[6] = (unsigned char)(strm->total_in >> 16); - out[7] = (unsigned char)(strm->total_in >> 24); - len = 8; - do { - ret = write(gd, out + 8 - len, len); - if (ret == -1) bye("writing gzip file", ""); - len -= ret; - } while (len); - close(gd); - } - - /* clean up and return */ - free(out); - free(in); - if (fd > 0) close(fd); -} - -/* process the compression level option if present, scan the gzip file, and - append the specified files, or append the data from stdin if no other file - names are provided on the command line -- the gzip file must be writable - and seekable */ -int main(int argc, char **argv) -{ - int gd, level; - z_stream strm; - - /* ignore command name */ - argc--; argv++; - - /* provide usage if no arguments */ - if (*argv == NULL) { - printf( - "gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n" - ); - printf( - "usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n"); - return 0; - } - - /* set compression level */ - level = Z_DEFAULT_COMPRESSION; - if (argv[0][0] == '-') { - if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0) - bye("invalid compression level", ""); - level = argv[0][1] - '0'; - if (*++argv == NULL) bye("no gzip file name after options", ""); - } - - /* prepare to append to gzip file */ - gd = gzscan(*argv++, &strm, level); - - /* append files on command line, or from stdin if none */ - if (*argv == NULL) - gztack(NULL, gd, &strm, 1); - else - do { - gztack(*argv, gd, &strm, argv[1] == NULL); - } while (*++argv != NULL); - return 0; -} diff --git a/zlib/examples/gzjoin.c b/zlib/examples/gzjoin.c deleted file mode 100644 index 89e8098..0000000 --- a/zlib/examples/gzjoin.c +++ /dev/null @@ -1,449 +0,0 @@ -/* gzjoin -- command to join gzip files into one gzip file - - Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved - version 1.2, 14 Aug 2012 - - This software is provided 'as-is', without any express or implied - warranty. In no event will the author be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Mark Adler madler@alumni.caltech.edu - */ - -/* - * Change history: - * - * 1.0 11 Dec 2004 - First version - * 1.1 12 Jun 2005 - Changed ssize_t to long for portability - * 1.2 14 Aug 2012 - Clean up for z_const usage - */ - -/* - gzjoin takes one or more gzip files on the command line and writes out a - single gzip file that will uncompress to the concatenation of the - uncompressed data from the individual gzip files. gzjoin does this without - having to recompress any of the data and without having to calculate a new - crc32 for the concatenated uncompressed data. gzjoin does however have to - decompress all of the input data in order to find the bits in the compressed - data that need to be modified to concatenate the streams. - - gzjoin does not do an integrity check on the input gzip files other than - checking the gzip header and decompressing the compressed data. They are - otherwise assumed to be complete and correct. - - Each joint between gzip files removes at least 18 bytes of previous trailer - and subsequent header, and inserts an average of about three bytes to the - compressed data in order to connect the streams. The output gzip file - has a minimal ten-byte gzip header with no file name or modification time. - - This program was written to illustrate the use of the Z_BLOCK option of - inflate() and the crc32_combine() function. gzjoin will not compile with - versions of zlib earlier than 1.2.3. - */ - -#include /* fputs(), fprintf(), fwrite(), putc() */ -#include /* exit(), malloc(), free() */ -#include /* open() */ -#include /* close(), read(), lseek() */ -#include "zlib.h" - /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ - -#define local static - -/* exit with an error (return a value to allow use in an expression) */ -local int bail(char *why1, char *why2) -{ - fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); - exit(1); - return 0; -} - -/* -- simple buffered file input with access to the buffer -- */ - -#define CHUNK 32768 /* must be a power of two and fit in unsigned */ - -/* bin buffered input file type */ -typedef struct { - char *name; /* name of file for error messages */ - int fd; /* file descriptor */ - unsigned left; /* bytes remaining at next */ - unsigned char *next; /* next byte to read */ - unsigned char *buf; /* allocated buffer of length CHUNK */ -} bin; - -/* close a buffered file and free allocated memory */ -local void bclose(bin *in) -{ - if (in != NULL) { - if (in->fd != -1) - close(in->fd); - if (in->buf != NULL) - free(in->buf); - free(in); - } -} - -/* open a buffered file for input, return a pointer to type bin, or NULL on - failure */ -local bin *bopen(char *name) -{ - bin *in; - - in = malloc(sizeof(bin)); - if (in == NULL) - return NULL; - in->buf = malloc(CHUNK); - in->fd = open(name, O_RDONLY, 0); - if (in->buf == NULL || in->fd == -1) { - bclose(in); - return NULL; - } - in->left = 0; - in->next = in->buf; - in->name = name; - return in; -} - -/* load buffer from file, return -1 on read error, 0 or 1 on success, with - 1 indicating that end-of-file was reached */ -local int bload(bin *in) -{ - long len; - - if (in == NULL) - return -1; - if (in->left != 0) - return 0; - in->next = in->buf; - do { - len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); - if (len < 0) - return -1; - in->left += (unsigned)len; - } while (len != 0 && in->left < CHUNK); - return len == 0 ? 1 : 0; -} - -/* get a byte from the file, bail if end of file */ -#define bget(in) (in->left ? 0 : bload(in), \ - in->left ? (in->left--, *(in->next)++) : \ - bail("unexpected end of file on ", in->name)) - -/* get a four-byte little-endian unsigned integer from file */ -local unsigned long bget4(bin *in) -{ - unsigned long val; - - val = bget(in); - val += (unsigned long)(bget(in)) << 8; - val += (unsigned long)(bget(in)) << 16; - val += (unsigned long)(bget(in)) << 24; - return val; -} - -/* skip bytes in file */ -local void bskip(bin *in, unsigned skip) -{ - /* check pointer */ - if (in == NULL) - return; - - /* easy case -- skip bytes in buffer */ - if (skip <= in->left) { - in->left -= skip; - in->next += skip; - return; - } - - /* skip what's in buffer, discard buffer contents */ - skip -= in->left; - in->left = 0; - - /* seek past multiples of CHUNK bytes */ - if (skip > CHUNK) { - unsigned left; - - left = skip & (CHUNK - 1); - if (left == 0) { - /* exact number of chunks: seek all the way minus one byte to check - for end-of-file with a read */ - lseek(in->fd, skip - 1, SEEK_CUR); - if (read(in->fd, in->buf, 1) != 1) - bail("unexpected end of file on ", in->name); - return; - } - - /* skip the integral chunks, update skip with remainder */ - lseek(in->fd, skip - left, SEEK_CUR); - skip = left; - } - - /* read more input and skip remainder */ - bload(in); - if (skip > in->left) - bail("unexpected end of file on ", in->name); - in->left -= skip; - in->next += skip; -} - -/* -- end of buffered input functions -- */ - -/* skip the gzip header from file in */ -local void gzhead(bin *in) -{ - int flags; - - /* verify gzip magic header and compression method */ - if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) - bail(in->name, " is not a valid gzip file"); - - /* get and verify flags */ - flags = bget(in); - if ((flags & 0xe0) != 0) - bail("unknown reserved bits set in ", in->name); - - /* skip modification time, extra flags, and os */ - bskip(in, 6); - - /* skip extra field if present */ - if (flags & 4) { - unsigned len; - - len = bget(in); - len += (unsigned)(bget(in)) << 8; - bskip(in, len); - } - - /* skip file name if present */ - if (flags & 8) - while (bget(in) != 0) - ; - - /* skip comment if present */ - if (flags & 16) - while (bget(in) != 0) - ; - - /* skip header crc if present */ - if (flags & 2) - bskip(in, 2); -} - -/* write a four-byte little-endian unsigned integer to out */ -local void put4(unsigned long val, FILE *out) -{ - putc(val & 0xff, out); - putc((val >> 8) & 0xff, out); - putc((val >> 16) & 0xff, out); - putc((val >> 24) & 0xff, out); -} - -/* Load up zlib stream from buffered input, bail if end of file */ -local void zpull(z_streamp strm, bin *in) -{ - if (in->left == 0) - bload(in); - if (in->left == 0) - bail("unexpected end of file on ", in->name); - strm->avail_in = in->left; - strm->next_in = in->next; -} - -/* Write header for gzip file to out and initialize trailer. */ -local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) -{ - fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); - *crc = crc32(0L, Z_NULL, 0); - *tot = 0; -} - -/* Copy the compressed data from name, zeroing the last block bit of the last - block if clr is true, and adding empty blocks as needed to get to a byte - boundary. If clr is false, then the last block becomes the last block of - the output, and the gzip trailer is written. crc and tot maintains the - crc and length (modulo 2^32) of the output for the trailer. The resulting - gzip file is written to out. gzinit() must be called before the first call - of gzcopy() to write the gzip header and to initialize crc and tot. */ -local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, - FILE *out) -{ - int ret; /* return value from zlib functions */ - int pos; /* where the "last block" bit is in byte */ - int last; /* true if processing the last block */ - bin *in; /* buffered input file */ - unsigned char *start; /* start of compressed data in buffer */ - unsigned char *junk; /* buffer for uncompressed data -- discarded */ - z_off_t len; /* length of uncompressed data (support > 4 GB) */ - z_stream strm; /* zlib inflate stream */ - - /* open gzip file and skip header */ - in = bopen(name); - if (in == NULL) - bail("could not open ", name); - gzhead(in); - - /* allocate buffer for uncompressed data and initialize raw inflate - stream */ - junk = malloc(CHUNK); - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit2(&strm, -15); - if (junk == NULL || ret != Z_OK) - bail("out of memory", ""); - - /* inflate and copy compressed data, clear last-block bit if requested */ - len = 0; - zpull(&strm, in); - start = in->next; - last = start[0] & 1; - if (last && clr) - start[0] &= ~1; - strm.avail_out = 0; - for (;;) { - /* if input used and output done, write used input and get more */ - if (strm.avail_in == 0 && strm.avail_out != 0) { - fwrite(start, 1, strm.next_in - start, out); - start = in->buf; - in->left = 0; - zpull(&strm, in); - } - - /* decompress -- return early when end-of-block reached */ - strm.avail_out = CHUNK; - strm.next_out = junk; - ret = inflate(&strm, Z_BLOCK); - switch (ret) { - case Z_MEM_ERROR: - bail("out of memory", ""); - case Z_DATA_ERROR: - bail("invalid compressed data in ", in->name); - } - - /* update length of uncompressed data */ - len += CHUNK - strm.avail_out; - - /* check for block boundary (only get this when block copied out) */ - if (strm.data_type & 128) { - /* if that was the last block, then done */ - if (last) - break; - - /* number of unused bits in last byte */ - pos = strm.data_type & 7; - - /* find the next last-block bit */ - if (pos != 0) { - /* next last-block bit is in last used byte */ - pos = 0x100 >> pos; - last = strm.next_in[-1] & pos; - if (last && clr) - in->buf[strm.next_in - in->buf - 1] &= ~pos; - } - else { - /* next last-block bit is in next unused byte */ - if (strm.avail_in == 0) { - /* don't have that byte yet -- get it */ - fwrite(start, 1, strm.next_in - start, out); - start = in->buf; - in->left = 0; - zpull(&strm, in); - } - last = strm.next_in[0] & 1; - if (last && clr) - in->buf[strm.next_in - in->buf] &= ~1; - } - } - } - - /* update buffer with unused input */ - in->left = strm.avail_in; - in->next = in->buf + (strm.next_in - in->buf); - - /* copy used input, write empty blocks to get to byte boundary */ - pos = strm.data_type & 7; - fwrite(start, 1, in->next - start - 1, out); - last = in->next[-1]; - if (pos == 0 || !clr) - /* already at byte boundary, or last file: write last byte */ - putc(last, out); - else { - /* append empty blocks to last byte */ - last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ - if (pos & 1) { - /* odd -- append an empty stored block */ - putc(last, out); - if (pos == 1) - putc(0, out); /* two more bits in block header */ - fwrite("\0\0\xff\xff", 1, 4, out); - } - else { - /* even -- append 1, 2, or 3 empty fixed blocks */ - switch (pos) { - case 6: - putc(last | 8, out); - last = 0; - case 4: - putc(last | 0x20, out); - last = 0; - case 2: - putc(last | 0x80, out); - putc(0, out); - } - } - } - - /* update crc and tot */ - *crc = crc32_combine(*crc, bget4(in), len); - *tot += (unsigned long)len; - - /* clean up */ - inflateEnd(&strm); - free(junk); - bclose(in); - - /* write trailer if this is the last gzip file */ - if (!clr) { - put4(*crc, out); - put4(*tot, out); - } -} - -/* join the gzip files on the command line, write result to stdout */ -int main(int argc, char **argv) -{ - unsigned long crc, tot; /* running crc and total uncompressed length */ - - /* skip command name */ - argc--; - argv++; - - /* show usage if no arguments */ - if (argc == 0) { - fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", - stderr); - return 0; - } - - /* join gzip files on command line and write to stdout */ - gzinit(&crc, &tot, stdout); - while (argc--) - gzcopy(*argv++, argc, &crc, &tot, stdout); - - /* done */ - return 0; -} diff --git a/zlib/examples/gzlog.c b/zlib/examples/gzlog.c deleted file mode 100644 index b8c2927..0000000 --- a/zlib/examples/gzlog.c +++ /dev/null @@ -1,1059 +0,0 @@ -/* - * gzlog.c - * Copyright (C) 2004, 2008, 2012, 2016 Mark Adler, all rights reserved - * For conditions of distribution and use, see copyright notice in gzlog.h - * version 2.2, 14 Aug 2012 - */ - -/* - gzlog provides a mechanism for frequently appending short strings to a gzip - file that is efficient both in execution time and compression ratio. The - strategy is to write the short strings in an uncompressed form to the end of - the gzip file, only compressing when the amount of uncompressed data has - reached a given threshold. - - gzlog also provides protection against interruptions in the process due to - system crashes. The status of the operation is recorded in an extra field - in the gzip file, and is only updated once the gzip file is brought to a - valid state. The last data to be appended or compressed is saved in an - auxiliary file, so that if the operation is interrupted, it can be completed - the next time an append operation is attempted. - - gzlog maintains another auxiliary file with the last 32K of data from the - compressed portion, which is preloaded for the compression of the subsequent - data. This minimizes the impact to the compression ratio of appending. - */ - -/* - Operations Concept: - - Files (log name "foo"): - foo.gz -- gzip file with the complete log - foo.add -- last message to append or last data to compress - foo.dict -- dictionary of the last 32K of data for next compression - foo.temp -- temporary dictionary file for compression after this one - foo.lock -- lock file for reading and writing the other files - foo.repairs -- log file for log file recovery operations (not compressed) - - gzip file structure: - - fixed-length (no file name) header with extra field (see below) - - compressed data ending initially with empty stored block - - uncompressed data filling out originally empty stored block and - subsequent stored blocks as needed (16K max each) - - gzip trailer - - no junk at end (no other gzip streams) - - When appending data, the information in the first three items above plus the - foo.add file are sufficient to recover an interrupted append operation. The - extra field has the necessary information to restore the start of the last - stored block and determine where to append the data in the foo.add file, as - well as the crc and length of the gzip data before the append operation. - - The foo.add file is created before the gzip file is marked for append, and - deleted after the gzip file is marked as complete. So if the append - operation is interrupted, the data to add will still be there. If due to - some external force, the foo.add file gets deleted between when the append - operation was interrupted and when recovery is attempted, the gzip file will - still be restored, but without the appended data. - - When compressing data, the information in the first two items above plus the - foo.add file are sufficient to recover an interrupted compress operation. - The extra field has the necessary information to find the end of the - compressed data, and contains both the crc and length of just the compressed - data and of the complete set of data including the contents of the foo.add - file. - - Again, the foo.add file is maintained during the compress operation in case - of an interruption. If in the unlikely event the foo.add file with the data - to be compressed is missing due to some external force, a gzip file with - just the previous compressed data will be reconstructed. In this case, all - of the data that was to be compressed is lost (approximately one megabyte). - This will not occur if all that happened was an interruption of the compress - operation. - - The third state that is marked is the replacement of the old dictionary with - the new dictionary after a compress operation. Once compression is - complete, the gzip file is marked as being in the replace state. This - completes the gzip file, so an interrupt after being so marked does not - result in recompression. Then the dictionary file is replaced, and the gzip - file is marked as completed. This state prevents the possibility of - restarting compression with the wrong dictionary file. - - All three operations are wrapped by a lock/unlock procedure. In order to - gain exclusive access to the log files, first a foo.lock file must be - exclusively created. When all operations are complete, the lock is - released by deleting the foo.lock file. If when attempting to create the - lock file, it already exists and the modify time of the lock file is more - than five minutes old (set by the PATIENCE define below), then the old - lock file is considered stale and deleted, and the exclusive creation of - the lock file is retried. To assure that there are no false assessments - of the staleness of the lock file, the operations periodically touch the - lock file to update the modified date. - - Following is the definition of the extra field with all of the information - required to enable the above append and compress operations and their - recovery if interrupted. Multi-byte values are stored little endian - (consistent with the gzip format). File pointers are eight bytes long. - The crc's and lengths for the gzip trailer are four bytes long. (Note that - the length at the end of a gzip file is used for error checking only, and - for large files is actually the length modulo 2^32.) The stored block - length is two bytes long. The gzip extra field two-byte identification is - "ap" for append. It is assumed that writing the extra field to the file is - an "atomic" operation. That is, either all of the extra field is written - to the file, or none of it is, if the operation is interrupted right at the - point of updating the extra field. This is a reasonable assumption, since - the extra field is within the first 52 bytes of the file, which is smaller - than any expected block size for a mass storage device (usually 512 bytes or - larger). - - Extra field (35 bytes): - - Pointer to first stored block length -- this points to the two-byte length - of the first stored block, which is followed by the two-byte, one's - complement of that length. The stored block length is preceded by the - three-bit header of the stored block, which is the actual start of the - stored block in the deflate format. See the bit offset field below. - - Pointer to the last stored block length. This is the same as above, but - for the last stored block of the uncompressed data in the gzip file. - Initially this is the same as the first stored block length pointer. - When the stored block gets to 16K (see the MAX_STORE define), then a new - stored block as added, at which point the last stored block length pointer - is different from the first stored block length pointer. When they are - different, the first bit of the last stored block header is eight bits, or - one byte back from the block length. - - Compressed data crc and length. This is the crc and length of the data - that is in the compressed portion of the deflate stream. These are used - only in the event that the foo.add file containing the data to compress is - lost after a compress operation is interrupted. - - Total data crc and length. This is the crc and length of all of the data - stored in the gzip file, compressed and uncompressed. It is used to - reconstruct the gzip trailer when compressing, as well as when recovering - interrupted operations. - - Final stored block length. This is used to quickly find where to append, - and allows the restoration of the original final stored block state when - an append operation is interrupted. - - First stored block start as the number of bits back from the final stored - block first length byte. This value is in the range of 3..10, and is - stored as the low three bits of the final byte of the extra field after - subtracting three (0..7). This allows the last-block bit of the stored - block header to be updated when a new stored block is added, for the case - when the first stored block and the last stored block are the same. (When - they are different, the numbers of bits back is known to be eight.) This - also allows for new compressed data to be appended to the old compressed - data in the compress operation, overwriting the previous first stored - block, or for the compressed data to be terminated and a valid gzip file - reconstructed on the off chance that a compression operation was - interrupted and the data to compress in the foo.add file was deleted. - - The operation in process. This is the next two bits in the last byte (the - bits under the mask 0x18). The are interpreted as 0: nothing in process, - 1: append in process, 2: compress in process, 3: replace in process. - - The top three bits of the last byte in the extra field are reserved and - are currently set to zero. - - Main procedure: - - Exclusively create the foo.lock file using the O_CREAT and O_EXCL modes of - the system open() call. If the modify time of an existing lock file is - more than PATIENCE seconds old, then the lock file is deleted and the - exclusive create is retried. - - Load the extra field from the foo.gz file, and see if an operation was in - progress but not completed. If so, apply the recovery procedure below. - - Perform the append procedure with the provided data. - - If the uncompressed data in the foo.gz file is 1MB or more, apply the - compress procedure. - - Delete the foo.lock file. - - Append procedure: - - Put what to append in the foo.add file so that the operation can be - restarted if this procedure is interrupted. - - Mark the foo.gz extra field with the append operation in progress. - + Restore the original last-block bit and stored block length of the last - stored block from the information in the extra field, in case a previous - append operation was interrupted. - - Append the provided data to the last stored block, creating new stored - blocks as needed and updating the stored blocks last-block bits and - lengths. - - Update the crc and length with the new data, and write the gzip trailer. - - Write over the extra field (with a single write operation) with the new - pointers, lengths, and crc's, and mark the gzip file as not in process. - Though there is still a foo.add file, it will be ignored since nothing - is in process. If a foo.add file is leftover from a previously - completed operation, it is truncated when writing new data to it. - - Delete the foo.add file. - - Compress and replace procedures: - - Read all of the uncompressed data in the stored blocks in foo.gz and write - it to foo.add. Also write foo.temp with the last 32K of that data to - provide a dictionary for the next invocation of this procedure. - - Rewrite the extra field marking foo.gz with a compression in process. - * If there is no data provided to compress (due to a missing foo.add file - when recovering), reconstruct and truncate the foo.gz file to contain - only the previous compressed data and proceed to the step after the next - one. Otherwise ... - - Compress the data with the dictionary in foo.dict, and write to the - foo.gz file starting at the bit immediately following the last previously - compressed block. If there is no foo.dict, proceed anyway with the - compression at slightly reduced efficiency. (For the foo.dict file to be - missing requires some external failure beyond simply the interruption of - a compress operation.) During this process, the foo.lock file is - periodically touched to assure that that file is not considered stale by - another process before we're done. The deflation is terminated with a - non-last empty static block (10 bits long), that is then located and - written over by a last-bit-set empty stored block. - - Append the crc and length of the data in the gzip file (previously - calculated during the append operations). - - Write over the extra field with the updated stored block offsets, bits - back, crc's, and lengths, and mark foo.gz as in process for a replacement - of the dictionary. - @ Delete the foo.add file. - - Replace foo.dict with foo.temp. - - Write over the extra field, marking foo.gz as complete. - - Recovery procedure: - - If not a replace recovery, read in the foo.add file, and provide that data - to the appropriate recovery below. If there is no foo.add file, provide - a zero data length to the recovery. In that case, the append recovery - restores the foo.gz to the previous compressed + uncompressed data state. - For the the compress recovery, a missing foo.add file results in foo.gz - being restored to the previous compressed-only data state. - - Append recovery: - - Pick up append at + step above - - Compress recovery: - - Pick up compress at * step above - - Replace recovery: - - Pick up compress at @ step above - - Log the repair with a date stamp in foo.repairs - */ - -#include -#include /* rename, fopen, fprintf, fclose */ -#include /* malloc, free */ -#include /* strlen, strrchr, strcpy, strncpy, strcmp */ -#include /* open */ -#include /* lseek, read, write, close, unlink, sleep, */ - /* ftruncate, fsync */ -#include /* errno */ -#include /* time, ctime */ -#include /* stat */ -#include /* utimes */ -#include "zlib.h" /* crc32 */ - -#include "gzlog.h" /* header for external access */ - -#define local static -typedef unsigned int uint; -typedef unsigned long ulong; - -/* Macro for debugging to deterministically force recovery operations */ -#ifdef GZLOG_DEBUG - #include /* longjmp */ - jmp_buf gzlog_jump; /* where to go back to */ - int gzlog_bail = 0; /* which point to bail at (1..8) */ - int gzlog_count = -1; /* number of times through to wait */ -# define BAIL(n) do { if (n == gzlog_bail && gzlog_count-- == 0) \ - longjmp(gzlog_jump, gzlog_bail); } while (0) -#else -# define BAIL(n) -#endif - -/* how old the lock file can be in seconds before considering it stale */ -#define PATIENCE 300 - -/* maximum stored block size in Kbytes -- must be in 1..63 */ -#define MAX_STORE 16 - -/* number of stored Kbytes to trigger compression (must be >= 32 to allow - dictionary construction, and <= 204 * MAX_STORE, in order for >> 10 to - discard the stored block headers contribution of five bytes each) */ -#define TRIGGER 1024 - -/* size of a deflate dictionary (this cannot be changed) */ -#define DICT 32768U - -/* values for the operation (2 bits) */ -#define NO_OP 0 -#define APPEND_OP 1 -#define COMPRESS_OP 2 -#define REPLACE_OP 3 - -/* macros to extract little-endian integers from an unsigned byte buffer */ -#define PULL2(p) ((p)[0]+((uint)((p)[1])<<8)) -#define PULL4(p) (PULL2(p)+((ulong)PULL2(p+2)<<16)) -#define PULL8(p) (PULL4(p)+((off_t)PULL4(p+4)<<32)) - -/* macros to store integers into a byte buffer in little-endian order */ -#define PUT2(p,a) do {(p)[0]=a;(p)[1]=(a)>>8;} while(0) -#define PUT4(p,a) do {PUT2(p,a);PUT2(p+2,a>>16);} while(0) -#define PUT8(p,a) do {PUT4(p,a);PUT4(p+4,a>>32);} while(0) - -/* internal structure for log information */ -#define LOGID "\106\035\172" /* should be three non-zero characters */ -struct log { - char id[4]; /* contains LOGID to detect inadvertent overwrites */ - int fd; /* file descriptor for .gz file, opened read/write */ - char *path; /* allocated path, e.g. "/var/log/foo" or "foo" */ - char *end; /* end of path, for appending suffices such as ".gz" */ - off_t first; /* offset of first stored block first length byte */ - int back; /* location of first block id in bits back from first */ - uint stored; /* bytes currently in last stored block */ - off_t last; /* offset of last stored block first length byte */ - ulong ccrc; /* crc of compressed data */ - ulong clen; /* length (modulo 2^32) of compressed data */ - ulong tcrc; /* crc of total data */ - ulong tlen; /* length (modulo 2^32) of total data */ - time_t lock; /* last modify time of our lock file */ -}; - -/* gzip header for gzlog */ -local unsigned char log_gzhead[] = { - 0x1f, 0x8b, /* magic gzip id */ - 8, /* compression method is deflate */ - 4, /* there is an extra field (no file name) */ - 0, 0, 0, 0, /* no modification time provided */ - 0, 0xff, /* no extra flags, no OS specified */ - 39, 0, 'a', 'p', 35, 0 /* extra field with "ap" subfield */ - /* 35 is EXTRA, 39 is EXTRA + 4 */ -}; - -#define HEAD sizeof(log_gzhead) /* should be 16 */ - -/* initial gzip extra field content (52 == HEAD + EXTRA + 1) */ -local unsigned char log_gzext[] = { - 52, 0, 0, 0, 0, 0, 0, 0, /* offset of first stored block length */ - 52, 0, 0, 0, 0, 0, 0, 0, /* offset of last stored block length */ - 0, 0, 0, 0, 0, 0, 0, 0, /* compressed data crc and length */ - 0, 0, 0, 0, 0, 0, 0, 0, /* total data crc and length */ - 0, 0, /* final stored block data length */ - 5 /* op is NO_OP, last bit 8 bits back */ -}; - -#define EXTRA sizeof(log_gzext) /* should be 35 */ - -/* initial gzip data and trailer */ -local unsigned char log_gzbody[] = { - 1, 0, 0, 0xff, 0xff, /* empty stored block (last) */ - 0, 0, 0, 0, /* crc */ - 0, 0, 0, 0 /* uncompressed length */ -}; - -#define BODY sizeof(log_gzbody) - -/* Exclusively create foo.lock in order to negotiate exclusive access to the - foo.* files. If the modify time of an existing lock file is greater than - PATIENCE seconds in the past, then consider the lock file to have been - abandoned, delete it, and try the exclusive create again. Save the lock - file modify time for verification of ownership. Return 0 on success, or -1 - on failure, usually due to an access restriction or invalid path. Note that - if stat() or unlink() fails, it may be due to another process noticing the - abandoned lock file a smidge sooner and deleting it, so those are not - flagged as an error. */ -local int log_lock(struct log *log) -{ - int fd; - struct stat st; - - strcpy(log->end, ".lock"); - while ((fd = open(log->path, O_CREAT | O_EXCL, 0644)) < 0) { - if (errno != EEXIST) - return -1; - if (stat(log->path, &st) == 0 && time(NULL) - st.st_mtime > PATIENCE) { - unlink(log->path); - continue; - } - sleep(2); /* relinquish the CPU for two seconds while waiting */ - } - close(fd); - if (stat(log->path, &st) == 0) - log->lock = st.st_mtime; - return 0; -} - -/* Update the modify time of the lock file to now, in order to prevent another - task from thinking that the lock is stale. Save the lock file modify time - for verification of ownership. */ -local void log_touch(struct log *log) -{ - struct stat st; - - strcpy(log->end, ".lock"); - utimes(log->path, NULL); - if (stat(log->path, &st) == 0) - log->lock = st.st_mtime; -} - -/* Check the log file modify time against what is expected. Return true if - this is not our lock. If it is our lock, touch it to keep it. */ -local int log_check(struct log *log) -{ - struct stat st; - - strcpy(log->end, ".lock"); - if (stat(log->path, &st) || st.st_mtime != log->lock) - return 1; - log_touch(log); - return 0; -} - -/* Unlock a previously acquired lock, but only if it's ours. */ -local void log_unlock(struct log *log) -{ - if (log_check(log)) - return; - strcpy(log->end, ".lock"); - unlink(log->path); - log->lock = 0; -} - -/* Check the gzip header and read in the extra field, filling in the values in - the log structure. Return op on success or -1 if the gzip header was not as - expected. op is the current operation in progress last written to the extra - field. This assumes that the gzip file has already been opened, with the - file descriptor log->fd. */ -local int log_head(struct log *log) -{ - int op; - unsigned char buf[HEAD + EXTRA]; - - if (lseek(log->fd, 0, SEEK_SET) < 0 || - read(log->fd, buf, HEAD + EXTRA) != HEAD + EXTRA || - memcmp(buf, log_gzhead, HEAD)) { - return -1; - } - log->first = PULL8(buf + HEAD); - log->last = PULL8(buf + HEAD + 8); - log->ccrc = PULL4(buf + HEAD + 16); - log->clen = PULL4(buf + HEAD + 20); - log->tcrc = PULL4(buf + HEAD + 24); - log->tlen = PULL4(buf + HEAD + 28); - log->stored = PULL2(buf + HEAD + 32); - log->back = 3 + (buf[HEAD + 34] & 7); - op = (buf[HEAD + 34] >> 3) & 3; - return op; -} - -/* Write over the extra field contents, marking the operation as op. Use fsync - to assure that the device is written to, and in the requested order. This - operation, and only this operation, is assumed to be atomic in order to - assure that the log is recoverable in the event of an interruption at any - point in the process. Return -1 if the write to foo.gz failed. */ -local int log_mark(struct log *log, int op) -{ - int ret; - unsigned char ext[EXTRA]; - - PUT8(ext, log->first); - PUT8(ext + 8, log->last); - PUT4(ext + 16, log->ccrc); - PUT4(ext + 20, log->clen); - PUT4(ext + 24, log->tcrc); - PUT4(ext + 28, log->tlen); - PUT2(ext + 32, log->stored); - ext[34] = log->back - 3 + (op << 3); - fsync(log->fd); - ret = lseek(log->fd, HEAD, SEEK_SET) < 0 || - write(log->fd, ext, EXTRA) != EXTRA ? -1 : 0; - fsync(log->fd); - return ret; -} - -/* Rewrite the last block header bits and subsequent zero bits to get to a byte - boundary, setting the last block bit if last is true, and then write the - remainder of the stored block header (length and one's complement). Leave - the file pointer after the end of the last stored block data. Return -1 if - there is a read or write failure on the foo.gz file */ -local int log_last(struct log *log, int last) -{ - int back, len, mask; - unsigned char buf[6]; - - /* determine the locations of the bytes and bits to modify */ - back = log->last == log->first ? log->back : 8; - len = back > 8 ? 2 : 1; /* bytes back from log->last */ - mask = 0x80 >> ((back - 1) & 7); /* mask for block last-bit */ - - /* get the byte to modify (one or two back) into buf[0] -- don't need to - read the byte if the last-bit is eight bits back, since in that case - the entire byte will be modified */ - buf[0] = 0; - if (back != 8 && (lseek(log->fd, log->last - len, SEEK_SET) < 0 || - read(log->fd, buf, 1) != 1)) - return -1; - - /* change the last-bit of the last stored block as requested -- note - that all bits above the last-bit are set to zero, per the type bits - of a stored block being 00 and per the convention that the bits to - bring the stream to a byte boundary are also zeros */ - buf[1] = 0; - buf[2 - len] = (*buf & (mask - 1)) + (last ? mask : 0); - - /* write the modified stored block header and lengths, move the file - pointer to after the last stored block data */ - PUT2(buf + 2, log->stored); - PUT2(buf + 4, log->stored ^ 0xffff); - return lseek(log->fd, log->last - len, SEEK_SET) < 0 || - write(log->fd, buf + 2 - len, len + 4) != len + 4 || - lseek(log->fd, log->stored, SEEK_CUR) < 0 ? -1 : 0; -} - -/* Append len bytes from data to the locked and open log file. len may be zero - if recovering and no .add file was found. In that case, the previous state - of the foo.gz file is restored. The data is appended uncompressed in - deflate stored blocks. Return -1 if there was an error reading or writing - the foo.gz file. */ -local int log_append(struct log *log, unsigned char *data, size_t len) -{ - uint put; - off_t end; - unsigned char buf[8]; - - /* set the last block last-bit and length, in case recovering an - interrupted append, then position the file pointer to append to the - block */ - if (log_last(log, 1)) - return -1; - - /* append, adding stored blocks and updating the offset of the last stored - block as needed, and update the total crc and length */ - while (len) { - /* append as much as we can to the last block */ - put = (MAX_STORE << 10) - log->stored; - if (put > len) - put = (uint)len; - if (put) { - if (write(log->fd, data, put) != put) - return -1; - BAIL(1); - log->tcrc = crc32(log->tcrc, data, put); - log->tlen += put; - log->stored += put; - data += put; - len -= put; - } - - /* if we need to, add a new empty stored block */ - if (len) { - /* mark current block as not last */ - if (log_last(log, 0)) - return -1; - - /* point to new, empty stored block */ - log->last += 4 + log->stored + 1; - log->stored = 0; - } - - /* mark last block as last, update its length */ - if (log_last(log, 1)) - return -1; - BAIL(2); - } - - /* write the new crc and length trailer, and truncate just in case (could - be recovering from partial append with a missing foo.add file) */ - PUT4(buf, log->tcrc); - PUT4(buf + 4, log->tlen); - if (write(log->fd, buf, 8) != 8 || - (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) - return -1; - - /* write the extra field, marking the log file as done, delete .add file */ - if (log_mark(log, NO_OP)) - return -1; - strcpy(log->end, ".add"); - unlink(log->path); /* ignore error, since may not exist */ - return 0; -} - -/* Replace the foo.dict file with the foo.temp file. Also delete the foo.add - file, since the compress operation may have been interrupted before that was - done. Returns 1 if memory could not be allocated, or -1 if reading or - writing foo.gz fails, or if the rename fails for some reason other than - foo.temp not existing. foo.temp not existing is a permitted error, since - the replace operation may have been interrupted after the rename is done, - but before foo.gz is marked as complete. */ -local int log_replace(struct log *log) -{ - int ret; - char *dest; - - /* delete foo.add file */ - strcpy(log->end, ".add"); - unlink(log->path); /* ignore error, since may not exist */ - BAIL(3); - - /* rename foo.name to foo.dict, replacing foo.dict if it exists */ - strcpy(log->end, ".dict"); - dest = malloc(strlen(log->path) + 1); - if (dest == NULL) - return -2; - strcpy(dest, log->path); - strcpy(log->end, ".temp"); - ret = rename(log->path, dest); - free(dest); - if (ret && errno != ENOENT) - return -1; - BAIL(4); - - /* mark the foo.gz file as done */ - return log_mark(log, NO_OP); -} - -/* Compress the len bytes at data and append the compressed data to the - foo.gz deflate data immediately after the previous compressed data. This - overwrites the previous uncompressed data, which was stored in foo.add - and is the data provided in data[0..len-1]. If this operation is - interrupted, it picks up at the start of this routine, with the foo.add - file read in again. If there is no data to compress (len == 0), then we - simply terminate the foo.gz file after the previously compressed data, - appending a final empty stored block and the gzip trailer. Return -1 if - reading or writing the log.gz file failed, or -2 if there was a memory - allocation failure. */ -local int log_compress(struct log *log, unsigned char *data, size_t len) -{ - int fd; - uint got, max; - ssize_t dict; - off_t end; - z_stream strm; - unsigned char buf[DICT]; - - /* compress and append compressed data */ - if (len) { - /* set up for deflate, allocating memory */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - if (deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, -15, 8, - Z_DEFAULT_STRATEGY) != Z_OK) - return -2; - - /* read in dictionary (last 32K of data that was compressed) */ - strcpy(log->end, ".dict"); - fd = open(log->path, O_RDONLY, 0); - if (fd >= 0) { - dict = read(fd, buf, DICT); - close(fd); - if (dict < 0) { - deflateEnd(&strm); - return -1; - } - if (dict) - deflateSetDictionary(&strm, buf, (uint)dict); - } - log_touch(log); - - /* prime deflate with last bits of previous block, position write - pointer to write those bits and overwrite what follows */ - if (lseek(log->fd, log->first - (log->back > 8 ? 2 : 1), - SEEK_SET) < 0 || - read(log->fd, buf, 1) != 1 || lseek(log->fd, -1, SEEK_CUR) < 0) { - deflateEnd(&strm); - return -1; - } - deflatePrime(&strm, (8 - log->back) & 7, *buf); - - /* compress, finishing with a partial non-last empty static block */ - strm.next_in = data; - max = (((uint)0 - 1) >> 1) + 1; /* in case int smaller than size_t */ - do { - strm.avail_in = len > max ? max : (uint)len; - len -= strm.avail_in; - do { - strm.avail_out = DICT; - strm.next_out = buf; - deflate(&strm, len ? Z_NO_FLUSH : Z_PARTIAL_FLUSH); - got = DICT - strm.avail_out; - if (got && write(log->fd, buf, got) != got) { - deflateEnd(&strm); - return -1; - } - log_touch(log); - } while (strm.avail_out == 0); - } while (len); - deflateEnd(&strm); - BAIL(5); - - /* find start of empty static block -- scanning backwards the first one - bit is the second bit of the block, if the last byte is zero, then - we know the byte before that has a one in the top bit, since an - empty static block is ten bits long */ - if ((log->first = lseek(log->fd, -1, SEEK_CUR)) < 0 || - read(log->fd, buf, 1) != 1) - return -1; - log->first++; - if (*buf) { - log->back = 1; - while ((*buf & ((uint)1 << (8 - log->back++))) == 0) - ; /* guaranteed to terminate, since *buf != 0 */ - } - else - log->back = 10; - - /* update compressed crc and length */ - log->ccrc = log->tcrc; - log->clen = log->tlen; - } - else { - /* no data to compress -- fix up existing gzip stream */ - log->tcrc = log->ccrc; - log->tlen = log->clen; - } - - /* complete and truncate gzip stream */ - log->last = log->first; - log->stored = 0; - PUT4(buf, log->tcrc); - PUT4(buf + 4, log->tlen); - if (log_last(log, 1) || write(log->fd, buf, 8) != 8 || - (end = lseek(log->fd, 0, SEEK_CUR)) < 0 || ftruncate(log->fd, end)) - return -1; - BAIL(6); - - /* mark as being in the replace operation */ - if (log_mark(log, REPLACE_OP)) - return -1; - - /* execute the replace operation and mark the file as done */ - return log_replace(log); -} - -/* log a repair record to the .repairs file */ -local void log_log(struct log *log, int op, char *record) -{ - time_t now; - FILE *rec; - - now = time(NULL); - strcpy(log->end, ".repairs"); - rec = fopen(log->path, "a"); - if (rec == NULL) - return; - fprintf(rec, "%.24s %s recovery: %s\n", ctime(&now), op == APPEND_OP ? - "append" : (op == COMPRESS_OP ? "compress" : "replace"), record); - fclose(rec); - return; -} - -/* Recover the interrupted operation op. First read foo.add for recovering an - append or compress operation. Return -1 if there was an error reading or - writing foo.gz or reading an existing foo.add, or -2 if there was a memory - allocation failure. */ -local int log_recover(struct log *log, int op) -{ - int fd, ret = 0; - unsigned char *data = NULL; - size_t len = 0; - struct stat st; - - /* log recovery */ - log_log(log, op, "start"); - - /* load foo.add file if expected and present */ - if (op == APPEND_OP || op == COMPRESS_OP) { - strcpy(log->end, ".add"); - if (stat(log->path, &st) == 0 && st.st_size) { - len = (size_t)(st.st_size); - if ((off_t)len != st.st_size || - (data = malloc(st.st_size)) == NULL) { - log_log(log, op, "allocation failure"); - return -2; - } - if ((fd = open(log->path, O_RDONLY, 0)) < 0) { - log_log(log, op, ".add file read failure"); - return -1; - } - ret = (size_t)read(fd, data, len) != len; - close(fd); - if (ret) { - log_log(log, op, ".add file read failure"); - return -1; - } - log_log(log, op, "loaded .add file"); - } - else - log_log(log, op, "missing .add file!"); - } - - /* recover the interrupted operation */ - switch (op) { - case APPEND_OP: - ret = log_append(log, data, len); - break; - case COMPRESS_OP: - ret = log_compress(log, data, len); - break; - case REPLACE_OP: - ret = log_replace(log); - } - - /* log status */ - log_log(log, op, ret ? "failure" : "complete"); - - /* clean up */ - if (data != NULL) - free(data); - return ret; -} - -/* Close the foo.gz file (if open) and release the lock. */ -local void log_close(struct log *log) -{ - if (log->fd >= 0) - close(log->fd); - log->fd = -1; - log_unlock(log); -} - -/* Open foo.gz, verify the header, and load the extra field contents, after - first creating the foo.lock file to gain exclusive access to the foo.* - files. If foo.gz does not exist or is empty, then write the initial header, - extra, and body content of an empty foo.gz log file. If there is an error - creating the lock file due to access restrictions, or an error reading or - writing the foo.gz file, or if the foo.gz file is not a proper log file for - this object (e.g. not a gzip file or does not contain the expected extra - field), then return true. If there is an error, the lock is released. - Otherwise, the lock is left in place. */ -local int log_open(struct log *log) -{ - int op; - - /* release open file resource if left over -- can occur if lock lost - between gzlog_open() and gzlog_write() */ - if (log->fd >= 0) - close(log->fd); - log->fd = -1; - - /* negotiate exclusive access */ - if (log_lock(log) < 0) - return -1; - - /* open the log file, foo.gz */ - strcpy(log->end, ".gz"); - log->fd = open(log->path, O_RDWR | O_CREAT, 0644); - if (log->fd < 0) { - log_close(log); - return -1; - } - - /* if new, initialize foo.gz with an empty log, delete old dictionary */ - if (lseek(log->fd, 0, SEEK_END) == 0) { - if (write(log->fd, log_gzhead, HEAD) != HEAD || - write(log->fd, log_gzext, EXTRA) != EXTRA || - write(log->fd, log_gzbody, BODY) != BODY) { - log_close(log); - return -1; - } - strcpy(log->end, ".dict"); - unlink(log->path); - } - - /* verify log file and load extra field information */ - if ((op = log_head(log)) < 0) { - log_close(log); - return -1; - } - - /* check for interrupted process and if so, recover */ - if (op != NO_OP && log_recover(log, op)) { - log_close(log); - return -1; - } - - /* touch the lock file to prevent another process from grabbing it */ - log_touch(log); - return 0; -} - -/* See gzlog.h for the description of the external methods below */ -gzlog *gzlog_open(char *path) -{ - size_t n; - struct log *log; - - /* check arguments */ - if (path == NULL || *path == 0) - return NULL; - - /* allocate and initialize log structure */ - log = malloc(sizeof(struct log)); - if (log == NULL) - return NULL; - strcpy(log->id, LOGID); - log->fd = -1; - - /* save path and end of path for name construction */ - n = strlen(path); - log->path = malloc(n + 9); /* allow for ".repairs" */ - if (log->path == NULL) { - free(log); - return NULL; - } - strcpy(log->path, path); - log->end = log->path + n; - - /* gain exclusive access and verify log file -- may perform a - recovery operation if needed */ - if (log_open(log)) { - free(log->path); - free(log); - return NULL; - } - - /* return pointer to log structure */ - return log; -} - -/* gzlog_compress() return values: - 0: all good - -1: file i/o error (usually access issue) - -2: memory allocation failure - -3: invalid log pointer argument */ -int gzlog_compress(gzlog *logd) -{ - int fd, ret; - uint block; - size_t len, next; - unsigned char *data, buf[5]; - struct log *log = logd; - - /* check arguments */ - if (log == NULL || strcmp(log->id, LOGID)) - return -3; - - /* see if we lost the lock -- if so get it again and reload the extra - field information (it probably changed), recover last operation if - necessary */ - if (log_check(log) && log_open(log)) - return -1; - - /* create space for uncompressed data */ - len = ((size_t)(log->last - log->first) & ~(((size_t)1 << 10) - 1)) + - log->stored; - if ((data = malloc(len)) == NULL) - return -2; - - /* do statement here is just a cheap trick for error handling */ - do { - /* read in the uncompressed data */ - if (lseek(log->fd, log->first - 1, SEEK_SET) < 0) - break; - next = 0; - while (next < len) { - if (read(log->fd, buf, 5) != 5) - break; - block = PULL2(buf + 1); - if (next + block > len || - read(log->fd, (char *)data + next, block) != block) - break; - next += block; - } - if (lseek(log->fd, 0, SEEK_CUR) != log->last + 4 + log->stored) - break; - log_touch(log); - - /* write the uncompressed data to the .add file */ - strcpy(log->end, ".add"); - fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (fd < 0) - break; - ret = (size_t)write(fd, data, len) != len; - if (ret | close(fd)) - break; - log_touch(log); - - /* write the dictionary for the next compress to the .temp file */ - strcpy(log->end, ".temp"); - fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (fd < 0) - break; - next = DICT > len ? len : DICT; - ret = (size_t)write(fd, (char *)data + len - next, next) != next; - if (ret | close(fd)) - break; - log_touch(log); - - /* roll back to compressed data, mark the compress in progress */ - log->last = log->first; - log->stored = 0; - if (log_mark(log, COMPRESS_OP)) - break; - BAIL(7); - - /* compress and append the data (clears mark) */ - ret = log_compress(log, data, len); - free(data); - return ret; - } while (0); - - /* broke out of do above on i/o error */ - free(data); - return -1; -} - -/* gzlog_write() return values: - 0: all good - -1: file i/o error (usually access issue) - -2: memory allocation failure - -3: invalid log pointer argument */ -int gzlog_write(gzlog *logd, void *data, size_t len) -{ - int fd, ret; - struct log *log = logd; - - /* check arguments */ - if (log == NULL || strcmp(log->id, LOGID)) - return -3; - if (data == NULL || len <= 0) - return 0; - - /* see if we lost the lock -- if so get it again and reload the extra - field information (it probably changed), recover last operation if - necessary */ - if (log_check(log) && log_open(log)) - return -1; - - /* create and write .add file */ - strcpy(log->end, ".add"); - fd = open(log->path, O_WRONLY | O_CREAT | O_TRUNC, 0644); - if (fd < 0) - return -1; - ret = (size_t)write(fd, data, len) != len; - if (ret | close(fd)) - return -1; - log_touch(log); - - /* mark log file with append in progress */ - if (log_mark(log, APPEND_OP)) - return -1; - BAIL(8); - - /* append data (clears mark) */ - if (log_append(log, data, len)) - return -1; - - /* check to see if it's time to compress -- if not, then done */ - if (((log->last - log->first) >> 10) + (log->stored >> 10) < TRIGGER) - return 0; - - /* time to compress */ - return gzlog_compress(log); -} - -/* gzlog_close() return values: - 0: ok - -3: invalid log pointer argument */ -int gzlog_close(gzlog *logd) -{ - struct log *log = logd; - - /* check arguments */ - if (log == NULL || strcmp(log->id, LOGID)) - return -3; - - /* close the log file and release the lock */ - log_close(log); - - /* free structure and return */ - if (log->path != NULL) - free(log->path); - strcpy(log->id, "bad"); - free(log); - return 0; -} diff --git a/zlib/examples/gzlog.h b/zlib/examples/gzlog.h deleted file mode 100644 index 86f0cec..0000000 --- a/zlib/examples/gzlog.h +++ /dev/null @@ -1,91 +0,0 @@ -/* gzlog.h - Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved - version 2.2, 14 Aug 2012 - - This software is provided 'as-is', without any express or implied - warranty. In no event will the author be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - Mark Adler madler@alumni.caltech.edu - */ - -/* Version History: - 1.0 26 Nov 2004 First version - 2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations - Interface changed slightly in that now path is a prefix - Compression now occurs as needed during gzlog_write() - gzlog_write() now always leaves the log file as valid gzip - 2.1 8 Jul 2012 Fix argument checks in gzlog_compress() and gzlog_write() - 2.2 14 Aug 2012 Clean up signed comparisons - */ - -/* - The gzlog object allows writing short messages to a gzipped log file, - opening the log file locked for small bursts, and then closing it. The log - object works by appending stored (uncompressed) data to the gzip file until - 1 MB has been accumulated. At that time, the stored data is compressed, and - replaces the uncompressed data in the file. The log file is truncated to - its new size at that time. After each write operation, the log file is a - valid gzip file that can decompressed to recover what was written. - - The gzlog operations can be interupted at any point due to an application or - system crash, and the log file will be recovered the next time the log is - opened with gzlog_open(). - */ - -#ifndef GZLOG_H -#define GZLOG_H - -/* gzlog object type */ -typedef void gzlog; - -/* Open a gzlog object, creating the log file if it does not exist. Return - NULL on error. Note that gzlog_open() could take a while to complete if it - has to wait to verify that a lock is stale (possibly for five minutes), or - if there is significant contention with other instantiations of this object - when locking the resource. path is the prefix of the file names created by - this object. If path is "foo", then the log file will be "foo.gz", and - other auxiliary files will be created and destroyed during the process: - "foo.dict" for a compression dictionary, "foo.temp" for a temporary (next) - dictionary, "foo.add" for data being added or compressed, "foo.lock" for the - lock file, and "foo.repairs" to log recovery operations performed due to - interrupted gzlog operations. A gzlog_open() followed by a gzlog_close() - will recover a previously interrupted operation, if any. */ -gzlog *gzlog_open(char *path); - -/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o - error on any of the gzlog files (this should not happen if gzlog_open() - succeeded, unless the device has run out of space or leftover auxiliary - files have permissions or ownership that prevent their use), -2 if there is - a memory allocation failure, or -3 if the log argument is invalid (e.g. if - it was not created by gzlog_open()). This function will write data to the - file uncompressed, until 1 MB has been accumulated, at which time that data - will be compressed. The log file will be a valid gzip file upon successful - return. */ -int gzlog_write(gzlog *log, void *data, size_t len); - -/* Force compression of any uncompressed data in the log. This should be used - sparingly, if at all. The main application would be when a log file will - not be appended to again. If this is used to compress frequently while - appending, it will both significantly increase the execution time and - reduce the compression ratio. The return codes are the same as for - gzlog_write(). */ -int gzlog_compress(gzlog *log); - -/* Close a gzlog object. Return zero on success, -3 if the log argument is - invalid. The log object is freed, and so cannot be referenced again. */ -int gzlog_close(gzlog *log); - -#endif diff --git a/zlib/examples/zlib_how.html b/zlib/examples/zlib_how.html deleted file mode 100644 index 444ff1c..0000000 --- a/zlib/examples/zlib_how.html +++ /dev/null @@ -1,545 +0,0 @@ - - - - -zlib Usage Example - - - -

zlib Usage Example

-We often get questions about how the deflate() and inflate() functions should be used. -Users wonder when they should provide more input, when they should use more output, -what to do with a Z_BUF_ERROR, how to make sure the process terminates properly, and -so on. So for those who have read zlib.h (a few times), and -would like further edification, below is an annotated example in C of simple routines to compress and decompress -from an input file to an output file using deflate() and inflate() respectively. The -annotations are interspersed between lines of the code. So please read between the lines. -We hope this helps explain some of the intricacies of zlib. -

-Without further adieu, here is the program zpipe.c: -


-/* zpipe.c: example of proper use of zlib's inflate() and deflate()
-   Not copyrighted -- provided to the public domain
-   Version 1.4  11 December 2005  Mark Adler */
-
-/* Version history:
-   1.0  30 Oct 2004  First version
-   1.1   8 Nov 2004  Add void casting for unused return values
-                     Use switch statement for inflate() return values
-   1.2   9 Nov 2004  Add assertions to document zlib guarantees
-   1.3   6 Apr 2005  Remove incorrect assertion in inf()
-   1.4  11 Dec 2005  Add hack to avoid MSDOS end-of-line conversions
-                     Avoid some compiler warnings for input and output buffers
- */
-
-We now include the header files for the required definitions. From -stdio.h we use fopen(), fread(), fwrite(), -feof(), ferror(), and fclose() for file i/o, and -fputs() for error messages. From string.h we use -strcmp() for command line argument processing. -From assert.h we use the assert() macro. -From zlib.h -we use the basic compression functions deflateInit(), -deflate(), and deflateEnd(), and the basic decompression -functions inflateInit(), inflate(), and -inflateEnd(). -

-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include "zlib.h"
-
-This is an ugly hack required to avoid corruption of the input and output data on -Windows/MS-DOS systems. Without this, those systems would assume that the input and output -files are text, and try to convert the end-of-line characters from one standard to -another. That would corrupt binary data, and in particular would render the compressed data unusable. -This sets the input and output to binary which suppresses the end-of-line conversions. -SET_BINARY_MODE() will be used later on stdin and stdout, at the beginning of main(). -

-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
-#  include <fcntl.h>
-#  include <io.h>
-#  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
-#else
-#  define SET_BINARY_MODE(file)
-#endif
-
-CHUNK is simply the buffer size for feeding data to and pulling data -from the zlib routines. Larger buffer sizes would be more efficient, -especially for inflate(). If the memory is available, buffers sizes -on the order of 128K or 256K bytes should be used. -

-#define CHUNK 16384
-
-The def() routine compresses data from an input file to an output file. The output data -will be in the zlib format, which is different from the gzip or zip -formats. The zlib format has a very small header of only two bytes to identify it as -a zlib stream and to provide decoding information, and a four-byte trailer with a fast -check value to verify the integrity of the uncompressed data after decoding. -

-/* Compress from file source to file dest until EOF on source.
-   def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
-   allocated for processing, Z_STREAM_ERROR if an invalid compression
-   level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
-   version of the library linked do not match, or Z_ERRNO if there is
-   an error reading or writing the files. */
-int def(FILE *source, FILE *dest, int level)
-{
-
-Here are the local variables for def(). ret will be used for zlib -return codes. flush will keep track of the current flushing state for deflate(), -which is either no flushing, or flush to completion after the end of the input file is reached. -have is the amount of data returned from deflate(). The strm structure -is used to pass information to and from the zlib routines, and to maintain the -deflate() state. in and out are the input and output buffers for -deflate(). -

-    int ret, flush;
-    unsigned have;
-    z_stream strm;
-    unsigned char in[CHUNK];
-    unsigned char out[CHUNK];
-
-The first thing we do is to initialize the zlib state for compression using -deflateInit(). This must be done before the first use of deflate(). -The zalloc, zfree, and opaque fields in the strm -structure must be initialized before calling deflateInit(). Here they are -set to the zlib constant Z_NULL to request that zlib use -the default memory allocation routines. An application may also choose to provide -custom memory allocation routines here. deflateInit() will allocate on the -order of 256K bytes for the internal state. -(See zlib Technical Details.) -

-deflateInit() is called with a pointer to the structure to be initialized and -the compression level, which is an integer in the range of -1 to 9. Lower compression -levels result in faster execution, but less compression. Higher levels result in -greater compression, but slower execution. The zlib constant Z_DEFAULT_COMPRESSION, -equal to -1, -provides a good compromise between compression and speed and is equivalent to level 6. -Level 0 actually does no compression at all, and in fact expands the data slightly to produce -the zlib format (it is not a byte-for-byte copy of the input). -More advanced applications of zlib -may use deflateInit2() here instead. Such an application may want to reduce how -much memory will be used, at some price in compression. Or it may need to request a -gzip header and trailer instead of a zlib header and trailer, or raw -encoding with no header or trailer at all. -

-We must check the return value of deflateInit() against the zlib constant -Z_OK to make sure that it was able to -allocate memory for the internal state, and that the provided arguments were valid. -deflateInit() will also check that the version of zlib that the zlib.h -file came from matches the version of zlib actually linked with the program. This -is especially important for environments in which zlib is a shared library. -

-Note that an application can initialize multiple, independent zlib streams, which can -operate in parallel. The state information maintained in the structure allows the zlib -routines to be reentrant. -


-    /* allocate deflate state */
-    strm.zalloc = Z_NULL;
-    strm.zfree = Z_NULL;
-    strm.opaque = Z_NULL;
-    ret = deflateInit(&strm, level);
-    if (ret != Z_OK)
-        return ret;
-
-With the pleasantries out of the way, now we can get down to business. The outer do-loop -reads all of the input file and exits at the bottom of the loop once end-of-file is reached. -This loop contains the only call of deflate(). So we must make sure that all of the -input data has been processed and that all of the output data has been generated and consumed -before we fall out of the loop at the bottom. -

-    /* compress until end of file */
-    do {
-
-We start off by reading data from the input file. The number of bytes read is put directly -into avail_in, and a pointer to those bytes is put into next_in. We also -check to see if end-of-file on the input has been reached. If we are at the end of file, then flush is set to the -zlib constant Z_FINISH, which is later passed to deflate() to -indicate that this is the last chunk of input data to compress. We need to use feof() -to check for end-of-file as opposed to seeing if fewer than CHUNK bytes have been read. The -reason is that if the input file length is an exact multiple of CHUNK, we will miss -the fact that we got to the end-of-file, and not know to tell deflate() to finish -up the compressed stream. If we are not yet at the end of the input, then the zlib -constant Z_NO_FLUSH will be passed to deflate to indicate that we are still -in the middle of the uncompressed data. -

-If there is an error in reading from the input file, the process is aborted with -deflateEnd() being called to free the allocated zlib state before returning -the error. We wouldn't want a memory leak, now would we? deflateEnd() can be called -at any time after the state has been initialized. Once that's done, deflateInit() (or -deflateInit2()) would have to be called to start a new compression process. There is -no point here in checking the deflateEnd() return code. The deallocation can't fail. -


-        strm.avail_in = fread(in, 1, CHUNK, source);
-        if (ferror(source)) {
-            (void)deflateEnd(&strm);
-            return Z_ERRNO;
-        }
-        flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
-        strm.next_in = in;
-
-The inner do-loop passes our chunk of input data to deflate(), and then -keeps calling deflate() until it is done producing output. Once there is no more -new output, deflate() is guaranteed to have consumed all of the input, i.e., -avail_in will be zero. -

-        /* run deflate() on input until output buffer not full, finish
-           compression if all of source has been read in */
-        do {
-
-Output space is provided to deflate() by setting avail_out to the number -of available output bytes and next_out to a pointer to that space. -

-            strm.avail_out = CHUNK;
-            strm.next_out = out;
-
-Now we call the compression engine itself, deflate(). It takes as many of the -avail_in bytes at next_in as it can process, and writes as many as -avail_out bytes to next_out. Those counters and pointers are then -updated past the input data consumed and the output data written. It is the amount of -output space available that may limit how much input is consumed. -Hence the inner loop to make sure that -all of the input is consumed by providing more output space each time. Since avail_in -and next_in are updated by deflate(), we don't have to mess with those -between deflate() calls until it's all used up. -

-The parameters to deflate() are a pointer to the strm structure containing -the input and output information and the internal compression engine state, and a parameter -indicating whether and how to flush data to the output. Normally deflate will consume -several K bytes of input data before producing any output (except for the header), in order -to accumulate statistics on the data for optimum compression. It will then put out a burst of -compressed data, and proceed to consume more input before the next burst. Eventually, -deflate() -must be told to terminate the stream, complete the compression with provided input data, and -write out the trailer check value. deflate() will continue to compress normally as long -as the flush parameter is Z_NO_FLUSH. Once the Z_FINISH parameter is provided, -deflate() will begin to complete the compressed output stream. However depending on how -much output space is provided, deflate() may have to be called several times until it -has provided the complete compressed stream, even after it has consumed all of the input. The flush -parameter must continue to be Z_FINISH for those subsequent calls. -

-There are other values of the flush parameter that are used in more advanced applications. You can -force deflate() to produce a burst of output that encodes all of the input data provided -so far, even if it wouldn't have otherwise, for example to control data latency on a link with -compressed data. You can also ask that deflate() do that as well as erase any history up to -that point so that what follows can be decompressed independently, for example for random access -applications. Both requests will degrade compression by an amount depending on how often such -requests are made. -

-deflate() has a return value that can indicate errors, yet we do not check it here. Why -not? Well, it turns out that deflate() can do no wrong here. Let's go through -deflate()'s return values and dispense with them one by one. The possible values are -Z_OK, Z_STREAM_END, Z_STREAM_ERROR, or Z_BUF_ERROR. Z_OK -is, well, ok. Z_STREAM_END is also ok and will be returned for the last call of -deflate(). This is already guaranteed by calling deflate() with Z_FINISH -until it has no more output. Z_STREAM_ERROR is only possible if the stream is not -initialized properly, but we did initialize it properly. There is no harm in checking for -Z_STREAM_ERROR here, for example to check for the possibility that some -other part of the application inadvertently clobbered the memory containing the zlib state. -Z_BUF_ERROR will be explained further below, but -suffice it to say that this is simply an indication that deflate() could not consume -more input or produce more output. deflate() can be called again with more output space -or more available input, which it will be in this code. -


-            ret = deflate(&strm, flush);    /* no bad return value */
-            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
-
-Now we compute how much output deflate() provided on the last call, which is the -difference between how much space was provided before the call, and how much output space -is still available after the call. Then that data, if any, is written to the output file. -We can then reuse the output buffer for the next call of deflate(). Again if there -is a file i/o error, we call deflateEnd() before returning to avoid a memory leak. -

-            have = CHUNK - strm.avail_out;
-            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
-                (void)deflateEnd(&strm);
-                return Z_ERRNO;
-            }
-
-The inner do-loop is repeated until the last deflate() call fails to fill the -provided output buffer. Then we know that deflate() has done as much as it can with -the provided input, and that all of that input has been consumed. We can then fall out of this -loop and reuse the input buffer. -

-The way we tell that deflate() has no more output is by seeing that it did not fill -the output buffer, leaving avail_out greater than zero. However suppose that -deflate() has no more output, but just so happened to exactly fill the output buffer! -avail_out is zero, and we can't tell that deflate() has done all it can. -As far as we know, deflate() -has more output for us. So we call it again. But now deflate() produces no output -at all, and avail_out remains unchanged as CHUNK. That deflate() call -wasn't able to do anything, either consume input or produce output, and so it returns -Z_BUF_ERROR. (See, I told you I'd cover this later.) However this is not a problem at -all. Now we finally have the desired indication that deflate() is really done, -and so we drop out of the inner loop to provide more input to deflate(). -

-With flush set to Z_FINISH, this final set of deflate() calls will -complete the output stream. Once that is done, subsequent calls of deflate() would return -Z_STREAM_ERROR if the flush parameter is not Z_FINISH, and do no more processing -until the state is reinitialized. -

-Some applications of zlib have two loops that call deflate() -instead of the single inner loop we have here. The first loop would call -without flushing and feed all of the data to deflate(). The second loop would call -deflate() with no more -data and the Z_FINISH parameter to complete the process. As you can see from this -example, that can be avoided by simply keeping track of the current flush state. -


-        } while (strm.avail_out == 0);
-        assert(strm.avail_in == 0);     /* all input will be used */
-
-Now we check to see if we have already processed all of the input file. That information was -saved in the flush variable, so we see if that was set to Z_FINISH. If so, -then we're done and we fall out of the outer loop. We're guaranteed to get Z_STREAM_END -from the last deflate() call, since we ran it until the last chunk of input was -consumed and all of the output was generated. -

-        /* done when last data in file processed */
-    } while (flush != Z_FINISH);
-    assert(ret == Z_STREAM_END);        /* stream will be complete */
-
-The process is complete, but we still need to deallocate the state to avoid a memory leak -(or rather more like a memory hemorrhage if you didn't do this). Then -finally we can return with a happy return value. -

-    /* clean up and return */
-    (void)deflateEnd(&strm);
-    return Z_OK;
-}
-
-Now we do the same thing for decompression in the inf() routine. inf() -decompresses what is hopefully a valid zlib stream from the input file and writes the -uncompressed data to the output file. Much of the discussion above for def() -applies to inf() as well, so the discussion here will focus on the differences between -the two. -

-/* Decompress from file source to file dest until stream ends or EOF.
-   inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
-   allocated for processing, Z_DATA_ERROR if the deflate data is
-   invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
-   the version of the library linked do not match, or Z_ERRNO if there
-   is an error reading or writing the files. */
-int inf(FILE *source, FILE *dest)
-{
-
-The local variables have the same functionality as they do for def(). The -only difference is that there is no flush variable, since inflate() -can tell from the zlib stream itself when the stream is complete. -

-    int ret;
-    unsigned have;
-    z_stream strm;
-    unsigned char in[CHUNK];
-    unsigned char out[CHUNK];
-
-The initialization of the state is the same, except that there is no compression level, -of course, and two more elements of the structure are initialized. avail_in -and next_in must be initialized before calling inflateInit(). This -is because the application has the option to provide the start of the zlib stream in -order for inflateInit() to have access to information about the compression -method to aid in memory allocation. In the current implementation of zlib -(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of -inflate() anyway. However those fields must be initialized since later versions -of zlib that provide more compression methods may take advantage of this interface. -In any case, no decompression is performed by inflateInit(), so the -avail_out and next_out fields do not need to be initialized before calling. -

-Here avail_in is set to zero and next_in is set to Z_NULL to -indicate that no input data is being provided. -


-    /* allocate inflate state */
-    strm.zalloc = Z_NULL;
-    strm.zfree = Z_NULL;
-    strm.opaque = Z_NULL;
-    strm.avail_in = 0;
-    strm.next_in = Z_NULL;
-    ret = inflateInit(&strm);
-    if (ret != Z_OK)
-        return ret;
-
-The outer do-loop decompresses input until inflate() indicates -that it has reached the end of the compressed data and has produced all of the uncompressed -output. This is in contrast to def() which processes all of the input file. -If end-of-file is reached before the compressed data self-terminates, then the compressed -data is incomplete and an error is returned. -

-    /* decompress until deflate stream ends or end of file */
-    do {
-
-We read input data and set the strm structure accordingly. If we've reached the -end of the input file, then we leave the outer loop and report an error, since the -compressed data is incomplete. Note that we may read more data than is eventually consumed -by inflate(), if the input file continues past the zlib stream. -For applications where zlib streams are embedded in other data, this routine would -need to be modified to return the unused data, or at least indicate how much of the input -data was not used, so the application would know where to pick up after the zlib stream. -

-        strm.avail_in = fread(in, 1, CHUNK, source);
-        if (ferror(source)) {
-            (void)inflateEnd(&strm);
-            return Z_ERRNO;
-        }
-        if (strm.avail_in == 0)
-            break;
-        strm.next_in = in;
-
-The inner do-loop has the same function it did in def(), which is to -keep calling inflate() until has generated all of the output it can with the -provided input. -

-        /* run inflate() on input until output buffer not full */
-        do {
-
-Just like in def(), the same output space is provided for each call of inflate(). -

-            strm.avail_out = CHUNK;
-            strm.next_out = out;
-
-Now we run the decompression engine itself. There is no need to adjust the flush parameter, since -the zlib format is self-terminating. The main difference here is that there are -return values that we need to pay attention to. Z_DATA_ERROR -indicates that inflate() detected an error in the zlib compressed data format, -which means that either the data is not a zlib stream to begin with, or that the data was -corrupted somewhere along the way since it was compressed. The other error to be processed is -Z_MEM_ERROR, which can occur since memory allocation is deferred until inflate() -needs it, unlike deflate(), whose memory is allocated at the start by deflateInit(). -

-Advanced applications may use -deflateSetDictionary() to prime deflate() with a set of likely data to improve the -first 32K or so of compression. This is noted in the zlib header, so inflate() -requests that that dictionary be provided before it can start to decompress. Without the dictionary, -correct decompression is not possible. For this routine, we have no idea what the dictionary is, -so the Z_NEED_DICT indication is converted to a Z_DATA_ERROR. -

-inflate() can also return Z_STREAM_ERROR, which should not be possible here, -but could be checked for as noted above for def(). Z_BUF_ERROR does not need to be -checked for here, for the same reasons noted for def(). Z_STREAM_END will be -checked for later. -


-            ret = inflate(&strm, Z_NO_FLUSH);
-            assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
-            switch (ret) {
-            case Z_NEED_DICT:
-                ret = Z_DATA_ERROR;     /* and fall through */
-            case Z_DATA_ERROR:
-            case Z_MEM_ERROR:
-                (void)inflateEnd(&strm);
-                return ret;
-            }
-
-The output of inflate() is handled identically to that of deflate(). -

-            have = CHUNK - strm.avail_out;
-            if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
-                (void)inflateEnd(&strm);
-                return Z_ERRNO;
-            }
-
-The inner do-loop ends when inflate() has no more output as indicated -by not filling the output buffer, just as for deflate(). In this case, we cannot -assert that strm.avail_in will be zero, since the deflate stream may end before the file -does. -

-        } while (strm.avail_out == 0);
-
-The outer do-loop ends when inflate() reports that it has reached the -end of the input zlib stream, has completed the decompression and integrity -check, and has provided all of the output. This is indicated by the inflate() -return value Z_STREAM_END. The inner loop is guaranteed to leave ret -equal to Z_STREAM_END if the last chunk of the input file read contained the end -of the zlib stream. So if the return value is not Z_STREAM_END, the -loop continues to read more input. -

-        /* done when inflate() says it's done */
-    } while (ret != Z_STREAM_END);
-
-At this point, decompression successfully completed, or we broke out of the loop due to no -more data being available from the input file. If the last inflate() return value -is not Z_STREAM_END, then the zlib stream was incomplete and a data error -is returned. Otherwise, we return with a happy return value. Of course, inflateEnd() -is called first to avoid a memory leak. -

-    /* clean up and return */
-    (void)inflateEnd(&strm);
-    return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
-}
-
-That ends the routines that directly use zlib. The following routines make this -a command-line program by running data through the above routines from stdin to -stdout, and handling any errors reported by def() or inf(). -

-zerr() is used to interpret the possible error codes from def() -and inf(), as detailed in their comments above, and print out an error message. -Note that these are only a subset of the possible return values from deflate() -and inflate(). -


-/* report a zlib or i/o error */
-void zerr(int ret)
-{
-    fputs("zpipe: ", stderr);
-    switch (ret) {
-    case Z_ERRNO:
-        if (ferror(stdin))
-            fputs("error reading stdin\n", stderr);
-        if (ferror(stdout))
-            fputs("error writing stdout\n", stderr);
-        break;
-    case Z_STREAM_ERROR:
-        fputs("invalid compression level\n", stderr);
-        break;
-    case Z_DATA_ERROR:
-        fputs("invalid or incomplete deflate data\n", stderr);
-        break;
-    case Z_MEM_ERROR:
-        fputs("out of memory\n", stderr);
-        break;
-    case Z_VERSION_ERROR:
-        fputs("zlib version mismatch!\n", stderr);
-    }
-}
-
-Here is the main() routine used to test def() and inf(). The -zpipe command is simply a compression pipe from stdin to stdout, if -no arguments are given, or it is a decompression pipe if zpipe -d is used. If any other -arguments are provided, no compression or decompression is performed. Instead a usage -message is displayed. Examples are zpipe < foo.txt > foo.txt.z to compress, and -zpipe -d < foo.txt.z > foo.txt to decompress. -

-/* compress or decompress from stdin to stdout */
-int main(int argc, char **argv)
-{
-    int ret;
-
-    /* avoid end-of-line conversions */
-    SET_BINARY_MODE(stdin);
-    SET_BINARY_MODE(stdout);
-
-    /* do compression if no arguments */
-    if (argc == 1) {
-        ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
-        if (ret != Z_OK)
-            zerr(ret);
-        return ret;
-    }
-
-    /* do decompression if -d specified */
-    else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
-        ret = inf(stdin, stdout);
-        if (ret != Z_OK)
-            zerr(ret);
-        return ret;
-    }
-
-    /* otherwise, report usage */
-    else {
-        fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
-        return 1;
-    }
-}
-
-
-Copyright (c) 2004, 2005 by Mark Adler
Last modified 11 December 2005
- - diff --git a/zlib/examples/zpipe.c b/zlib/examples/zpipe.c deleted file mode 100644 index 83535d1..0000000 --- a/zlib/examples/zpipe.c +++ /dev/null @@ -1,205 +0,0 @@ -/* zpipe.c: example of proper use of zlib's inflate() and deflate() - Not copyrighted -- provided to the public domain - Version 1.4 11 December 2005 Mark Adler */ - -/* Version history: - 1.0 30 Oct 2004 First version - 1.1 8 Nov 2004 Add void casting for unused return values - Use switch statement for inflate() return values - 1.2 9 Nov 2004 Add assertions to document zlib guarantees - 1.3 6 Apr 2005 Remove incorrect assertion in inf() - 1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions - Avoid some compiler warnings for input and output buffers - */ - -#include -#include -#include -#include "zlib.h" - -#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) -# include -# include -# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY) -#else -# define SET_BINARY_MODE(file) -#endif - -#define CHUNK 16384 - -/* Compress from file source to file dest until EOF on source. - def() returns Z_OK on success, Z_MEM_ERROR if memory could not be - allocated for processing, Z_STREAM_ERROR if an invalid compression - level is supplied, Z_VERSION_ERROR if the version of zlib.h and the - version of the library linked do not match, or Z_ERRNO if there is - an error reading or writing the files. */ -int def(FILE *source, FILE *dest, int level) -{ - int ret, flush; - unsigned have; - z_stream strm; - unsigned char in[CHUNK]; - unsigned char out[CHUNK]; - - /* allocate deflate state */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - ret = deflateInit(&strm, level); - if (ret != Z_OK) - return ret; - - /* compress until end of file */ - do { - strm.avail_in = fread(in, 1, CHUNK, source); - if (ferror(source)) { - (void)deflateEnd(&strm); - return Z_ERRNO; - } - flush = feof(source) ? Z_FINISH : Z_NO_FLUSH; - strm.next_in = in; - - /* run deflate() on input until output buffer not full, finish - compression if all of source has been read in */ - do { - strm.avail_out = CHUNK; - strm.next_out = out; - ret = deflate(&strm, flush); /* no bad return value */ - assert(ret != Z_STREAM_ERROR); /* state not clobbered */ - have = CHUNK - strm.avail_out; - if (fwrite(out, 1, have, dest) != have || ferror(dest)) { - (void)deflateEnd(&strm); - return Z_ERRNO; - } - } while (strm.avail_out == 0); - assert(strm.avail_in == 0); /* all input will be used */ - - /* done when last data in file processed */ - } while (flush != Z_FINISH); - assert(ret == Z_STREAM_END); /* stream will be complete */ - - /* clean up and return */ - (void)deflateEnd(&strm); - return Z_OK; -} - -/* Decompress from file source to file dest until stream ends or EOF. - inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be - allocated for processing, Z_DATA_ERROR if the deflate data is - invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and - the version of the library linked do not match, or Z_ERRNO if there - is an error reading or writing the files. */ -int inf(FILE *source, FILE *dest) -{ - int ret; - unsigned have; - z_stream strm; - unsigned char in[CHUNK]; - unsigned char out[CHUNK]; - - /* allocate inflate state */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit(&strm); - if (ret != Z_OK) - return ret; - - /* decompress until deflate stream ends or end of file */ - do { - strm.avail_in = fread(in, 1, CHUNK, source); - if (ferror(source)) { - (void)inflateEnd(&strm); - return Z_ERRNO; - } - if (strm.avail_in == 0) - break; - strm.next_in = in; - - /* run inflate() on input until output buffer not full */ - do { - strm.avail_out = CHUNK; - strm.next_out = out; - ret = inflate(&strm, Z_NO_FLUSH); - assert(ret != Z_STREAM_ERROR); /* state not clobbered */ - switch (ret) { - case Z_NEED_DICT: - ret = Z_DATA_ERROR; /* and fall through */ - case Z_DATA_ERROR: - case Z_MEM_ERROR: - (void)inflateEnd(&strm); - return ret; - } - have = CHUNK - strm.avail_out; - if (fwrite(out, 1, have, dest) != have || ferror(dest)) { - (void)inflateEnd(&strm); - return Z_ERRNO; - } - } while (strm.avail_out == 0); - - /* done when inflate() says it's done */ - } while (ret != Z_STREAM_END); - - /* clean up and return */ - (void)inflateEnd(&strm); - return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR; -} - -/* report a zlib or i/o error */ -void zerr(int ret) -{ - fputs("zpipe: ", stderr); - switch (ret) { - case Z_ERRNO: - if (ferror(stdin)) - fputs("error reading stdin\n", stderr); - if (ferror(stdout)) - fputs("error writing stdout\n", stderr); - break; - case Z_STREAM_ERROR: - fputs("invalid compression level\n", stderr); - break; - case Z_DATA_ERROR: - fputs("invalid or incomplete deflate data\n", stderr); - break; - case Z_MEM_ERROR: - fputs("out of memory\n", stderr); - break; - case Z_VERSION_ERROR: - fputs("zlib version mismatch!\n", stderr); - } -} - -/* compress or decompress from stdin to stdout */ -int main(int argc, char **argv) -{ - int ret; - - /* avoid end-of-line conversions */ - SET_BINARY_MODE(stdin); - SET_BINARY_MODE(stdout); - - /* do compression if no arguments */ - if (argc == 1) { - ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION); - if (ret != Z_OK) - zerr(ret); - return ret; - } - - /* do decompression if -d specified */ - else if (argc == 2 && strcmp(argv[1], "-d") == 0) { - ret = inf(stdin, stdout); - if (ret != Z_OK) - zerr(ret); - return ret; - } - - /* otherwise, report usage */ - else { - fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr); - return 1; - } -} diff --git a/zlib/examples/zran.c b/zlib/examples/zran.c deleted file mode 100644 index 4fec659..0000000 --- a/zlib/examples/zran.c +++ /dev/null @@ -1,409 +0,0 @@ -/* zran.c -- example of zlib/gzip stream indexing and random access - * Copyright (C) 2005, 2012 Mark Adler - * For conditions of distribution and use, see copyright notice in zlib.h - Version 1.1 29 Sep 2012 Mark Adler */ - -/* Version History: - 1.0 29 May 2005 First version - 1.1 29 Sep 2012 Fix memory reallocation error - */ - -/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary() - for random access of a compressed file. A file containing a zlib or gzip - stream is provided on the command line. The compressed stream is decoded in - its entirety, and an index built with access points about every SPAN bytes - in the uncompressed output. The compressed file is left open, and can then - be read randomly, having to decompress on the average SPAN/2 uncompressed - bytes before getting to the desired block of data. - - An access point can be created at the start of any deflate block, by saving - the starting file offset and bit of that block, and the 32K bytes of - uncompressed data that precede that block. Also the uncompressed offset of - that block is saved to provide a referece for locating a desired starting - point in the uncompressed stream. build_index() works by decompressing the - input zlib or gzip stream a block at a time, and at the end of each block - deciding if enough uncompressed data has gone by to justify the creation of - a new access point. If so, that point is saved in a data structure that - grows as needed to accommodate the points. - - To use the index, an offset in the uncompressed data is provided, for which - the latest access point at or preceding that offset is located in the index. - The input file is positioned to the specified location in the index, and if - necessary the first few bits of the compressed data is read from the file. - inflate is initialized with those bits and the 32K of uncompressed data, and - the decompression then proceeds until the desired offset in the file is - reached. Then the decompression continues to read the desired uncompressed - data from the file. - - Another approach would be to generate the index on demand. In that case, - requests for random access reads from the compressed data would try to use - the index, but if a read far enough past the end of the index is required, - then further index entries would be generated and added. - - There is some fair bit of overhead to starting inflation for the random - access, mainly copying the 32K byte dictionary. So if small pieces of the - file are being accessed, it would make sense to implement a cache to hold - some lookahead and avoid many calls to extract() for small lengths. - - Another way to build an index would be to use inflateCopy(). That would - not be constrained to have access points at block boundaries, but requires - more memory per access point, and also cannot be saved to file due to the - use of pointers in the state. The approach here allows for storage of the - index in a file. - */ - -#include -#include -#include -#include "zlib.h" - -#define local static - -#define SPAN 1048576L /* desired distance between access points */ -#define WINSIZE 32768U /* sliding window size */ -#define CHUNK 16384 /* file input buffer size */ - -/* access point entry */ -struct point { - off_t out; /* corresponding offset in uncompressed data */ - off_t in; /* offset in input file of first full byte */ - int bits; /* number of bits (1-7) from byte at in - 1, or 0 */ - unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */ -}; - -/* access point list */ -struct access { - int have; /* number of list entries filled in */ - int size; /* number of list entries allocated */ - struct point *list; /* allocated list */ -}; - -/* Deallocate an index built by build_index() */ -local void free_index(struct access *index) -{ - if (index != NULL) { - free(index->list); - free(index); - } -} - -/* Add an entry to the access point list. If out of memory, deallocate the - existing list and return NULL. */ -local struct access *addpoint(struct access *index, int bits, - off_t in, off_t out, unsigned left, unsigned char *window) -{ - struct point *next; - - /* if list is empty, create it (start with eight points) */ - if (index == NULL) { - index = malloc(sizeof(struct access)); - if (index == NULL) return NULL; - index->list = malloc(sizeof(struct point) << 3); - if (index->list == NULL) { - free(index); - return NULL; - } - index->size = 8; - index->have = 0; - } - - /* if list is full, make it bigger */ - else if (index->have == index->size) { - index->size <<= 1; - next = realloc(index->list, sizeof(struct point) * index->size); - if (next == NULL) { - free_index(index); - return NULL; - } - index->list = next; - } - - /* fill in entry and increment how many we have */ - next = index->list + index->have; - next->bits = bits; - next->in = in; - next->out = out; - if (left) - memcpy(next->window, window + WINSIZE - left, left); - if (left < WINSIZE) - memcpy(next->window + left, window, WINSIZE - left); - index->have++; - - /* return list, possibly reallocated */ - return index; -} - -/* Make one entire pass through the compressed stream and build an index, with - access points about every span bytes of uncompressed output -- span is - chosen to balance the speed of random access against the memory requirements - of the list, about 32K bytes per access point. Note that data after the end - of the first zlib or gzip stream in the file is ignored. build_index() - returns the number of access points on success (>= 1), Z_MEM_ERROR for out - of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a - file read error. On success, *built points to the resulting index. */ -local int build_index(FILE *in, off_t span, struct access **built) -{ - int ret; - off_t totin, totout; /* our own total counters to avoid 4GB limit */ - off_t last; /* totout value of last access point */ - struct access *index; /* access points being generated */ - z_stream strm; - unsigned char input[CHUNK]; - unsigned char window[WINSIZE]; - - /* initialize inflate */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */ - if (ret != Z_OK) - return ret; - - /* inflate the input, maintain a sliding window, and build an index -- this - also validates the integrity of the compressed data using the check - information at the end of the gzip or zlib stream */ - totin = totout = last = 0; - index = NULL; /* will be allocated by first addpoint() */ - strm.avail_out = 0; - do { - /* get some compressed data from input file */ - strm.avail_in = fread(input, 1, CHUNK, in); - if (ferror(in)) { - ret = Z_ERRNO; - goto build_index_error; - } - if (strm.avail_in == 0) { - ret = Z_DATA_ERROR; - goto build_index_error; - } - strm.next_in = input; - - /* process all of that, or until end of stream */ - do { - /* reset sliding window if necessary */ - if (strm.avail_out == 0) { - strm.avail_out = WINSIZE; - strm.next_out = window; - } - - /* inflate until out of input, output, or at end of block -- - update the total input and output counters */ - totin += strm.avail_in; - totout += strm.avail_out; - ret = inflate(&strm, Z_BLOCK); /* return at end of block */ - totin -= strm.avail_in; - totout -= strm.avail_out; - if (ret == Z_NEED_DICT) - ret = Z_DATA_ERROR; - if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) - goto build_index_error; - if (ret == Z_STREAM_END) - break; - - /* if at end of block, consider adding an index entry (note that if - data_type indicates an end-of-block, then all of the - uncompressed data from that block has been delivered, and none - of the compressed data after that block has been consumed, - except for up to seven bits) -- the totout == 0 provides an - entry point after the zlib or gzip header, and assures that the - index always has at least one access point; we avoid creating an - access point after the last block by checking bit 6 of data_type - */ - if ((strm.data_type & 128) && !(strm.data_type & 64) && - (totout == 0 || totout - last > span)) { - index = addpoint(index, strm.data_type & 7, totin, - totout, strm.avail_out, window); - if (index == NULL) { - ret = Z_MEM_ERROR; - goto build_index_error; - } - last = totout; - } - } while (strm.avail_in != 0); - } while (ret != Z_STREAM_END); - - /* clean up and return index (release unused entries in list) */ - (void)inflateEnd(&strm); - index->list = realloc(index->list, sizeof(struct point) * index->have); - index->size = index->have; - *built = index; - return index->size; - - /* return error */ - build_index_error: - (void)inflateEnd(&strm); - if (index != NULL) - free_index(index); - return ret; -} - -/* Use the index to read len bytes from offset into buf, return bytes read or - negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past - the end of the uncompressed data, then extract() will return a value less - than len, indicating how much as actually read into buf. This function - should not return a data error unless the file was modified since the index - was generated. extract() may also return Z_ERRNO if there is an error on - reading or seeking the input file. */ -local int extract(FILE *in, struct access *index, off_t offset, - unsigned char *buf, int len) -{ - int ret, skip; - z_stream strm; - struct point *here; - unsigned char input[CHUNK]; - unsigned char discard[WINSIZE]; - - /* proceed only if something reasonable to do */ - if (len < 0) - return 0; - - /* find where in stream to start */ - here = index->list; - ret = index->have; - while (--ret && here[1].out <= offset) - here++; - - /* initialize file and inflate state to start there */ - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - strm.avail_in = 0; - strm.next_in = Z_NULL; - ret = inflateInit2(&strm, -15); /* raw inflate */ - if (ret != Z_OK) - return ret; - ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET); - if (ret == -1) - goto extract_ret; - if (here->bits) { - ret = getc(in); - if (ret == -1) { - ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR; - goto extract_ret; - } - (void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits)); - } - (void)inflateSetDictionary(&strm, here->window, WINSIZE); - - /* skip uncompressed bytes until offset reached, then satisfy request */ - offset -= here->out; - strm.avail_in = 0; - skip = 1; /* while skipping to offset */ - do { - /* define where to put uncompressed data, and how much */ - if (offset == 0 && skip) { /* at offset now */ - strm.avail_out = len; - strm.next_out = buf; - skip = 0; /* only do this once */ - } - if (offset > WINSIZE) { /* skip WINSIZE bytes */ - strm.avail_out = WINSIZE; - strm.next_out = discard; - offset -= WINSIZE; - } - else if (offset != 0) { /* last skip */ - strm.avail_out = (unsigned)offset; - strm.next_out = discard; - offset = 0; - } - - /* uncompress until avail_out filled, or end of stream */ - do { - if (strm.avail_in == 0) { - strm.avail_in = fread(input, 1, CHUNK, in); - if (ferror(in)) { - ret = Z_ERRNO; - goto extract_ret; - } - if (strm.avail_in == 0) { - ret = Z_DATA_ERROR; - goto extract_ret; - } - strm.next_in = input; - } - ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */ - if (ret == Z_NEED_DICT) - ret = Z_DATA_ERROR; - if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR) - goto extract_ret; - if (ret == Z_STREAM_END) - break; - } while (strm.avail_out != 0); - - /* if reach end of stream, then don't keep trying to get more */ - if (ret == Z_STREAM_END) - break; - - /* do until offset reached and requested data read, or stream ends */ - } while (skip); - - /* compute number of uncompressed bytes read after offset */ - ret = skip ? 0 : len - strm.avail_out; - - /* clean up and return bytes read or error */ - extract_ret: - (void)inflateEnd(&strm); - return ret; -} - -/* Demonstrate the use of build_index() and extract() by processing the file - provided on the command line, and the extracting 16K from about 2/3rds of - the way through the uncompressed output, and writing that to stdout. */ -int main(int argc, char **argv) -{ - int len; - off_t offset; - FILE *in; - struct access *index = NULL; - unsigned char buf[CHUNK]; - - /* open input file */ - if (argc != 2) { - fprintf(stderr, "usage: zran file.gz\n"); - return 1; - } - in = fopen(argv[1], "rb"); - if (in == NULL) { - fprintf(stderr, "zran: could not open %s for reading\n", argv[1]); - return 1; - } - - /* build index */ - len = build_index(in, SPAN, &index); - if (len < 0) { - fclose(in); - switch (len) { - case Z_MEM_ERROR: - fprintf(stderr, "zran: out of memory\n"); - break; - case Z_DATA_ERROR: - fprintf(stderr, "zran: compressed data error in %s\n", argv[1]); - break; - case Z_ERRNO: - fprintf(stderr, "zran: read error on %s\n", argv[1]); - break; - default: - fprintf(stderr, "zran: error %d while building index\n", len); - } - return 1; - } - fprintf(stderr, "zran: built index with %d access points\n", len); - - /* use index by reading some bytes from an arbitrary offset */ - offset = (index->list[index->have - 1].out << 1) / 3; - len = extract(in, index, offset, buf, CHUNK); - if (len < 0) - fprintf(stderr, "zran: extraction failed: %s error\n", - len == Z_MEM_ERROR ? "out of memory" : "input corrupted"); - else { - fwrite(buf, 1, len, stdout); - fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset); - } - - /* clean up and exit */ - free_index(index); - fclose(in); - return 0; -} -- cgit v1.1