]>
cygwin.com Git - cygwin-apps/setup.git/blob - compress_xz.cc
2 * Copyright (c) 2008, Charles Wilson
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * A copy of the GNU General Public License can be found at
12 * Written by Charles Wilson <cygwin@cygwin.com>
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
20 #include "compress_xz.h"
29 static inline uint32_t
30 le32dec(const void *pp
)
32 unsigned char const *p
= (unsigned char const *)pp
;
33 return ((p
[3] << 24) | (p
[2] << 16) | (p
[1] << 8) | p
[0]);
36 static inline uint64_t
37 le64dec(const void *pp
)
39 unsigned char const *p
= (unsigned char const *)pp
;
40 return (((uint64_t)le32dec(p
+ 4) << 32) | le32dec(p
));
44 * Predicate: the stream is open for read.
46 compress_xz::compress_xz (io_stream
* parent
)
52 compression_type (COMPRESSION_UNKNOWN
)
54 unsigned char * out_block
= NULL
;
55 unsigned char * in_block
= NULL
;
58 if (!parent
|| parent
->error())
65 state
= (struct private_data
*)calloc(sizeof(*state
), 1);
66 out_block
= (unsigned char *)malloc(out_block_size
);
67 in_block
= (unsigned char *)malloc(in_block_size
);
68 if (state
== NULL
|| out_block
== NULL
|| in_block
== NULL
)
77 memset(&(state
->stream
), 0x00, sizeof(state
->stream
));
78 state
->out_block_size
= out_block_size
;
79 state
->out_block
= out_block
;
80 state
->in_block_size
= in_block_size
;
81 state
->in_block
= in_block
;
82 state
->out_p
= out_block
;
83 state
->stream
.avail_in
= 0;
84 state
->stream
.next_out
= state
->out_block
;
85 state
->stream
.avail_out
= state
->out_block_size
;
91 compress_xz::read (void *buffer
, size_t len
)
93 if ( compression_type
!= COMPRESSION_XZ
94 && compression_type
!= COMPRESSION_LZMA
)
99 /* there is no recovery from a busted stream */
109 /* peekbuf is layered on top of existing buffering code */
112 ssize_t tmplen
= std::min (this->peeklen
, len
);
113 this->peeklen
-= tmplen
;
114 memcpy (buffer
, this->peekbuf
, tmplen
);
115 memmove (this->peekbuf
, this->peekbuf
+ tmplen
, sizeof(this->peekbuf
) - tmplen
);
116 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
118 return tmpread
+ tmplen
;
123 if (state
->out_p
< state
->out_block
+ state
->out_pos
)
124 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
126 ssize_t tmplen
= std::min ((size_t)(state
->out_block
+ state
->out_pos
- state
->out_p
), len
);
127 memcpy (buffer
, state
->out_p
, tmplen
);
128 state
->out_p
+= tmplen
;
129 ssize_t tmpread
= read (&((char *) buffer
)[tmplen
], len
- tmplen
);
131 return tmpread
+ tmplen
;
136 size_t lenRemaining
= len
;
137 unsigned char * bufp
= (unsigned char *)buffer
;
139 size_t avail_out
= 0;
140 size_t decompressed
= 0;
142 /* if we made it here, any existing uncompressed data in out_block
143 * has been consumed, so reset out_p and out_pos
145 state
->out_p
= state
->out_block
;
149 if (state
->in_pos
== state
->in_size
)
151 /* no compressed data ready; read some more */
152 state
->in_size
= (size_t) this->original
->read(state
->in_block
, state
->in_block_size
);
156 avail_in
= state
->in_size
- state
->in_pos
; /* will be 0 if EOF */
157 avail_out
= state
->out_block_size
- state
->out_pos
;
159 state
->stream
.next_out
= state
->out_block
+ state
->out_pos
;
160 state
->stream
.avail_out
= avail_out
;
161 state
->stream
.next_in
= state
->in_block
+ state
->in_pos
;
162 state
->stream
.avail_in
= avail_in
;
164 lzma_ret res
= lzma_code (&(state
->stream
),
165 (state
->stream
.avail_in
== 0) ? LZMA_FINISH
: LZMA_RUN
);
167 consumed
= avail_in
- state
->stream
.avail_in
;
168 decompressed
= avail_out
- state
->stream
.avail_out
;
170 state
->in_pos
+= consumed
;
171 state
->out_pos
+= decompressed
;
173 ssize_t tmplen
= std::min (decompressed
, lenRemaining
);
174 memcpy (bufp
, state
->out_p
, tmplen
);
175 state
->out_p
+= tmplen
;
177 lenRemaining
-= tmplen
;
178 state
->total_out
+= decompressed
;
179 state
->total_in
+= consumed
;
183 case LZMA_STREAM_END
: /* Found end of stream. */
186 case LZMA_OK
: /* Decompressor made some progress. */
189 msg ("Lzma library error: Cannot allocate memory\n");
190 this->lasterr
= ENOMEM
;
192 case LZMA_MEMLIMIT_ERROR
:
193 msg ("Lzma library error: Out of memory\n");
194 this->lasterr
= ENOMEM
;
196 case LZMA_FORMAT_ERROR
:
197 msg ("Lzma library error: format not recognized\n");
198 this->lasterr
= EINVAL
;
200 case LZMA_OPTIONS_ERROR
:
201 msg ("Lzma library error: Invalid options\n");
202 this->lasterr
= EINVAL
;
204 case LZMA_DATA_ERROR
:
205 msg ("Lzma library error: Corrupted input data\n");
206 this->lasterr
= EINVAL
;
209 msg ("Lzma library error: No progress is possible\n");
210 this->lasterr
= EINVAL
;
212 case LZMA_PROG_ERROR
:
213 msg ("Lzma library error: Internal error\n");
214 this->lasterr
= EINVAL
;
217 msg ("Lzma decompression failed: Unknown error %d\n", res
);
218 this->lasterr
= EINVAL
;
222 while (lenRemaining
!= 0 && !state
->eof
);
224 return (len
- lenRemaining
);
228 compress_xz::write (const void *buffer
, size_t len
)
230 throw new logic_error("compress_xz::write is not implemented");
234 compress_xz::peek (void *buffer
, size_t len
)
236 /* can only peek 512 bytes */
240 if (len
> this->peeklen
)
242 size_t want
= len
- this->peeklen
;
243 ssize_t got
= read (&(this->peekbuf
[peeklen
]), want
);
245 this->peeklen
+= got
;
249 /* we may have read less than requested. */
250 memcpy (buffer
, this->peekbuf
, this->peeklen
);
251 return this->peeklen
;
255 memcpy (buffer
, this->peekbuf
, len
);
264 throw new logic_error("compress_xz::tell is not implemented");
268 compress_xz::seek (long where
, io_stream_seek_t whence
)
270 throw new logic_error("compress_xz::seek is not implemented");
274 compress_xz::error ()
280 compress_xz::set_mtime (time_t mtime
)
283 return original
->set_mtime (mtime
);
288 compress_xz::get_mtime ()
291 return original
->get_mtime ();
296 compress_xz::get_mode ()
299 return original
->get_mode ();
304 compress_xz::release_original ()
306 owns_original
= false;
310 compress_xz::destroy ()
314 if ( compression_type
== COMPRESSION_XZ
315 || compression_type
== COMPRESSION_LZMA
)
317 lzma_end(&(state
->stream
));
320 if (state
->out_block
)
322 free (state
->out_block
);
323 state
->out_block
= NULL
;
328 free (state
->in_block
);
329 state
->in_block
= NULL
;
335 compression_type
= COMPRESSION_UNKNOWN
;
338 if (original
&& owns_original
)
342 compress_xz::~compress_xz ()
347 /* ===========================================================================
348 * Check the header of a lzma_stream opened for reading, and initialize
349 * the appropriate decoder (xz or lzma).
351 * the stream has already been created sucessfully
352 * this method is called only once per stream
353 * OUT assertion - success:
354 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
355 * state->stream is initialized with the appropriate decoder
356 * lzma: the first 14 bytes of the stream are read (+ whatever
357 * the decoder itself consumes on initialization)
358 * xz: the first 6 bytes of the stram are read (+ whatever the
359 * decoder itself consumes on initialization)
361 * OUT assertion - error:
362 * last_error is non-zero
365 compress_xz::init_decoder (void)
367 unsigned char buf
[14];
369 this->compression_type
= COMPRESSION_UNKNOWN
;
371 /* read properties */
372 if (this->original
->peek (buf
, 6) != 6)
374 this->lasterr
= (errno
? errno
: EIO
);
378 if (bid_xz ((void *)buf
, 6) > 0)
380 this->compression_type
= COMPRESSION_XZ
;
384 if (this->original
->peek (buf
+ 6, 8) != 8)
386 this->lasterr
= (errno
? errno
: EIO
);
389 if (bid_lzma ((void *)buf
, 14) > 0)
391 this->compression_type
= COMPRESSION_LZMA
;
395 switch (compression_type
)
398 ret
= lzma_stream_decoder (&(state
->stream
),
399 (1U << 30),/* memlimit */
402 case COMPRESSION_LZMA
:
403 ret
= lzma_alone_decoder (&(state
->stream
),
404 (1U << 30));/* memlimit */
407 this->lasterr
= EINVAL
;
416 this->lasterr
= ENOMEM
;
418 case LZMA_OPTIONS_ERROR
:
419 this->lasterr
= EINVAL
;
422 this->lasterr
= EINVAL
;
428 compress_xz::is_xz_or_lzma (void * buffer
, size_t len
)
431 int bits_checked_lzma
;
433 bits_checked_xz
= bid_xz (buffer
, len
);
437 bits_checked_lzma
= bid_lzma (buffer
, len
);
438 if (bits_checked_lzma
)
445 * Portions of bid_xz() and bid_lzma() have been adapted from the
446 * libarchive archive_read_support_compression_xz.c functions
447 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
448 * the 2-clause (simplified) BSD license, reproduced below.
450 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
451 * Copyright (c) 2009 Michihiro NAKAJIMA
452 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
453 * All rights reserved.
455 * Redistribution and use in source and binary forms, with or without
456 * modification, are permitted provided that the following conditions
458 * 1. Redistributions of source code must retain the above copyright
459 * notice, this list of conditions and the following disclaimer.
460 * 2. Redistributions in binary form must reproduce the above copyright
461 * notice, this list of conditions and the following disclaimer in the
462 * documentation and/or other materials provided with the distribution.
464 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
465 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
466 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
467 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
468 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
469 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
470 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
471 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
472 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
473 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
476 compress_xz::bid_xz (void * buffer
, size_t len
)
478 const unsigned char *buf
;
481 buf
= (const unsigned char *)buffer
;
484 /* not enough peek'ed data in buf */
489 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
511 msg ("compress_xz::bid_xz: success: %d\n", bits_checked
);
512 return (bits_checked
);
516 compress_xz::bid_lzma (void * buffer
, size_t len
)
518 const unsigned char *buf
;
520 uint64_t uncompressed_size
;
525 /* not enough peek'ed data in buffer */
528 buf
= (unsigned char *)buffer
;
530 /* First byte of raw LZMA stream is commonly 0x5d.
531 * The first byte is a special number, which consists of
532 * three parameters of LZMA compression, a number of literal
533 * context bits(which is from 0 to 8, default is 3), a number
534 * of literal pos bits(which is from 0 to 4, default is 0),
535 * a number of pos bits(which is from 0 to 4, default is 2).
536 * The first byte is made by
537 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
538 * and so the default value in this field is
539 * (2 * 5 + 0) * 9 + 3 = 0x5d.
540 * lzma of LZMA SDK has options to change those parameters.
541 * It means a range of this field is from 0 to 224. And lzma of
542 * XZ Utils with option -e records 0x5e in this field. */
543 /* NOTE: If this checking of the first byte increases false
544 * recognition, we should allow only 0x5d and 0x5e for the first
545 * byte of LZMA stream. */
547 if (buf
[0] > (4 * 5 + 4) * 9 + 8)
549 /* Most likely value in the first byte of LZMA stream. */
550 if (buf
[0] == 0x5d || buf
[0] == 0x5e)
553 /* Sixth through fourteenth bytes are uncompressed size,
554 * stored in little-endian order. `-1' means uncompressed
555 * size is unknown and lzma of XZ Utils always records `-1'
557 uncompressed_size
= le64dec(buf
+5);
558 if (uncompressed_size
== (uint64_t)(-1))
561 /* Second through fifth bytes are dictionary size, stored in
562 * little-endian order. The minimum dictionary size is
563 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
564 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
565 * which the one uses with option -d27.
566 * NOTE: A comment of LZMA SDK source code says this dictionary
567 * range is from 1 << 12 to 1 << 30. */
568 dicsize
= le32dec(buf
+1);
571 case 0x00001000:/* lzma of LZMA SDK option -d12. */
572 case 0x00002000:/* lzma of LZMA SDK option -d13. */
573 case 0x00004000:/* lzma of LZMA SDK option -d14. */
574 case 0x00008000:/* lzma of LZMA SDK option -d15. */
575 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
576 * lzma of LZMA SDK option -d16. */
577 case 0x00020000:/* lzma of LZMA SDK option -d17. */
578 case 0x00040000:/* lzma of LZMA SDK option -d18. */
579 case 0x00080000:/* lzma of XZ Utils option -2.
580 * lzma of LZMA SDK option -d19. */
581 case 0x00100000:/* lzma of XZ Utils option -3.
582 * lzma of LZMA SDK option -d20. */
583 case 0x00200000:/* lzma of XZ Utils option -4.
584 * lzma of LZMA SDK option -d21. */
585 case 0x00400000:/* lzma of XZ Utils option -5.
586 * lzma of LZMA SDK option -d22. */
587 case 0x00800000:/* lzma of XZ Utils option -6.
588 * lzma of LZMA SDK option -d23. */
589 case 0x01000000:/* lzma of XZ Utils option -7.
590 * lzma of LZMA SDK option -d24. */
591 case 0x02000000:/* lzma of XZ Utils option -8.
592 * lzma of LZMA SDK option -d25. */
593 case 0x04000000:/* lzma of XZ Utils option -9.
594 * lzma of LZMA SDK option -d26. */
595 case 0x08000000:/* lzma of LZMA SDK option -d27. */
599 /* If a memory usage for encoding was not enough on
600 * the platform where LZMA stream was made, lzma of
601 * XZ Utils automatically decreased the dictionary
602 * size to enough memory for encoding by 1Mi bytes
604 if (dicsize
<= 0x03F00000 && dicsize
>= 0x00300000
605 && (dicsize
& ((1 << 20)-1)) == 0
606 && bits_checked
== 8 + 64)
611 /* Otherwise dictionary size is unlikely. But it is
612 * possible that someone makes lzma stream with
613 * liblzma/LZMA SDK in one's dictionary size. */
617 /* TODO: The above test is still very weak. It would be
618 * good to do better. */
619 msg ("compress_xz::bid_lzma: success: %d\n", bits_checked
);
620 return (bits_checked
);
This page took 0.062617 seconds and 5 git commands to generate.