]> cygwin.com Git - cygwin-apps/setup.git/blob - compress_xz.cc
1480c6ce9fa72a63190d91d74c23de87d805bc17
[cygwin-apps/setup.git] / compress_xz.cc
1 /*
2 * Copyright (c) 2008, Charles Wilson
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * A copy of the GNU General Public License can be found at
10 * http://www.gnu.org/
11 *
12 * Written by Charles Wilson <cygwin@cygwin.com>
13 *
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
17 * below).
18 */
19
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
22
23 #include <stdexcept>
24 using namespace std;
25 #include <errno.h>
26 #include <memory.h>
27 #include <malloc.h>
28
29 static inline uint32_t
30 le32dec(const void *pp)
31 {
32 unsigned char const *p = (unsigned char const *)pp;
33 return ((p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
34 }
35
36 static inline uint64_t
37 le64dec(const void *pp)
38 {
39 unsigned char const *p = (unsigned char const *)pp;
40 return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
41 }
42
43 /*
44 * Predicate: the stream is open for read.
45 */
46 compress_xz::compress_xz (io_stream * parent)
47 :
48 original(NULL),
49 owns_original(true),
50 peeklen(0),
51 lasterr(0),
52 compression_type (COMPRESSION_UNKNOWN)
53 {
54 unsigned char * out_block = NULL;
55 unsigned char * in_block = NULL;
56
57 /* read only */
58 if (!parent || parent->error())
59 {
60 lasterr = EBADF;
61 return;
62 }
63 original = parent;
64
65 state = (struct private_data *)calloc(sizeof(*state), 1);
66 out_block = (unsigned char *)malloc(out_block_size);
67 in_block = (unsigned char *)malloc(in_block_size);
68 if (state == NULL || out_block == NULL || in_block == NULL)
69 {
70 free(out_block);
71 free(in_block);
72 free(state);
73 lasterr = ENOMEM;
74 return;
75 }
76
77 memset(&(state->stream), 0x00, sizeof(state->stream));
78 state->out_block_size = out_block_size;
79 state->out_block = out_block;
80 state->in_block_size = in_block_size;
81 state->in_block = in_block;
82 state->out_p = out_block;
83 state->stream.avail_in = 0;
84 state->stream.next_out = state->out_block;
85 state->stream.avail_out = state->out_block_size;
86
87 init_decoder ();
88 }
89
90 ssize_t
91 compress_xz::read (void *buffer, size_t len)
92 {
93 if ( compression_type != COMPRESSION_XZ
94 && compression_type != COMPRESSION_LZMA)
95 {
96 return -1;
97 }
98
99 /* there is no recovery from a busted stream */
100 if (this->lasterr)
101 {
102 return -1;
103 }
104 if (len == 0)
105 {
106 return 0;
107 }
108
109 /* peekbuf is layered on top of existing buffering code */
110 if (this->peeklen)
111 {
112 ssize_t tmplen = std::min (this->peeklen, len);
113 this->peeklen -= tmplen;
114 memcpy (buffer, this->peekbuf, tmplen);
115 memmove (this->peekbuf, this->peekbuf + tmplen, sizeof(this->peekbuf) - tmplen);
116 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
117 if (tmpread >= 0)
118 return tmpread + tmplen;
119 else
120 return tmpread;
121 }
122
123 if (state->out_p < state->out_block + state->out_pos)
124 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
125 {
126 ssize_t tmplen = std::min ((size_t)(state->out_block + state->out_pos - state->out_p), len);
127 memcpy (buffer, state->out_p, tmplen);
128 state->out_p += tmplen;
129 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
130 if (tmpread >= 0)
131 return tmpread + tmplen;
132 else
133 return tmpread;
134 }
135
136 size_t lenRemaining = len;
137 unsigned char * bufp = (unsigned char *)buffer;
138 size_t avail_in = 0;
139 size_t avail_out = 0;
140 size_t decompressed = 0;
141 size_t consumed = 0;
142 /* if we made it here, any existing uncompressed data in out_block
143 * has been consumed, so reset out_p and out_pos
144 */
145 state->out_p = state->out_block;
146 state->out_pos = 0;
147 do
148 {
149 if (state->in_pos == state->in_size)
150 {
151 /* no compressed data ready; read some more */
152 state->in_size = (size_t) this->original->read(state->in_block, state->in_block_size);
153 state->in_pos = 0;
154 }
155
156 avail_in = state->in_size - state->in_pos; /* will be 0 if EOF */
157 avail_out = state->out_block_size - state->out_pos;
158
159 state->stream.next_out = state->out_block + state->out_pos;
160 state->stream.avail_out = avail_out;
161 state->stream.next_in = state->in_block + state->in_pos;
162 state->stream.avail_in = avail_in;
163
164 lzma_ret res = lzma_code (&(state->stream),
165 (state->stream.avail_in == 0) ? LZMA_FINISH : LZMA_RUN);
166
167 consumed = avail_in - state->stream.avail_in;
168 decompressed = avail_out - state->stream.avail_out;
169
170 state->in_pos += consumed;
171 state->out_pos += decompressed;
172
173 ssize_t tmplen = std::min (decompressed, lenRemaining);
174 memcpy (bufp, state->out_p, tmplen);
175 state->out_p += tmplen;
176 bufp += tmplen;
177 lenRemaining -= tmplen;
178 state->total_out += decompressed;
179 state->total_in += consumed;
180
181 switch (res)
182 {
183 case LZMA_STREAM_END: /* Found end of stream. */
184 state->eof = 1;
185 /* FALL THROUGH */
186 case LZMA_OK: /* Decompressor made some progress. */
187 break;
188 case LZMA_MEM_ERROR:
189 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
190 this->lasterr = ENOMEM;
191 return -1;
192 case LZMA_MEMLIMIT_ERROR:
193 LogPlainPrintf ("Lzma library error: Out of memory\n");
194 this->lasterr = ENOMEM;
195 return -1;
196 case LZMA_FORMAT_ERROR:
197 LogPlainPrintf ("Lzma library error: format not recognized\n");
198 this->lasterr = EINVAL;
199 return -1;
200 case LZMA_OPTIONS_ERROR:
201 LogPlainPrintf ("Lzma library error: Invalid options\n");
202 this->lasterr = EINVAL;
203 return -1;
204 case LZMA_DATA_ERROR:
205 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
206 this->lasterr = EINVAL;
207 return -1;
208 case LZMA_BUF_ERROR:
209 LogPlainPrintf ("Lzma library error: No progress is possible\n");
210 this->lasterr = EINVAL;
211 return -1;
212 case LZMA_PROG_ERROR:
213 LogPlainPrintf ("Lzma library error: Internal error\n");
214 this->lasterr = EINVAL;
215 return -1;
216 default:
217 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res);
218 this->lasterr = EINVAL;
219 return -1;
220 }
221 }
222 while (lenRemaining != 0 && !state->eof);
223
224 return (len - lenRemaining);
225 }
226
227 ssize_t
228 compress_xz::write (const void *buffer, size_t len)
229 {
230 throw new logic_error("compress_xz::write is not implemented");
231 }
232
233 ssize_t
234 compress_xz::peek (void *buffer, size_t len)
235 {
236 /* can only peek 512 bytes */
237 if (len > 512)
238 return ENOMEM;
239
240 if (len > this->peeklen)
241 {
242 size_t want = len - this->peeklen;
243 ssize_t got = read (&(this->peekbuf[peeklen]), want);
244 if (got >= 0)
245 this->peeklen += got;
246 else
247 /* error */
248 return got;
249 /* we may have read less than requested. */
250 memcpy (buffer, this->peekbuf, this->peeklen);
251 return this->peeklen;
252 }
253 else
254 {
255 memcpy (buffer, this->peekbuf, len);
256 return len;
257 }
258 return 0;
259 }
260
261 long
262 compress_xz::tell ()
263 {
264 throw new logic_error("compress_xz::tell is not implemented");
265 }
266
267 int
268 compress_xz::seek (long where, io_stream_seek_t whence)
269 {
270 throw new logic_error("compress_xz::seek is not implemented");
271 }
272
273 int
274 compress_xz::error ()
275 {
276 return lasterr;
277 }
278
279 int
280 compress_xz::set_mtime (time_t mtime)
281 {
282 if (original)
283 return original->set_mtime (mtime);
284 return 1;
285 }
286
287 time_t
288 compress_xz::get_mtime ()
289 {
290 if (original)
291 return original->get_mtime ();
292 return 0;
293 }
294
295 mode_t
296 compress_xz::get_mode ()
297 {
298 if (original)
299 return original->get_mode ();
300 return 0;
301 }
302
303 void
304 compress_xz::release_original ()
305 {
306 owns_original = false;
307 }
308
309 void
310 compress_xz::destroy ()
311 {
312 if (state)
313 {
314 if ( compression_type == COMPRESSION_XZ
315 || compression_type == COMPRESSION_LZMA)
316 {
317 lzma_end(&(state->stream));
318 }
319
320 if (state->out_block)
321 {
322 free (state->out_block);
323 state->out_block = NULL;
324 }
325
326 if (state->in_block)
327 {
328 free (state->in_block);
329 state->in_block = NULL;
330 }
331
332 free(state);
333 state = NULL;
334
335 compression_type = COMPRESSION_UNKNOWN;
336 }
337
338 if (original && owns_original)
339 delete original;
340 }
341
342 compress_xz::~compress_xz ()
343 {
344 destroy ();
345 }
346
347 /* ===========================================================================
348 * Check the header of a lzma_stream opened for reading, and initialize
349 * the appropriate decoder (xz or lzma).
350 * IN assertion:
351 * the stream has already been created sucessfully
352 * this method is called only once per stream
353 * OUT assertion - success:
354 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
355 * state->stream is initialized with the appropriate decoder
356 * lzma: the first 14 bytes of the stream are read (+ whatever
357 * the decoder itself consumes on initialization)
358 * xz: the first 6 bytes of the stram are read (+ whatever the
359 * decoder itself consumes on initialization)
360 * last_error is zero
361 * OUT assertion - error:
362 * last_error is non-zero
363 */
364 void
365 compress_xz::init_decoder (void)
366 {
367 unsigned char buf[14];
368 int ret;
369 this->compression_type = COMPRESSION_UNKNOWN;
370
371 /* read properties */
372 if (this->original->peek (buf, 6) != 6)
373 {
374 this->lasterr = (errno ? errno : EIO);
375 return;
376 }
377
378 if (bid_xz ((void *)buf, 6) > 0)
379 {
380 this->compression_type = COMPRESSION_XZ;
381 }
382 else
383 {
384 if (this->original->peek (buf + 6, 8) != 8)
385 {
386 this->lasterr = (errno ? errno : EIO);
387 return;
388 }
389 if (bid_lzma ((void *)buf, 14) > 0)
390 {
391 this->compression_type = COMPRESSION_LZMA;
392 }
393 }
394
395 switch (compression_type)
396 {
397 case COMPRESSION_XZ:
398 ret = lzma_stream_decoder (&(state->stream),
399 (1U << 30),/* memlimit */
400 LZMA_CONCATENATED);
401 break;
402 case COMPRESSION_LZMA:
403 ret = lzma_alone_decoder (&(state->stream),
404 (1U << 30));/* memlimit */
405 break;
406 default:
407 this->lasterr = EINVAL;
408 return;
409 }
410
411 switch (ret)
412 {
413 case LZMA_OK:
414 break;
415 case LZMA_MEM_ERROR:
416 this->lasterr = ENOMEM;
417 break;
418 case LZMA_OPTIONS_ERROR:
419 this->lasterr = EINVAL;
420 break;
421 default:
422 this->lasterr = EINVAL;
423 break;
424 }
425 }
426
427 bool
428 compress_xz::is_xz_or_lzma (void * buffer, size_t len)
429 {
430 int bits_checked_xz;
431 int bits_checked_lzma;
432
433 bits_checked_xz = bid_xz (buffer, len);
434 if (bits_checked_xz)
435 return true;
436
437 bits_checked_lzma = bid_lzma (buffer, len);
438 if (bits_checked_lzma)
439 return true;
440
441 return false;
442 }
443
444 /*-
445 * Portions of bid_xz() and bid_lzma() have been adapted from the
446 * libarchive archive_read_support_compression_xz.c functions
447 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
448 * the 2-clause (simplified) BSD license, reproduced below.
449 *
450 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
451 * Copyright (c) 2009 Michihiro NAKAJIMA
452 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
453 * All rights reserved.
454 *
455 * Redistribution and use in source and binary forms, with or without
456 * modification, are permitted provided that the following conditions
457 * are met:
458 * 1. Redistributions of source code must retain the above copyright
459 * notice, this list of conditions and the following disclaimer.
460 * 2. Redistributions in binary form must reproduce the above copyright
461 * notice, this list of conditions and the following disclaimer in the
462 * documentation and/or other materials provided with the distribution.
463 *
464 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
465 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
466 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
467 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
468 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
469 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
470 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
471 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
472 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
473 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
474 */
475 int
476 compress_xz::bid_xz (void * buffer, size_t len)
477 {
478 const unsigned char *buf;
479 int bits_checked;
480
481 buf = (const unsigned char *)buffer;
482 if (len < 6)
483 {
484 /* not enough peek'ed data in buf */
485 return 0;
486 }
487
488 /*
489 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
490 */
491 bits_checked = 0;
492 if (buf[0] != 0xFD)
493 return 0;
494 bits_checked += 8;
495 if (buf[1] != 0x37)
496 return 0;
497 bits_checked += 8;
498 if (buf[2] != 0x7A)
499 return 0;
500 bits_checked += 8;
501 if (buf[3] != 0x58)
502 return 0;
503 bits_checked += 8;
504 if (buf[4] != 0x5A)
505 return 0;
506 bits_checked += 8;
507 if (buf[5] != 0x00)
508 return 0;
509 bits_checked += 8;
510
511 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked);
512 return (bits_checked);
513 }
514
515 int
516 compress_xz::bid_lzma (void * buffer, size_t len)
517 {
518 const unsigned char *buf;
519 uint32_t dicsize;
520 uint64_t uncompressed_size;
521 int bits_checked;
522
523 if (len < 14)
524 {
525 /* not enough peek'ed data in buffer */
526 return 0;
527 }
528 buf = (unsigned char *)buffer;
529
530 /* First byte of raw LZMA stream is commonly 0x5d.
531 * The first byte is a special number, which consists of
532 * three parameters of LZMA compression, a number of literal
533 * context bits(which is from 0 to 8, default is 3), a number
534 * of literal pos bits(which is from 0 to 4, default is 0),
535 * a number of pos bits(which is from 0 to 4, default is 2).
536 * The first byte is made by
537 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
538 * and so the default value in this field is
539 * (2 * 5 + 0) * 9 + 3 = 0x5d.
540 * lzma of LZMA SDK has options to change those parameters.
541 * It means a range of this field is from 0 to 224. And lzma of
542 * XZ Utils with option -e records 0x5e in this field. */
543 /* NOTE: If this checking of the first byte increases false
544 * recognition, we should allow only 0x5d and 0x5e for the first
545 * byte of LZMA stream. */
546 bits_checked = 0;
547 if (buf[0] > (4 * 5 + 4) * 9 + 8)
548 return 0;
549 /* Most likely value in the first byte of LZMA stream. */
550 if (buf[0] == 0x5d || buf[0] == 0x5e)
551 bits_checked += 8;
552
553 /* Sixth through fourteenth bytes are uncompressed size,
554 * stored in little-endian order. `-1' means uncompressed
555 * size is unknown and lzma of XZ Utils always records `-1'
556 * in this field. */
557 uncompressed_size = le64dec(buf+5);
558 if (uncompressed_size == (uint64_t)(-1))
559 bits_checked += 64;
560
561 /* Second through fifth bytes are dictionary size, stored in
562 * little-endian order. The minimum dictionary size is
563 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
564 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
565 * which the one uses with option -d27.
566 * NOTE: A comment of LZMA SDK source code says this dictionary
567 * range is from 1 << 12 to 1 << 30. */
568 dicsize = le32dec(buf+1);
569 switch (dicsize)
570 {
571 case 0x00001000:/* lzma of LZMA SDK option -d12. */
572 case 0x00002000:/* lzma of LZMA SDK option -d13. */
573 case 0x00004000:/* lzma of LZMA SDK option -d14. */
574 case 0x00008000:/* lzma of LZMA SDK option -d15. */
575 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
576 * lzma of LZMA SDK option -d16. */
577 case 0x00020000:/* lzma of LZMA SDK option -d17. */
578 case 0x00040000:/* lzma of LZMA SDK option -d18. */
579 case 0x00080000:/* lzma of XZ Utils option -2.
580 * lzma of LZMA SDK option -d19. */
581 case 0x00100000:/* lzma of XZ Utils option -3.
582 * lzma of LZMA SDK option -d20. */
583 case 0x00200000:/* lzma of XZ Utils option -4.
584 * lzma of LZMA SDK option -d21. */
585 case 0x00400000:/* lzma of XZ Utils option -5.
586 * lzma of LZMA SDK option -d22. */
587 case 0x00800000:/* lzma of XZ Utils option -6.
588 * lzma of LZMA SDK option -d23. */
589 case 0x01000000:/* lzma of XZ Utils option -7.
590 * lzma of LZMA SDK option -d24. */
591 case 0x02000000:/* lzma of XZ Utils option -8.
592 * lzma of LZMA SDK option -d25. */
593 case 0x04000000:/* lzma of XZ Utils option -9.
594 * lzma of LZMA SDK option -d26. */
595 case 0x08000000:/* lzma of LZMA SDK option -d27. */
596 bits_checked += 32;
597 break;
598 default:
599 /* If a memory usage for encoding was not enough on
600 * the platform where LZMA stream was made, lzma of
601 * XZ Utils automatically decreased the dictionary
602 * size to enough memory for encoding by 1Mi bytes
603 * (1 << 20).*/
604 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000
605 && (dicsize & ((1 << 20)-1)) == 0
606 && bits_checked == 8 + 64)
607 {
608 bits_checked += 32;
609 break;
610 }
611 /* Otherwise dictionary size is unlikely. But it is
612 * possible that someone makes lzma stream with
613 * liblzma/LZMA SDK in one's dictionary size. */
614 return 0;
615 }
616
617 /* TODO: The above test is still very weak. It would be
618 * good to do better. */
619 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked);
620 return (bits_checked);
621 }
622
623
This page took 0.061805 seconds and 5 git commands to generate.