]> cygwin.com Git - cygwin-apps/setup.git/blob - compress_xz.cc
Suppress bogus free-nonheap-object warning in iniparse.cc
[cygwin-apps/setup.git] / compress_xz.cc
1 /*
2 * Copyright (c) 2008, Charles Wilson
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * A copy of the GNU General Public License can be found at
10 * http://www.gnu.org/
11 *
12 * Written by Charles Wilson <cygwin@cygwin.com>
13 *
14 * Portions of bid_xz() and bid_lzma() adapted from the libarchive
15 * archive_read_support_compression_xz.c functions xz_bidder_bid()
16 * and lzma_bidder_bid(), which are under a BSD license (reproduced
17 * below).
18 */
19
20 #include "compress_xz.h"
21 #include "LogSingleton.h"
22
23 #include <stdexcept>
24
25 #include <errno.h>
26 #include <memory.h>
27 #include <malloc.h>
28
29 static inline uint32_t
30 le32dec(const void *pp)
31 {
32 unsigned char const *p = (unsigned char const *)pp;
33 return ((p[3] << 24) | (p[2] << 16) | (p[1] << 8) | p[0]);
34 }
35
36 static inline uint64_t
37 le64dec(const void *pp)
38 {
39 unsigned char const *p = (unsigned char const *)pp;
40 return (((uint64_t)le32dec(p + 4) << 32) | le32dec(p));
41 }
42
43 /*
44 * Predicate: the stream is open for read.
45 */
46 compress_xz::compress_xz (io_stream * parent)
47 :
48 original(NULL),
49 owns_original(true),
50 peeklen(0),
51 lasterr(0),
52 compression_type (COMPRESSION_UNKNOWN)
53 {
54 /* read only */
55 if (!parent || parent->error())
56 {
57 lasterr = EBADF;
58 return;
59 }
60 original = parent;
61
62 create ();
63 init_decoder ();
64 }
65
66 void
67 compress_xz::create ()
68 {
69 unsigned char * out_block = NULL;
70 unsigned char * in_block = NULL;
71
72 state = (struct private_data *)calloc(sizeof(*state), 1);
73 out_block = (unsigned char *)malloc(out_block_size);
74 in_block = (unsigned char *)malloc(in_block_size);
75 if (state == NULL || out_block == NULL || in_block == NULL)
76 {
77 free(out_block);
78 free(in_block);
79 free(state);
80 lasterr = ENOMEM;
81 return;
82 }
83
84 memset(&(state->stream), 0x00, sizeof(state->stream));
85 state->out_block_size = out_block_size;
86 state->out_block = out_block;
87 state->in_block_size = in_block_size;
88 state->in_block = in_block;
89 state->out_p = state->out_block;
90 state->stream.avail_in = 0;
91 state->stream.next_out = state->out_block;
92 state->stream.avail_out = state->out_block_size;
93 }
94
95 ssize_t
96 compress_xz::read (void *buffer, size_t len)
97 {
98 if ( compression_type != COMPRESSION_XZ
99 && compression_type != COMPRESSION_LZMA)
100 {
101 return -1;
102 }
103
104 /* there is no recovery from a busted stream */
105 if (this->lasterr)
106 {
107 return -1;
108 }
109 if (len == 0)
110 {
111 return 0;
112 }
113
114 /* peekbuf is layered on top of existing buffering code */
115 if (this->peeklen)
116 {
117 ssize_t tmplen = std::min (this->peeklen, len);
118 this->peeklen -= tmplen;
119 memcpy (buffer, this->peekbuf, tmplen);
120 memmove (this->peekbuf, this->peekbuf + tmplen, sizeof(this->peekbuf) - tmplen);
121 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
122 if (tmpread >= 0)
123 return tmpread + tmplen;
124 else
125 return tmpread;
126 }
127
128 if (state->out_p < state->out_block + state->out_pos)
129 /* out_p - out_block < out_pos, but avoid sign/unsigned warning */
130 {
131 ssize_t tmplen = std::min ((size_t)(state->out_block + state->out_pos - state->out_p), len);
132 memcpy (buffer, state->out_p, tmplen);
133 state->out_p += tmplen;
134 ssize_t tmpread = read (&((char *) buffer)[tmplen], len - tmplen);
135 if (tmpread >= 0)
136 return tmpread + tmplen;
137 else
138 return tmpread;
139 }
140
141 size_t lenRemaining = len;
142 unsigned char * bufp = (unsigned char *)buffer;
143 size_t avail_in = 0;
144 size_t avail_out = 0;
145 size_t decompressed = 0;
146 size_t consumed = 0;
147 /* if we made it here, any existing uncompressed data in out_block
148 * has been consumed, so reset out_p and out_pos
149 */
150 state->out_p = state->out_block;
151 state->out_pos = 0;
152 do
153 {
154 if (state->in_pos == state->in_size)
155 {
156 /* no compressed data ready; read some more */
157 state->in_size = (size_t) this->original->read(state->in_block, state->in_block_size);
158 state->in_pos = 0;
159 }
160
161 avail_in = state->in_size - state->in_pos; /* will be 0 if EOF */
162 avail_out = state->out_block_size - state->out_pos;
163
164 state->stream.next_out = state->out_block + state->out_pos;
165 state->stream.avail_out = avail_out;
166 state->stream.next_in = state->in_block + state->in_pos;
167 state->stream.avail_in = avail_in;
168
169 lzma_ret res = lzma_code (&(state->stream),
170 (state->stream.avail_in == 0) ? LZMA_FINISH : LZMA_RUN);
171
172 consumed = avail_in - state->stream.avail_in;
173 decompressed = avail_out - state->stream.avail_out;
174
175 state->in_pos += consumed;
176 state->out_pos += decompressed;
177
178 ssize_t tmplen = std::min (decompressed, lenRemaining);
179 memcpy (bufp, state->out_p, tmplen);
180 state->out_p += tmplen;
181 bufp += tmplen;
182 lenRemaining -= tmplen;
183 state->total_out += decompressed;
184 state->total_in += consumed;
185
186 switch (res)
187 {
188 case LZMA_STREAM_END: /* Found end of stream. */
189 state->eof = 1;
190 /* FALL THROUGH */
191 case LZMA_OK: /* Decompressor made some progress. */
192 break;
193 case LZMA_MEM_ERROR:
194 LogPlainPrintf ("Lzma library error: Cannot allocate memory\n");
195 this->lasterr = ENOMEM;
196 return -1;
197 case LZMA_MEMLIMIT_ERROR:
198 LogPlainPrintf ("Lzma library error: Out of memory\n");
199 this->lasterr = ENOMEM;
200 return -1;
201 case LZMA_FORMAT_ERROR:
202 LogPlainPrintf ("Lzma library error: format not recognized\n");
203 this->lasterr = EINVAL;
204 return -1;
205 case LZMA_OPTIONS_ERROR:
206 LogPlainPrintf ("Lzma library error: Invalid options\n");
207 this->lasterr = EINVAL;
208 return -1;
209 case LZMA_DATA_ERROR:
210 LogPlainPrintf ("Lzma library error: Corrupted input data\n");
211 this->lasterr = EINVAL;
212 return -1;
213 case LZMA_BUF_ERROR:
214 LogPlainPrintf ("Lzma library error: No progress is possible\n");
215 this->lasterr = EINVAL;
216 return -1;
217 case LZMA_PROG_ERROR:
218 LogPlainPrintf ("Lzma library error: Internal error\n");
219 this->lasterr = EINVAL;
220 return -1;
221 default:
222 LogPlainPrintf ("Lzma decompression failed: Unknown error %d\n", res);
223 this->lasterr = EINVAL;
224 return -1;
225 }
226 }
227 while (lenRemaining != 0 && !state->eof);
228
229 return (len - lenRemaining);
230 }
231
232 ssize_t
233 compress_xz::write (const void *buffer, size_t len)
234 {
235 throw new std::logic_error("compress_xz::write is not implemented");
236 }
237
238 ssize_t
239 compress_xz::peek (void *buffer, size_t len)
240 {
241 /* can only peek 512 bytes */
242 if (len > 512)
243 return ENOMEM;
244
245 if (len > this->peeklen)
246 {
247 size_t want = len - this->peeklen;
248 ssize_t got = read (&(this->peekbuf[peeklen]), want);
249 if (got >= 0)
250 this->peeklen += got;
251 else
252 /* error */
253 return got;
254 /* we may have read less than requested. */
255 memcpy (buffer, this->peekbuf, this->peeklen);
256 return this->peeklen;
257 }
258 else
259 {
260 memcpy (buffer, this->peekbuf, len);
261 return len;
262 }
263 return 0;
264 }
265
266 long
267 compress_xz::tell ()
268 {
269 throw new std::logic_error("compress_xz::tell is not implemented");
270 }
271
272 int
273 compress_xz::seek (long where, io_stream_seek_t whence)
274 {
275 if ((whence == IO_SEEK_SET) && (where == 0))
276 {
277 int result = original->seek(where, whence);
278 destroy ();
279 peeklen = 0;
280 lasterr = 0;
281 create ();
282 init_decoder ();
283 return result;
284 }
285
286 throw new std::logic_error("compress_xz::seek is not implemented");
287 }
288
289 int
290 compress_xz::error ()
291 {
292 return lasterr;
293 }
294
295 int
296 compress_xz::set_mtime (time_t mtime)
297 {
298 if (original)
299 return original->set_mtime (mtime);
300 return 1;
301 }
302
303 time_t
304 compress_xz::get_mtime ()
305 {
306 if (original)
307 return original->get_mtime ();
308 return 0;
309 }
310
311 mode_t
312 compress_xz::get_mode ()
313 {
314 if (original)
315 return original->get_mode ();
316 return 0;
317 }
318
319 void
320 compress_xz::release_original ()
321 {
322 owns_original = false;
323 }
324
325 void
326 compress_xz::destroy ()
327 {
328 if (state)
329 {
330 if ( compression_type == COMPRESSION_XZ
331 || compression_type == COMPRESSION_LZMA)
332 {
333 lzma_end(&(state->stream));
334 }
335
336 if (state->out_block)
337 {
338 free (state->out_block);
339 state->out_block = NULL;
340 }
341
342 if (state->in_block)
343 {
344 free (state->in_block);
345 state->in_block = NULL;
346 }
347
348 free(state);
349 state = NULL;
350
351 compression_type = COMPRESSION_UNKNOWN;
352 }
353 }
354
355 compress_xz::~compress_xz ()
356 {
357 destroy ();
358
359 if (original && owns_original)
360 delete original;
361 }
362
363 /* ===========================================================================
364 * Check the header of a lzma_stream opened for reading, and initialize
365 * the appropriate decoder (xz or lzma).
366 * IN assertion:
367 * the stream has already been created sucessfully
368 * this method is called only once per stream
369 * OUT assertion - success:
370 * compression_type is set to COMPRESSION_XZ or COMPRESSION_LZMA
371 * state->stream is initialized with the appropriate decoder
372 * lzma: the first 14 bytes of the stream are read (+ whatever
373 * the decoder itself consumes on initialization)
374 * xz: the first 6 bytes of the stram are read (+ whatever the
375 * decoder itself consumes on initialization)
376 * last_error is zero
377 * OUT assertion - error:
378 * last_error is non-zero
379 */
380 void
381 compress_xz::init_decoder (void)
382 {
383 unsigned char buf[14];
384 int ret;
385 this->compression_type = COMPRESSION_UNKNOWN;
386
387 /* read properties */
388 if (this->original->peek (buf, 6) != 6)
389 {
390 this->lasterr = (errno ? errno : EIO);
391 return;
392 }
393
394 if (bid_xz ((void *)buf, 6) > 0)
395 {
396 this->compression_type = COMPRESSION_XZ;
397 }
398 else
399 {
400 if (this->original->peek (buf + 6, 8) != 8)
401 {
402 this->lasterr = (errno ? errno : EIO);
403 return;
404 }
405 if (bid_lzma ((void *)buf, 14) > 0)
406 {
407 this->compression_type = COMPRESSION_LZMA;
408 }
409 }
410
411 switch (compression_type)
412 {
413 case COMPRESSION_XZ:
414 ret = lzma_stream_decoder (&(state->stream),
415 (1U << 30),/* memlimit */
416 LZMA_CONCATENATED);
417 break;
418 case COMPRESSION_LZMA:
419 ret = lzma_alone_decoder (&(state->stream),
420 (1U << 30));/* memlimit */
421 break;
422 default:
423 this->lasterr = EINVAL;
424 return;
425 }
426
427 switch (ret)
428 {
429 case LZMA_OK:
430 break;
431 case LZMA_MEM_ERROR:
432 this->lasterr = ENOMEM;
433 break;
434 case LZMA_OPTIONS_ERROR:
435 this->lasterr = EINVAL;
436 break;
437 default:
438 this->lasterr = EINVAL;
439 break;
440 }
441 }
442
443 bool
444 compress_xz::is_xz_or_lzma (void * buffer, size_t len)
445 {
446 int bits_checked_xz;
447 int bits_checked_lzma;
448
449 bits_checked_xz = bid_xz (buffer, len);
450 if (bits_checked_xz)
451 return true;
452
453 bits_checked_lzma = bid_lzma (buffer, len);
454 if (bits_checked_lzma)
455 return true;
456
457 return false;
458 }
459
460 /*-
461 * Portions of bid_xz() and bid_lzma() have been adapted from the
462 * libarchive archive_read_support_compression_xz.c functions
463 * xz_bidder_bid() and lzma_bidder_bid(), which were released under
464 * the 2-clause (simplified) BSD license, reproduced below.
465 *
466 * (modifications for setup.exe) Copyright (c) 2010 Charles Wilson
467 * Copyright (c) 2009 Michihiro NAKAJIMA
468 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
469 * All rights reserved.
470 *
471 * Redistribution and use in source and binary forms, with or without
472 * modification, are permitted provided that the following conditions
473 * are met:
474 * 1. Redistributions of source code must retain the above copyright
475 * notice, this list of conditions and the following disclaimer.
476 * 2. Redistributions in binary form must reproduce the above copyright
477 * notice, this list of conditions and the following disclaimer in the
478 * documentation and/or other materials provided with the distribution.
479 *
480 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
481 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
482 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
483 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
484 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
485 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
486 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
487 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
488 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
489 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
490 */
491 int
492 compress_xz::bid_xz (void * buffer, size_t len)
493 {
494 const unsigned char *buf;
495 int bits_checked;
496
497 buf = (const unsigned char *)buffer;
498 if (len < 6)
499 {
500 /* not enough peek'ed data in buf */
501 return 0;
502 }
503
504 /*
505 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
506 */
507 bits_checked = 0;
508 if (buf[0] != 0xFD)
509 return 0;
510 bits_checked += 8;
511 if (buf[1] != 0x37)
512 return 0;
513 bits_checked += 8;
514 if (buf[2] != 0x7A)
515 return 0;
516 bits_checked += 8;
517 if (buf[3] != 0x58)
518 return 0;
519 bits_checked += 8;
520 if (buf[4] != 0x5A)
521 return 0;
522 bits_checked += 8;
523 if (buf[5] != 0x00)
524 return 0;
525 bits_checked += 8;
526
527 #ifdef DEBUG
528 LogBabblePrintf ("compress_xz::bid_xz: success: %d\n", bits_checked);
529 #endif
530 return (bits_checked);
531 }
532
533 int
534 compress_xz::bid_lzma (void * buffer, size_t len)
535 {
536 const unsigned char *buf;
537 uint32_t dicsize;
538 uint64_t uncompressed_size;
539 int bits_checked;
540
541 if (len < 14)
542 {
543 /* not enough peek'ed data in buffer */
544 return 0;
545 }
546 buf = (unsigned char *)buffer;
547
548 /* First byte of raw LZMA stream is commonly 0x5d.
549 * The first byte is a special number, which consists of
550 * three parameters of LZMA compression, a number of literal
551 * context bits(which is from 0 to 8, default is 3), a number
552 * of literal pos bits(which is from 0 to 4, default is 0),
553 * a number of pos bits(which is from 0 to 4, default is 2).
554 * The first byte is made by
555 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
556 * and so the default value in this field is
557 * (2 * 5 + 0) * 9 + 3 = 0x5d.
558 * lzma of LZMA SDK has options to change those parameters.
559 * It means a range of this field is from 0 to 224. And lzma of
560 * XZ Utils with option -e records 0x5e in this field. */
561 /* NOTE: If this checking of the first byte increases false
562 * recognition, we should allow only 0x5d and 0x5e for the first
563 * byte of LZMA stream. */
564 bits_checked = 0;
565 if (buf[0] > (4 * 5 + 4) * 9 + 8)
566 return 0;
567 /* Most likely value in the first byte of LZMA stream. */
568 if (buf[0] == 0x5d || buf[0] == 0x5e)
569 bits_checked += 8;
570
571 /* Sixth through fourteenth bytes are uncompressed size,
572 * stored in little-endian order. `-1' means uncompressed
573 * size is unknown and lzma of XZ Utils always records `-1'
574 * in this field. */
575 uncompressed_size = le64dec(buf+5);
576 if (uncompressed_size == (uint64_t)(-1))
577 bits_checked += 64;
578
579 /* Second through fifth bytes are dictionary size, stored in
580 * little-endian order. The minimum dictionary size is
581 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
582 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
583 * which the one uses with option -d27.
584 * NOTE: A comment of LZMA SDK source code says this dictionary
585 * range is from 1 << 12 to 1 << 30. */
586 dicsize = le32dec(buf+1);
587 switch (dicsize)
588 {
589 case 0x00001000:/* lzma of LZMA SDK option -d12. */
590 case 0x00002000:/* lzma of LZMA SDK option -d13. */
591 case 0x00004000:/* lzma of LZMA SDK option -d14. */
592 case 0x00008000:/* lzma of LZMA SDK option -d15. */
593 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
594 * lzma of LZMA SDK option -d16. */
595 case 0x00020000:/* lzma of LZMA SDK option -d17. */
596 case 0x00040000:/* lzma of LZMA SDK option -d18. */
597 case 0x00080000:/* lzma of XZ Utils option -2.
598 * lzma of LZMA SDK option -d19. */
599 case 0x00100000:/* lzma of XZ Utils option -3.
600 * lzma of LZMA SDK option -d20. */
601 case 0x00200000:/* lzma of XZ Utils option -4.
602 * lzma of LZMA SDK option -d21. */
603 case 0x00400000:/* lzma of XZ Utils option -5.
604 * lzma of LZMA SDK option -d22. */
605 case 0x00800000:/* lzma of XZ Utils option -6.
606 * lzma of LZMA SDK option -d23. */
607 case 0x01000000:/* lzma of XZ Utils option -7.
608 * lzma of LZMA SDK option -d24. */
609 case 0x02000000:/* lzma of XZ Utils option -8.
610 * lzma of LZMA SDK option -d25. */
611 case 0x04000000:/* lzma of XZ Utils option -9.
612 * lzma of LZMA SDK option -d26. */
613 case 0x08000000:/* lzma of LZMA SDK option -d27. */
614 bits_checked += 32;
615 break;
616 default:
617 /* If a memory usage for encoding was not enough on
618 * the platform where LZMA stream was made, lzma of
619 * XZ Utils automatically decreased the dictionary
620 * size to enough memory for encoding by 1Mi bytes
621 * (1 << 20).*/
622 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000
623 && (dicsize & ((1 << 20)-1)) == 0
624 && bits_checked == 8 + 64)
625 {
626 bits_checked += 32;
627 break;
628 }
629 /* Otherwise dictionary size is unlikely. But it is
630 * possible that someone makes lzma stream with
631 * liblzma/LZMA SDK in one's dictionary size. */
632 return 0;
633 }
634
635 /* TODO: The above test is still very weak. It would be
636 * good to do better. */
637 #ifdef DEBUG
638 LogBabblePrintf ("compress_xz::bid_lzma: success: %d\n", bits_checked);
639 #endif
640 return (bits_checked);
641 }
This page took 0.062894 seconds and 5 git commands to generate.