1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38#define UCLPACK_COMPAT 0
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <ctype.h>
43#include <errno.h>
44#include <stdint.h>
45#include <limits.h>
46#include <assert.h>
47#if UCLPACK_COMPAT
48#include <netinet/in.h>
49#endif
50
51
52#ifndef VERBOSE
53#define Fprintf(x)
54#define wterr 0
55#else
56#define Fprintf(x) fprintf x
57#endif
58
59#ifndef MAIN
60extern
61#endif
62FILE *infile, *outfile;
63
64#if defined(ENCODE) || defined(DECODE)
65
66#ifndef ENDIAN
67#define ENDIAN 0
68#endif
69#ifndef BITSIZE
70#define BITSIZE 32
71#endif
72
73#ifndef COMPACT
74static __inline__ void Error(char *message)
75{
76 Fprintf((stderr, "\n%s\n", message));
77 exit(EXIT_FAILURE);
78}
79#endif
80
81
82
83static unsigned long i86ul_to_host(unsigned long ul)
84{
85 unsigned long res = 0;
86 int i;
87 union
88 {
89 unsigned char c[4];
90 unsigned long ul;
91 } u;
92
93 u.ul = ul;
94 for (i = 3; i >= 0; i--)
95 res = (res << 8) + u.c[i];
96 return res;
97}
98
99static unsigned long host_to_i86ul(unsigned long ul)
100{
101 int i;
102 union
103 {
104 unsigned char c[4];
105 unsigned long ul;
106 } u;
107
108 for (i = 0; i < 4; i++)
109 {
110 u.c[i] = ul & 0xff;
111 ul >>= 8;
112 }
113 return u.ul;
114}
115#endif
116
117
118
119#if UCLPACK_COMPAT
120
121static const unsigned char magic[8] =
122{ 0x00, 0xe9, 0x55, 0x43, 0x4c, 0xff, 0x01, 0x1a };
123
124#endif
125
126#ifdef ENCODE
127
128
129#define N (1024*1024ul)
130#define THRESHOLD 1
131#define F 2048
132#define M2_MAX_OFFSET 0xd00
133
134
135
136struct ucl_compress_config
137{
138 int bb_endian;
139 int bb_size;
140 unsigned int max_offset;
141 unsigned int max_match;
142 int s_level;
143 int h_level;
144 int p_level;
145 int c_flags;
146 unsigned int m_size;
147};
148
149struct ucl_compress
150{
151 int init;
152
153 unsigned int look;
154
155 unsigned int m_len;
156 unsigned int m_off;
157
158 unsigned int last_m_len;
159 unsigned int last_m_off;
160
161 const unsigned char *bp;
162 const unsigned char *ip;
163 const unsigned char *in;
164 const unsigned char *in_end;
165 unsigned char *out;
166
167 uint32_t bb_b;
168 unsigned bb_k;
169 unsigned bb_c_endian;
170 unsigned bb_c_s;
171 unsigned bb_c_s8;
172 unsigned char *bb_p;
173 unsigned char *bb_op;
174
175 struct ucl_compress_config conf;
176 unsigned int *result;
177
178 unsigned int textsize;
179 unsigned int codesize;
180 unsigned int printcount;
181
182
183
184
185 unsigned long lit_bytes;
186 unsigned long match_bytes;
187 unsigned long rep_bytes;
188 unsigned long lazy;
189};
190
191
192
193#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1))
194
195#define UCL_E_OK 0
196#define UCL_E_INVALID_ARGUMENT 1
197#define UCL_E_OUT_OF_MEMORY 2
198#define UCL_E_ERROR 3
199
200
201
202
203
204#define SWD_HSIZE 16384
205#define SWD_MAX_CHAIN 2048
206
207#define HEAD3(b,p) \
208 (((0x9f5f*(((((uint32_t)b[p]<<5)^b[p+1])<<5)^b[p+2]))>>5) & (SWD_HSIZE-1))
209
210#define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8))
211#define NIL2 UINT_MAX
212
213struct ucl_swd
214{
215
216 unsigned int n;
217 unsigned int f;
218 unsigned int threshold;
219
220
221 unsigned int max_chain;
222 unsigned int nice_length;
223 int use_best_off;
224 unsigned int lazy_insert;
225
226
227 unsigned int m_len;
228 unsigned int m_off;
229 unsigned int look;
230 int b_char;
231#if defined(SWD_BEST_OFF)
232 unsigned int best_off[ SWD_BEST_OFF ];
233#endif
234
235
236 struct ucl_compress *c;
237 unsigned int m_pos;
238#if defined(SWD_BEST_OFF)
239 unsigned int best_pos[ SWD_BEST_OFF ];
240#endif
241
242
243 const uint8_t *dict;
244 const uint8_t *dict_end;
245 unsigned int dict_len;
246
247
248 unsigned int ip;
249 unsigned int bp;
250 unsigned int rp;
251 unsigned int b_size;
252
253 unsigned char *b_wrap;
254
255 unsigned int node_count;
256 unsigned int first_rp;
257
258 unsigned char b [ N + F + F ];
259 unsigned int head3 [ SWD_HSIZE ];
260 unsigned int succ3 [ N + F ];
261 unsigned int best3 [ N + F ];
262 unsigned int llen3 [ SWD_HSIZE ];
263 unsigned int head2 [ 65536U ];
264};
265
266#define s_head3(s,key) s->head3[key]
267
268
269#if !defined( NDEBUG)
270static void assert_match(const struct ucl_swd * swd, unsigned int m_len,
271 unsigned int m_off )
272
273{
274 const struct ucl_compress *c = swd->c;
275 unsigned int d_off;
276
277 assert(m_len >= 2);
278 if (m_off <= (unsigned int) (c->bp - c->in))
279 {
280 assert(c->bp - m_off + m_len < c->ip);
281 assert(memcmp(c->bp, c->bp - m_off, m_len) == 0);
282 }
283 else
284 {
285 assert(swd->dict != NULL);
286 d_off = m_off - (unsigned int) (c->bp - c->in);
287 assert(d_off <= swd->dict_len);
288 if (m_len > d_off)
289 {
290 assert(memcmp(c->bp, swd->dict_end - d_off, d_off) ==
291 0);
292
293 assert(c->in + m_len - d_off < c->ip);
294 assert(memcmp(c->bp + d_off, c->in, m_len - d_off) ==
295 0);
296
297 }
298 else
299 {
300 assert(memcmp(c->bp, swd->dict_end - d_off, m_len) ==
301 0);
302
303 }
304 }
305}
306#else
307# define assert_match(a,b,c) ((void)0)
308#endif
309
310
311
312
313
314
315static
316void swd_initdict(struct ucl_swd *s, const uint8_t *dict, unsigned int dict_len)
317
318{
319 s->dict = s->dict_end = NULL;
320 s->dict_len = 0;
321
322 if (!dict || dict_len <= 0)
323 return;
324 if (dict_len > s->n)
325 {
326 dict += dict_len - s->n;
327 dict_len = s->n;
328 }
329
330 s->dict = dict;
331 s->dict_len = dict_len;
332 s->dict_end = dict + dict_len;
333 memcpy(s->b,dict,dict_len);
334 s->ip = dict_len;
335}
336
337
338static
339void swd_insertdict(struct ucl_swd *s, unsigned int node, unsigned int len)
340{
341 unsigned int key;
342
343 s->node_count = s->n - len;
344 s->first_rp = node;
345
346 while (len-- > 0)
347 {
348 key = HEAD3(s->b,node);
349 s->succ3[node] = s_head3(s,key);
350 s->head3[key] = (unsigned int)(node);
351 s->best3[node] = (unsigned int)(s->f + 1);
352 s->llen3[key]++;
353 assert(s->llen3[key] <= s->n);
354
355 key = HEAD2(s->b,node);
356 s->head2[key] = (unsigned int)(node);
357
358 node++;
359 }
360}
361
362
363
364
365
366
367static
368int swd_init(struct ucl_swd *s, const uint8_t *dict, unsigned int dict_len)
369{
370 unsigned int i = 0;
371 int c = 0;
372
373 if (s->n == 0)
374 s->n = N;
375 if (s->f == 0)
376 s->f = F;
377 s->threshold = THRESHOLD;
378 if (s->n > N || s->f > F)
379 return UCL_E_INVALID_ARGUMENT;
380
381
382 s->max_chain = SWD_MAX_CHAIN;
383 s->nice_length = s->f;
384 s->use_best_off = 0;
385 s->lazy_insert = 0;
386
387 s->b_size = s->n + s->f;
388 if (s->b_size + s->f >= UINT_MAX)
389 return UCL_E_ERROR;
390 s->b_wrap = s->b + s->b_size;
391 s->node_count = s->n;
392
393 memset(s->llen3, 0, sizeof(s->llen3[0]) * SWD_HSIZE);
394 for (i = 0; i < 65536U; i++)
395 s->head2[i] = NIL2;
396
397 s->ip = 0;
398 swd_initdict(s,dict,dict_len);
399 s->bp = s->ip;
400 s->first_rp = s->ip;
401
402 assert(s->ip + s->f <= s->b_size);
403
404 s->look = (unsigned int) (s->c->in_end - s->c->ip);
405 if (s->look > 0)
406 {
407 if (s->look > s->f)
408 s->look = s->f;
409 memcpy(&s->b[s->ip],s->c->ip,s->look);
410 s->c->ip += s->look;
411 s->ip += s->look;
412 }
413 if (s->ip == s->b_size)
414 s->ip = 0;
415
416 if (s->look >= 2 && s->dict_len > 0)
417 swd_insertdict(s,0,s->dict_len);
418
419 s->rp = s->first_rp;
420 if (s->rp >= s->node_count)
421 s->rp -= s->node_count;
422 else
423 s->rp += s->b_size - s->node_count;
424
425
426
427 return UCL_E_OK;
428}
429
430
431static
432void swd_exit(struct ucl_swd *s)
433{
434
435
436}
437
438#define swd_pos2off(s,pos) \
439 (s->bp > (pos) ? s->bp - (pos) : s->b_size - ((pos) - s->bp))
440
441
442
443
444
445static __inline__
446void swd_getbyte(struct ucl_swd *s)
447{
448 int c;
449
450 if ((c = getbyte(*(s->c))) < 0)
451 {
452 if (s->look > 0)
453 --s->look;
454 }
455 else
456 {
457 s->b[s->ip] = (uint8_t)(c);
458 if (s->ip < s->f)
459 s->b_wrap[s->ip] = (uint8_t)(c);
460 }
461 if (++s->ip == s->b_size)
462 s->ip = 0;
463 if (++s->bp == s->b_size)
464 s->bp = 0;
465 if (++s->rp == s->b_size)
466 s->rp = 0;
467}
468
469
470
471
472static __inline__
473void swd_remove_node(struct ucl_swd *s, unsigned int node)
474{
475 if (s->node_count == 0)
476 {
477 unsigned int key;
478
479#ifdef UCL_DEBUG
480 if (s->first_rp != UINT_MAX)
481 {
482 if (node != s->first_rp)
483 printf("Remove %5d: %5d %5d %5d %5d %6d %6d\n",
484
485 node, s->rp, s->ip, s->bp, s->first_rp,
486 s->ip - node, s->ip - s->bp);
487 assert(node == s->first_rp);
488 s->first_rp = UINT_MAX;
489 }
490#endif
491
492 key = HEAD3(s->b,node);
493 assert(s->llen3[key] > 0);
494 --s->llen3[key];
495
496 key = HEAD2(s->b,node);
497 assert(s->head2[key] != NIL2);
498 if ((unsigned int) s->head2[key] == node)
499 s->head2[key] = NIL2;
500 }
501 else
502 --s->node_count;
503}
504
505
506
507
508
509
510
511static
512void swd_accept(struct ucl_swd *s, unsigned int n)
513{
514 assert(n <= s->look);
515
516 if (n > 0) do
517 {
518 unsigned int key;
519
520 swd_remove_node(s,s->rp);
521
522
523 key = HEAD3(s->b,s->bp);
524 s->succ3[s->bp] = s_head3(s,key);
525 s->head3[key] = (unsigned int)(s->bp);
526 s->best3[s->bp] = (unsigned int)(s->f + 1);
527 s->llen3[key]++;
528 assert(s->llen3[key] <= s->n);
529
530
531 key = HEAD2(s->b,s->bp);
532 s->head2[key] = (unsigned int)(s->bp);
533
534 swd_getbyte(s);
535 } while (--n > 0);
536}
537
538
539
540
541
542static
543void swd_search(struct ucl_swd *s, unsigned int node, unsigned int cnt)
544{
545 const unsigned char *p1;
546 const unsigned char *p2;
547 const unsigned char *px;
548
549 unsigned int m_len = s->m_len;
550 const unsigned char * b = s->b;
551 const unsigned char * bp = s->b + s->bp;
552 const unsigned char * bx = s->b + s->bp + s->look;
553 unsigned char scan_end1;
554
555 assert(s->m_len > 0);
556
557 scan_end1 = bp[m_len - 1];
558 for ( ; cnt-- > 0; node = s->succ3[node])
559 {
560 p1 = bp;
561 p2 = b + node;
562 px = bx;
563
564 assert(m_len < s->look);
565
566 if (
567 p2[m_len - 1] == scan_end1 &&
568 p2[m_len] == p1[m_len] &&
569 p2[0] == p1[0] &&
570 p2[1] == p1[1])
571 {
572 unsigned int i;
573 assert(memcmp(bp,&b[node],3) == 0);
574
575 p1 += 2; p2 += 2;
576 do {} while (++p1 < px && *p1 == *++p2);
577 i = p1 - bp;
578
579#ifdef UCL_DEBUG
580 if (memcmp(bp,&b[node],i) != 0)
581 printf("%5ld %5ld %02x%02x %02x%02x\n",
582 (long)s->bp, (long) node,
583 bp[0], bp[1], b[node], b[node+1]);
584#endif
585 assert(memcmp(bp,&b[node],i) == 0);
586
587#if defined(SWD_BEST_OFF)
588 if (i < SWD_BEST_OFF)
589 {
590 if (s->best_pos[i] == 0)
591 s->best_pos[i] = node + 1;
592 }
593#endif
594 if (i > m_len)
595 {
596 s->m_len = m_len = i;
597 s->m_pos = node;
598 if (m_len == s->look)
599 return;
600 if (m_len >= s->nice_length)
601 return;
602 if (m_len > (unsigned int) s->best3[node])
603 return;
604 scan_end1 = bp[m_len - 1];
605 }
606 }
607 }
608}
609
610static int swd_search2(struct ucl_swd *s)
611{
612 unsigned int key;
613
614 assert(s->look >= 2);
615 assert(s->m_len > 0);
616
617 key = s->head2[ HEAD2(s->b,s->bp) ];
618 if (key == NIL2)
619 return 0;
620#ifdef UCL_DEBUG
621 if (memcmp(&s->b[s->bp],&s->b[key],2) != 0)
622 printf("%5ld %5ld %02x%02x %02x%02x\n", (long)s->bp, (long)key,
623 s->b[s->bp], s->b[s->bp+1], s->b[key], s->b[key+1]);
624#endif
625 assert(memcmp(&s->b[s->bp],&s->b[key],2) == 0);
626#if defined(SWD_BEST_OFF)
627 if (s->best_pos[2] == 0)
628 s->best_pos[2] = key + 1;
629#endif
630
631 if (s->m_len < 2)
632 {
633 s->m_len = 2;
634 s->m_pos = key;
635 }
636 return 1;
637}
638
639
640
641
642
643static
644void swd_findbest(struct ucl_swd *s)
645{
646 unsigned int key;
647 unsigned int cnt, node;
648 unsigned int len;
649
650 assert(s->m_len > 0);
651
652
653 key = HEAD3(s->b,s->bp);
654 node = s->succ3[s->bp] = s_head3(s,key);
655 cnt = s->llen3[key]++;
656 assert(s->llen3[key] <= s->n + s->f);
657 if (cnt > s->max_chain && s->max_chain > 0)
658 cnt = s->max_chain;
659 s->head3[key] = (unsigned int)(s->bp);
660
661 s->b_char = s->b[s->bp];
662 len = s->m_len;
663 if (s->m_len >= s->look)
664 {
665 if (s->look == 0)
666 s->b_char = -1;
667 s->m_off = 0;
668 s->best3[s->bp] = (unsigned int)(s->f + 1);
669 }
670 else
671 {
672 if (swd_search2(s))
673 if (s->look >= 3)
674 swd_search(s,node,cnt);
675 if (s->m_len > len)
676 s->m_off = swd_pos2off(s,s->m_pos);
677 s->best3[s->bp] = (unsigned int)(s->m_len);
678
679#if defined(SWD_BEST_OFF)
680 if (s->use_best_off)
681 {
682 int i;
683 for (i = 2; i < SWD_BEST_OFF; i++)
684 if (s->best_pos[i] > 0)
685 s->best_off[i] =
686 swd_pos2off(s,s->best_pos[i]-1);
687
688 else
689 s->best_off[i] = 0;
690 }
691#endif
692 }
693
694 swd_remove_node(s,s->rp);
695
696
697 key = HEAD2(s->b,s->bp);
698 s->head2[key] = (unsigned int)(s->bp);
699}
700
701
702
703
704
705
706static int
707init_match ( struct ucl_compress *c, struct ucl_swd *s,
708 const uint8_t *dict, unsigned int dict_len,
709 uint32_t flags )
710{
711 int r;
712
713 assert(!c->init);
714 c->init = 1;
715
716 s->c = c;
717
718 c->last_m_len = c->last_m_off = 0;
719
720 c->textsize = c->codesize = c->printcount = 0;
721 c->lit_bytes = c->match_bytes = c->rep_bytes = 0;
722 c->lazy = 0;
723
724 r = swd_init(s,dict,dict_len);
725 if (r != UCL_E_OK)
726 {
727 swd_exit(s);
728 return r;
729 }
730
731 s->use_best_off = (flags & 1) ? 1 : 0;
732 return UCL_E_OK;
733}
734
735static int
736find_match ( struct ucl_compress *c, struct ucl_swd *s,
737 unsigned int this_len, unsigned int skip )
738{
739 assert(c->init);
740
741 if (skip > 0)
742 {
743 assert(this_len >= skip);
744 swd_accept(s, this_len - skip);
745 c->textsize += this_len - skip + 1;
746 }
747 else
748 {
749 assert(this_len <= 1);
750 c->textsize += this_len - skip;
751 }
752
753 s->m_len = THRESHOLD;
754#ifdef SWD_BEST_OFF
755 if (s->use_best_off)
756 memset(s->best_pos,0,sizeof(s->best_pos));
757#endif
758 swd_findbest(s);
759 c->m_len = s->m_len;
760 c->m_off = s->m_off;
761
762 swd_getbyte(s);
763
764 if (s->b_char < 0)
765 {
766 c->look = 0;
767 c->m_len = 0;
768 swd_exit(s);
769 }
770 else
771 {
772 c->look = s->look + 1;
773 }
774 c->bp = c->ip - c->look;
775
776#if 0
777
778 if (c->m_len > THRESHOLD && c->m_len + 1 <= c->look)
779 {
780 const uint8_t *ip = c->bp;
781 const uint8_t *m = c->bp - c->m_off;
782 const uint8_t *in = c->in;
783
784 if (ip - in > N)
785 in = ip - N;
786 for (;;)
787 {
788 while (*in != *ip)
789 in++;
790 if (in == ip)
791 break;
792 if (in != m)
793 if (memcmp(in,ip,c->m_len+1) == 0)
794 printf("%p %p %p %5d\n",
795 in, ip, m, c->m_len);
796
797 in++;
798 }
799 }
800#endif
801
802 return UCL_E_OK;
803}
804
805
806static int bbConfig(struct ucl_compress *c, int endian, int bitsize)
807{
808 if (endian != -1)
809 {
810 if (endian != 0)
811 return UCL_E_ERROR;
812 c->bb_c_endian = endian;
813 }
814 if (bitsize != -1)
815 {
816 if (bitsize != 8 && bitsize != 16 && bitsize != 32)
817 return UCL_E_ERROR;
818 c->bb_c_s = bitsize;
819 c->bb_c_s8 = bitsize / 8;
820 }
821 c->bb_b = 0; c->bb_k = 0;
822 c->bb_p = NULL;
823 c->bb_op = NULL;
824 return UCL_E_OK;
825}
826
827static void bbWriteBits(struct ucl_compress *c)
828{
829 uint8_t *p = c->bb_p;
830 uint32_t b = c->bb_b;
831
832 p[0] = (uint8_t)(b >> 0);
833 if (c->bb_c_s >= 16)
834 {
835 p[1] = (uint8_t)(b >> 8);
836 if (c->bb_c_s == 32)
837 {
838 p[2] = (uint8_t)(b >> 16);
839 p[3] = (uint8_t)(b >> 24);
840 }
841 }
842}
843
844
845static void bbPutBit(struct ucl_compress *c, unsigned bit)
846{
847 assert(bit == 0 || bit == 1);
848 assert(c->bb_k <= c->bb_c_s);
849
850 if (c->bb_k < c->bb_c_s)
851 {
852 if (c->bb_k == 0)
853 {
854 assert(c->bb_p == NULL);
855 c->bb_p = c->bb_op;
856 c->bb_op += c->bb_c_s8;
857 }
858 assert(c->bb_p != NULL);
859 assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
860
861 c->bb_b = (c->bb_b << 1) + bit;
862 c->bb_k++;
863 }
864 else
865 {
866 assert(c->bb_p != NULL);
867 assert(c->bb_p + c->bb_c_s8 <= c->bb_op);
868
869 bbWriteBits(c);
870 c->bb_p = c->bb_op;
871 c->bb_op += c->bb_c_s8;
872 c->bb_b = bit;
873 c->bb_k = 1;
874 }
875}
876
877
878static void bbPutByte(struct ucl_compress *c, unsigned b)
879{
880
881 assert(c->bb_p == NULL || c->bb_p + c->bb_c_s8 <= c->bb_op);
882 *c->bb_op++ = (uint8_t)(b);
883}
884
885static void bbFlushBits(struct ucl_compress *c, unsigned filler_bit)
886{
887 if (c->bb_k > 0)
888 {
889 assert(c->bb_k <= c->bb_c_s);
890 while (c->bb_k != c->bb_c_s)
891 bbPutBit(c, filler_bit);
892 bbWriteBits(c);
893 c->bb_k = 0;
894 }
895 c->bb_p = NULL;
896}
897
898
899
900
901
902
903
904
905static void code_prefix_ss11(struct ucl_compress *c, uint32_t i)
906{
907 if (i >= 2)
908 {
909 uint32_t t = 4;
910 i += 2;
911 do {
912 t <<= 1;
913 } while (i >= t);
914 t >>= 1;
915 do {
916 t >>= 1;
917 bbPutBit(c, (i & t) ? 1 : 0);
918 bbPutBit(c, 0);
919 } while (t > 2);
920 }
921 bbPutBit(c, (unsigned)i & 1);
922 bbPutBit(c, 1);
923}
924
925static void
926code_match(struct ucl_compress *c, unsigned int m_len, const unsigned int m_off)
927
928{
929 while (m_len > c->conf.max_match)
930 {
931 code_match(c, c->conf.max_match - 3, m_off);
932 m_len -= c->conf.max_match - 3;
933 }
934
935 c->match_bytes += m_len;
936 if (m_len > c->result[3])
937 c->result[3] = m_len;
938 if (m_off > c->result[1])
939 c->result[1] = m_off;
940
941 bbPutBit(c, 0);
942
943 if (m_off == c->last_m_off)
944 {
945 bbPutBit(c, 0);
946 bbPutBit(c, 1);
947 }
948 else
949 {
950 code_prefix_ss11(c, 1 + ((m_off - 1) >> 8));
951 bbPutByte(c, (unsigned)m_off - 1);
952 }
953 m_len = m_len - 1 - (m_off > M2_MAX_OFFSET);
954 if (m_len >= 4)
955 {
956 bbPutBit(c,0);
957 bbPutBit(c,0);
958 code_prefix_ss11(c, m_len - 4);
959 }
960 else
961 {
962 bbPutBit(c, m_len > 1);
963 bbPutBit(c, (unsigned)m_len & 1);
964 }
965
966 c->last_m_off = m_off;
967}
968
969static void
970code_run(struct ucl_compress *c, const uint8_t *ii, unsigned int lit)
971{
972 if (lit == 0)
973 return;
974 c->lit_bytes += lit;
975 if (lit > c->result[5])
976 c->result[5] = lit;
977 do {
978 bbPutBit(c, 1);
979 bbPutByte(c, *ii++);
980 } while (--lit > 0);
981}
982
983
984
985
986
987static int
988len_of_coded_match(struct ucl_compress *c, unsigned int m_len, unsigned int
989 m_off)
990
991{
992 int b;
993 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET))
994 || m_off > c->conf.max_offset)
995 return -1;
996 assert(m_off > 0);
997
998 m_len = m_len - 2 - (m_off > M2_MAX_OFFSET);
999
1000 if (m_off == c->last_m_off)
1001 b = 1 + 2;
1002 else
1003 {
1004 b = 1 + 10;
1005 m_off = (m_off - 1) >> 8;
1006 while (m_off > 0)
1007 {
1008 b += 2;
1009 m_off >>= 1;
1010 }
1011 }
1012
1013 b += 2;
1014 if (m_len < 3)
1015 return b;
1016 m_len -= 3;
1017
1018 do {
1019 b += 2;
1020 m_len >>= 1;
1021 } while (m_len > 0);
1022
1023 return b;
1024}
1025
1026int ucl_nrv2b_99_compress(
1027 const uint8_t *in, unsigned long in_len,
1028 uint8_t *out, unsigned long *out_len,
1029 unsigned int *result)
1030{
1031 const uint8_t *ii;
1032 unsigned int lit;
1033 unsigned int m_len, m_off;
1034 struct ucl_compress c_buffer;
1035 struct ucl_compress * const c = &c_buffer;
1036 struct ucl_swd *swd;
1037 unsigned int result_buffer[16];
1038 int r;
1039
1040
1041#define SC_TRY_LAZY 2
1042#define SC_GOOD_LENGTH F
1043#define SC_MAX_LAZY F
1044#define SC_NICE_LENGTH F
1045#define SC_MAX_CHAIN 4096
1046#define SC_FLAGS 1
1047#define SC_MAX_OFFSET N
1048
1049 memset(c, 0, sizeof(*c));
1050 c->ip = c->in = in;
1051 c->in_end = in + in_len;
1052 c->out = out;
1053 c->result = result ? result : result_buffer;
1054 memset(c->result, 0, 16*sizeof(*c->result));
1055 c->result[0] = c->result[2] = c->result[4] = UINT_MAX;
1056 result = NULL;
1057 memset(&c->conf, 0xff, sizeof(c->conf));
1058 r = bbConfig(c, ENDIAN, BITSIZE);
1059 if (r == 0)
1060 r = bbConfig(c, c->conf.bb_endian, c->conf.bb_size);
1061 if (r != 0)
1062 return UCL_E_INVALID_ARGUMENT;
1063 c->bb_op = out;
1064
1065 ii = c->ip;
1066 lit = 0;
1067
1068
1069 swd = (struct ucl_swd *) malloc(sizeof(*swd));
1070 if (!swd)
1071 return UCL_E_OUT_OF_MEMORY;
1072
1073 swd->f = F;
1074 swd->n = N;
1075 if (in_len >= 256 && in_len < swd->n)
1076 swd->n = in_len;
1077 if (swd->f < 8 || swd->n < 256)
1078 return UCL_E_INVALID_ARGUMENT;
1079
1080 r = init_match(c,swd,NULL,0, SC_FLAGS);
1081 if (r != UCL_E_OK)
1082 {
1083 free(swd);
1084 return r;
1085 }
1086 if (SC_MAX_CHAIN > 0)
1087 swd->max_chain = SC_MAX_CHAIN;
1088 if (SC_NICE_LENGTH > 0)
1089 swd->nice_length = SC_NICE_LENGTH;
1090 if (c->conf.max_match < swd->nice_length)
1091 swd->nice_length = c->conf.max_match;
1092
1093 c->last_m_off = 1;
1094 r = find_match(c,swd,0,0);
1095 if (r != UCL_E_OK)
1096 return r;
1097 while (c->look > 0)
1098 {
1099 unsigned int ahead;
1100 unsigned int max_ahead;
1101 int l1, l2;
1102
1103 c->codesize = c->bb_op - out;
1104
1105 m_len = c->m_len;
1106 m_off = c->m_off;
1107
1108 assert(c->bp == c->ip - c->look);
1109 assert(c->bp >= in);
1110 if (lit == 0)
1111 ii = c->bp;
1112 assert(ii + lit == c->bp);
1113 assert(swd->b_char == *(c->bp));
1114
1115 if (m_len < 2 || (m_len == 2 && (m_off > M2_MAX_OFFSET))
1116 || m_off > c->conf.max_offset)
1117 {
1118
1119 lit++;
1120 swd->max_chain = SC_MAX_CHAIN;
1121 r = find_match(c,swd,1,0);
1122 assert(r == 0);
1123 continue;
1124 }
1125
1126
1127 assert_match(swd,m_len,m_off);
1128
1129
1130 ahead = 0;
1131 if (SC_TRY_LAZY <= 0 || m_len >= SC_MAX_LAZY || m_off ==
1132 c->last_m_off)
1133
1134 {
1135
1136 l1 = 0;
1137 max_ahead = 0;
1138 }
1139 else
1140 {
1141
1142 l1 = len_of_coded_match(c,m_len,m_off);
1143 assert(l1 > 0);
1144 max_ahead = SC_TRY_LAZY;
1145 if ((m_len - 1) < max_ahead) {
1146 max_ahead = m_len -1;
1147 }
1148 }
1149
1150 while (ahead < max_ahead && c->look > m_len)
1151 {
1152 if (m_len >= SC_GOOD_LENGTH)
1153 swd->max_chain = SC_MAX_CHAIN >> 2;
1154 else
1155 swd->max_chain = SC_MAX_CHAIN;
1156 r = find_match(c,swd,1,0);
1157 ahead++;
1158
1159 assert(r == 0);
1160 assert(c->look > 0);
1161 assert(ii + lit + ahead == c->bp);
1162
1163 if (c->m_len < 2)
1164 continue;
1165 l2 = len_of_coded_match(c,c->m_len,c->m_off);
1166 if (l2 < 0)
1167 continue;
1168 if (l1 + (int)(ahead + c->m_len - m_len) * 5 > l2 +
1169 (int)(ahead) * 9)
1170 {
1171 c->lazy++;
1172 assert_match(swd,c->m_len,c->m_off);
1173 lit += ahead;
1174 assert(ii + lit == c->bp);
1175 goto lazy_match_done;
1176 }
1177 }
1178
1179 assert(ii + lit + ahead == c->bp);
1180
1181
1182 code_run(c,ii,lit);
1183 lit = 0;
1184
1185
1186 code_match(c,m_len,m_off);
1187 swd->max_chain = SC_MAX_CHAIN;
1188 r = find_match(c,swd,m_len,1+ahead);
1189 assert(r == 0);
1190
1191 lazy_match_done: ;
1192 }
1193
1194
1195 code_run(c,ii,lit);
1196
1197
1198 bbPutBit(c, 0);
1199 code_prefix_ss11(c, 0x1000000U);
1200 bbPutByte(c, 0xff);
1201
1202 bbFlushBits(c, 0);
1203
1204 assert(c->textsize == in_len);
1205 c->codesize = c->bb_op - out;
1206 *out_len = c->bb_op - out;
1207
1208#if 0
1209 printf("%7ld %7ld -> %7ld %7ld %7ld %ld (max: %d %d %d)\n",
1210 (long) c->textsize, (long) in_len, (long) c->codesize,
1211 c->match_bytes, c->lit_bytes, c->lazy,
1212 c->result[1], c->result[3], c->result[5]);
1213#endif
1214 assert(c->lit_bytes + c->match_bytes == in_len);
1215
1216 swd_exit(swd);
1217 free(swd);
1218
1219 return UCL_E_OK;
1220}
1221
1222
1223#ifndef COMPACT
1224void Encode(void)
1225{
1226 uint8_t *in, *out;
1227 unsigned long in_len, out_len;
1228 uint32_t tw;
1229 int r;
1230 fseek(infile, 0, SEEK_END);
1231 in_len = ftell(infile);
1232#ifdef VERBOSE
1233 if ((signed long)in_len < 0)
1234 Fprintf((stderr, "Errno: %d", errno));
1235#endif
1236#if UCLPACK_COMPAT
1237 {
1238 uint8_t byte;
1239 if (fwrite(magic, sizeof(magic), 1, outfile) != 1)
1240 Error("Can't write.");
1241 tw = htonl(0);
1242 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1243 Error("Can't write.");
1244 byte = 0x2b;
1245 if (fwrite(&byte, sizeof(byte), 1, outfile) != 1)
1246 Error("Can't write.");
1247 byte = 10;
1248 if (fwrite(&byte, sizeof(byte), 1, outfile) != 1)
1249 Error("Can't write.");
1250 tw = htonl(256*1024);
1251 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1252 Error("Can't write.");
1253 tw = htonl(in_len);
1254 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1255 Error("Can't write.");
1256 }
1257#else
1258 tw = host_to_i86ul(in_len);
1259 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1260 Error("Can't write.");
1261#endif
1262 if (in_len == 0)
1263 return;
1264 rewind(infile);
1265
1266 in = malloc(in_len);
1267 out_len = in_len + (in_len/8) + 256;
1268 out = malloc(out_len);
1269 if (!in || !out) {
1270 Error("Can't malloc");
1271 }
1272 if (fread(in, in_len, 1, infile) != 1) {
1273 Error("Can't read");
1274 }
1275 r = ucl_nrv2b_99_compress(in, in_len, out, &out_len, 0 );
1276#if UCLPACK_COMPAT
1277 tw = htonl(out_len);
1278 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1279 Error("Can't write.");
1280
1281#endif
1282 if (fwrite(out, out_len, 1, outfile) != 1) {
1283 Error("Write error\n");
1284 }
1285#if UCLPACK_COMPAT
1286 tw = htonl(0);
1287 if (fwrite(&tw, sizeof(tw), 1, outfile) != 1)
1288 Error("Can't write.");
1289
1290#endif
1291
1292#ifdef LONG_REPORT
1293 Fprintf((stderr, "input size %ld bytes\n", in_len));
1294 Fprintf((stderr, "output size %ld bytes\n", out_len));
1295 Fprintf((stderr, "input/output %.3f\n", (double)in_len / out_len));
1296#else
1297 Fprintf((stderr, "input/output = %ld/%ld = %.3f\n", in_len, out_len,
1298 (double)in_len / out_len));
1299#endif
1300
1301}
1302#endif
1303
1304#endif
1305
1306#ifdef COMPACT
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317void do_nrv2b_compress(uint8_t *in, int in_len, uint8_t *out, int *out_len) {
1318 unsigned long new_out_len = in_len + (in_len/8) + 256;
1319 uint8_t *new_out = malloc(new_out_len);
1320 if (new_out == NULL) {
1321 printf("Not enough memory to allocate buffer.\n");
1322 exit(1);
1323 }
1324 ucl_nrv2b_99_compress(in, in_len, new_out, &new_out_len, 0 );
1325 *out_len = (int) new_out_len;
1326 if (*out_len < in_len)
1327 memcpy(out, new_out, *out_len);
1328 free(new_out);
1329}
1330#endif
1331
1332#ifdef DECODE
1333
1334#define GETBIT_8(bb, src, ilen) \
1335 (((bb = bb & 0x7f ? bb*2 : ((unsigned)src[ilen++]*2+1)) >> 8) & 1)
1336
1337#define GETBIT_LE16(bb, src, ilen) \
1338 (bb*=2,bb&0xffff ? (bb>>16)&1 : (ilen+=2,((bb=(src[ilen-2]+src[ilen-1]*256u)*2+1)>>16)&1))
1339
1340#define GETBIT_LE32(bb, src, ilen) \
1341 (bc > 0 ? ((bb>>--bc)&1) : (bc=31,\
1342 bb=*(const uint32_t *)((src)+ilen),ilen+=4,(bb>>31)&1))
1343
1344#if ENDIAN == 0 && BITSIZE == 8
1345#define GETBIT(bb, src, ilen) GETBIT_8(bb, src, ilen)
1346#endif
1347#if ENDIAN == 0 && BITSIZE == 16
1348#define GETBIT(bb, src, ilen) GETBIT_LE16(bb, src, ilen)
1349#endif
1350#if ENDIAN == 0 && BITSIZE == 32
1351#define GETBIT(bb, src, ilen) GETBIT_LE32(bb, src, ilen)
1352#endif
1353
1354#ifndef GETBIT
1355#error "Bad Combination of ENDIAN and BITSIZE values specified"
1356#endif
1357
1358#define SAFE
1359
1360#ifdef SAFE
1361#define FAIL(x,r) if (x) { fprintf(stderr,r); exit(1); }
1362#else
1363#define FAIL(x,r)
1364#endif
1365
1366#ifdef COMPACT
1367void do_nrv2b_uncompress(uint8_t *dst, int dst_len, uint8_t *src, int src_len) {
1368 unsigned long ilen = 0, olen = 0, last_m_off = 1;
1369 uint32_t bb = 0;
1370 unsigned bc = 0;
1371 for (;;) {
1372 unsigned int m_off, m_len;
1373 while (GETBIT(bb, src, ilen)) {
1374 FAIL(ilen >= src_len, "input overrun");
1375 FAIL(olen >= dst_len, "output overrun");
1376 dst[olen++] = src[ilen++];
1377 }
1378 m_off = 1;
1379 do {
1380 m_off = (m_off * 2) + GETBIT(bb, src, ilen);
1381 FAIL(ilen >= src_len, "input overrun");
1382 FAIL(m_off > 0xffffffU +3, "lookbehind overrun");
1383 } while (!GETBIT(bb, src, ilen));
1384 if (m_off == 2) {
1385 m_off = last_m_off;
1386 } else {
1387 FAIL(ilen >= src_len, "input overrun");
1388 m_off = ((m_off - 3) * 256) + src[ilen++];
1389 if (m_off == 0xffffffffU)
1390 break;
1391 last_m_off = ++m_off;
1392 }
1393 m_len = GETBIT(bb, src, ilen);
1394 m_len = (m_len * 2) + GETBIT(bb, src, ilen);
1395 if (m_len == 0) {
1396 m_len++;
1397 do {
1398 m_len = (m_len * 2) + GETBIT(bb, src, ilen);
1399 FAIL(ilen >= src_len, "input overrun");
1400 FAIL(m_len >= dst_len, "output overrun");
1401 } while(!GETBIT(bb, src, ilen));
1402 m_len += 2;
1403 }
1404 m_len += (m_off > 0xd00);
1405 FAIL(olen + m_len > dst_len, "output overrun");
1406 FAIL(m_off > olen, "lookbehind overrun");
1407 {
1408 const uint8_t *m_pos;
1409 m_pos = dst + olen - m_off;
1410 dst[olen++] = *m_pos++;
1411 do {
1412 dst[olen++] = *m_pos++;
1413 } while(--m_len > 0);
1414 }
1415 }
1416 FAIL(ilen < src_len, "input not consumed");
1417}
1418#else
1419void Decode(void)
1420{
1421 uint32_t tw;
1422 uint8_t *src, *dst;
1423 unsigned long max_src_len, src_len, dst_len;
1424 unsigned long ilen = 0, olen = 0, last_m_off = 1;
1425 uint32_t bb = 0;
1426 unsigned bc = 0;
1427#if UCLPACK_COMPAT
1428 if (fseek(infile, sizeof(magic) + sizeof(tw) + 1 + 1 + sizeof(tw),
1429 SEEK_SET) != 0)
1430
1431 Error("Seek Error");
1432 if (fread(&tw, sizeof(tw), 1, infile) < 1)
1433 Error("Can't read");
1434 dst_len = ntohl(tw);
1435 if (fread(&tw, sizeof(tw), 1, infile) < 1)
1436 Error("Can't read");
1437 max_src_len = ntohl(tw);
1438#else
1439 if (fread(&tw, sizeof(tw), 1, infile) < 1)
1440 Error("Can't read");
1441 dst_len = i86ul_to_host(tw);
1442 max_src_len = dst_len + (dst_len/8) + 256;
1443#endif
1444 if (dst_len == 0)
1445 return;
1446 dst = malloc(dst_len);
1447 if (!dst)
1448 Error("Can't malloc");
1449 src = malloc(max_src_len);
1450 if (!src)
1451 Error("Can't malloc");
1452 src_len = fread(src, 1, max_src_len, infile);
1453 if (src_len <= 0)
1454 Error("Can't read");
1455
1456 for(;;) {
1457 unsigned int m_off, m_len;
1458 while(GETBIT(bb, src, ilen)) {
1459 FAIL(ilen >= src_len, "input overrun");
1460 FAIL(olen >= dst_len, "output overrun");
1461 dst[olen++] = src[ilen++];
1462 }
1463 m_off = 1;
1464 do {
1465 m_off = m_off*2 + GETBIT(bb, src, ilen);
1466 FAIL(ilen >= src_len, "input overrun");
1467 FAIL(m_off > 0xffffffU +3, "lookbehind overrun");
1468 } while (!GETBIT(bb, src, ilen));
1469 if (m_off == 2)
1470 {
1471 m_off = last_m_off;
1472 }
1473 else
1474 {
1475 FAIL(ilen >= src_len, "input overrun");
1476 m_off = (m_off - 3)*256 + src[ilen++];
1477 if(m_off == 0xffffffffU)
1478 break;
1479 last_m_off = ++m_off;
1480 }
1481 m_len = GETBIT(bb, src, ilen);
1482 m_len = m_len*2 + GETBIT(bb, src, ilen);
1483 if (m_len == 0)
1484 {
1485 m_len++;
1486 do {
1487 m_len = m_len*2 + GETBIT(bb, src, ilen);
1488 FAIL(ilen >= src_len, "input overrun");
1489 FAIL(m_len >= dst_len, "output overrun");
1490 } while(!GETBIT(bb, src, ilen));
1491 m_len += 2;
1492 }
1493 m_len += (m_off > 0xd00);
1494 FAIL(olen + m_len > dst_len, "output overrun");
1495 FAIL(m_off > olen, "lookbeind overrun");
1496 {
1497 const uint8_t *m_pos;
1498 m_pos = dst + olen - m_off;
1499 dst[olen++] = *m_pos++;
1500 do {
1501 dst[olen++] = *m_pos++;
1502 } while(--m_len > 0);
1503 }
1504 }
1505 FAIL(ilen < src_len, "input not consumed");
1506 FAIL(ilen > src_len, "input overrun");
1507 assert(ilen == src_len);
1508 Fprintf((stderr, "%12ld\n", olen));
1509 if (dst_len != olen) {
1510 fprintf(stderr, "length != expected length\n");
1511 }
1512 if (fwrite(dst, olen, 1, outfile) != 1)
1513 Error("Write error\n");
1514 free(src);
1515 free(dst);
1516}
1517#endif
1518#endif
1519
1520#ifdef MAIN
1521int main(int argc, char *argv[])
1522{
1523 char *s;
1524 FILE *f;
1525 int c;
1526
1527 if (argc == 2) {
1528 outfile = stdout;
1529 if ((f = tmpfile()) == NULL) {
1530 perror("tmpfile");
1531 return EXIT_FAILURE;
1532 }
1533 while ((c = getchar()) != EOF)
1534 fputc(c, f);
1535 rewind(infile = f);
1536 }
1537 else if (argc != 4) {
1538 Fprintf((stderr, "'nrv2b e file1 file2' encodes file1 into file2.\n"
1539 "'nrv2b d file2 file1' decodes file2 into file1.\n"));
1540
1541 return EXIT_FAILURE;
1542 }
1543 if (argc == 4) {
1544 if ((s = argv[1], s[1] || strpbrk(s, "DEde") == NULL)
1545 || (s = argv[2], (infile = fopen(s, "rb")) == NULL)
1546 || (s = argv[3], (outfile = fopen(s, "wb")) == NULL)) {
1547 Fprintf((stderr, "??? %s\n", s));
1548 return EXIT_FAILURE;
1549 }
1550 }
1551 if (toupper(*argv[1]) == 'E')
1552 Encode();
1553 else
1554 Decode();
1555 fclose(infile);
1556 fclose(outfile);
1557 return EXIT_SUCCESS;
1558}
1559#endif
1560