1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
diff --git a/smaz2.c b/smaz2.c
index f9dd594..74afc65 100644
--- a/smaz2.c
+++ b/smaz2.c
@@ -6,6 +6,8 @@
#include <ctype.h>
#include <stdlib.h>
+extern int debug;
+
/* 128 common bigrams. */
const char *bigrams = "intherreheanonesorteattistenntartondalitseediseangoulecomeneriroderaioicliofasetvetasihamaecomceelllcaurlachhidihofonsotacnarssoprrtsassusnoiltsemctgeloeebetrnipeiepancpooldaadviunamutwimoshyoaiewowosfiepttmiopiaweagsuiddoooirspplscaywaigeirylytuulivimabty";
@@ -51,7 +53,6 @@ char *words[256] = {
unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned char *s, unsigned long len)
{
- int debug = 0; // Log debugging messages.
int verblen = 0; /* Length of the emitted verbatim sequence, 0 if
* no verbating sequence was emitted last time,
* otherwise 1...5, it never reaches 8 even if we have
@@ -80,17 +81,17 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
* byte value 8: space + word. */
if (i != 256) {
if (s[0] == ' ') {
- if (debug) printf("( %s)", words[i]);
+ if (debug) fprintf(stderr,"( %s)", words[i]);
if (y < dstlen) dst[y++] = 8; // Space + word.
if (y < dstlen) dst[y++] = i; // Word ID.
s++; len--; // Account for the space.
} else if (len > wordlen && s[wordlen] == ' ') {
- if (debug) printf("(%s )", words[i]);
+ if (debug) fprintf(stderr,"(%s )", words[i]);
if (y < dstlen) dst[y++] = 7; // Word + space.
if (y < dstlen) dst[y++] = i; // Word ID.
s++; len--; // Account for the space.
} else {
- if (debug) printf("(%s)", words[i]);
+ if (debug) fprintf(stderr,"(%s)", words[i]);
if (y < dstlen) dst[y++] = 6; // Simple word.
if (y < dstlen) dst[y++] = i; // Word ID.
}
@@ -120,7 +121,7 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
s += 2;
len -= 2;
verblen = 0;
- if (debug) printf("[%c%c]", bigrams[i*2], bigrams[i*2+1]);
+ if (debug) fprintf(stderr,"[%c%c]", bigrams[i*2], bigrams[i*2+1]);
continue;
}
}
@@ -132,7 +133,7 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
if (y < dstlen) dst[y++] = s[0];
/* Consume. */
- if (debug) printf("{%c}", s[0]);
+ if (debug) fprintf(stderr,"{%c}", s[0]);
s++;
len--;
verblen = 0;
@@ -144,12 +145,12 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
* with the escape sequence. */
verblen++;
if (verblen == 1) {
- if (debug) printf("_%c", s[0]);
+ if (debug) fprintf(stderr,"_%c", s[0]);
if (y+1 == dstlen) break; /* No room for 2 bytes. */
dst[y++] = verblen;
dst[y++] = s[0];
} else {
- if (debug) printf("%c", s[0]);
+ if (debug) fprintf(stderr,"%c", s[0]);
dst[y++] = s[0];
dst[y-(verblen+1)] = verblen; // Fix the verbatim bytes length.
if (verblen == 5) verblen = 0; // Start to emit a new sequence.
@@ -170,17 +171,20 @@ unsigned long smaz2_decompress(unsigned char *dst, unsigned long dstlen, unsigne
unsigned long orig_dstlen = dstlen, i = 0;
while (i < len) {
+ unsigned char *_dst = dst;
if ((c[i] & 128) != 0) {
/* Emit bigram. */
unsigned char idx = c[i]&127;
if (dstlen && dstlen-- && i < len) *dst++ = bigrams[idx*2];
if (dstlen && dstlen-- && i < len) *dst++ = bigrams[idx*2+1];
i++;
+ if (debug) fprintf(stderr,"[%c%c]", *(dst-2), *(dst-1));
} else if (c[i] > 0 && c[i] < 6) {
/* Emit verbatim sequence. */
unsigned char vlen = c[i++];
while(vlen-- && i < len)
if (dstlen && dstlen--) *dst++ = c[i++];
+ if (debug) fprintf(stderr,"_%.*s", (int)(dst-_dst), _dst);
} else if (c[i] > 5 && c[i] < 9) {
/* Emit word. */
unsigned char escape = c[i];
@@ -191,9 +195,11 @@ unsigned long smaz2_decompress(unsigned char *dst, unsigned long dstlen, unsigne
while(words[idx][j] != 0)
if (dstlen && dstlen--) *dst++ = words[idx][j++];
if (dstlen && escape == 7 && dstlen--) *dst++ = ' ';
+ if (debug) fprintf(stderr,"(%.*s)", (int)(dst-_dst), _dst);
} else {
/* Emit byte as it is. */
if (dstlen--) *dst++ = c[i++];
+ if (debug) fprintf(stderr,"{%c}", *(dst-1));
}
}
return orig_dstlen - dstlen;
|