summarylogtreecommitdiffstats
path: root/smaz2.c.patch
blob: 12162dc757c5c0bfb3bb18f324549990c592a0d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
diff --git a/smaz2.c b/smaz2.c
index f9dd594..74afc65 100644
--- a/smaz2.c
+++ b/smaz2.c
@@ -6,6 +6,8 @@
 #include <ctype.h>
 #include <stdlib.h>
 
+extern int debug;
+
 /* 128 common bigrams. */
 const char *bigrams = "intherreheanonesorteattistenntartondalitseediseangoulecomeneriroderaioicliofasetvetasihamaecomceelllcaurlachhidihofonsotacnarssoprrtsassusnoiltsemctgeloeebetrnipeiepancpooldaadviunamutwimoshyoaiewowosfiepttmiopiaweagsuiddoooirspplscaywaigeirylytuulivimabty";
 
@@ -51,7 +53,6 @@ char *words[256] = {
 unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned char *s, unsigned long len)
 {
 
-    int debug = 0;       // Log debugging messages.
     int verblen = 0;     /* Length of the emitted verbatim sequence, 0 if
                           * no verbating sequence was emitted last time,
                           * otherwise 1...5, it never reaches 8 even if we have
@@ -80,17 +81,17 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
              * byte value 8: space + word. */
             if (i != 256) {
                 if (s[0] == ' ') {
-                    if (debug) printf("( %s)", words[i]);
+                    if (debug) fprintf(stderr,"( %s)", words[i]);
                     if (y < dstlen) dst[y++] = 8; // Space + word.
                     if (y < dstlen) dst[y++] = i; // Word ID.
                     s++; len--; // Account for the space.
                 } else if (len > wordlen && s[wordlen] == ' ') {
-                    if (debug) printf("(%s )", words[i]);
+                    if (debug) fprintf(stderr,"(%s )", words[i]);
                     if (y < dstlen) dst[y++] = 7; // Word + space.
                     if (y < dstlen) dst[y++] = i; // Word ID.
                     s++; len--; // Account for the space.
                 } else {
-                    if (debug) printf("(%s)", words[i]);
+                    if (debug) fprintf(stderr,"(%s)", words[i]);
                     if (y < dstlen) dst[y++] = 6; // Simple word.
                     if (y < dstlen) dst[y++] = i; // Word ID.
                 }
@@ -120,7 +121,7 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
                 s += 2;
                 len -= 2;
                 verblen = 0;
-                if (debug) printf("[%c%c]", bigrams[i*2], bigrams[i*2+1]);
+                if (debug) fprintf(stderr,"[%c%c]", bigrams[i*2], bigrams[i*2+1]);
                 continue;
             }
         }
@@ -132,7 +133,7 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
             if (y < dstlen) dst[y++] = s[0];
 
             /* Consume. */
-            if (debug) printf("{%c}", s[0]);
+            if (debug) fprintf(stderr,"{%c}", s[0]);
             s++;
             len--;
             verblen = 0;
@@ -144,12 +145,12 @@ unsigned long smaz2_compress(unsigned char *dst, unsigned long dstlen, unsigned
          * with the escape sequence. */
         verblen++;
         if (verblen == 1) {
-            if (debug) printf("_%c", s[0]);
+            if (debug) fprintf(stderr,"_%c", s[0]);
             if (y+1 == dstlen) break; /* No room for 2 bytes. */
             dst[y++] = verblen;
             dst[y++] = s[0];
         } else {
-            if (debug) printf("%c", s[0]);
+            if (debug) fprintf(stderr,"%c", s[0]);
             dst[y++] = s[0];
             dst[y-(verblen+1)] = verblen; // Fix the verbatim bytes length.
             if (verblen == 5) verblen = 0; // Start to emit a new sequence.
@@ -170,17 +171,20 @@ unsigned long smaz2_decompress(unsigned char *dst, unsigned long dstlen, unsigne
     unsigned long orig_dstlen = dstlen, i = 0;
 
     while (i < len) {
+        unsigned char *_dst = dst;
         if ((c[i] & 128) != 0) {
             /* Emit bigram. */
             unsigned char idx = c[i]&127;
             if (dstlen && dstlen-- && i < len) *dst++ = bigrams[idx*2];
             if (dstlen && dstlen-- && i < len) *dst++ = bigrams[idx*2+1];
             i++;
+            if (debug) fprintf(stderr,"[%c%c]", *(dst-2), *(dst-1));
         } else if (c[i] > 0 && c[i] < 6) {
             /* Emit verbatim sequence. */
             unsigned char vlen = c[i++];
             while(vlen-- && i < len)
                 if (dstlen && dstlen--) *dst++ = c[i++];
+            if (debug) fprintf(stderr,"_%.*s", (int)(dst-_dst), _dst);
         } else if (c[i] > 5 && c[i] < 9) {
             /* Emit word. */
             unsigned char escape = c[i];
@@ -191,9 +195,11 @@ unsigned long smaz2_decompress(unsigned char *dst, unsigned long dstlen, unsigne
             while(words[idx][j] != 0)
                 if (dstlen && dstlen--) *dst++ = words[idx][j++];
             if (dstlen && escape == 7 && dstlen--) *dst++ = ' ';
+            if (debug) fprintf(stderr,"(%.*s)", (int)(dst-_dst), _dst);
         } else {
             /* Emit byte as it is. */
             if (dstlen--) *dst++ = c[i++];
+            if (debug) fprintf(stderr,"{%c}", *(dst-1));
         }
     }
     return orig_dstlen - dstlen;