Development: Embedded BWT

From WxWiki

Jump to: navigation, search

Below is a Burrows-Wheeler Transform algorithm implemented specifically for embedded machines. See Compression Methods for more info.


// fixed size BWT+RLE+EGC+MTF compression designed for embedded systems (e.g. Gameboy)
// BWT = Block Sorting
// RLE = Run Length Encoding
// ECG = Elias Gamming Coding
// MTF = Move To Front
//
// Slow but requires minimal ram 
//
// Version 0.04 -- Nov 9th 2002 
//              -- Sped up decompression more
//
// version 0.03 -- Nov 7th 2002
//              -- requires now 1KB + 4*SIZE bytes (0.5KB more than before)
//              -- Uses improved bin based qsort so that compression takes less time
//              -- Uses unsigned types to handle 32KB blocks
//              -- Uses fixed qsort
//
// Version 0.02 -- Nov 2nd 2002
//              -- Updated MTF code to be more efficient (less mem moves)
//
//
// Version 0.01 -- Nov 1st 2002.  
//              -- Updated decompression routines that are faster/smaller
//
// Tom St Denis, tomstdenis@yahoo.com
 
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
 
// size of block compressed (bytes) must be power of two
// on a 32-bit platform the following ram is required
//
// Requires 1KB + 4*SIZE bytes of ram to work and about 1kb of stack.
#define SIZE 8192
 
// must alloc ram for these at runtime, each are arrays of SIZE elements
unsigned char *bwt_blk, *bwt_sblk, *bwt_out;
 
// these are the work buffers required
// if you leave them as null compress()/decompress() will alloc/free ram as required
// this lets you fix the address for embedded devices
unsigned short *bwt_wrk_mem = NULL;  // must have SIZE elements 
unsigned short *bwt_tab     = NULL;  // must have 256 elements
unsigned short *bwt_tab2    = NULL;  // must have 256 elements
unsigned char  *bwt_ctab;            // (setup automatically)
 
// qsort function that is not-recursive
#define  T   7
 
#define swap(x, y) { unsigned short t; t = *((unsigned short *)(x));      \
                     *((unsigned short *)(x)) = *((unsigned short *)(y)); 
                     *((unsigned short *)(y)) = t; }
 
static int comp_helper(const void *A, const void *B)
{
    unsigned short a, b;
    a = *((short *)A);
    b = *((short *)B);
    return memcmp(bwt_blk+a+1, bwt_blk+b+1, SIZE-1);
}
 
/* main quicksort function */
static void bwt_qsort(unsigned char *base, unsigned elements, unsigned size)
{
    unsigned char *stack[40], **sp;                        /* stack and stack pointer      */
    unsigned char *i, *j, *limit;                          /* scan and limit pointers      */
    unsigned thresh;                                       /* size of T elements in bytes  */
 
    thresh = size*T;                                       /* init threshold               */
    sp = stack;                                            /* init stack pointer           */
    limit = base+elements*size;                            /* pointer past end of array    */
    for(;;) {                                              /* repeat until break...        */
        if((unsigned)limit - (unsigned)base > thresh) {    /* if more than T elements      */
            /*   swap base with middle  */
            swap(((((unsigned)limit-(unsigned)base)/size)/2)*size+base, base);
            i = base + size;                               /* i scans left to right        */
            j = limit - size;                              /* j scans right to left        */
            if(comp_helper(i, j) > 0)                      /* Sedgewick's                  */
                swap(i, j);                                /*   three-element sort         */
            if(comp_helper(base, j) > 0)                   /*   sets things up             */
                swap(base, j);                             /*   so that                    */
            if(comp_helper(i, base) > 0)                   /*      *i <= *base <= *j       */
                swap(i, base);                             /* *base is pivot element       */
            for(;;) {                                      /* loop until break             */
                do {                                       /* move i right                 */
                    i += size;                             /*        until *i >= pivot     */
                } while(comp_helper(i, base) < 0);
                do {                                       /* move j left                  */
                    j -= size;                             /*   until *j <= pivot          */
                } while(comp_helper(j, base) > 0);
                if(i > j)                                  /* if pointers crossed          */
                    break;                                 /*   break loop                 */
                swap(i, j); 
            }    /* else swap elements,keep scanning*/
            swap(base, j);                                 /* move pivot into correct place */
            if((unsigned)j - (unsigned)base > limit - i) { /* if left subfile larger       */
                sp[0] = base;                              /* stack left subfile base      */
                sp[1] = j;                                 /*   and limit                  */
                base = i; 
            } else {                                       /* sort the right subfile       */
                                                           /* else right subfile larger    */
                sp[0] = i;                                 /* stack right subfile base     */
                sp[1] = limit;                             /*    and limit                 */
                limit = j;
            }                                              /* sort the left subfile        */
            sp += 2;                                       /* increment stack pointer      */
        } else {                                           /* else subfile is small, use insertion sort */
            for(j = base, i = j+size; i < limit; j = i, i += size)
                while(comp_helper(j, j+size) > 0) {
                    swap(j, j+size);
                    if(j == base)
                    break;
                    j -= size; 
                }
            if(sp == stack)                                /* No entries - done!           */
                break;
            sp -= 2;                                       /* pop the base and limit       */
            base = sp[0];
            limit = sp[1]; 
        }
    }
}
 
// BIT routines
static void putbit(long *k, long *bitcnt, long *bitbuf, long b)
{
    *bitbuf |= b;
    if (--(*bitcnt) == 0) {
        bwt_out[(*k)++] = *bitbuf;
        *bitcnt = 8;
        *bitbuf = 0;
    } else {
        *bitbuf <<= 1;
    }
}
 
static void putbyte(long *k, long *bitcnt, long *bitbuf, long b)
{
    long i;
    for (i = 0; i < 8; i++) {
        putbit(k, bitcnt, bitbuf, (b<<i)&0x80?1:0);
    }
}
 
static long getbit(long *k, long *bitcnt, long *bitbuf)
{
    long x;
    if ((*bitcnt)-- == 0) {
        *bitcnt = 7;
        *bitbuf = bwt_blk[(*k)++];
    }
    x = (*bitbuf >> *bitcnt) & 1;
    return x;
}
 
static long getbyte(long *k, long *bitcnt, long *bitbuf)
{
    long x, y;
    for (x = y = 0; x < 8; x++) {
        y |= getbit(k, bitcnt, bitbuf)<<(7-x);
    }
    return y;
}
 
static void bwt_emit_len(long *k, long *bitcnt, long *bitbuf, long len)
{
    long x, diff, bits;
    /*
    Elias Gamma Code
    1                  1     1
    01x              2-3     3
    001xx            4-7     5
    0001xxx         8-15     7
    00001xxxx      16-31     9
    000001xxxxx    32-63    11
    0000001xxxxxx 64-127    13
    ...
    */
    // bits equals the 2^n such that 2^n+1 is > len and 2^n <= len
    for (bits = 0; (1<<bits) <= len; ++bits); --bits;
 
    // output the 0's
    for (x = 0; x < bits; x++) putbit(k, bitcnt, bitbuf, 0);
    putbit(k, bitcnt, bitbuf, 1);
 
    diff = len - (1<<bits);
    for (x = 0; x < bits; x++) {
        putbit(k, bitcnt, bitbuf, (diff>>x)&1);
    }
}
 
static long bwt_read_len(long *k, long *bitcnt, long *bitbuf)
{
    long bits, x, y;
 
    bits = 0;
    while (getbit(k, bitcnt, bitbuf) == 0) {
        ++bits;
    }
 
    y = 1<<bits;
    for (x = 0; x < bits; x++) {
        y += getbit(k,bitcnt,bitbuf)<<x;
    }
 
    return y;
}   
 
static void mtf_encode(long *k, long *bitcnt, long *bitbuf, unsigned char v, int use_mtf)
{
    long x, y;
 
    if (use_mtf) {
        // find 'v' in bwt_tab
        for (x = 0; x < 256; x++) {
            if (bwt_ctab[x] == v) {
                break;
            }
        }
 
        // emit code for position x
        bwt_emit_len(k, bitcnt, bitbuf, x+1);
 
        // now move 'v' to the front
        for (y = x; y > 0; y--) {
            bwt_ctab[y] = bwt_ctab[y-1];
        }
        bwt_ctab[0] = v;
    } else {
        putbyte(k, bitcnt, bitbuf, v);
    }
}
 
static unsigned char mtf_decode(long *k, long *bitcnt, long *bitbuf, int use_mtf)
{
    long x, y, v;
    if (use_mtf) {
        x = bwt_read_len(k, bitcnt, bitbuf)-1;
        v = bwt_ctab[x];
 
        // move 'v' to front 
        for (y = x; y > 0; y--) {
            bwt_ctab[y] = bwt_ctab[y-1];
        }
        bwt_ctab[0] = v;
        return v;
    } else {
        return getbyte(k, bitcnt, bitbuf);
    }
}
 
// determines if MTF is useful or not
static int mtf_profile(void)
{
    long x, y, z, v, last, b_mtf, b_nmtf;
 
    // setup MTF
    for (x = 0; x < 256; x++) bwt_ctab[x] = x;
    b_mtf = b_nmtf = last = 0;
 
    for (x = 0; x < SIZE; x++) {
        if (bwt_blk[(bwt_wrk_mem[x]-1)&(SIZE-1)] != last) {
            b_nmtf += 8;
 
            // find char 
            v = bwt_blk[(bwt_wrk_mem[x]-1)&(SIZE-1)];
 
            for (y = 0; y < 256; y++) {
                if (bwt_ctab[y] == v) {
                    break;
                }
            }
            ++y;
 
            // calc weight of position
            if (y == 1) {
                ++b_mtf;
            } else if (y <= 3) {
                b_mtf += 3;
            } else if (y <= 7) {
                b_mtf += 5;
            } else if (y <= 15) {
                b_mtf += 7;
            } else if (y <= 31) {
                b_mtf += 9;
            } else if (y <= 63) {
                b_mtf += 11;
            } else if (y <= 127) {
                b_mtf += 13;
            } else if (y <= 255) {
                b_mtf += 15;
            } else if (y <= 511) {
                b_mtf += 17;
            }
            --y;
 
            // sort MTF tab
            // now move 'v' to the front
            for (z = y; z > 0; z--) {
                bwt_ctab[z] = bwt_ctab[z-1];
            }
            bwt_ctab[0] = v;
 
            last = v;
        }
    }
 
    if (b_mtf > b_nmtf) {
        return 0;
    } else { 
        return 1;
    }
}  
 
static void bwt_decompress(void)
{
    long i, j, k, off, last, run, bitcnt, bitbuf, use_mtf;
 
    // get offset
    off = bwt_blk[0] | ((unsigned)bwt_blk[1] << 8);
 
    // now RLE decompress 
    k = 2;
    bitcnt = 0;
 
    // get mtf bit
    use_mtf = getbit(&k, &bitcnt, &bitbuf);
 
    // setup MTF table 
    if (use_mtf) {
        for (i = 0; i < 256; i++) 
            bwt_ctab[i] = i;
    }
 
    // sort bwt_sblk using the bucket sort
    memset(bwt_tab2, 0, 256*sizeof(short));
    for (j = 0; j < SIZE; ) {
        run  = bwt_read_len(&k, &bitcnt, &bitbuf);
        last = mtf_decode(&k, &bitcnt, &bitbuf, use_mtf);
        memset(bwt_sblk+j, last, run);
        bwt_tab2[last] += run;
        j += run;
    }
 
    // copy into bwt_blk 
    memcpy(bwt_blk, bwt_sblk, SIZE);
 
    // now fill (bucket sort) 
    memset(bwt_tab, 0, sizeof(short) * 256);
    for (i = j = k = 0; i < 256; i++) {
        bwt_tab[i] = k;
        j = bwt_tab2[i];
        if (j > 0) {
            memset(bwt_sblk+k, i, j);
            k += j;
        }
    }
 
    // setup vectors to -1 each
    memset(bwt_wrk_mem, 0xFF, sizeof(short) * SIZE);
 
    // now do search 
    for (i = 0; i < SIZE; i++) {
        // use in table once its found once
        j = bwt_tab[bwt_blk[i]];
 
        // look for bwt_blk[i] in bwt_sblk[]
        for (; j < SIZE; j++) {
            if (bwt_blk[i] == bwt_sblk[j] && bwt_wrk_mem[j] == (unsigned short)-1) {
                bwt_wrk_mem[j] = i;
                bwt_tab[bwt_blk[i]] = j+1;
                break;
            }
        }
    }
 
    // copy back
    for (i = 0; i < SIZE; i++) {
        bwt_out[i] = bwt_blk[off];
        off = bwt_wrk_mem[off];
    }
}
 
static long bwt_compress(void)
{
    long i, k, last, run, off, bitbuf, bitcnt, use_mtf;
 
    // count each byte
    memset(bwt_tab, 0, sizeof(short) * 256);
    for (i = 0; i < SIZE; i++)     { ++bwt_tab[bwt_blk[i]]; }
 
    // make offsets into wrk mem for each
    for (k = i = 0; i < 256; i++)  { bwt_tab2[i] = k; k += bwt_tab[i]; }
 
    // now copy each index by byte
    for (i = 0; i < SIZE; i++)     { bwt_wrk_mem[bwt_tab2[bwt_blk[i]]++] = i; }
 
    // now sort each bin
    for (k = i = 0; i < 256; i++)  { 
        if (bwt_tab[i]) {
            bwt_qsort((unsigned char *)(bwt_wrk_mem + k), bwt_tab[i], sizeof(short));
            k += bwt_tab[i];
        }
    }
 
    // now find first index 
    for (off = i = 0; i < SIZE; i++) {
        if (bwt_wrk_mem[i] == 1) {
            off = i;
            break;
        }
    }
 
    // RLE compress 
    bwt_out[0] = off&255;
    bwt_out[1] = off>>8;
    last = 0;
    run  = 0;
    k = 2;
    bitcnt = 8;
    bitbuf = 0;
 
    use_mtf = mtf_profile();
 
    putbit(&k, &bitcnt, &bitbuf, use_mtf);
 
    if (use_mtf) {
        // setup MTF table 
        for (i = 0; i < 256; i++) 
            bwt_ctab[i] = i;
    }
 
    for (i = 0; i < SIZE; i++) {
        if (last != bwt_blk[(bwt_wrk_mem[i]-1)&(SIZE-1)]) {
            if (run) {
                if (k > SIZE-16) return -1;
                bwt_emit_len(&k, &bitcnt, &bitbuf, run);
                mtf_encode(&k, &bitcnt, &bitbuf, last, use_mtf);
            }
            run = 1;
            last = bwt_blk[(bwt_wrk_mem[i]-1)&(SIZE-1)];
        } else { ++run; }
    }
 
    if (run) {
        if (k > SIZE-16) return -1;
        bwt_emit_len(&k, &bitcnt, &bitbuf, run);
        mtf_encode(&k, &bitcnt, &bitbuf, last, use_mtf);
    }
 
    if (bitcnt != 8) {
        for (i = 0; i < 8; i++) putbit(&k, &bitcnt, &bitbuf, 0);
    }
 
    return k;
}
 
// helper routines
 
// sizes... 
// in  : 2*SIZE bytes
// stores result in upper half of in[]
long compress(unsigned char *in)
{
    long len, n;
 
    // if any of the global points is null...
    if (bwt_wrk_mem == NULL || bwt_tab == NULL) {
        n = 1;
        bwt_wrk_mem = malloc(sizeof(short) * SIZE);
        bwt_tab     = malloc(sizeof(short) * 256);
        bwt_tab2    = malloc(sizeof(short) * 256);
    } else {
        n = 0;
    }
 
    // setup bwt_blk and copy
    bwt_blk = in;                                  // input
    memcpy(bwt_blk+SIZE, in, SIZE);                // 2nd copy
    bwt_out     = in+SIZE;                         // setup output
    bwt_ctab    = (unsigned char *)bwt_tab;        // MTF tab
 
    len = bwt_compress();
 
    if (n == 1) {
        free(bwt_wrk_mem);
        free(bwt_tab);
        free(bwt_tab2);
        bwt_wrk_mem = NULL;
        bwt_tab     = NULL;
        bwt_tab2    = NULL;
    }
    return len;
}
 
// in  must be SIZE bytes
// out must be SIZE bytes
void decompress(unsigned char *in, unsigned char *out)
{
    long n;
 
    // if any of the global points is null...
    if (bwt_wrk_mem == NULL || bwt_tab == NULL || bwt_tab2 == NULL) {
        n = 1;
        bwt_wrk_mem = malloc(sizeof(short) * SIZE);
        bwt_tab     = malloc(sizeof(short) * 256);
        bwt_tab2     = malloc(sizeof(short) * 256);
    } else {
        n = 0;
    }
 
    bwt_blk  = in;
    bwt_out  = bwt_sblk = out;
    bwt_ctab = (unsigned char *)bwt_tab; // MTF tab
    bwt_decompress();
 
    if (n == 1) {
        free(bwt_wrk_mem);
        free(bwt_tab);
        free(bwt_tab2);
        bwt_wrk_mem = NULL;
        bwt_tab     = NULL;
        bwt_tab2    = NULL;
    }
}
 
//
// DEMO for platforms with a console
//
 
#ifdef DEMO
 
#include <time.h>
int main(void)
{
    unsigned char src[SIZE+SIZE], res[SIZE];
    int x;
    clock_t t1;
 
    // make up a buffer 
    memset(src, 0, sizeof(src));
    fread(src, 1, SIZE-1, in);
 
    // COMPRESSION
    t1 = clock();
    x  = compress(src);
    printf("Took %lu ticks\n", clock() - t1);
 
    printf("Compressed %d downto %d bytes\n", SIZE, x);
    if (x == -1) { printf("Data not compressed.\n"); return 0; }
 
    // DECOMPRESSION
    t1 = clock();
    memset(res, 0, sizeof(res));
    decompress(src+SIZE, res); // decompresses bwt_blk to bwt_out
    printf("Took %lu ticks\n", clock() - t1);
 
    // result is in bwt_out[0..SIZE-1]
    printf("Decompress Res: {%s}\nCompare: %d\n", res, memcmp(res, src, SIZE));
 
    return 0;
}
 
#endif 
Personal tools