// rangemapper.cpp : Defines the entry point for the console application. // (C) 2010, Andrew Polar under GPL ver. 3. // Released Oct, 2010. // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // // This is experimental arithmetic or range coder. It is designed as close to // theoretical range narrowing concept as possible. It is inspired by // Matt Mahoney binary arithmetic coder used in FPAQ and ZPAQ. His concept // of binary coding is simply generalized on any size alphabet. // #include #include #include #include #include #include ///////Data generation functions/////////////////////////////// template double calculate_entropy(T* data, int data_size) { int min, max, counter, buffer_size; int* buffer; double entropy; double log2 = log(2.0); double prob; min = data[0]; max = data[0]; for (counter=0; counter max) max = data[counter]; } buffer_size = max - min + 1; buffer = (int*)malloc(buffer_size * sizeof(int)); memset(buffer, 0x00, buffer_size * sizeof(int)); for (counter=0; counter 0) { prob = (double)(buffer[counter]); prob /= (double)(data_size); entropy += log(prob)/log2*prob; } } entropy *= -1.0; if (buffer) free(buffer); return entropy; } static __inline double round(double x) { if ((x - floor(x)) >= 0.5) return ceil(x); else return floor(x); } double makeData(int* data, int data_size, int min, int max, int redundancy_factor) { int counter, cnt, high, low; double buf; if (redundancy_factor <= 1) redundancy_factor = 1; if (max <= min) max = min + 1; srand((unsigned)(time(0))); for (counter=0; counter high) high = data[counter]; if (data[counter] < low) low = data[counter]; } for (counter=0; counter>1; if (point >= cm[mid] && point < cm[mid+1]) { index = mid; break; } if (point >= cm[mid+1]) left = mid + 1; else right = mid; if (cnt++ >= size) break; } return index; } //This is the main class RangeMapper. It can be used in adaptive data //processing. It processes ranges that passed to encoder and decoder. //In adaptive coder the ranges can be computed dynamically and depend //on the context. //Class is written for generic case: alphabets from 2 up to 10000 can be //processed without changes in code. In case of special data coder can //be optimized and run at least twice faster. class RangeMapper { public: RangeMapper(int range_size) { LOW = 0; MID = 0; RANGE_SIZE = range_size; RANGE_LIMIT = ((unsigned long long)(1) << RANGE_SIZE); BYTES_IN_BUFFER = (64 - RANGE_SIZE) / 8; SHIFT = (BYTES_IN_BUFFER - 1) * 8; MASK = ((long long)(1)<<(BYTES_IN_BUFFER * 8)) - 1; HIGH = MASK; } ~RangeMapper() {} void encodeRange(int cmin, int cmax); void decodeRange(int cmin, int cmax); int getMidPoint(); void flush(); void init(); private: void updateModel(int cmin, int cmax); unsigned long long LOW, HIGH, MID, RANGE_LIMIT, MASK; unsigned char RANGE_SIZE, SHIFT, BYTES_IN_BUFFER; }; void RangeMapper::updateModel(int cmin, int cmax) { unsigned long long range = HIGH - LOW; HIGH = LOW + ((range * cmax) >> RANGE_SIZE); LOW += ((range * cmin) >> RANGE_SIZE) + 1; } int RangeMapper::getMidPoint() { return (int)(((MID - LOW) << RANGE_SIZE) / (HIGH - LOW)); } void RangeMapper::encodeRange(int cmin, int cmax) { updateModel(cmin, cmax); if ((HIGH - LOW) < RANGE_LIMIT) HIGH = LOW; //preventing narrowing range while (((LOW ^ HIGH) >> SHIFT) == 0) { writeByte((unsigned char)(LOW >> SHIFT)); LOW <<= 8; HIGH = (HIGH << 8) | 0xff; } HIGH &= MASK; LOW &= MASK; } void RangeMapper::decodeRange(int cmin, int cmax) { updateModel(cmin, cmax); if ((HIGH - LOW) < RANGE_LIMIT) HIGH = LOW; while (((LOW ^ HIGH) >> SHIFT) == 0) { LOW <<= 8; HIGH = (HIGH << 8) | 0xff; MID = (MID << 8) | readByte(); } HIGH &= MASK; LOW &= MASK; MID &= MASK; } void RangeMapper::flush() { LOW += 1; for (int i=0; i> SHIFT)); LOW <<= 8; } } void RangeMapper::init() { for (int i=0; i=0; --i) { if (cm[i] >= cm[i+1]) cm[i] = cm[i+1] - 1; } //end of correction if (freq) free(freq); } void makeLookupTable(int* cm, int size, short* lookup) { for (int i=0; iencodeRange(cm[data[i]], cm[data[i]+1]); } rm_encode->encodeRange(cm[alphabet_size], cm[alphabet_size+1]); //end of data marker rm_encode->flush(); delete rm_encode; clock_t end_encoding = clock(); printf("Time for encoding %2.3f sec.\n", (double)(end_encoding - start_encoding)/CLOCKS_PER_SEC); int actual_size = current_byte; //end encoding //decode clock_t start_decoding = clock(); short* lookup = (short*)malloc(((1<init(); int i=0; while (true) { int midpoint = rm_decode->getMidPoint(); //next is binary search algorithm that does not need having lookup array //int index = findInterval(cm, alphabet_size + 2, midpoint); //this is lookup table that expedites execution, either of these functions works int index = lookup[midpoint]; //midpoint presumed being within correct boundaries if (index == alphabet_size) break; //end of data marker if (index != data[i++]) { printf("Data mismatch %d element %d\n", i, index); isOK = false; break; } rm_decode->decodeRange(cm[index], cm[index+1]); } if (i != data_size) isOK = false; delete rm_decode; if (lookup) free(lookup); clock_t end_decoding = clock(); printf("Time for decoding %2.3f sec.\n\n", (double)(end_decoding - start_decoding)/CLOCKS_PER_SEC); //end decoding printf("Expected size %d\n", expected_size); printf("Actual size %d, \n\n", actual_size); if (isOK) printf("Round trip is OK\n"); else printf("Data mismatch\n"); if (g_buffer) free(g_buffer); if (data) free(data); return 0; }