// rangemapper.cpp : Defines the entry point for the console application. // (C) 2010, Andrew Polar under GPL ver. 3. // Released Oct, 2010. // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // // This is experimental arithmetic or range coder. It is designed as close to // theoretical range narrowing concept as possible. In the way it is written // it is effective for small alphabets only, but the concept is valid for // any alphabet. To support large alphabets code should be changed. // #include #include #include #include #include #include ///////Data generation functions/////////////////////////////// template double calculate_entropy(T* data, int data_size) { int min, max, counter, buffer_size; int* buffer; double entropy; double log2 = log(2.0); double prob; min = data[0]; max = data[0]; for (counter=0; counter max) max = data[counter]; } buffer_size = max - min + 1; buffer = (int*)malloc(buffer_size * sizeof(int)); memset(buffer, 0x00, buffer_size * sizeof(int)); for (counter=0; counter 0) { prob = (double)(buffer[counter]); prob /= (double)(data_size); entropy += log(prob)/log2*prob; } } entropy *= -1.0; if (buffer) free(buffer); return entropy; } static __inline double round(double x) { if ((x - floor(x)) >= 0.5) return ceil(x); else return floor(x); } double makeData(int* data, int data_size, int min, int max, int redundancy_factor) { int counter, cnt, high, low; double buf; if (redundancy_factor <= 1) redundancy_factor = 1; if (max <= min) max = min + 1; srand((unsigned)(time(0))); for (counter=0; counter high) high = data[counter]; if (data[counter] < low) low = data[counter]; } for (counter=0; counter>1; if (point >= cm[mid] && point < cm[mid+1]) { index = mid; break; } if (point >= cm[mid+1]) left = mid + 1; else right = mid; if (cnt++ >= size) break; } return index; } unsigned char* g_buffer = 0; int current_byte = 0; void writeByte(unsigned char byte) { g_buffer[current_byte++] = byte; } unsigned char readByte() { return g_buffer[current_byte++]; } class RangeMapper { public: RangeMapper(int Precision): PRECISION(Precision) {LOW = 0; MID = 0; HIGH = 0xffffffff;} ~RangeMapper() {} void encodeRange(int cmin, int cmax); int decodeRange(int* cm, int set_size); void flush(); void init(); private: unsigned int LOW, HIGH, MID; unsigned char PRECISION; }; int static bytecounter = 0; void RangeMapper::encodeRange(int cmin, int cmax) { long long range = HIGH - LOW; HIGH = LOW + (unsigned int)((range * cmax) >> PRECISION); LOW += (unsigned int)((range * cmin) >> PRECISION) + 1; if ((HIGH - LOW) < (unsigned int)(1<> 24)); LOW <<= 8; HIGH = (HIGH << 8) | 0xff; } } int RangeMapper::decodeRange(int* cm, int size) { long long range = MID - LOW; range <<= PRECISION; int mid_point = (unsigned int)(range / (long long)(HIGH - LOW)); //either this function //int index = findInterval(cm, size + 1, mid_point); //or following fragment int index = -1; int i = 0; do { if (mid_point >= cm[i] && mid_point < cm[i+1]) { index = i; break; } ++i; }while (i < size); if (index < 0) return index; range = HIGH - LOW; HIGH = LOW + (unsigned int)((range * cm[index+1]) >> PRECISION); LOW += (unsigned int)((range * cm[index]) >> PRECISION) + 1; if ((HIGH - LOW) < (unsigned int)(1<> 24)); LOW <<= 8; } } void RangeMapper::init() { for (int i=0; i<4; ++i) { MID = (MID << 8) + readByte(); } } void makeRanges(int* data, int data_size, int* cm, int alphabet_size, int PRECISION) { //we make ranges for data int* freq = (int*)malloc(alphabet_size * sizeof(int)); memset(freq, 0x00, alphabet_size * sizeof(int)); for (int i=0; i=0; --i) { if (cm[i] >= cm[i+1]) cm[i] = cm[i+1] - 1; } //end of correction if (freq) free(freq); } int main() { //parameters that can be changed when understood const int alphabet_size = 45; //efficiency is changed with the size of alphabet. int data_size = 4000000; int PRECISION = 10; //precision is related to alphabet size, the larger the //alphabet the larger precision is necessary. //////////////////////////////// //we make original data printf("Wait, the data is being made ...\n"); int* data = (int*)malloc(data_size * sizeof(int)); double entropy = makeData(data, data_size, 0, alphabet_size - 1, 10); //data is made //make ranges const int cm_size = alphabet_size + 2; int cm[cm_size]; makeRanges(data, data_size, cm, alphabet_size, PRECISION); printf("Ranges made for the data\n\n"); for (int i=0; iencodeRange(cm[data[i]], cm[data[i]+1]); } rm_encode->encodeRange(cm[alphabet_size], cm[alphabet_size+1]); //end of data marker rm_encode->flush(); delete rm_encode; clock_t end_encoding = clock(); printf("Time for encoding %2.3f sec.\n", (double)(end_encoding - start_encoding)/CLOCKS_PER_SEC); int actual_size = current_byte; //end encoding //decode clock_t start_decoding = clock(); bool isOK = true; RangeMapper* rm_decode = new RangeMapper(PRECISION); current_byte = 0; rm_decode->init(); int i=0; while (true) { int index = rm_decode->decodeRange(cm, alphabet_size+1); if (index == alphabet_size) break; //end of data marker if (index != data[i++]) { printf("Data mismatch %d element %d\n", i, index); isOK = false; break; } } if (i != data_size) isOK = false; delete rm_decode; clock_t end_decoding = clock(); printf("Time for decoding %2.3f sec.\n\n", (double)(end_decoding - start_decoding)/CLOCKS_PER_SEC); //end decoding printf("Expected size %d\n", (int)(entropy * (double)(data_size) / 8.0)); printf("Actual size %d, among them %d extra bytes to correct the range\n\n", actual_size, bytecounter); if (isOK) printf("Round trip is OK\n"); else printf("Data mismatch\n"); if (g_buffer) free(g_buffer); if (data) free(data); return 0; }