// RunCoder0.cpp Adaptive order 0 range coder // (C) 2009, Andrew Polar under GPL ver. 3. // Released Jan. 28, 2009. // Updated Feb. 23, 2010. // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // #include #include #include #include #include ///////Data generation functions/////////////////////////////// double entropy24(int* data, int size) { int max_size = 1 << 24; int min, counter; int* buffer; double entropy; double log2 = log(2.0); double prob; min = data[0]; for (counter=0; counter 0) { prob = (double)buffer[counter]; prob /= (double)size; entropy += log(prob)/log2*prob; } } entropy *= -1.0; for (counter=0; counter= 0.5) return ceil(x); else return floor(x); } double MakeData(int* data, int data_size, int min, int max, int redundancy_factor) { int counter, cnt, high, low; double buf; if (redundancy_factor <= 1) redundancy_factor = 1; if (max <= min) max = min + 1; srand((unsigned)time(0)); for (counter=0; counter high) high = data[counter]; if (data[counter] < low) low = data[counter]; } for (counter=0; counter>= 1) > 0) ++len; return len + 1; } static __inline void SafeProduct(int F, unsigned char L) { MANTISSA *= F; if ((MANTISSA & (1 << (MANTISSA_BIT_SIZE + L - 1))) > 0) { EXPONENT = L; } else { EXPONENT = L - 1; } MANTISSA >>= EXPONENT; } static __inline void readBits(int bits) { if (bits == 0) { return; } if ((bit_counter - bits) < 0) { bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= result[current_byte+0]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= result[current_byte+1]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= result[current_byte+2]; bit_holding_buffer64 <<= 8; bit_holding_buffer64 |= result[current_byte+3]; current_byte += 4; bit_counter += 32; } operation_buffer64 <<= bits; operation_buffer64 |= (bit_holding_buffer64 >> (bit_counter - bits)) & (0xffffffff >> (32 - bits)); bit_counter -= bits; } static __inline void writeBits(int bits) { if (bits == 0) { return; } bit_holding_buffer64 <<= bits; bit_holding_buffer64 |= (operation_buffer64 >> (64-bits)); bit_counter += bits; if (bit_counter > 31) { unsigned char offset = bit_counter - 32; result[current_byte+0] = unsigned char(bit_holding_buffer64 >> (offset + 24)); result[current_byte+1] = unsigned char(bit_holding_buffer64 >> (offset + 16)); result[current_byte+2] = unsigned char(bit_holding_buffer64 >> (offset + 8)); result[current_byte+3] = unsigned char(bit_holding_buffer64 >> offset); bit_counter -= 32; current_byte += 4; } } //Adaptive order 0 range coder class RunCoder { public: RunCoder(); ~RunCoder(); bool Initialize(int size); void Renormalize(bool isDecoding); void ProcessSymbol(int value); void FlushRemainedBuffer(); int DecodeSymbol(); void UpdateRawFrequency(int value, bool isDecoding); private: int* rawfrequency; int* normalfrequency; int* cumulative; int* symbol; unsigned char* bit_freq_len; void FreeResources(); int alphabetsize; int memorylimit; int MAX_INDEX; int nCounter; }; RunCoder::RunCoder() { rawfrequency = 0; normalfrequency = 0; cumulative = 0; symbol = 0; bit_freq_len = 0; } RunCoder::~RunCoder() { FreeResources(); } bool RunCoder::Initialize(int size) { if (rawfrequency == 0) { rawfrequency = (int*)malloc(size * sizeof(int)); if (rawfrequency == 0) return false; memset(rawfrequency, 0x00, size * sizeof(int)); } alphabetsize = size + 2; if (normalfrequency == 0) { normalfrequency = (int*)malloc(alphabetsize * sizeof(int)); if (normalfrequency == 0) return false; } if (bit_freq_len == 0) { bit_freq_len = (unsigned char*)malloc(alphabetsize * sizeof(unsigned char)); if (bit_freq_len == 0) return false; } if (cumulative == 0) { cumulative = (int*)malloc((alphabetsize+1)*sizeof(int)); //must be size+1 if (cumulative == 0) return false; } if (symbol == 0) { symbol = (int*)malloc((1 << MAX_CUMULATIVE_FREQUENCY_BIT_SIZE) * sizeof(int)); if (symbol == 0) return false; } operation_buffer64 = 0; bit_holding_buffer64 = 0; EXPONENT = 0; current_byte = 0; bit_counter = 0; nCounter = 0; MANTISSA = (1< MAX_INDEX) { //we need correction, Total should not be > max, that rarely or never happen. int i = 2; while (iTotal > MAX_INDEX) { if (normalfrequency[i] > 1) { --normalfrequency[i]; --iTotal; } ++i; if (i == alphabetsize) i = 2; } } //End of normalization //calculate bit length of every frequency for (int i=0; i= memorylimit) { for (int i=0; i>= 1; } } if (nCounter == LENGTH_OF_ADAPTATION_BUFFER) { Renormalize(isDecoding); nCounter = 0; } } void RunCoder::ProcessSymbol(int value) { writeBits(MAX_CUMULATIVE_FREQUENCY_BIT_SIZE - EXPONENT); operation_buffer64 <<= (MAX_CUMULATIVE_FREQUENCY_BIT_SIZE - EXPONENT); operation_buffer64 += cumulative[value+2] * MANTISSA; SafeProduct(normalfrequency[value+2], bit_freq_len[value+2]); } void RunCoder::FlushRemainedBuffer() { ProcessSymbol(-1); // //flushing 64 bits of operation_buffer64 writeBits(32); operation_buffer64 <<= 32; writeBits(32); operation_buffer64 <<= 32; //we write some remained bits delayed by writing if (bit_counter > 0) { writeBits(32-bit_counter); } } int RunCoder::DecodeSymbol() { readBits(MAX_CUMULATIVE_FREQUENCY_BIT_SIZE - EXPONENT); int ID = int(operation_buffer64/MANTISSA); if (ID > MAX_INDEX || ID < 0) { printf("Error in decoding, process terminated.\n"); return 1; //1 is termination flag the decoding will stop at this point. } operation_buffer64 -= MANTISSA * cumulative[symbol[ID]]; SafeProduct(normalfrequency[symbol[ID]], bit_freq_len[symbol[ID]]); return symbol[ID]; } //End RunCoder ///////////////////////////////////////////////////// template int Encode(int estimatedCompressedSize, int alphabet, T* data, int data_size) { clock_t start_encoding = clock(); int result_size = estimatedCompressedSize * 2 + 1000; result = (unsigned char*)malloc(result_size * sizeof(unsigned char)); RunCoder* encoder = new RunCoder(); if (encoder->Initialize(alphabet) == false) { printf("Initialization failed\n"); exit(1); } encoder->Renormalize(false); for (int i=0; iProcessSymbol(-2); } encoder->ProcessSymbol(data[i]); encoder->UpdateRawFrequency(data[i], false); } encoder->FlushRemainedBuffer(); delete encoder; clock_t end_encoding = clock(); printf("End encoding, time %2.3f s., size %d \n", (double)(end_encoding - start_encoding)/CLOCKS_PER_SEC, current_byte); return current_byte; } template int Decoding(int alphabet, T* data) { clock_t start_decoding = clock(); RunCoder* decoder = new RunCoder(); if (decoder->Initialize(alphabet) == false) { printf("Initialization failed\n"); exit(1); } decoder->Renormalize(true); int cnt = 0; int value; readBits(32); readBits(32); bool isOK = false; while (true) { value = decoder->DecodeSymbol(); if (value == 1) { isOK = true; break; } if (value == 0) {//0 is output to avoid buffer overflow printf("Zero flag recognized at %d\n", cnt); } else { decoder->UpdateRawFrequency(value-2, true); data[cnt++] = value-2; } } delete decoder; clock_t end_decoding = clock(); printf("End decoding, time %2.3f s.\n", (double)(end_decoding - start_decoding)/CLOCKS_PER_SEC); if (isOK == true) printf("Termination flag is recognized\n"); return cnt; } int getFileSize(char* fileName) { FILE* f; if ((f = fopen(fileName, "rb")) == NULL) return -1; fseek(f, 0, SEEK_END); int bytes = ftell(f); fclose(f); return bytes; } void main() { //printf("Making data for round trip...\n"); //int data_size = 4000000; //int max_data_value = 317; //int redundancy_factor = 10; //int* data = (int*)malloc(data_size * sizeof(int)); //double entropy = MakeData(data, data_size, 0, max_data_value, redundancy_factor); //int alphabet = max_data_value + 1; //entropy = entropy24(data, data_size); //int estimatedCompressedSize = int(entropy*double(data_size)/8.0)+1; //printf("Original size %d\n", data_size); //printf("Estimated size of compressed buffer %d\n", estimatedCompressedSize); //This is how to make round trip for a file instead of data generated above char fileName[] = "C:\\Development\\SAMPLES\\acrord32.exe"; int data_size = getFileSize(fileName); if (data_size < 0) { printf("File not found\n"); return; } unsigned char* data = (unsigned char*)malloc(data_size * sizeof(unsigned char)); int alphabet = 256; int estimatedCompressedSize = data_size; printf("estimatedCompressedSize %d\n", estimatedCompressedSize); FILE* f = fopen(fileName, "rb"); fread(data, 1, data_size, f); fclose(f); //////////////////////////////////////////////////////////////////////////////// //Encoding int result_size = Encode(estimatedCompressedSize, alphabet, data, data_size); //////////////////////////////////////////////////////////////////////////////// printf("Compression ratio %f\n", double(result_size)/double(data_size)); //////////////////////////////////////////////////////////////////////////////// //Decoding int test_size = data_size; int* test = (int*)malloc(test_size*sizeof(int)); test_size = Decoding(alphabet, test); //////////////////////////////////////////////////////////////////////////////// //Data test if (test_size != data_size) { printf("Size mismatch\n"); } else { bool isOK = true; for (int i=0; i