// zip78.cpp : Defines the entry point for the console application. // (C) 2010, Andrew Polar under GPL ver. 3. // Released May, 2010 // // LICENSE // // This program is free software; you can redistribute it and/or // modify it under the terms of the GNU General Public License as // published by the Free Software Foundation; either version 3 of // the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // General Public License for more details at // Visit . // // This program is called zip78 because it could appear in 1978. It is written // on technologies basically known in 1978. Some implementation details // of LZ77, however, can be covered by US Patent 5,126,739, filed on Nov. 27, // 1990 and be presumably expired by Nov. 27, 2010. #include #include #include #include #include #define USE_PREDICTOR //can not be changed #define END_MARKER 257 //can be changed if understood #define CONTEXT_BIN_LEN 15 #define HASH_LOOKUP_BIT_LENGTH 19 #define MAX_OFFSET_BIT_LEN 17 //this is trade between time and compression ratio #define SUFFICIENT_MATCH 64 #define MINIMUM_MATCH 6 //this parameter needs to be flexible and decided on for every couple #define MAX_STEPS 128 //Do not change following constants, unless you are an expert int HASH_MASK = (1<> (bit_counter - max_len)) & ((1 << max_len)-1); } static __inline void writeBits(int codeLen, int code) { bit_holding_buffer64 <<= codeLen; bit_holding_buffer64 |= code; bit_counter += codeLen; if (bit_counter > 31) { unsigned char offset = bit_counter - 32; global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 24)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 16)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> (offset + 8)); global_pointer_to_data_buffer[current_byte++] = unsigned char(bit_holding_buffer64 >> offset); bit_counter -= 32; } } //end input output //returns the bitlength of passed number template static __inline int bitLen(AllIntegers n) { if (n < 0) n = -n; int cnt = 1; while ((n >>= 1) > 0) ++cnt; return cnt; } //this hash is taken from quicklz, but it can be changed into something else //it is not very sensitive to particular implementation static __inline int hash_function(int value) { return ((value >> 9) ^ (value >> 13) ^ value) & HASH_MASK; } bool compress_lz77(const unsigned char* data, const int data_size, int* rtn_data, int& rtn_data_size) { int* last_position_lookup = (int*)malloc((1< prev_position) { if (bOccurredOnce) break; bOccurredOnce = true; } if (bOccurredOnce) { if (position <= index-2) break; } prev_position = position; //end condition block int word_index = (data[index-2] <<16) | (data[index-1] <<8) | data[index]; int word_position = (data[position]<<16) | (data[position+1]<<8) | data[position+2]; count = 2; offset = index-2 - position; if (offset >= MAXIMUM_OFFSET) break; if (word_index == word_position) { while (true) { if (data[index-2 + count] == data[position + count]) ++count; else break; if (count >= 0xff) break; if ((index-2 + count) == data_size) break; } } if (count > max_count) { max_count = count; max_offset = offset; } if (max_count > SUFFICIENT_MATCH) break; if (step++ >= MAX_STEPS) break; } if (max_count >= MINIMUM_MATCH ) { //output rtn_data_size -= 2; rtn_data[rtn_data_size++] = 256; rtn_data[rtn_data_size++] = ((max_count - MINIMUM_MATCH) << MAX_OFFSET_BIT_LEN) | max_offset; index += max_count - 3; last_couple_index = index; value = (data[index-2]<<16) | (data[index-1]<<8) | data[index]; } else { //output literal rtn_data[rtn_data_size++] = data[index]; } ++index; } while (index < data_size); if (self_addressed_dictionary) free(self_addressed_dictionary); if (last_position_lookup) free(last_position_lookup); return true; } //This is generalization for Move-To-Front algorithm. class Predictor { public: Predictor(); ~Predictor(); void SetZeros(); bool Initialize(unsigned char memory = 8); void ReleaseMemory(); unsigned char deflate(unsigned char symbol); unsigned char inflate(unsigned char position); unsigned char getSymbol(unsigned char symbol); bool m_bInitialized; private: void ResortSymbols(unsigned char position); void RescaleFrequencies(); unsigned char m_memory; unsigned char *m_symbol, *m_frequency; }; Predictor::Predictor() { } Predictor::~Predictor() { ReleaseMemory(); } void Predictor::ReleaseMemory() { if (m_symbol) free(m_symbol); if (m_frequency) free(m_frequency); } void Predictor::SetZeros() { m_symbol = m_frequency = 0; m_bInitialized = false; } bool Predictor::Initialize(unsigned char memory) { m_symbol = (unsigned char*)malloc(256); m_frequency = (unsigned char*)malloc(256); if (m_symbol == 0 || m_frequency == 0) { printf("Failed to allocate memory.\n"); return false; } for (int i=0; i<256; ++i) { m_frequency[i] = 0; m_symbol[i] = i; } m_memory = memory; m_bInitialized = true; return m_bInitialized; } unsigned char Predictor::deflate(unsigned char symbol) { unsigned char cnt = 0; while (m_symbol[cnt] != symbol) {++cnt;} ResortSymbols(cnt); RescaleFrequencies(); return cnt; } unsigned char Predictor::getSymbol(unsigned char symbol) { unsigned char cnt = 0; while (m_symbol[cnt] != symbol) {++cnt;} return cnt; } unsigned char Predictor::inflate(unsigned char position) { unsigned char symbol = m_symbol[position]; ResortSymbols(position); RescaleFrequencies(); return symbol; } void Predictor::ResortSymbols(unsigned char position) { ++m_frequency[position]; if (position == 0) { return; } int position_to_switch = -1; for (int i=position-1; i>=0; --i) { if (m_frequency[position] > m_frequency[i]) { position_to_switch = i; } else { break; } } if (position_to_switch < 0) { return; } unsigned char buf = m_symbol[position_to_switch]; m_symbol[position_to_switch] = m_symbol[position]; m_symbol[position] = buf; // buf = m_frequency[position_to_switch]; m_frequency[position_to_switch] = m_frequency[position]; m_frequency[position] = buf; } void Predictor::RescaleFrequencies() { if (m_frequency[0] < m_memory) return; for (int i=0; i<256; ++i) { if (m_frequency[i] > 0) m_frequency[i] >>= 1; else break; } } //end predictor class //this is entropy coder class Prefixer { public: Prefixer(); ~Prefixer(); bool Initialize (short AlphabetSize, bool isDecoding); void ReleaseMemory(); void EncodeSymbol (short value); short DecodeSymbol (); void FlushBits (); short GetSymbol (short value); private: short InvertSymbol (short value); void UpdateTables (short value); void BuildTree (bool isDecoding); bool allocateDecodingTables(); void freeDecodingTables(); int *m_fragment_len, *m_shift, *m_lowest_code, *m_offset, *m_code; short *m_frequency, *m_symbol, *m_inverse; unsigned char *m_codeLen; int m_alphabet_size, m_max_code_len, m_tableSize, m_counter, m_size_of_adaptation; int m_max_frequency_limit, m_min_size_of_adaptation, m_max_size_of_adaptation; }; bool Prefixer::Initialize(short AlphabetSize, bool isDecoding) { m_alphabet_size = AlphabetSize; m_frequency = m_symbol = m_inverse = 0; m_frequency = (short*) malloc(m_alphabet_size * sizeof(*m_frequency)); m_symbol = (short*) malloc(m_alphabet_size * sizeof(*m_symbol)); m_inverse = (short*) malloc(m_alphabet_size * sizeof(*m_inverse)); m_codeLen = (unsigned char*) malloc(m_alphabet_size * sizeof(*m_codeLen)); m_code = (int*) malloc(m_alphabet_size * sizeof(*m_code)); if (m_frequency == 0) return false; if (m_symbol == 0) return false; if (m_inverse == 0) return false; if (m_codeLen == 0) return false; if (m_code == 0) return false; for (int i=0; i= m_max_frequency_limit) { for (int j=0; j>= 1; } } if (m_symbol[value] == 0) { return; } short npos = -1; short pos = m_symbol[value]; short freq = m_frequency[value]; for (short i=pos-1; i>=0; --i) { if (freq > m_frequency[m_inverse[i]]) { npos = i; } else { break; } } if (npos < 0) { return; } short buf = m_symbol[m_inverse[pos]]; m_symbol[m_inverse[pos]] = m_symbol[m_inverse[npos]]; m_symbol[m_inverse[npos]] = buf; buf = m_inverse[pos]; m_inverse[pos] = m_inverse[npos]; m_inverse[npos] = buf; } short Prefixer::GetSymbol(short value) { return m_symbol[value]; } short Prefixer::InvertSymbol(short value) { return m_inverse[value]; } void Prefixer::BuildTree(bool isDecoding) { short* tmpFreq = (short*)malloc(m_alphabet_size * sizeof(*tmpFreq)); for (int i=0; i>= 1) > tmpFreq[i]) { ++m_codeLen[i]; } if (m_codeLen[i] > m_max_code_len) { m_max_code_len = m_codeLen[i]; } } //find codes m_code[0] = 0; for (int i=1; i= m_size_of_adaptation) { BuildTree(false); m_counter = 0; if (m_size_of_adaptation < m_max_size_of_adaptation) m_size_of_adaptation <<= 1; } } void Prefixer::FlushBits() { if (bit_counter > 0) { writeBits(32-bit_counter, 0); } } short Prefixer::DecodeSymbol() { int address = readBits(m_max_code_len); short symbol = 0; bool isFound = false; for (int i=0; i> m_shift[i]) - m_lowest_code[i]; if (pos < m_fragment_len[i]) { symbol = InvertSymbol(pos + m_offset[i]); bit_counter -= m_max_code_len - m_shift[i]; isFound = true; break; } } if (!isFound) { printf("Error decoding\n"); } UpdateTables(symbol); ++m_counter; if (m_counter >= m_size_of_adaptation) { BuildTree(true); m_counter = 0; if (m_size_of_adaptation < m_max_size_of_adaptation) m_size_of_adaptation <<= 1; } return symbol; } int Encode(unsigned char* data, int data_size) { //make LZ77 encoding int lzResult_size = data_size; int* lzResult = (int*)malloc(lzResult_size * sizeof(*lzResult)); if (!compress_lz77(data, data_size, lzResult, lzResult_size)) { if (lzResult) free(lzResult); return -1; } //end LZ77 encoding #ifdef USE_PREDICTOR //create predictor instances const int context_size = 1<> MAX_OFFSET_BIT_LEN) + MINIMUM_MATCH; int offset = lzResult[i+1] & ((1<> 8); offsetlower.EncodeSymbol(offset & 0xff); i += 1; position_in_data += length; context = data[position_in_data-4-offset]; context <<= 8; context |= data[position_in_data-3-offset]; context <<= 8; context |= data[position_in_data-2-offset]; context <<= 8; context |= data[position_in_data-1-offset]; context &= STAT_BIT_MASK; } } literals.EncodeSymbol(END_MARKER); literals.FlushBits(); #ifdef USE_PREDICTOR if (predictor) { for (int i=0; i