using System; using System.Collections.Generic; using System.Linq; using System.Text; namespace rangemapper_csh { public class DataStorage { public byte[] data = null; private int nDeclared, nCurrent; public DataStorage(int size) { nDeclared = size; data = new byte[nDeclared]; Array.Clear(data, 0, data.Length); SetPointer(); } public int nSizeTaken { get { return nCurrent; } } public void SetPointer() { nCurrent = 0; } public void WriteByte(byte b) { data[nCurrent] = b; nCurrent++; if (nCurrent >= nDeclared) { Console.WriteLine("Error, erray is too short"); Environment.Exit(1); } } public byte ReadByte() { byte b = data[nCurrent]; nCurrent++; return b; } } public static class Auxiliary { private static double calculate_entropy(int[] data, int data_size) { int min, max, counter, buffer_size; double entropy; double log2 = System.Math.Log(2.0); double prob; min = data[0]; max = data[0]; for (counter = 0; counter < data_size; ++counter) { if (data[counter] < min) min = data[counter]; if (data[counter] > max) max = data[counter]; } buffer_size = max - min + 1; int[] buffer = new int[buffer_size]; Array.Clear(buffer, 0, buffer.Length); for (counter = 0; counter < data_size; ++counter) { ++buffer[data[counter] - min]; } entropy = 0.0; for (counter = 0; counter < buffer_size; ++counter) { if (buffer[counter] > 0) { prob = (double)(buffer[counter]); prob /= (double)(data_size); entropy += System.Math.Log(prob) / log2 * prob; } } entropy *= -1.0; return entropy; } public static double makeData(ref int[] data, int data_size, int min, int max, int redundancy_factor) { int counter, cnt, high, low; double buf; if (redundancy_factor <= 1) redundancy_factor = 1; if (max <= min) max = min + 1; Random random = new Random(); int range = max - min + 1; for (counter = 0; counter < data_size; counter++) { buf = 0; for (cnt = 0; cnt < redundancy_factor; cnt++) { buf += (double)(random.Next(range)); } data[counter] = ((int)(buf)) / redundancy_factor; } low = data[0]; high = data[0]; for (counter = 0; counter < data_size; counter++) { if (data[counter] > high) high = data[counter]; if (data[counter] < low) low = data[counter]; } for (counter = 0; counter < data_size; counter++) { buf = (double)(data[counter] - low); buf /= (double)(high - low); buf *= (double)(max - min); buf = System.Math.Round(buf); data[counter] = (int)(buf) + min; } return calculate_entropy(data, data_size); } public static void makeRanges(ref int[] data, int data_size, ref int[] cm, int alphabet_size, int RANGE_SIZE_IN_BITS) { int[] freq = new int[alphabet_size]; Array.Clear(freq, 0, freq.Length); for (int i=0; i=0; --i) { if (cm[i] >= cm[i+1]) cm[i] = cm[i+1] - 1; } //end of correction } public static void makeLookupTable(ref int[] cm, int size, ref short[] lookup) { for (int i = 0; i < size - 1; ++i) { for (int j = cm[i]; j < cm[i + 1]; ++j) { lookup[j] = (short)(i); } } } public static int findInterval(int[] cm, int size, int point) { int index = -1; int left = 0; int right = size-2; int cnt = 0; while (true) { int mid = (right + left)>>1; if (point >= cm[mid] && point < cm[mid+1]) { index = mid; break; } if (point >= cm[mid+1]) left = mid + 1; else right = mid; if (cnt++ >= size) break; } return index; } } public class RangeMapper { private ulong LOW, HIGH, MID, RANGE_LIMIT, MASK; private byte RANGE_SIZE, SHIFT, BYTES_IN_BUFFER; private DataStorage ds = null; public RangeMapper(int range_size, ref DataStorage DS) { LOW = 0; MID = 0; RANGE_SIZE = (byte)range_size; RANGE_LIMIT = ((ulong)(1) << RANGE_SIZE); BYTES_IN_BUFFER = (byte)((64 - RANGE_SIZE) / 8); SHIFT = (byte)((BYTES_IN_BUFFER - 1) * 8); MASK = ((ulong)(1) << (BYTES_IN_BUFFER * 8)) - 1; HIGH = MASK; ds = DS; } private void updateModel(int cmin, int cmax) { ulong range = HIGH - LOW; HIGH = LOW + ((range * (ulong)(cmax)) >> RANGE_SIZE); LOW += ((range * (ulong)(cmin)) >> RANGE_SIZE) + 1; } public int getMidPoint() { return (int)(((MID - LOW) << RANGE_SIZE) / (HIGH - LOW)); } public void encodeRange(int cmin, int cmax) { updateModel(cmin, cmax); if ((HIGH - LOW) < RANGE_LIMIT) HIGH = LOW; //preventing narrowing range while (((LOW ^ HIGH) >> SHIFT) == 0) { ds.WriteByte((byte)(LOW >> SHIFT)); LOW <<= 8; HIGH = (HIGH << 8) + 255; } HIGH &= MASK; LOW &= MASK; } public void decodeRange(int cmin, int cmax) { updateModel(cmin, cmax); if ((HIGH - LOW) < RANGE_LIMIT) HIGH = LOW; while (((LOW ^ HIGH) >> SHIFT) == 0) { LOW <<= 8; HIGH = (HIGH << 8) + 255; MID = (MID << 8) + ds.ReadByte(); } HIGH &= MASK; LOW &= MASK; MID &= MASK; } public void flush() { LOW += 1; for (int i = 0; i < BYTES_IN_BUFFER; i++) { ds.WriteByte((byte)(LOW >> SHIFT)); LOW <<= 8; } } public void init() { for (int i = 0; i < BYTES_IN_BUFFER; ++i) { MID = (MID << 8) + ds.ReadByte(); } } } class Program { static void Main(string[] args) { const int alphabet_size = 317; int data_size = 4000000; int RANGE_SIZE_IN_BITS = 16; int[] data = new int[data_size]; Console.WriteLine("Wait, generating data ..."); double entropy = Auxiliary.makeData(ref data, data_size, 0, alphabet_size - 1, 10); int expected_size = (int)(entropy * (double)(data_size) / 8.0); Console.WriteLine("Expected size of compressed data: " + expected_size.ToString()); //make ranges for data const int cm_size = alphabet_size + 2; int[] cm = new int[cm_size]; Auxiliary.makeRanges(ref data, data_size, ref cm, alphabet_size, RANGE_SIZE_IN_BITS); //ranges are completed //encode DataStorage ds = new DataStorage(expected_size * 2); DateTime TimeEncodeStart = DateTime.Now; RangeMapper rm_encode = new RangeMapper(RANGE_SIZE_IN_BITS, ref ds); for (int i=0; i