import math, copy, random input = (open('enwik7.txt', 'r', encoding='ansi').read()) dodecode = 1 if len(input) == 14 else 0 decode = str(int(open('comp.txt', 'rb').read())) if dodecode == 1 else '' tree = ['', [], []] low = 1 de = 0 middle = 1 compressed = '' for count2 in range(10000000): window = input[count2: count2 + 15] energy = input[count2 - 280 if count2 > 279 else 0 : count2 + 14] predictions = [[""" abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789?!<>=":/.-\n_#[]{}*'|&;,()骢 ³â€”~“¡%لا·ˆŠÐ½°Ñ…¸¼¯Œ²¦Ûר›™–ã‚ƒ­ºì•ë˜í¤à¥±—´¹ÅŸæåœä‰Ä«+žá£è‡é箁Âµ¾¬‘¶‹ŽÉÊ\†$šÏÎ Ú’Ç@ÒÖÌêË^`ÆÔÕÓ""", [0.00001] * 200]] if count2 != 0: for z in range(15): node = 0 for zz in range(15): if (14 - z) + zz != 14: char_index = tree[node].find(window[(14 - z) + zz]) + 1 node = (tree[(node + 2)][char_index - 1]) if char_index == 0 and (14 - z) + zz != 14 or len(tree[node]) == 0: break else: predictions.insert(1, tree[node: node + 2]) break if char_index == 0 and (14 - z) + zz != 14: break predict = ['', []] remaining = 1 for q in range(len(predictions)): j = [copy.copy(predictions[q][0]), copy.copy(predictions[q][1])] zb = (len(predictions) - 1) - q if q != 0 and zb < 5 and zb > -1: eng = ['', []] for n in range(len(energy) - zb): if energy[n:n+zb] == window[14-zb:14] or zb == 0: y = count2 / [1800, 37000, 50000, 92000, 130000][zb] * (50 if n > 291 else 100 if n > 260 else 86 if n > 250 else 71 if n > 240 else 60 if n > 227 else 42 if n > 200 else 28) eng_index = eng[0].find(energy[n + zb]) + 1 if eng_index == 0: eng[0] += (energy[n + zb]) eng[-1].append(y) else: eng[-1][eng_index - 1] += y for c in range(len(eng[0])): j_index = j[0].find(eng[0][c]) + 1 if j_index != 0: j[1][j_index - 1] += eng[1][c] lj, sum2 = len(j[1]), sum(j[1]) if q != 0: for o in range(lj): freq = j[1][o] / (sum2 / lj) freq = 4.4 if freq > 8 else 3.2 if freq > 4 else 2.2 if freq > 2 else freq j[1][o] = (freq * (sum2 / lj)) sum2 = sum(j[1]) w = sum2 / (6.6 * lj * [0.7, 0.99, 0.99][lj - 1] if lj < 4 else lj * 10.8) w = 0.96 if w > 20 else (((w-10)/10)*0.06)+0.9 if w > 10 else (((w-3)/7)*0.05)+0.85 if w > 3 else (((w-1)/2)*0.16)+0.69 if w > 1 else (((w-0.8)/0.2)*0.07)+0.62 if w > 0.8 else (((w-0.6)/0.2)*0.04)+0.58 if w > 0.6 else (((w-0.4)/0.2)*0.07)+0.47 if w > 0.4 else w _25ofRoof = (w * [0.9, 0.9, 0.87, 0.83, 0.79, 0.73, 0.57, 0.46, 0.36, 0.31, 0.3, 0.28, 0.33, 0.61, 0.59, 0.53][len(predictions) - 1 - q]) * remaining remaining -= _25ofRoof for g in range(lj): k = j[0][g] freq = j[1][g] char_index = predict[0].find(k) + 1 if char_index == 0: predict[0] = predict[0] + k predict[1].append((freq / sum2) * _25ofRoof) else: predict[1][char_index - 1] += ((freq / sum2) * _25ofRoof) summ = 1 - sum(predict[1]) for n in range(len(predict[1])): predict[1][n] += summ / len(predict[1]) decodepart = float('0.' + str(decode[0 + de:16 + de])) for m in range(len(predict[0])): x = random.choices(predict[0], weights=(predict[1]), k=1) if (dodecode == 1 and 'generat' == 'generate') else predict[0][m] low -= predict[1][m] * middle if dodecode == 0 and x == window[-1]: break elif dodecode == 1 and (decodepart > low or 'generat' == 'generate'): window += x[0] input += x[0] break char_location = 0 high = low + predict[1][m] * middle while str(f'{low:.18f}')[char_location] == str(f'{high:.18f}')[char_location] and char_location != len(str(low)): char_location += 1 compressed = str(compressed) + str(f'{low:.18f}')[2: char_location - 1] cl = 10 ** (char_location - 3) de += char_location - 3 high = high * cl - math.floor(high * cl) low = low * cl - math.floor(low * cl) middle = high - low low = high node = 0 for i in window: char_index = tree[node].find(i) + 1 if char_index == 0: tree[node] = tree[node] + i tree[node + 1].append(1) tree[node + 2].append(len(tree)) node = len(tree) tree.extend(('', [], [])) else: tree[node + 1][char_index - 1] += 1 node = tree[node + 2][char_index - 1] (print(len(compressed) / 2.40819), open('comp.txt', 'wb').write(bytes(str(int(compressed)), 'ansi'))) if dodecode == 0 else open('decomp.txt', 'wb').write(bytes(str(input), 'ansi'))