问答文章1 问答文章501 问答文章1001 问答文章1501 问答文章2001 问答文章2501 问答文章3001 问答文章3501 问答文章4001 问答文章4501 问答文章5001 问答文章5501 问答文章6001 问答文章6501 问答文章7001 问答文章7501 问答文章8001 问答文章8501 问答文章9001 问答文章9501

用python编写一个字符串压缩程序(要求为自适应模型替代法)

发布网友 发布时间:2022-05-07 07:39

我来回答

1个回答

热心网友 时间:2022-04-18 09:42

你好,下面是LZ777自适应压缩算法的一个简单实现,你可以看看
import math
from bitarray import bitarray
class LZ77Compressor:
"""
A simplified implementation of the LZ77 Compression Algorithm
"""
MAX_WINDOW_SIZE = 400
def __init__(self, window_size=20):
self.window_size = min(window_size, self.MAX_WINDOW_SIZE)
self.lookahead_buffer_size = 15 # length of match is at most 4 bits
def compress(self, input_file_path, output_file_path=None, verbose=False):
"""
Given the path of an input file, its content is compressed by applying a simple
LZ77 compression algorithm.
The compressed format is:
0 bit followed by 8 bits (1 byte character) when there are no previous matches
within window
1 bit followed by 12 bits pointer (distance to the start of the match from the
current position) and 4 bits (length of the match)

If a path to the output file is provided, the compressed data is written into
a binary file. Otherwise, it is returned as a bitarray
if verbose is enabled, the compression description is printed to standard output
"""
data = None
i = 0
output_buffer = bitarray(endian='big')
# read the input file
try:
with open(input_file_path, 'rb') as input_file:
data = input_file.read()
except IOError:
print 'Could not open input file ...'
raise
while i < len(data):
#print i
match = self.findLongestMatch(data, i)
if match:
# Add 1 bit flag, followed by 12 bit for distance, and 4 bit for the length
# of the match
(bestMatchDistance, bestMatchLength) = match
output_buffer.append(True)
output_buffer.frombytes(chr(bestMatchDistance >> 4))
output_buffer.frombytes(chr(((bestMatchDistance & 0xf) << 4) | bestMatchLength))
if verbose:
print "<1, %i, %i>" % (bestMatchDistance, bestMatchLength),
i += bestMatchLength
else:
# No useful match was found. Add 0 bit flag, followed by 8 bit for the character
output_buffer.append(False)
output_buffer.frombytes(data[i])

if verbose:
print "<0, %s>" % data[i],
i += 1
# fill the buffer with zeros if the number of bits is not a multiple of 8
output_buffer.fill()
# write the compressed data into a binary file if a path is provided
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(output_buffer.tobytes())
print "File was compressed successfully and saved to output path ..."
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
# an output file path was not provided, return the compressed data
return output_buffer

def decompress(self, input_file_path, output_file_path=None):
"""
Given a string of the compressed file path, the data is decompressed back to its
original form, and written into the output file path if provided. If no output
file path is provided, the decompressed data is returned as a string
"""
data = bitarray(endian='big')
output_buffer = []
# read the input file
try:
with open(input_file_path, 'rb') as input_file:
data.fromfile(input_file)
except IOError:
print 'Could not open input file ...'
raise
while len(data) >= 9:
flag = data.pop(0)
if not flag:
byte = data[0:8].tobytes()
output_buffer.append(byte)
del data[0:8]
else:
byte1 = ord(data[0:8].tobytes())
byte2 = ord(data[8:16].tobytes())
del data[0:16]
distance = (byte1 << 4) | (byte2 >> 4)
length = (byte2 & 0xf)
for i in range(length):
output_buffer.append(output_buffer[-distance])
out_data = ''.join(output_buffer)
if output_file_path:
try:
with open(output_file_path, 'wb') as output_file:
output_file.write(out_data)
print 'File was decompressed successfully and saved to output path ...'
return None
except IOError:
print 'Could not write to output file path. Please check if the path is correct ...'
raise
return out_data

def findLongestMatch(self, data, current_position):
"""
Finds the longest match to a substring starting at the current_position
in the lookahead buffer from the history window
"""
end_of_buffer = min(current_position + self.lookahead_buffer_size, len(data) + 1)
best_match_distance = -1
best_match_length = -1
# Optimization: Only consider substrings of length 2 and greater, and just
# output any substring of length 1 (8 bits uncompressed is better than 13 bits
# for the flag, distance, and length)
for j in range(current_position + 2, end_of_buffer):
start_index = max(0, current_position - self.window_size)
substring = data[current_position:j]
for i in range(start_index, current_position):
repetitions = len(substring) / (current_position - i)
last = len(substring) % (current_position - i)
matched_string = data[i:current_position] * repetitions + data[i:i+last]
if matched_string == substring and len(substring) > best_match_length:
best_match_distance = current_position - i
best_match_length = len(substring)
if best_match_distance > 0 and best_match_length > 0:
return (best_match_distance, best_match_length)
return None
声明声明:本网页内容为用户发布,旨在传播知识,不代表本网认同其观点,若有侵权等问题请及时与本网联系,我们将在第一时间删除处理。E-MAIL:11247931@qq.com
四开大门要多少宽度 四开大门尺寸多少 大门四开门尺寸是多少 秦昊新剧《亲爱的小孩》网上褒贬不一,你觉得这部剧是否符合现实呢? 《亲爱的小孩》妻子产后失禁,丈夫嫌弃反复洗手,你怎么看? 求推荐一个安卓手机文档管理工具吧,手机实在太乱了,也没有介绍的? 手机文件管理器哪个好用 隐私文件夹app哪个好用 泰山茶年产600吨品牌近40个销售额达5亿元 山东茶叶品牌 山东茶叶厂家 山东有哪些茶叶品牌【品牌库】 从广延路到真如中学如何走? ...使用LZ77算法进行无损压缩,写出这个字符串的编码结果? 兵马俑门票价是多少? 如何把16位数据压缩为8位? 益生菌功能有什么? 益生菌的作用及功能有哪些? 益生菌有什么用啊?益生菌的作用是什么? ps128菌对沉迷玩手机少年有改变作用吗 怎样钓土鲮 夜间钓土鲮鱼用什么饵料最好钓 秋季晚上钓土鲮技巧 请问钓土鲮的技巧 钓土鲮鱼最佳饵料配方是什么?专钓 土鲮鱼用什么饵最好钓 如何搭配土鲮鱼饵料 水库如何钓大土鲮? 土鲮鱼水面翻滚会开口吗 2013世界RPG网游排行 请问经济管理专业的专升本要考哪几门课程?谢谢 经济管理专业的报考专升本要考那些科目 经济管理专升本考几门课程? 兵马俑门票多少元?详细一点。 我们同学之间传文件常常将文件压缩,我不太明白这中的道例,谁能告诉我?这个真能提高传输速度吗? 7Z的压缩算法 i did manage to do sth中 did和manage都是动词 为什么能连用 表示什么意思呢 关于竹子生长励志的句子 高中作文《毛竹的生长过程》800字 为什么星际穿越里会出现23年相差 星际穿越 从两年变成二十三年 星际穿越在什么地方浪费了23年 星际穿越 23年后,墨菲生日那天给爸爸视频留言,那时布兰德博士在后面,为什么说从来没见过墨菲? 《星际穿越》不靠谱? 在那个星球上1小时等于地球上7年,当男主人公回到飞船上时,时间已经过了23年。日语怎么说。 星际穿越里一小时七年怎么解释黑人比他们老的快,是因为人体细胞长得慢了吗 18马力柴油拖拉机多少斤 12马力小四轮拖拉机有多重 拖拉机多重? 单缸拖拉机整车自重有多少斤 石家庄12拖拉机机头有多重 15马力的拖拉机自重是多少吨,加上车斗总重量是多少吨? 中原18拖拉机有多重