数据压缩原理与应用 | 实验一 | RLE压缩bmp文件

张开发
2026/4/10 14:05:03 15 分钟阅读

分享文章

数据压缩原理与应用 | 实验一 | RLE压缩bmp文件
一、RLE算法设计本实验采用游程编码将输入像素序列划分为重复块和原始块两种三元组对于连续重复的像素值三个单元分别表示块种类、重复次数像素值对于不连续重复的值三个单元分别为块种类、列表长度、像素列表。二、文件格式设计本实验用一种自定义的.rle二进制文件格式文件内容由文件头和数据块组两部分组成。文件头固定包含图像宽度、高度和位深用于解码时恢复图像结构。数据部分由若干变长编码块顺序排列编码块结构与RLE编码后的块结构相同。三、实验代码读取bmp文件def read_bmp(filename): with open(filename, rb) as f: header f.read(54) width struct.unpack(I, header[18:22])[0] height struct.unpack(I, header[22:26])[0] bitcount struct.unpack(H, header[28:30])[0] # 8bit BMP if bitcount 8: palette f.read(1024) header palette row_size (width 3) // 4 * 4 pixels [] rows [] for y in range(height): row f.read(row_size) rows.append(row[:width]) for row in reversed(rows): for x in range(width): pixels.append(row[x]) return pixels, width, height, bitcount, header # 24bit BMP elif bitcount 24: row_size (width * 3 3) // 4 * 4 pixels [] rows [] for y in range(height): row f.read(row_size) rows.append(row) for row in reversed(rows): for x in range(width): b row[x * 3] g row[x * 3 1] r row[x * 3 2] pixels.append((r, g, b)) return pixels, width, height, bitcount, header else: raise ValueError(Only 8bit or 24bit BMP supported)RLE编码过程def rle_encode(data): encoded [] i 0 n len(data) while i n: count 1 while i count n and data[i] data[i count] and count 255: count 1 #重复块 if count 1: encoded.append((0, count, data[i])) i count # 原始块 else: start i while i n - 1 and data[i] ! data[i 1] and (i - start 1) 255: i 1 #i的作用 i 1 length i - start encoded.append((1, length, data[start:start length])) return encodedRLE解码过程def rle_decode(encoded): pixels [] for item in encoded: type_, length, value item if type_ 0: pixels.extend([value] * length) else: pixels.extend(value) return pixels完整代码import struct import os def read_bmp(filename): with open(filename, rb) as f: header f.read(54) width struct.unpack(I, header[18:22])[0] height struct.unpack(I, header[22:26])[0] bitcount struct.unpack(H, header[28:30])[0] # 8bit BMP if bitcount 8: palette f.read(1024) header palette row_size (width 3) // 4 * 4 pixels [] rows [] for y in range(height): row f.read(row_size) rows.append(row[:width]) for row in reversed(rows): for x in range(width): pixels.append(row[x]) return pixels, width, height, bitcount, header # 24bit BMP elif bitcount 24: row_size (width * 3 3) // 4 * 4 pixels [] rows [] for y in range(height): row f.read(row_size) rows.append(row) for row in reversed(rows): for x in range(width): b row[x * 3] g row[x * 3 1] r row[x * 3 2] pixels.append((r, g, b)) return pixels, width, height, bitcount, header else: raise ValueError(Only 8bit or 24bit BMP supported) # RLE编码 def rle_encode(data): encoded [] i 0 n len(data) while i n: count 1 while i count n and data[i] data[i count] and count 255: count 1 #重复块 if count 1: encoded.append((0, count, data[i])) i count # 原始块 else: start i while i n - 1 and data[i] ! data[i 1] and (i - start 1) 255: i 1 #i的作用 i 1 length i - start encoded.append((1, length, data[start:start length])) return encoded # 保存RLE def save_rle(save_folder, filename, encoded, width, height, bitcount): if not os.path.exists(save_folder): os.makedirs(save_folder) rle_file_path os.path.join(save_folder, filename) with open(rle_file_path, wb) as f: f.write(struct.pack(I, width)) f.write(struct.pack(I, height)) f.write(struct.pack(H, bitcount)) for item in encoded: type_, length, value item f.write(bytes([type_])) f.write(bytes([length])) # 8bit if bitcount 8: if type_ 0: f.write(bytes([value])) else: f.write(bytes(value)) # 24bit else: if type_ 0: r,g,b value f.write(bytes([r,g,b])) else: for r,g,b in value: f.write(bytes([r,g,b])) file_size os.path.getsize(rle_file_path) return file_size # 读取RLE def load_rle(filename): encoded [] with open(filename, rb) as f: width struct.unpack(I, f.read(4))[0] height struct.unpack(I, f.read(4))[0] bitcount struct.unpack(H, f.read(2))[0] while True: t f.read(1) if not t: break type_ t[0] length f.read(1)[0] # 8bit if bitcount 8: if type_ 0: value f.read(1)[0] encoded.append((type_, length, value)) else: value list(f.read(length)) encoded.append((type_, length, value)) # 24bit else: if type_ 0: r f.read(1)[0] g f.read(1)[0] b f.read(1)[0] encoded.append((type_, length, (r,g,b))) else: block [] for _ in range(length): r f.read(1)[0] g f.read(1)[0] b f.read(1)[0] block.append((r,g,b)) encoded.append((type_, length, block)) return encoded, width, height, bitcount # RLE解码 def rle_decode(encoded): pixels [] for item in encoded: type_, length, value item if type_ 0: pixels.extend([value] * length) else: pixels.extend(value) return pixels # 更新文件信息头 def update_bmp_header(header, width, height, bitcount): if bitcount 8: row_size (width 3) // 4 * 4 else: row_size (width*3 3) // 4 * 4 img_size row_size * height file_size len(header) img_size header bytearray(header) struct.pack_into(I, header, 2, file_size) struct.pack_into(I, header, 34, img_size) return bytes(header) # 保存BMP def save_bmp(filename, pixels, width, height, bitcount, header): with open(filename, wb) as f: header update_bmp_header(header, width, height, bitcount) f.write(header) # 8bit if bitcount 8: row_size (width 3) // 4 * 4 for y in reversed(range(height)): row [] for x in range(width): gray pixels[y * width x] row.append(gray) padding row_size - width row.extend([0] * padding) f.write(bytes(row)) # 24bit else: row_size (width*3 3) // 4 * 4 for y in reversed(range(height)): row [] for x in range(width): r,g,b pixels[y*width x] row.extend([b,g,r]) padding row_size - width*3 row.extend([0]*padding) f.write(bytes(row)) if __name__ __main__: bmp_folder bmp_pictures rle_save_folder rle_compressed output_bmp_folder output_bmp for file_name in os.listdir(bmp_folder): bmp_file os.path.join(bmp_folder, file_name) pixels, width, height, bitcount, header read_bmp(bmp_file) print(图片名称:, file_name) print(bit位数:, bitcount) original_file_size os.path.getsize(bmp_file) encoded rle_encode(pixels) rle_file_name os.path.splitext(file_name)[0] .rle new_file_size save_rle(rle_save_folder, rle_file_name, encoded, width, height, bitcount) print(图片字节压缩比, original_file_size / new_file_size) rle_file_path os.path.join(rle_save_folder, rle_file_name) encoded2, w, h, b load_rle(rle_file_path) pixels2 rle_decode(encoded2) output_bmp_name os.path.splitext(file_name)[0] .bmp output_bmp_path os.path.join(output_bmp_folder, output_bmp_name) save_bmp(output_bmp_path, pixels2, w, h, b, header) print(done)四、结果分析部分数据集对于大多数自然图像的bmp文件图像压缩比小于1基本处于0.97-0.99之间即采用RLE编码后的.rle文件比原bmp文件更大对于用Windows自带的画图功能画出来并保存为24位bmp格式的文件压缩比基本处于100左右。从实验结果可以看出RLE 压缩效果与图像的像素分布特性密切相关。从示例图像来看第1幅图像包含一定量的高频信息连续重复像素较少导致编码过程中大量数据被划分为原始块额外的块结构开销超过游程部分节省的大小从而导致压缩后文件更大。第2幅8bit灰度自然图像其像素分布接近真实概率分布原始的1位信息编码后变为3倍大小使压缩后的文件大小出现明显膨胀说明RLE不适合处理接近随机或自然分布的数据。相比之下第3幅纯色图像具有极高的空间冗余度有大量连续重复的像素使得RLE能够高效压缩。五、经验总结实验中使用的bmp文件均为常见的8bit或24bit格式由于8bit是灰度图像24bit是彩色图像文件信息头和文件数据格式都不一样在读取文件时需要注意而且最好是直接把信息头中不会改变的部分保存下来存到.rle文件中以避免解压后读取失败。bmp文件采用低位高序的存储方式即低位在前、高位在后以逻辑信息方式读取时需要倒着读。一开始我随便找了个转网站把jpg转成bmp后来发现解压缩之后打不开用二进制格式打开时发现这个bmp文件既不是8bit也不是24bit而是一种更先进的bmp格式遂弃用。六、碎碎念在CSDN上看到了类似的实验但是是用C实现的由于本人已经弃用C转而投入Python的怀抱加上AI给的Python代码更优秀于是决定用Python来实现数据压缩CSDN上的第一篇文章也是献给了作业 ^_^ 希望越来越万能的Python也能支持实现之后的实验。

更多文章