GO实现文件压缩算法

Mathea ·
更新时间:2024-09-21
· 829 次阅读

实现原理

   读取文件,统计字符出现次数为权值,构建哈夫曼树,获取每个字符的哈夫曼编码,写入文件。

压缩文件头定义
type compressHead struct {
	srclen, dstlen, keymapLen uint32	//源文件字符个数  压缩文件字符个数   哈夫曼编码字符映射个数
	patchBit                  uint8     //压缩后不足8bit补0个数
	keysMap                   map[interface{}]uint32	//字符统计构建哈夫曼树
}
压缩实现过程如下
//按照小端模式写入文件
func getCompressedBytes(pHead *compressHead, data []byte) []byte {
	buf := new(bytes.Buffer)
	if err := binary.Write(buf, binary.LittleEndian, pHead.srclen); err == nil {
		if err = binary.Write(buf, binary.LittleEndian, pHead.dstlen); err != nil {
			fmt.Println(err.Error())
		}
		if err = binary.Write(buf, binary.LittleEndian, pHead.keymapLen); err != nil {
			fmt.Println(err.Error())
		}
		if err = binary.Write(buf, binary.LittleEndian, pHead.patchBit); err != nil {
			fmt.Println(err.Error())
		}
		for key, value := range pHead.keysMap {
			if err = binary.Write(buf, binary.LittleEndian, key); err != nil {
				fmt.Println(err.Error())
			}
			if err = binary.Write(buf, binary.LittleEndian, value); err != nil {
				fmt.Println(err.Error())
			}
		}
		if err = binary.Write(buf, binary.LittleEndian, data); err != nil {
			fmt.Println(err.Error())
		}
	} else {
		fmt.Println(err.Error())
	}
	return buf.Bytes()
}
func Compress(strInFileName, strOutFileName string) bool {
	if data, err := ioutil.ReadFile(strInFileName); err != nil {
		fmt.Println(err.Error())
		return false
	} else {
		keys := make(map[interface{}]uint32)
		for i := 0; i < len(data); i++ {
			if _, ok := keys[data[i]]; ok {
				keys[data[i]]++
			} else {
				keys[data[i]] = 1
			}
		}
		if pTree := CreatHuffman(keys); pTree != nil {
			pHead := &compressHead{srclen: (uint32(len(data))), keysMap: keys}
			compressdata := make([]byte, 0)
			factor := make(map[byte][]byte)
			for i := 0; i < len(data); i++ {
				if value, ok := factor[data[i]]; ok {
					compressdata = append(compressdata, value...)
				} else {
					if code, codeok := GetHuffmanCode(data[i], pTree); codeok {
						factor[data[i]] = code
						compressdata = append(compressdata, code...)
					}
				}
			}
			pHead.keymapLen = (uint32(len(pHead.keysMap)))
			pHead.patchBit = (uint8)(8 - len(compressdata)%8)
			for i := uint8(0); i = 8; compressdata = compressdata[8:] {
				var b byte = 0
				for i := 0; i < 8; i++ {
					b |= compressdata[i] << (7 - i)
				}
				afterdata = append(afterdata, b)
			}
			fmt.Printf("after data:%v\n", afterdata)
			pHead.dstlen = (uint32(len(afterdata)))
			buf := getCompressedBytes(pHead, afterdata)
			ioutil.WriteFile(strOutFileName, buf, 0666)
		}
	}
	return true
}

构造哈夫曼树可以参考哈夫曼树构建

解压缩过程

解压缩读取压缩文件头部信息,构建哈夫曼树,bit遍历每个字符进行解压缩

func getCompressedHead(data []byte) (*compressHead, []byte) {
	pHead := new(compressHead)
	buf := bytes.NewBuffer(data)
	binary.Read(buf, binary.LittleEndian, &pHead.srclen)
	binary.Read(buf, binary.LittleEndian, &pHead.dstlen)
	binary.Read(buf, binary.LittleEndian, &pHead.keymapLen)
	binary.Read(buf, binary.LittleEndian, &pHead.patchBit)
	pHead.keysMap = make(map[interface{}]uint32)
	for i := uint32(0); i = 1 && !bEnd {
				b := dst[0]
				for i := 7; i >= 0; i-- {
					if (b>>i)&1 != 0 {
						pTmpTree = pTmpTree.GetRight()
					} else {
						pTmpTree = pTmpTree.GetLeft()
					}
					if nil != pTmpTree && pTmpTree.Value != nil {
						v, _ := GetHuffmanValue(pTmpTree).(byte)
						src[srcindex] = v
						srcindex++
						if len(dst) == 1 && (uint8(i)) == pHead.patchBit {
							bEnd = true
							break
						}
						pTmpTree = pTree
					}
				}
				dst = dst[1:]
			}
		}
		ioutil.WriteFile(strOutFileName, src, 0666)
	}
	return true
}


作者:lightjia



GO 文件压缩 压缩 算法

需要 登录 后方可回复, 如果你还没有账号请 注册新账号