HuffmanCode
赫夫曼编码
对一个字符串进行二进制编码,采用赫夫曼编码原则,可获得总长度最短的编码方案。
知识点:最优二叉树(赫夫曼树)
#python3
from collections import Counter
class Node(object):
def __init__(self, prob, character='#', left=None, right=None):
self.character = character
self.prob = prob
self.left = left
self.right = right
# 每个字母创建一个节点,包含字母character, 及其频率prob
def initialCreate(data):
nodelist = []
for i in data:
nodelist.append(Node(character=i, prob=data[i]))
return nodelist
# 按照哈夫曼编码原则创建二叉树
def create(nodelist):
if len(nodelist)>1: # 只有一种字符时无需循环建树
while len(nodelist)>1:
nodelist = sorted(nodelist, key=lambda x: x.prob)
nodelist.append(Node(prob=nodelist[0].prob+nodelist[1].prob, left=nodelist[0], right=nodelist[1]))
nodelist.pop(0)
nodelist.pop(0)
return nodelist
# 遍历二叉树创建编码hugo-tranquilpeak-theme
def setCode(root, cdict, code=''):
if root is None:
return
code1 = code
if root.left is None and root.right is None:
if code:
cdict[root.character] = code
return
else: # 只有一种字符的特殊情况
cdict[root.character] = '0'
return cdict
if root.left:
code += '0'
setCode(root.left, cdict, code)
if root.right:
code1 += '1'
setCode(root.right, cdict, code1)
return cdict
if __name__ == '__main__':
string = input() # 读入字符串
data = Counter(string) # 存放字符串中各字符出现次数
nodelist = initialCreate(data) # 初始化
nodelist = create(nodelist) # 建立二叉树
cdict = {}
cdict = setCode(nodelist[0], cdict) # 获取各字符及其对应编码
print(cdict)