赫夫曼编码

对一个字符串进行二进制编码,采用赫夫曼编码原则,可获得总长度最短的编码方案。

知识点:最优二叉树(赫夫曼树)

#python3

from collections import Counter

class Node(object):
    def __init__(self, prob, character='#', left=None, right=None):
        self.character = character
        self.prob = prob
        self.left = left
        self.right = right

# 每个字母创建一个节点,包含字母character, 及其频率prob
def initialCreate(data):
    nodelist = []
    for i in data:
        nodelist.append(Node(character=i, prob=data[i]))
    return nodelist

# 按照哈夫曼编码原则创建二叉树
def create(nodelist):
    if len(nodelist)>1:  # 只有一种字符时无需循环建树
        while len(nodelist)>1:
            nodelist = sorted(nodelist, key=lambda x: x.prob)
            nodelist.append(Node(prob=nodelist[0].prob+nodelist[1].prob, left=nodelist[0], right=nodelist[1]))
            nodelist.pop(0)
            nodelist.pop(0)
    return nodelist

# 遍历二叉树创建编码hugo-tranquilpeak-theme
def setCode(root, cdict, code=''):
    if root is None:
        return
    code1 = code
    if root.left is None and root.right is None:
        if code:
            cdict[root.character] = code
            return
        else:  # 只有一种字符的特殊情况
            cdict[root.character] = '0'
            return cdict
    if root.left:
        code += '0'
        setCode(root.left, cdict, code)
    if root.right:
        code1 += '1'
        setCode(root.right, cdict, code1)
    return cdict

if __name__ == '__main__':
    string = input()  # 读入字符串
    data = Counter(string)  # 存放字符串中各字符出现次数
    nodelist = initialCreate(data)  # 初始化
    nodelist = create(nodelist)  # 建立二叉树
    cdict = {}
    cdict = setCode(nodelist[0], cdict)  # 获取各字符及其对应编码
    print(cdict)