python 字典树的应用语言模型统计词频

class TrieNode(): def __init__(self,value=https://www.it610.com/article/None,count=0,parent=None): self.value = value #值 self.count = count #频数统计 self.parent = parent #父结点 self.children = {} #子节点 class Trie(): def __init__(self): self.root = TrieNode() def insert(self,sequence,value):""":param sequence: 要查找的字符串 :param value:词频的数值 :return:None """ cur_node = self.root for item in sequence: if item not in cur_node.children: child = TrieNode(value=https://www.it610.com/article/item,count=0,parent=cur_node) cur_node.children[item] = child cur_node = child else: #更新结点 cur_node = cur_node.children[item] cur_node.count = value def search(self,sequence):""" 查询是否存在完整序列 :param sequence: :return: bool """ cur_node = self.root mark =True for item in sequence: if item not in cur_node.children: mark = False break else: cur_node = cur_node.children[item] #如果还有子节点 说明序列并非完整 if cur_node.children: mark = False if mark: return cur_node.count else: return 0

【python 字典树的应用语言模型统计词频】

    推荐阅读