关于python损失函数代码的信息( 七 ) _算法

import math
def calShannonEnt(dataSet):
""" 计算信息熵 """
labelCountDict = {}
for d in dataSet:
label = d[-1]
if label not in labelCountDict.keys():
labelCountDict[label] = 1
else:
labelCountDict[label] += 1
entropy = 0.0
for l, c in labelCountDict.items():
p = 1.0 * c / len(dataSet)
entropy -= p * math.log(p, 2)
return entropy
def filterSubDataSet(dataSet, colIndex, value):
"""返回colIndex特征列label等于value，并且过滤掉改特征列的数据集"""
subDataSetList = []
for r in dataSet:
if r[colIndex] == value:
newR = r[:colIndex]
newR = np.append(newR, (r[colIndex + 1:]))
subDataSetList.append(newR)
return np.array(subDataSetList)
def chooseFeature(dataSet):
""" 通过计算信息增益选择最合适的特征"""
featureNum = dataSet.shape[1] - 1
entropy = calShannonEnt(dataSet)
bestInfoGain = 0.0
bestFeatureIndex = -1
for i in range(featureNum):
uniqueValues = np.unique(dataSet[:, i])
condition_entropy = 0.0
for v in uniqueValues:#计算条件熵
subDataSet = filterSubDataSet(dataSet, i, v)
p = 1.0 * len(subDataSet) / len(dataSet)
condition_entropy += p * calShannonEnt(subDataSet)
infoGain = entropy - condition_entropy#计算信息增益
if infoGain = bestInfoGain:#选择最大信息增益
bestInfoGain = infoGain
bestFeatureIndex = i
return bestFeatureIndex
def creatDecisionTree(dataSet, featNames):
""" 通过训练集生成决策树 """
featureName = featNames[:]# 拷贝featNames，此处不能直接用赋值操作，否则新变量会指向旧变量的地址
classList = list(dataSet[:, -1])
if len(set(classList)) == 1:# 只有一个类别
return classList[0]
if dataSet.shape[1] == 1:#当所有特征属性都利用完仍然无法判断样本属于哪一类，此时归为该数据集中数量最多的那一类
return max(set(classList), key=classList.count)
bestFeatureIndex = chooseFeature(dataSet)#选择特征
bestFeatureName = featNames[bestFeatureIndex]
del featureName[bestFeatureIndex]#移除已选特征列
decisionTree = {bestFeatureName: {}}
featureValueUnique = sorted(set(dataSet[:, bestFeatureIndex]))#已选特征列所包含的类别，通过递归生成决策树
for v in featureValueUnique:
copyFeatureName = featureName[:]
subDataSet = filterSubDataSet(dataSet, bestFeatureIndex, v)
decisionTree[bestFeatureName][v] = creatDecisionTree(subDataSet, copyFeatureName)
return decisionTree
def classify(decisionTree, featnames, featList):
""" 使用训练所得的决策树进行分类 """
classLabel = None
root = decisionTree.keys()[0]
firstGenDict = decisionTree[root]
featIndex = featnames.index(root)
for k in firstGenDict.keys():
if featList[featIndex] == k:
if isinstance(firstGenDict[k], dict):#若子节点仍是树，则递归查找
classLabel = classify(firstGenDict[k], featnames, featList)
else:
classLabel = firstGenDict[k]
return classLabel
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76

关于python损失函数代码的信息( 七 )

推荐阅读

阴阳师百闻牌怎么清理缓存阴阳师怎么清除缓存ios

运动过度易致秃头防脱发试试这些药膳

金税盘怎么抄税航信抄税从哪个软件里，如何抄税报税

披针形叶披针形叶子图片

python经验分享，初学python想请教下大牛们该入门学习最重要是实践这方面

李鸿章是个好官还是坏官，李鸿章的功与过？

史密斯热水器故障解决史密斯热水器故障代码显示Eb

4月份怎么钓鱼鲫鱼

藏红花泡酒的用量

黄桃干的营养价值

如何评价微软的surface pro4和surface book？

WinRE,winre drv分区是干什么的

因子分析累计贡献率,因子的累计方差贡献率

手术后能吃花蛤吗

去风寒感冒最有效方法风寒感冒怎么好得快

刑法对打击报复证人刑事责任刑法对打击报复证人刑事责任的规定

防水|盘点目前最值得购买的几款机型，尤其最后一款深受大众喜爱

如何判断热水器是否注满水？史密斯热水器怎么显示水量

[∧]音标怎么读 θ怎么发音

元气骑士法师怎么玩法师通关攻略详解