python3.5做分类时,混淆矩阵加在哪一步preface:做着最近的任务,对数据处理 , 做些简单的提特征,用机器学习算法跑下程序得出结果,看看哪些特征的组合较好,这一系列流程必然要用到很多函数,故将自己常用函数记录上 。应该说这些函数基本上都会用到,像是数据预处理,处理完了后特征提取、降维、训练预测、通过混淆矩阵看分类效果,得出报告 。
1.输入
从数据集开始,提取特征转化为有标签的数据集,转为向量 。拆分成训练集和测试集,这里不多讲,在上一篇博客中谈到用StratifiedKFold()函数即可 。在训练集中有data和target开始 。
2.处理
[python] view plain copy
def my_preprocessing(train_data):
from sklearn import preprocessing
X_normalized = preprocessing.normalize(train_data ,norm = "l2",axis=0)#使用l2范式,对特征列进行正则
return X_normalized
def my_feature_selection(data, target):
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
data_new = SelectKBest(chi2, k= 50).fit_transform(data,target)
return data_new
def my_PCA(data):#data without target, just train data, withou train target.
from sklearn import decomposition
pca_sklearn = decomposition.PCA()
pca_sklearn.fit(data)
main_var = pca_sklearn.explained_variance_
print sum(main_var)*0.9
import matplotlib.pyplot as plt
n = 15
plt.plot(main_var[:n])
plt.show()
def clf_train(data,target):
from sklearn import svm
#from sklearn.linear_model import LogisticRegression
clf = svm.SVC(C=100,kernel="rbf",gamma=0.001)
clf.fit(data,target)
#clf_LR = LogisticRegression()
#clf_LR.fit(x_train, y_train)
#y_pred_LR = clf_LR.predict(x_test)
return clf
def my_confusion_matrix(y_true, y_pred):
from sklearn.metrics import confusion_matrix
labels = list(set(y_true))
conf_mat = confusion_matrix(y_true, y_pred, labels = labels)
print "confusion_matrix(left labels: y_true, up labels: y_pred):"
print "labels\t",
for i in range(len(labels)):
print labels[i],"\t",
print
for i in range(len(conf_mat)):
print i,"\t",
for j in range(len(conf_mat[i])):
print conf_mat[i][j],'\t',
print
print
def my_classification_report(y_true, y_pred):
from sklearn.metrics import classification_report
print "classification_report(left: labels):"
print classification_report(y_true, y_pred)
my_preprocess()函数:
主要使用sklearn的preprocessing函数中的normalize()函数 , 默认参数为l2范式,对特征列进行正则处理 。即每一个样例,处理标签,每行的平方和为1.
my_feature_selection()函数:
使用sklearn的feature_selection函数中SelectKBest()函数和chi2()函数,若是用词袋提取了很多维的稀疏特征,有必要使用卡方选取前k个有效的特征 。
my_PCA()函数:
主要用来观察前多少个特征是主要特征,并且画图 。看看前多少个特征占据主要部分 。
clf_train()函数:
可用多种机器学习算法,如SVM, LR, RF, GBDT等等很多,其中像SVM需要调参数的,有专门调试参数的函数如StratifiedKFold()(见前几篇博客) 。以达到最优 。
my_confusion_matrix()函数:
主要是针对预测出来的结果,和原来的结果对比,算出混淆矩阵,不必自己计算 。其对每个类别的混淆矩阵都计算出来了,并且labels参数默认是排序了的 。
my_classification_report()函数:
主要通过sklearn.metrics函数中的classification_report()函数,针对每个类别给出详细的准确率、召回率和F-值这三个参数和宏平均值,用来评价算法好坏 。另外ROC曲线的话 , 需要是对二分类才可以 。多类别似乎不行 。
主要参考sklearn官网
利用RBF作为核函数5-2、利用RBF作为核函数
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
iris = datasets.load_iris()
# 为简单起见rbf函数python,选取前两个特征作为分类的输入特征rbf函数python,
# 以便在二维空间画出决策曲线
X = iris.data[:, :2]
y = iris.target
# 设置分类器SVCrbf函数python,核函数为rbf,gamma设置为自动调整
svc = svm.SVC(kernel="rbf", C=1, gamma="auto").fit(X, y)
# 绘图参数
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max()1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max()1
h = (x_max / x_min) / 100
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
plt.subplot(1, 1, 1)
# 利用已有分类器进行预测
Z = svc.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
# 绘制等高线并填充轮廓
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Paired)
plt.xlabel('花萼长度')
plt.ylabel('花萼宽度')
# 限制x的取值范围,便于显示
plt.xlim(xx.min(), xx.max())
plt.title('利用RBF作为核函数')
plt.show()
python rbf表示什么分布径向基(RBF)神经网络python实现
1 from numpy import array, append, vstack, transpose, reshape, \
2dot, true_divide, mean, exp, sqrt, log, \
3loadtxt, savetxt, zeros, frombuffer
4 from numpy.linalg import norm, lstsq
5 from multiprocessing import Process, Array
6 from random import sample
7 from time import time
8 from sys import stdout
9 from ctypes import c_double
10 from h5py import File
11
12
13 def metrics(a, b):
14return norm(a - b)
15
16
17 def gaussian (x, mu, sigma):
18return exp(- metrics(mu, x)**2 / (2 * sigma**2))
21 def multiQuadric (x, mu, sigma):
22return pow(metrics(mu,x)**2sigma**2, 0.5)
23
24
25 def invMultiQuadric (x, mu, sigma):
26return pow(metrics(mu,x)**2sigma**2, -0.5)
27
28
29 def plateSpine (x,mu):
30r = metrics(mu,x)
31return (r**2) * log(r)
32
33
34 class Rbf:
35def __init__(self, prefix = 'rbf', workers = 4, extra_neurons = 0, from_files = None):
36self.prefix = prefix
37self.workers = workers
38self.extra_neurons = extra_neurons
39
40# Import partial model
41if from_files is not None:
42w_handle = self.w_handle = File(from_files['w'], 'r')
43mu_handle = self.mu_handle = File(from_files['mu'], 'r')
44sigma_handle = self.sigma_handle = File(from_files['sigma'], 'r')
45
46self.w = w_handle['w']
47self.mu = mu_handle['mu']
48self.sigmas = sigma_handle['sigmas']
49
50self.neurons = self.sigmas.shape[0]
51
52def _calculate_error(self, y):
53self.error = mean(abs(self.os - y))
54self.relative_error = true_divide(self.error, mean(y))
55
56def _generate_mu(self, x):
57n = self.n
58extra_neurons = self.extra_neurons
59
60# TODO: Make reusable
61mu_clusters = loadtxt('clusters100.txt', delimiter='\t')
62
63mu_indices = sample(range(n), extra_neurons)
64mu_new = x[mu_indices, :]
65mu = vstack((mu_clusters, mu_new))
66
67return mu
68
69def _calculate_sigmas(self):
70neurons = self.neurons
71mu = self.mu
72
73sigmas = zeros((neurons, ))
74for i in xrange(neurons):
75dists = [0 for _ in xrange(neurons)]
76for j in xrange(neurons):
77if i != j:
78dists[j] = metrics(mu[i], mu[j])
79sigmas[i] = mean(dists)* 2
80# max(dists) / sqrt(neurons * 2))
81return sigmas
82
83def _calculate_phi(self, x):
84C = self.workers
85neurons = self.neurons
86mu = self.mu
87sigmas = self.sigmas
88phi = self.phi = None
89n = self.n
90
91
92def heavy_lifting(c, phi):
93s = jobs[c][1] - jobs[c][0]
94for k, i in enumerate(xrange(jobs[c][0], jobs[c][1])):
95for j in xrange(neurons):
96# phi[i, j] = metrics(x[i,:], mu[j])**3)
97# phi[i, j] = plateSpine(x[i,:], mu[j]))
98# phi[i, j] = invMultiQuadric(x[i,:], mu[j], sigmas[j]))
99phi[i, j] = multiQuadric(x[i,:], mu[j], sigmas[j])
100# phi[i, j] = gaussian(x[i,:], mu[j], sigmas[j]))
101if k % 1000 == 0:
102percent = true_divide(k, s)*100
103print(c, ': {:2.2f}%'.format(percent))
104print(c, ': Done')
105
106# distributing the work between 4 workers
107shared_array = Array(c_double, n * neurons)
108phi = frombuffer(shared_array.get_obj())
109phi = phi.reshape((n, neurons))
110
111jobs = []
112workers = []
113
114p = n / C
115m = n % C
116for c in range(C):
117jobs.append((c*p, (c 1)*p(m if c == C-1 else 0)))
118worker = Process(target = heavy_lifting, args = (c, phi))
119workers.append(worker)
120worker.start()
121
122for worker in workers:
123worker.join()
124
125return phi
126
127def _do_algebra(self, y):
128phi = self.phi
129
130w = lstsq(phi, y)[0]
131os = dot(w, transpose(phi))
132return w, os
133# Saving to HDF5
134os_h5 = os_handle.create_dataset('os', data = https://www.04ip.com/post/os)
135
136def train(self, x, y):
137self.n = x.shape[0]
138
139## Initialize HDF5 caches
140prefix = self.prefix
141postfix = str(self.n)'-'str(self.extra_neurons)'.hdf5'
142name_template = prefix'-{}-'postfix
143phi_handle = self.phi_handle = File(name_template.format('phi'), 'w')
144os_handle = self.w_handle = File(name_template.format('os'), 'w')
145w_handle = self.w_handle = File(name_template.format('w'), 'w')
146mu_handle = self.mu_handle = File(name_template.format('mu'), 'w')
147sigma_handle = self.sigma_handle = File(name_template.format('sigma'), 'w')
148
149## Mu generation
150mu = self.mu = self._generate_mu(x)
151self.neurons = mu.shape[0]
152print('({} neurons)'.format(self.neurons))
153# Save to HDF5
154mu_h5 = mu_handle.create_dataset('mu', data = https://www.04ip.com/post/mu)
155
156## Sigma calculation
157print('Calculating Sigma...')
158sigmas = self.sigmas = self._calculate_sigmas()
159# Save to HDF5
160sigmas_h5 = sigma_handle.create_dataset('sigmas', data = https://www.04ip.com/post/sigmas)
161print('Done')
162
163## Phi calculation
164print('Calculating Phi...')
165phi = self.phi = self._calculate_phi(x)
166print('Done')
167# Saving to HDF5
168print('Serializing...')
169phi_h5 = phi_handle.create_dataset('phi', data = https://www.04ip.com/post/phi)
170del phi
171self.phi = phi_h5
172print('Done')
173
174## Algebra
175print('Doing final algebra...')
176w, os = self.w, _ = self._do_algebra(y)
177# Saving to HDF5
178w_h5 = w_handle.create_dataset('w', data = https://www.04ip.com/post/w)
179os_h5 = os_handle.create_dataset('os', data = https://www.04ip.com/post/os)
180
181## Calculate error
182self._calculate_error(y)
183print('Done')
184
185def predict(self, test_data):
186mu = self.mu = self.mu.value
187sigmas = self.sigmas = self.sigmas.value
188w = self.w = self.w.value
189
190print('Calculating phi for test data...')
191phi = self._calculate_phi(test_data)
192os = dot(w, transpose(phi))
193savetxt('iok3834.txt', os, delimiter='\n')
194return os
195
196@property
197def summary(self):
198return '\n'.join( \
199['-----------------',
200'Training set size: {}'.format(self.n),
201'Hidden layer size: {}'.format(self.neurons),
202'-----------------',
203'Absolute error: {:02.2f}'.format(self.error),
204'Relative error: {:02.2f}%'.format(self.relative_error * 100)])
205
206
207 def predict(test_data):
208mu = File('rbf-mu-212243-2400.hdf5', 'r')['mu'].value
209sigmas = File('rbf-sigma-212243-2400.hdf5', 'r')['sigmas'].value
210w = File('rbf-w-212243-2400.hdf5', 'r')['w'].value
211
212n = test_data.shape[0]
213neur = mu.shape[0]
214
215mu = transpose(mu)
216mu.reshape((n, neur))
217
218phi = zeros((n, neur))
219for i in range(n):
220for j in range(neur):
221phi[i, j] = multiQuadric(test_data[i,:], mu[j], sigmas[j])
222
223os = dot(w, transpose(phi))
224savetxt('iok3834.txt', os, delimiter='\n')
225return os
高斯核函数RBF5-11、高斯核函数RBF
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
x, y = datasets.make_moons(n_samples=1000, noise=0.25, random_state=2020)# 生成1000个数据样本
plt.figure()
plt.scatter(x[y == 0, 0], x[y == 0, 1], color="r")
plt.scatter(x[y == 1, 0], x[y == 1, 1], color="g")
plt.title('散点图')
plt.show()
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2020)
# 绘制边界曲线
def plot_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1] - axis[0]) * 100)).reshape(-1, 1),
np.linspace(axis[2], axis[3], int((axis[3] - axis[2]) * 100)).reshape(-1, 1)
)
x_new = np.c_[x0.ravel(), x1.ravel()]
y_pre = model.predict(x_new)
zz = y_pre.reshape(x0.shape)
# 设置颜色
cus = ListedColormap(["#BA55D3", "#FF69B4", "#FFE4C4"])
plt.contourf(x0, x1, zz, cmap=cus)
def RBFkernelSVC(gamma):#高斯核函数RBF
return Pipeline([
("std", StandardScaler()),
("svc", SVC(kernel="rbf", gamma=gamma))
])
sv = RBFkernelSVC(gamma=1)
sv.fit(x_train, y_train)
plot_decision_boundary(sv, axis=([-1.8, 2.5, -1.4, 1.8]))
plt.scatter(x[y == 0, 0], x[y == 0, 1], color="r")
plt.scatter(x[y == 1, 0], x[y == 1, 1], color="g")
plt.title('高斯核函数RBF')
plt.show()
# 打印出分数
print(sv.score(x_test, y_test))
d = datasets.load_iris()
x = d.data
y = d.target
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2020)
sv = RBFkernelSVC(gamma=10)
sv.fit(x_train, y_train)
# 打印出分数
print(sv.score(x_test, y_test))
【rbf函数python rbf函数空间插值】rbf函数python的介绍就聊到这里吧,感谢你花时间阅读本站内容,更多关于rbf函数空间插值、rbf函数python的信息别忘了在本站进行查找喔 。
推荐阅读
- 怎么制作防震减灾ppt,防震减灾ppt 百度网盘
- 输入输出java代码,java编程输入
- 厦门沧众跨境电商如何做,厦门沧众跨境电商如何做销售
- 观看虎牙直播的观后感,我想看虎牙直播的
- linux命令自动开机 linux配置开机自动启动进程
- giscad高程分析,arcgis高程分析
- python爬虫抓取地图数据,如何爬取地图数据
- oracle数据库连接字段,oracle连接语句
- php代码+怎么抓数据 php怎么获取数据库中的数据