基于邻域粗糙集算法python实现
还在跑程序,我这渣笔记本CPU估计要跑很久,不如来贴个代码吧。
这个是之前一篇粗糙集相对属性约简python实现的一个小小改进
当中距离函数使用的是欧几里得距离公式,入坑没多久,代码写的不是很好,请见谅。
# _*_coding:utf-8 _*_
#@Time:2018/12/22 上午10:45
#@Author:we2swing
#@FileName: test.pyimport pandas as pd
import numpy as np
import math
import matplotlib.pyplot as pltdef basic_set(df):
basic = {}
for i in df.drop_duplicates().values.tolist():
basic[str(i)] = []
for j, k in enumerate(df.values.tolist()):
if k == i:
basic[str(i)].append(j)return basicdef Euclidean(x_data):
n = x_data.shape[1]#代表列数
m = x_data.shape[0]#代表行数
num = [0] * n
num2 = np.zeros((m,m))
#转为list
arr = np.array(x_data)counti,countj,countk,countz = 0,0,0,0
#遍历每一行的元素
for j in arr:
countk = 0
for k in arr:
if countj != countk:
countz = 0
# 遍历每一行的每一个元素
for z in k:
temp = np.square(z - j[countz])
if countz == 0:
num[countz] = temp
else:
#把每一行中每个元素的差值平方相加
num[countz] = num[countz-1] + temp
countz = countz + 1
else:
num[countz-1] = 0
num2[countj][countk] = round(math.sqrt(num[countz-1]),1)
countk = countk + 1
countj = countj + 1
return num2def key_basic(num,k):
# k = 4# 设δ=k
# k = [4.0,4.1,4.2,4.3,4.4,4.5,4.6,4.7,4.8,4.9,5.0]
num1 = {}
counti, countj = 0, 0
for i in num:
num1[counti] = []
countj = 0
for j in i:
if j <= k:
num1[counti].append(countj)
countj = countj + 1
counti = counti + 1
return num1def compare(data):
# k = [4.0, 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.9, 5.0]
global t1
t1 = 4.0
data = https://www.it610.com/article/data.dropna(axis=0, how='any')
x_data = https://www.it610.com/article/data.drop(['judge'], axis=1)
y_data = https://www.it610.com/article/data.loc[:,'judge']
n = x_data.shape[1]# 代表列数
m = x_data.shape[0]# 代表行数# 决策属性基本集
y_basic_set = sorted([v for k, v in basic_set(y_data).items()])
num = Euclidean(x_data)
# print(num)
x_basic_set = [v for k, v in key_basic(num,t1).items()]# γC(D)
pos = []
for i in x_basic_set:
for j in y_basic_set:
if set(i).issubset(j):
pos.append(i)
# pos.sort()#排序
r_x_y = len(pos) / len(data)# 也许可以写一个card函数
print('依赖度r_x_(y):', r_x_y)# 计算每一个γ(C-a)(D)
# 获取条件属性个数为总下标存入集合
# 收集属性重要性
imp_attr = []
columns_num = list(range(len(x_data.columns)))
for i in columns_num:
c = columns_num.copy()
c.remove(i)
u = data.iloc[:, c]
num = Euclidean(u)
u = sorted([v for k, v in key_basic(num,t1).items()])# γC(D)
pos_va = []
for k in u:
for j in y_basic_set:
if set(k).issubset(j):
pos_va.append(k)
r_x_y_2 = len(pos_va) / len(data)
r_diff = round((r_x_y - r_x_y_2), 4)
imp_attr.append(r_diff)dict_imp = {}
for o, p in enumerate(imp_attr):
dict_imp[data.columns[o]] = presult = dict_imp
# print(imp_attr)
return resultdef sepdata(data):
# 获取数据长度
len = data.iloc[:, 0].size
# 将数据划分
if len % 100 != 0:
if len > 100:
num = len // 100 + 1
else:
num = 1
else:
if len > 100:
num = int(len / 100)
else:
num = 1
arr = [[]] * numcount = 0
for i in arr:
# 如果数少于100或者最后一部分数少于100,则放入一个由数长决定的数组
if num == 1:
arr[count] = data.iloc[0:len]# 取100开始,取
elif count == num - 1:
arr[count] = data.iloc[100 * count:len]
else:
arr[count] = data.iloc[100 * count:(count + 1) * 100]
count = count + 1
sorted_dict_imp = [[]] * num
total = [0] * 27
title = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16',
'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27']
# total = [0] * 16
# title = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16']
print(sorted_dict_imp)
count = 0
for i in arr:
print('-------------------------------------第%d个数据集数据-----------------------------------------' % (count + 1))
sorted_dict_imp[count] = compare(i)
count = count + 1
count1 = 0
# 将dict的key为C1-Cn的value存入total中保存,并且相加
for i in sorted_dict_imp:
count = 0
if count1 == 0:
for j in title:
total[count] = i.get(j)
count = count + 1
else:
for z in title:
total[count] = i.get(z) + total[count]
count = count + 1
count1 = count1 + 1
# 输出最终C1-Cn的结果
count = 0
for i in title:
print(i, ':', round(total[count], 4))
count = count + 1
figure(total)def figure(opt):
count = 0
plt.rcParams['font.sans-serif'] = ['SimHei']# 用来正常显示中文标签
title = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10', 'C11', 'C12', 'C13', 'C14', 'C15', 'C16',
'C17', 'C18', 'C19', 'C20', 'C21', 'C22', 'C23', 'C24', 'C25', 'C26', 'C27']
plt.scatter(title, opt, c='b', s=7)
plt.plot(title, opt, linewidth=3, alpha=0.7)
plt.show()def main():
data = https://www.it610.com/article/pd.read_csv(filepath_or_buffer='/Users/we2swing/Desktop/111.csv')
sepdata(data)if __name__ == '__main__':
main()
【基于邻域粗糙集算法python实现】里面很多代码延续的是之前上面传送门里文章的代码。
觉得有用的话点个赞
测试数据:
链接:https://pan.baidu.com/s/1dtle6A6od8bOCwDPW1lZ2w
提取码:o2qf
推荐阅读
- 基于微信小程序带后端ssm接口小区物业管理平台设计
- 基于|基于 antd 风格的 element-table + pagination 的二次封装
- 基于爱,才会有“愿望”当“要求”。2017.8.12
- javaweb|基于Servlet+jsp+mysql开发javaWeb学生成绩管理系统
- JavaScript|vue 基于axios封装request接口请求——request.js文件
- 韵达基于云原生的业务中台建设 | 实战派
- EasyOA|EasyOA 基于SSM的实现 未完成总结与自我批判
- 基于stm32智能风扇|基于stm32智能风扇_一款基于STM32的智能灭火机器人设计
- stm32|基于STM32和freeRTOS智能门锁设计方案
- Python|Python 基于datetime库的日期时间数据处理