【dataframe的合并(append, merge, concat)】少年击剑更吹箫,剑气箫心一例消。这篇文章主要讲述dataframe的合并(append, merge, concat)相关的知识,希望能为你提供帮助。
1,pd.concat:拼接
1.1,axis
df1 = pd.DataFrame(np.ones((3,4))*0, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
df3 = pd.DataFrame(np.ones((3,4))*2, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
a
b
c
d
0
0.0
0.0
0.0
0.0
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
a
b
c
d
0
1.0
1.0
1.0
1.0
1
1.0
1.0
1.0
1.0
2
1.0
1.0
1.0
1.0
a
b
c
d
0
2.0
2.0
2.0
2.0
1
2.0
2.0
2.0
2.0
2
2.0
2.0
2.0
2.0
result = pd.concat([df1, df2, df3], axis = 0)
# 0表示竖向的合并
a
b
c
d
0
0.0
0.0
0.0
0.0
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
0
1.0
1.0
1.0
1.0
1
1.0
1.0
1.0
1.0
2
1.0
1.0
1.0
1.0
0
2.0
2.0
2.0
2.0
1
2.0
2.0
2.0
2.0
2
2.0
2.0
2.0
2.0
result=pd.concat([df1,df2,df3], axis=1)
# 1表示横向的合并
a
b
c
d
a
b
c
d
a
b
c
d
0
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
1
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
2
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
1.2,ignore_index
df1 = pd.DataFrame(np.ones((3,4))*0, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
df3 = pd.DataFrame(np.ones((3,4))*2, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
result = pd.concat([df1, df2, df3], axis = 0, ignore_index=True)
# ignore_index=True表示忽略原来index(axis=0)/column(axis=1)
a
b
c
d
0
0.0
0.0
0.0
0.0
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
3
1.0
1.0
1.0
1.0
4
1.0
1.0
1.0
1.0
5
1.0
1.0
1.0
1.0
6
2.0
2.0
2.0
2.0
7
2.0
2.0
2.0
2.0
8
2.0
2.0
2.0
2.0
result = pd.concat([df1, df2, df3], axis = 0, ignore_index=True)
# ignore_index=True表示忽略原来index
0
1
2
3
4
5
6
7
8
9
10
11
0
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
1
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
2
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2.0
2.0
2.0
2.0
1.3,join
df1 = pd.DataFrame(np.ones((3,4))*0, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = [‘b‘, ‘c‘, ‘d‘, ‘e‘])
result=pd.concat([df1,df2], axis=0, join=‘inner‘, ignore_index=True)
# join=‘inner‘表示只保留column名相同的部分
b
c
d
0
0.0
0.0
0.0
1
0.0
0.0
0.0
2
0.0
0.0
0.0
3
1.0
1.0
1.0
4
1.0
1.0
1.0
5
1.0
1.0
1.0
1.4,
join_axes
df1 = pd.DataFrame(np.ones((3,4))*0, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘], index=[1, 2, 3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = [‘b‘, ‘c‘, ‘d‘, ‘e‘], index=[2, 3, 4])
result=pd.concat([df1,df2], axis=1)
a
b
c
d
b
c
d
e
1
0.0
0.0
0.0
0.0
NaN
NaN
NaN
NaN
2
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
3
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
4
NaN
NaN
NaN
NaN
1.0
1.0
1.0
1.0
result=pd.concat([df1,df2], axis=1, join_axes=[df1.index])
#
join_axes=[df1.index]表示按照df1的index进行合并
a
b
c
d
b
c
d
e
1
0.0
0.0
0.0
0.0
NaN
NaN
NaN
NaN
2
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
3
0.0
0.0
0.0
0.0
1.0
1.0
1.0
1.0
2,pd.append()
2.1,append
pd.append只有竖向往下加数据,没有axis属性
df1 = pd.DataFrame(np.ones((3,4))*0, columns = [‘a‘, ‘b‘, ‘c‘, ‘d‘], index=[1, 2, 3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns = [‘b‘, ‘c‘, ‘d‘, ‘e‘], index=[2, 3, 4])
df3 = pd.DataFrame(np.ones((3,4))*2, columns = [‘b‘, ‘c‘, ‘d‘, ‘e‘], index=[2, 3, 4])
df1 = df1.append(df2)
a
b
c
d
e
1
0.0
0.0
0.0
0.0
NaN
2
0.0
0.0
0.0
0.0
NaN
3
0.0
0.0
0.0
0.0
NaN
2
NaN
1.0
1.0
1.0
1.0
3
NaN
1.0
1.0
1.0
1.0
4
NaN
1.0
1.0
1.0
1.0
df1 = df1.append([df2, df3], ignore_index=True)
# 可以一次加多个df,也可以忽略index
a
b
c
d
e
0
0.0
0.0
0.0
0.0
NaN
1
0.0
0.0
0.0
0.0
NaN
2
0.0
0.0
0.0
0.0
NaN
3
NaN
1.0
1.0
1.0
1.0
4
NaN
1.0
1.0
1.0
1.0
5
NaN
1.0
1.0
1.0
1.0
6
NaN
2.0
2.0
2.0
2.0
7
NaN
2.0
2.0
2.0
2.0
8
NaN
2.0
2.0
2.0
2.0
s1=pd.Series([1,2,3,4],index=[‘a‘,‘b‘,‘c‘,‘d‘])
df1 = df1.append(s1, ignore_index=True)
# 可以加series,ignore_index=True必须加上,s1个数必须和df1的列数相同
a
b
c
d
0
0.0
0.0
0.0
0.0
# 注意ignore_index = True以后,df1的index变成从0开始了!!!
1
0.0
0.0
0.0
0.0
2
0.0
0.0
0.0
0.0
3
1.0
2.0
3.0
4.0
l1=[1,2,3,4]
df1.loc[4] = l1
a
b
c
d
1
0.0
0.0
0.0
0.0
# df1的index还是按照原来从1开始
2
0.0
0.0
0.0
0.0
3
0.0
0.0
0.0
0.0
4
1.0
2.0
3.0
4.0
2.2,append举例
df1
=
pd.DataFrame({‘B‘:
[‘B0‘,
‘B1‘,
‘B2‘,
‘B3‘,‘B4‘],
‘D‘:
[‘D0‘,
‘D1‘,
‘D2‘,
‘D3‘,‘D4‘],
‘A‘:
[‘A0‘,
‘A1‘,
‘A2‘,
‘A3‘,‘A4‘],
‘E‘:
[‘E0‘,
‘E1‘,
‘E2‘,
‘E3‘,‘E4‘]},
index=[0,
1,
2,
3,4],columns=[‘B‘,‘D‘,‘A‘,‘E‘])
df2
=
pd.DataFrame({‘F‘:
[‘F4‘,
‘F5‘,
‘F6‘,
‘F7‘,‘F8‘],
‘A‘:
[‘A4‘,
‘A5‘,
‘A6‘,
‘A7‘,‘A8‘],
‘B‘:
[‘B4‘,
‘B5‘,
‘B6‘,
‘B7‘,‘B8‘],
‘C‘:
[‘C4‘,
‘C5‘,
‘C6‘,
‘C7‘,‘C8‘]},
index=[5,
9,
6,
7,10])
输入df3=df1.append(df2)后,df3变为:
A
B
C
D
E
F
0
A0
B0
NaN
D0
E0
NaN
1
A1
B1
NaN
D1
E1
NaN
2
A2
B2
NaN
D2
E2
NaN
3
A3
B3
NaN
D3
E3
NaN
4
A4
B4
NaN
D4
E4
NaN
5
A4
B4
C4
NaN
NaN
F4
9
A5
B5
C5
NaN
NaN
F5
6
A6
B6
C6
NaN
NaN
F6
7
A7
B7
C7
NaN
NaN
F7
10
A8
B8
C8
NaN
NaN
F8
列项重新排列了,如果想要列项按照df1的走,即df1的次序,并且如果df1没有的列,就直接扔掉:
df4=pd.concat([df1,df2],axis=0)
df4.reindex(columns=df1.columns)
3,pd.merge
# 合并
3.1,1个key
left=pd.DataFrame({‘key‘:[‘K0‘,‘K1‘,‘K2‘,‘K3‘],
‘A‘:[‘A0‘,‘A1‘,‘A2‘,‘A3‘],
‘B‘:[‘B0‘,‘B1‘,‘B2‘,‘B3‘]})
right=pd.DataFrame({‘key‘:[‘K0‘,‘K1‘,‘K2‘,‘K3‘],
‘C‘:[‘C0‘,‘C1‘,‘C2‘,‘C3‘],
‘D‘:[‘D0‘,‘D1‘,‘D2‘,‘D3‘]})
res=pd.merge(left,right)
res=pd.merge(left,right,on=[‘key‘])
# 默认会自动选择共同的column进行合并,如果没有共同的column会报错
A
B key
C
D
0
A0
B0
K0
C0
D0
1
A1
B1
K1
C1
D1
2
A2
B2
K2
C2
D2
3
A3
B3
K3
C3
D3
3.2,多个key,how可取[‘left‘, ‘right‘, ‘inner‘, ‘outer‘]
left=pd.DataFrame({‘key1‘:[‘K0‘,‘K0‘,‘K1‘,‘K2‘],
‘key2‘:[‘K0‘,‘K1‘,‘K0‘,‘K1‘],
‘A‘:[‘A0‘,‘A1‘,‘A2‘,‘A3‘],
‘B‘:[‘B0‘,‘B1‘,‘B2‘,‘B3‘]})
right=pd.DataFrame({‘key1‘:[‘K0‘,‘K1‘,‘K1‘,‘K2‘],
‘key2‘:[‘K0‘,‘K0‘,‘K0‘,‘K0‘],
‘C‘:[‘C0‘,‘C1‘,‘C2‘,‘C3‘],
‘D‘:[‘D0‘,‘D1‘,‘D2‘,‘D3‘]})
A
B key1 key2
0
A0
B0
K0
K0
1
A1
B1
K0
K1
2
A2
B2
K1
K0
3
A3
B3
K2
K1
C
D key1 key2
0
C0
D0
K0
K0
1
C1
D1
K1
K0
2
C2
D2
K1
K0
3
C3
D3
K2
K0
res=pd.merge(left,right,on=[‘key1‘,‘key2‘])
# 需要key1,key2完全相同才合并,left有一行K1/K0,right有两行K1/K0,则打印2遍
res=pd.merge(left,right,on=[‘key1‘,‘key2‘], how = ‘inner‘)
# 默认是how=‘inner‘,即key的value要相同
A
B
key1 key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A2
B2
K1
K0
C1
D1
2
A2
B2
K1
K0
C2
D2
res=pd.merge(left,right,on=[‘key1‘,‘key2‘], how = ‘outer‘)
# 根据key1,key2取值全部合并,如果left或者right没有的取NaN
A
B key1 key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A1
B1
K0
K1
NaN
NaN
2
A2
B2
K1
K0
C1
D1
3
A2
B2
K1
K0
C2
D2
4
A3
B3
K2
K1
NaN
NaN
5
NaN
NaN
K2
K0
C3
D3
res=pd.merge(left,right,on=[‘key1‘,‘key2‘], how = ‘left‘)
# 根据left的key合并,重复出现了2行K1K0(因为right有2行)
A
B key1 key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A1
B1
K0
K1
NaN
NaN
2
A2
B2
K1
K0
C1
D1
3
A2
B2
K1
K0
C2
D2
4
A3
B3
K2
K1
NaN
NaN
res=pd.merge(left,right,on=[‘key1‘,‘key2‘],how=‘right‘)
# 根据right的key合并
A
B key1 key2
C
D
0
A0
B0
K0
K0
C0
D0
1
A2
B2
K1
K0
C1
D1
2
A2
B2
K1
K0
C2
D2
3
NaN
NaN
K2
K0
C3
D3
3.3, indicator
默认false,true表示显示合并方式,是left还是right,还是both
推荐阅读
- Android 7.0 新特性
- Android 修改TabLayout底部导航条Indicator的长短
- app测试与web测试的区别
- app的deviceName,apppackage,appactivity获取
- appium手机自动化安装
- 导入AppiumLibrary报错( ImportError: cannot import name 'InvalidArgumentException)
- 静态分析Android程序
- Spring的beanFactory与ApplicationContext区别
- Android逆向学习资料