Python数据分析笔记
应工作需要,做数据分析要用到numpy,pandas的第三方库,做做笔记。
1.numpy相关笔记
In [1]: import numpy as npIn [2]: data = https://www.it610.com/article/np.array([1, 3, 4, 8])In [3]: data
Out[3]: array([1, 3, 4, 8])In [4]: data.shape
Out[4]: (4,)In [5]: data.dtype
Out[5]: dtype('int32')In [6]: data[1]
Out[6]: 3In [7]: data[1] = 9In [8]: data
Out[8]: array([1, 9, 4, 8])In [9]: data = https://www.it610.com/article/np.array([[1,2,3],[4,5,6]])In [10]: data
Out[10]:
array([[1, 2, 3],
[4, 5, 6]])In [11]: data.shape
Out[11]: (2, 3)In [12]: data[0,1]
Out[12]: 2In [13]: np.arange(10)
Out[13]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [14]: data[1,2] = 7In [15]: data
Out[15]:
array([[1, 2, 3],
[4, 5, 7]])In [18]: np.arange(5,15)
Out[18]: array([ 5,6,7,8,9, 10, 11, 12, 13, 14])In [19]: data = np.arange(10)In [20]: data.reshape(2,5)
Out[20]:
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])In [22]: data2 = data.reshape(2,5)In [23]: data2
Out[23]:
array([[0, 1, 2, 3, 4],
[5, 6, 7, 8, 9]])In [24]: data[4] = 10In [25]: data
Out[25]: array([ 0,1,2,3, 10,5,6,7,8,9])In [26]: data2
Out[26]:
array([[ 0,1,2,3, 10],
[ 5,6,7,8,9]])In [28]: np.zeros((2,2))
Out[28]:
array([[ 0.,0.],
[ 0.,0.]])In [29]: np.ones((2,3,3))
Out[29]:
array([[[ 1.,1.,1.],
[ 1.,1.,1.],
[ 1.,1.,1.]],[[ 1.,1.,1.],
[ 1.,1.,1.],
[ 1.,1.,1.]]])In [30]: np.eye(4)
Out[30]:
array([[ 1.,0.,0.,0.],
[ 0.,1.,0.,0.],
[ 0.,0.,1.,0.],
[ 0.,0.,0.,1.]])In [31]: np.arange(16).reshape(4,4)
Out[31]:
array([[ 0,1,2,3],
[ 4,5,6,7],
[ 8,9, 10, 11],
[12, 13, 14, 15]])In [37]: data = np.arange(100, step=10)In [38]: data
Out[38]: array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])In [39]: data[2]
Out[39]: 20In [40]: data[2:5]
Out[40]: array([20, 30, 40])In [41]: data[:3]
Out[41]: array([ 0, 10, 20])In [42]: data[5:] = -1In [43]: data
Out[43]: array([ 0, 10, 20, 30, 40, -1, -1, -1, -1, -1])In [44]: data = np.arange(16).reshape(4,4)In [45]: data
Out[45]:
array([[ 0,1,2,3],
[ 4,5,6,7],
[ 8,9, 10, 11],
[12, 13, 14, 15]])In [46]: data[1]
Out[46]: array([4, 5, 6, 7])In [47]: data[1:3]
Out[47]:
array([[ 4,5,6,7],
[ 8,9, 10, 11]])In [48]: data[:, 2:4]
Out[48]:
array([[ 2,3],
[ 6,7],
[10, 11],
[14, 15]])In [49]: data[[1,3],[2,3]]
Out[49]: array([ 6, 15])In [53]: print(data[1,2],data[3,3])
6 15In [54]: data> 10
Out[54]:
array([[False, False, False, False],
[False, False, False, False],
[False, False, False,True],
[ True,True,True,True]], dtype=bool)In [55]: data[data > 10]
Out[55]: array([11, 12, 13, 14, 15])In [56]: data[data % 2 == 0]
Out[56]: array([ 0,2,4,6,8, 10, 12, 14])In [57]: x = np.arange(1,5).reshape(2,2)In [58]: x
Out[58]:
array([[1, 2],
[3, 4]])In [59]: y = np.arange(5,9).reshape(2,2)In [60]: y
Out[60]:
array([[5, 6],
[7, 8]])In [61]: x + y
Out[61]:
array([[ 6,8],
[10, 12]])In [62]: x - y
Out[62]:
array([[-4, -4],
[-4, -4]])In [63]: x * y
Out[63]:
array([[ 5, 12],
[21, 32]])In [65]: x.dot(y)
Out[65]:
array([[19, 22],
[43, 50]])In [66]: x / y
Out[66]:
array([[ 0.2,0.33333333],
[ 0.42857143,0.5]])In [67]: np.sqrt(x)
Out[67]:
array([[ 1.,1.41421356],
[ 1.73205081,2.]])In [68]: x.T
Out[68]:
array([[1, 3],
[2, 4]])In [69]: np.linspace(1,10)
Out[69]:
array([1.,1.18367347,1.36734694,1.55102041,
1.73469388,1.91836735,2.10204082,2.28571429,
2.46938776,2.65306122,2.83673469,3.02040816,
3.20408163,3.3877551 ,3.57142857,3.75510204,
3.93877551,4.12244898,4.30612245,4.48979592,
4.67346939,4.85714286,5.04081633,5.2244898 ,
5.40816327,5.59183673,5.7755102 ,5.95918367,
6.14285714,6.32653061,6.51020408,6.69387755,
6.87755102,7.06122449,7.24489796,7.42857143,
7.6122449 ,7.79591837,7.97959184,8.16326531,
8.34693878,8.53061224,8.71428571,8.89795918,
9.08163265,9.26530612,9.44897959,9.63265306,
9.81632653,10.])In [70]: np.linspace(1,10, num=200)
Out[70]:
array([1.,1.04522613,1.09045226,1.13567839,
1.18090452,1.22613065,1.27135678,1.31658291,
1.36180905,1.40703518,1.45226131,1.49748744,
1.54271357,1.5879397 ,1.63316583,1.67839196,
1.72361809,1.76884422,1.81407035,1.85929648,
1.90452261,1.94974874,1.99497487,2.04020101,
2.08542714,2.13065327,2.1758794 ,2.22110553,
2.26633166,2.31155779,2.35678392,2.40201005,
2.44723618,2.49246231,2.53768844,2.58291457,
2.6281407 ,2.67336683,2.71859296,2.7638191 ,
2.80904523,2.85427136,2.89949749,2.94472362,
2.98994975,3.03517588,3.08040201,3.12562814,
3.17085427,3.2160804 ,3.26130653,3.30653266,
3.35175879,3.39698492,3.44221106,3.48743719,
3.53266332,3.57788945,3.62311558,3.66834171,
3.71356784,3.75879397,3.8040201 ,3.84924623,
3.89447236,3.93969849,3.98492462,4.03015075,
4.07537688,4.12060302,4.16582915,4.21105528,
4.25628141,4.30150754,4.34673367,4.3919598 ,
4.43718593,4.48241206,4.52763819,4.57286432,
4.61809045,4.66331658,4.70854271,4.75376884,
4.79899497,4.84422111,4.88944724,4.93467337,
4.9798995 ,5.02512563,5.07035176,5.11557789,
5.16080402,5.20603015,5.25125628,5.29648241,
5.34170854,5.38693467,5.4321608 ,5.47738693,
5.52261307,5.5678392 ,5.61306533,5.65829146,
5.70351759,5.74874372,5.79396985,5.83919598,
5.88442211,5.92964824,5.97487437,6.0201005 ,
6.06532663,6.11055276,6.15577889,6.20100503,
6.24623116,6.29145729,6.33668342,6.38190955,
6.42713568,6.47236181,6.51758794,6.56281407,
6.6080402 ,6.65326633,6.69849246,6.74371859,
6.78894472,6.83417085,6.87939698,6.92462312,
6.96984925,7.01507538,7.06030151,7.10552764,
7.15075377,7.1959799 ,7.24120603,7.28643216,
7.33165829,7.37688442,7.42211055,7.46733668,
7.51256281,7.55778894,7.60301508,7.64824121,
7.69346734,7.73869347,7.7839196 ,7.82914573,
7.87437186,7.91959799,7.96482412,8.01005025,
8.05527638,8.10050251,8.14572864,8.19095477,
8.2361809 ,8.28140704,8.32663317,8.3718593 ,
8.41708543,8.46231156,8.50753769,8.55276382,
8.59798995,8.64321608,8.68844221,8.73366834,
8.77889447,8.8241206 ,8.86934673,8.91457286,
8.95979899,9.00502513,9.05025126,9.09547739,
9.14070352,9.18592965,9.23115578,9.27638191,
9.32160804,9.36683417,9.4120603 ,9.45728643,
9.50251256,9.54773869,9.59296482,9.63819095,
9.68341709,9.72864322,9.77386935,9.81909548,
9.86432161,9.90954774,9.95477387,10.])In [71]: x = np.linspace(0,2*np.pi,num=50)In [72]: x
Out[72]:
array([ 0.,0.12822827,0.25645654,0.38468481,0.51291309,
0.64114136,0.76936963,0.8975979 ,1.02582617,1.15405444,
1.28228272,1.41051099,1.53873926,1.66696753,1.7951958 ,
1.92342407,2.05165235,2.17988062,2.30810889,2.43633716,
2.56456543,2.6927937 ,2.82102197,2.94925025,3.07747852,
3.20570679,3.33393506,3.46216333,3.5903916 ,3.71861988,
3.84684815,3.97507642,4.10330469,4.23153296,4.35976123,
4.48798951,4.61621778,4.74444605,4.87267432,5.00090259,
5.12913086,5.25735913,5.38558741,5.51381568,5.64204395,
5.77027222,5.89850049,6.02672876,6.15495704,6.28318531])In [74]: y
Out[74]:
array([0.00000000e+00,1.27877162e-01,2.53654584e-01,
3.75267005e-01,4.90717552e-01,5.98110530e-01,
6.95682551e-01,7.81831482e-01,8.55142763e-01,
9.14412623e-01,9.58667853e-01,9.87181783e-01,
9.99486216e-01,9.95379113e-01,9.74927912e-01,
9.38468422e-01,8.86599306e-01,8.20172255e-01,
7.40277997e-01,6.48228395e-01,5.45534901e-01,
4.33883739e-01,3.15108218e-01,1.91158629e-01,
6.40702200e-02,-6.40702200e-02,-1.91158629e-01,
-3.15108218e-01,-4.33883739e-01,-5.45534901e-01,
-6.48228395e-01,-7.40277997e-01,-8.20172255e-01,
-8.86599306e-01,-9.38468422e-01,-9.74927912e-01,
-9.95379113e-01,-9.99486216e-01,-9.87181783e-01,
-9.58667853e-01,-9.14412623e-01,-8.55142763e-01,
-7.81831482e-01,-6.95682551e-01,-5.98110530e-01,
-4.90717552e-01,-3.75267005e-01,-2.53654584e-01,
-1.27877162e-01,-2.44929360e-16])
2.pandas相关笔记
In [1]: import pandas as pdIn [2]: import numpy as npIn [3]: s = pd.Series([1, 3, 5, np.NaN, 8, 4])In [4]: s
Out[4]:
01.0
13.0
25.0
3NaN
48.0
54.0
dtype: float64In [5]: dates = pd.date_range('20160301', periods=6)In [6]: dates
Out[6]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')In [7]: data = https://www.it610.com/article/pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))In [8]: data
Out[8]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [9]: data.shape
Out[9]: (6, 4)In [10]: data.values
Out[10]:
array([[ 2.02783946, -0.29258761,0.05187483, -0.2978054 ],
[ 1.09800225, -0.20649433,0.03898198,0.04355011],
[ 1.15290424,0.10370374,0.39340982, -0.42141752],
[-0.37963074,0.1653215 , -2.12121315,1.70680791],
[ 0.02057416, -1.69729895,1.23049898,1.82649881],
[-0.72699977, -0.0723013 , -1.5117672 ,0.13140707]])In [13]: d = {'A': 1, 'B': pd.Timestamp('20130301'), 'C': [i for i in range(4)], 'D': np.arange(4)}In [14]: d
Out[14]:
{'A': 1,
'B': Timestamp('2013-03-01 00:00:00'),
'C': [0, 1, 2, 3],
'D': array([0, 1, 2, 3])}In [16]: df = pd.DataFrame(d)In [17]: df
Out[17]:
ABCD
01 2013-03-0100
11 2013-03-0111
21 2013-03-0122
31 2013-03-0133In [18]: df.dtypes
Out[18]:
Aint64
Bdatetime64[ns]
Cint64
Dint32
dtype: objectIn [19]: df.A
Out[19]:
01
11
21
31
Name: A, dtype: int64In [20]: df.C
Out[20]:
00
11
22
33
Name: C, dtype: int64In [21]: df.B
Out[21]:
02013-03-01
12013-03-01
22013-03-01
32013-03-01
Name: B, dtype: datetime64[ns]In [22]: df.D
Out[22]:
00
11
22
33
Name: D, dtype: int32In [23]: type(df.B)
Out[23]: pandas.core.series.SeriesIn [24]: data.head()
Out[24]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499In [25]: data.tail()
Out[25]:
ABCD
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [26]: data.head(2)
Out[26]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550In [27]: data.index
Out[27]:
DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04',
'2016-03-05', '2016-03-06'],
dtype='datetime64[ns]', freq='D')In [28]: data.describe()
Out[28]:
ABCD
count6.0000006.0000006.0000006.000000
mean0.532115 -0.333276 -0.3197020.498173
std1.0602950.6907891.2525841.004557
min-0.727000 -1.697299 -2.121213 -0.421418
25%-0.279580 -0.271064 -1.124080 -0.212467
50%0.559288 -0.1393980.0454280.087479
75%1.1391790.0597020.3080261.312958
max2.0278390.1653211.2304991.826499In [29]: data.T
Out[29]:
2016-03-012016-03-022016-03-032016-03-042016-03-052016-03-06
A2.0278391.0980021.152904-0.3796310.020574-0.727000
B-0.292588-0.2064940.1037040.165321-1.697299-0.072301
C0.0518750.0389820.393410-2.1212131.230499-1.511767
D-0.2978050.043550-0.4214181.7068081.8264990.131407In [30]: data.T.shape
Out[30]: (4, 6)In [31]: data.shape
Out[31]: (6, 4)In [32]: data.sort_index(axis=1)
Out[32]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [33]: data.sort_index(axis=1, ascending=False)
Out[33]:
DCBA
2016-03-01 -0.2978050.051875 -0.2925882.027839
2016-03-020.0435500.038982 -0.2064941.098002
2016-03-03 -0.4214180.3934100.1037041.152904
2016-03-041.706808 -2.1212130.165321 -0.379631
2016-03-051.8264991.230499 -1.6972990.020574
2016-03-060.131407 -1.511767 -0.072301 -0.727000In [34]: data.sort_index(axis=0, ascending=False)
Out[34]:
ABCD
2016-03-06 -0.727000 -0.072301 -1.5117670.131407
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-012.027839 -0.2925880.051875 -0.297805In [35]: data.sort_values(by='A')
Out[35]:
ABCD
2016-03-06 -0.727000 -0.072301 -1.5117670.131407
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-012.027839 -0.2925880.051875 -0.297805In [36]: data['A']
Out[36]:
2016-03-012.027839
2016-03-021.098002
2016-03-031.152904
2016-03-04-0.379631
2016-03-050.020574
2016-03-06-0.727000
Freq: D, Name: A, dtype: float64In [37]: data.A
Out[37]:
2016-03-012.027839
2016-03-021.098002
2016-03-031.152904
2016-03-04-0.379631
2016-03-050.020574
2016-03-06-0.727000
Freq: D, Name: A, dtype: float64In [39]: data[2:4]
Out[39]:
ABCD
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808In [41]: data['20160302':'20160305']
Out[41]:
ABCD
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499In [42]: data.loc['20160302':'20160305']
Out[42]:
ABCD
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499In [43]: data.iloc[2:4]
Out[43]:
ABCD
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808In [44]: data.iloc[:,1:3]
Out[44]:
BC
2016-03-01 -0.2925880.051875
2016-03-02 -0.2064940.038982
2016-03-030.1037040.393410
2016-03-040.165321 -2.121213
2016-03-05 -1.6972991.230499
2016-03-06 -0.072301 -1.511767In [45]: data.loc[:,'B':'C']
Out[45]:
BC
2016-03-01 -0.2925880.051875
2016-03-02 -0.2064940.038982
2016-03-030.1037040.393410
2016-03-040.165321 -2.121213
2016-03-05 -1.6972991.230499
2016-03-06 -0.072301 -1.511767In [46]: data.loc['20160302':'20160305', ['B','C']]
Out[46]:
BC
2016-03-02 -0.2064940.038982
2016-03-030.1037040.393410
2016-03-040.165321 -2.121213
2016-03-05 -1.6972991.230499In [48]: data.iloc[1:3, 1:3]
Out[48]:
BC
2016-03-02 -0.2064940.038982
2016-03-030.1037040.393410In [49]: data.loc['20160302', 'B']
Out[49]: -0.20649432992272151In [50]: data.at[pd.Timestamp('20160302'), 'B']
Out[50]: -0.20649432992272151In [51]: data.iloc[1]
Out[51]:
A1.098002
B-0.206494
C0.038982
D0.043550
Name: 2016-03-02 00:00:00, dtype: float64In [52]: data.iloc[1:3]
Out[52]:
ABCD
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418In [53]: data.iloc[:,1:3]
Out[53]:
BC
2016-03-01 -0.2925880.051875
2016-03-02 -0.2064940.038982
2016-03-030.1037040.393410
2016-03-040.165321 -2.121213
2016-03-05 -1.6972991.230499
2016-03-06 -0.072301 -1.511767In [54]: data.iloc[1,1]
Out[54]: -0.20649432992272151In [55]: data.iat[1,1]
Out[55]: -0.20649432992272151In [56]: %timeit data.iloc[1,1]
8.08 μs ± 17.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)In [57]: %timeit data.iat[1,1]
5.38 μs ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)In [58]: data[data.A > 0]
Out[58]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-050.020574 -1.6972991.2304991.826499In [59]: data[data > 0]
Out[59]:
ABCD
2016-03-012.027839NaN0.051875NaN
2016-03-021.098002NaN0.0389820.043550
2016-03-031.1529040.1037040.393410NaN
2016-03-04NaN0.165321NaN1.706808
2016-03-050.020574NaN1.2304991.826499
2016-03-06NaNNaNNaN0.131407In [60]: data2 = data.copy()In [61]: data2
Out[61]:
ABCD
2016-03-012.027839 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04 -0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [62]: tag = ['a'] * 2 + ['b'] * 2 + ['c'] * 2In [63]: data2['TAG'] = tagIn [64]: data2
Out[64]:
ABCD TAG
2016-03-012.027839 -0.2925880.051875 -0.297805a
2016-03-021.098002 -0.2064940.0389820.043550a
2016-03-031.1529040.1037040.393410 -0.421418b
2016-03-04 -0.3796310.165321 -2.1212131.706808b
2016-03-050.020574 -1.6972991.2304991.826499c
2016-03-06 -0.727000 -0.072301 -1.5117670.131407cIn [66]: data2[data2.TAG.isin(['a','c'])]
Out[66]:
ABCD TAG
2016-03-012.027839 -0.2925880.051875 -0.297805a
2016-03-021.098002 -0.2064940.0389820.043550a
2016-03-050.020574 -1.6972991.2304991.826499c
2016-03-06 -0.727000 -0.072301 -1.5117670.131407cIn [68]: data.iat[0,0] = 100In [69]: data
Out[69]:
ABCD
2016-03-01100.000000 -0.2925880.051875 -0.297805
2016-03-021.098002 -0.2064940.0389820.043550
2016-03-031.1529040.1037040.393410 -0.421418
2016-03-04-0.3796310.165321 -2.1212131.706808
2016-03-050.020574 -1.6972991.2304991.826499
2016-03-06-0.727000 -0.072301 -1.5117670.131407In [70]: data.A = range(6)In [71]: data
Out[71]:
ABCD
2016-03-010 -0.2925880.051875 -0.297805
2016-03-021 -0.2064940.0389820.043550
2016-03-0320.1037040.393410 -0.421418
2016-03-0430.165321 -2.1212131.706808
2016-03-054 -1.6972991.2304991.826499
2016-03-065 -0.072301 -1.5117670.131407In [72]: data.B = 100In [73]: data
Out[73]:
ABCD
2016-03-0101000.051875 -0.297805
2016-03-0211000.0389820.043550
2016-03-0321000.393410 -0.421418
2016-03-043100 -2.1212131.706808
2016-03-0541001.2304991.826499
2016-03-065100 -1.5117670.131407In [74]: data.iloc[:,2:4] = 1000In [75]: data
Out[75]:
ABCD
2016-03-01010010001000
2016-03-02110010001000
2016-03-03210010001000
2016-03-04310010001000
2016-03-05410010001000
2016-03-06510010001000
3.ipython pandas
In [2]: import pandas as pd^M
...: import numpy as np^M
...: import matplotlib.pyplot as plt
...:
...:In [3]: dates = pd.date_range('20190501', periods=6)^M
...: df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))^M
...: df
...:
...:
Out[3]:
ABCD
2019-05-01 -1.203323 -1.0230170.9948081.845361
2019-05-020.4138180.3505630.718125 -0.100747
2019-05-03 -0.9769960.298922 -0.5636731.431109
2019-05-04 -1.395960 -0.415227 -1.5838350.022370
2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065
2019-05-061.290431 -1.629373 -1.530487 -1.452107In [4]: df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])^M
...: df1
...:
...:
Out[4]:
ABCDE
2019-05-01 -1.203323 -1.0230170.9948081.845361 NaN
2019-05-020.4138180.3505630.718125 -0.100747 NaN
2019-05-03 -0.9769960.298922 -0.5636731.431109 NaN
2019-05-04 -1.395960 -0.415227 -1.5838350.022370 NaNIn [5]: df1.iloc[1:3,4] = 2^M
...: df1
...:
...:
Out[5]:
ABCDE
2019-05-01 -1.203323 -1.0230170.9948081.845361NaN
2019-05-020.4138180.3505630.718125 -0.1007472.0
2019-05-03 -0.9769960.298922 -0.5636731.4311092.0
2019-05-04 -1.395960 -0.415227 -1.5838350.022370NaNIn [6]: df1.dropna()
Out[6]:
ABCDE
2019-05-020.4138180.3505630.718125 -0.1007472.0
2019-05-03 -0.9769960.298922 -0.5636731.4311092.0In [7]: df1.fillna(value=https://www.it610.com/article/5)
Out[7]:
ABCDE
2019-05-01 -1.203323 -1.0230170.9948081.8453615.0
2019-05-020.4138180.3505630.718125 -0.1007472.0
2019-05-03 -0.9769960.298922 -0.5636731.4311092.0
2019-05-04 -1.395960 -0.415227 -1.5838350.0223705.0In [8]: pd.isnull(df1)
Out[8]:
ABCDE
2019-05-01FalseFalseFalseFalseTrue
2019-05-02FalseFalseFalseFalseFalse
2019-05-03FalseFalseFalseFalseFalse
2019-05-04FalseFalseFalseFalseTrueIn [9]: pd.isnull(df1).any().any()
Out[9]: TrueIn [10]: df1.mean()
Out[10]:
A-0.790615
B-0.197190
C-0.108644
D0.799523
E2.000000
dtype: float64In [11]: df1.mean(axis=1)
Out[11]:
2019-05-010.153457
2019-05-020.676352
2019-05-030.437872
2019-05-04-0.843163
Freq: D, dtype: float64In [12]: df1.cumsum()
Out[12]:
ABCDE
2019-05-01 -1.203323 -1.0230170.9948081.845361NaN
2019-05-02 -0.789505 -0.6724541.7129331.7446142.0
2019-05-03 -1.766501 -0.3735321.1492593.1757244.0
2019-05-04 -3.162461 -0.788759 -0.4345753.198094NaNIn [13]: s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)^M
...: s
...:
...:
Out[13]:
2019-05-01NaN
2019-05-02NaN
2019-05-031.0
2019-05-043.0
2019-05-055.0
2019-05-06NaN
Freq: D, dtype: float64In [14]: df
Out[14]:
ABCD
2019-05-01 -1.203323 -1.0230170.9948081.845361
2019-05-020.4138180.3505630.718125 -0.100747
2019-05-03 -0.9769960.298922 -0.5636731.431109
2019-05-04 -1.395960 -0.415227 -1.5838350.022370
2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065
2019-05-061.290431 -1.629373 -1.530487 -1.452107In [15]: df.sub(s,axis='index')
Out[15]:
ABCD
2019-05-01NaNNaNNaNNaN
2019-05-02NaNNaNNaNNaN
2019-05-03 -1.976996 -0.701078 -1.5636730.431109
2019-05-04 -4.395960 -3.415227 -4.583835 -2.977630
2019-05-05 -7.466317 -5.819741 -5.417125 -7.290065
2019-05-06NaNNaNNaNNaNIn [16]: df.apply(np.cumsum)
Out[16]:
ABCD
2019-05-01 -1.203323 -1.0230170.9948081.845361
2019-05-02 -0.789505 -0.6724541.7129331.744614
2019-05-03 -1.766501 -0.3735321.1492593.175724
2019-05-04 -3.162461 -0.788759 -0.4345753.198094
2019-05-05 -5.628777 -1.608500 -0.8517000.908028
2019-05-06 -4.338346 -3.237874 -2.382187 -0.544078In [17]: df.apply(lambda x : x.max() - x.min())
Out[17]:
A3.756748
B1.979937
C2.578643
D4.135427
dtype: float64In [18]: def _sum(x):^M
...:print(type(x))^M
...:return x.sum()^M
...: df.apply(_sum)
...:
...:
Out[18]:
A-4.338346
B-3.237874
C-2.382187
D-0.544078
dtype: float64In [19]: s = pd.Series(np.random.randint(10,20,size=20))^M
...: s
...:
...:
Out[19]:
019
116
211
317
413
514
613
711
817
912
1019
1110
1219
1318
1412
1510
1619
1712
1817
1910
dtype: int32In [20]: s.value_counts()
Out[20]:
194
173
123
103
132
112
181
161
141
dtype: int64In [21]: s.mode()
Out[21]:
019
dtype: int32In [22]: df = pd.DataFrame(np.random.randn(10,4), columns=list('ABCD'))^M
...: df
...:
...:
Out[22]:
ABCD
01.8529040.224001 -0.873486 -0.098751
1 -0.005724 -0.4330290.059684 -0.424876
20.3822971.121435 -0.5720000.624490
3 -1.304039 -0.5231071.7594170.367895
40.0304971.7683040.242685 -0.921089
5 -0.086144 -0.5163010.7048650.195875
6 -0.015493 -1.0044010.775551 -0.349997
70.542791 -2.1449510.2080700.930271
81.7097920.170925 -0.1024210.544754
9 -1.1359631.863820 -0.789279 -1.587587In [23]: df.iloc[:3]
Out[23]:
ABCD
01.8529040.224001 -0.873486 -0.098751
1 -0.005724 -0.4330290.059684 -0.424876
20.3822971.121435 -0.5720000.624490In [24]: df.iloc[3:7]
Out[24]:
ABCD
3 -1.304039 -0.5231071.7594170.367895
40.0304971.7683040.242685 -0.921089
5 -0.086144 -0.5163010.7048650.195875
6 -0.015493 -1.0044010.775551 -0.349997In [25]: df.iloc[7:]
Out[25]:
ABCD
70.542791 -2.1449510.2080700.930271
81.7097920.170925 -0.1024210.544754
9 -1.1359631.863820 -0.789279 -1.587587In [26]: df1 = pd.concat([df.iloc[:3], df.iloc[3:7], df.iloc[7:]])^M
...: df1
...:
...:
Out[26]:
ABCD
01.8529040.224001 -0.873486 -0.098751
1 -0.005724 -0.4330290.059684 -0.424876
20.3822971.121435 -0.5720000.624490
3 -1.304039 -0.5231071.7594170.367895
40.0304971.7683040.242685 -0.921089
5 -0.086144 -0.5163010.7048650.195875
6 -0.015493 -1.0044010.775551 -0.349997
70.542791 -2.1449510.2080700.930271
81.7097920.170925 -0.1024210.544754
9 -1.1359631.863820 -0.789279 -1.587587In [27]: df == df1
Out[27]:
ABCD
0TrueTrueTrueTrue
1TrueTrueTrueTrue
2TrueTrueTrueTrue
3TrueTrueTrueTrue
4TrueTrueTrueTrue
5TrueTrueTrueTrue
6TrueTrueTrueTrue
7TrueTrueTrueTrue
8TrueTrueTrueTrue
9TrueTrueTrueTrueIn [28]: (df == df1).all().all()
Out[28]: TrueIn [29]: left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1,2]})^M
...: right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4,5]})
...:
...:In [30]: left
Out[30]:
keylval
0foo1
1foo2In [31]: right
Out[31]:
keyrval
0foo4
1foo5In [32]: pd.merge(left,right,on='key')
Out[32]:
keylvalrval
0foo14
1foo15
2foo24
3foo25In [33]: s = pd.Series(np.random.randint(1,5,size=4), index=list('ABCD'))^M
...: s
...:
...:
Out[33]:
A4
B4
C3
D3
dtype: int32In [34]: df.append(s, ignore_index=True)
Out[34]:
ABCD
01.8529040.224001 -0.873486 -0.098751
1-0.005724 -0.4330290.059684 -0.424876
20.3822971.121435 -0.5720000.624490
3-1.304039 -0.5231071.7594170.367895
40.0304971.7683040.242685 -0.921089
5-0.086144 -0.5163010.7048650.195875
6-0.015493 -1.0044010.775551 -0.349997
70.542791 -2.1449510.2080700.930271
81.7097920.170925 -0.1024210.544754
9-1.1359631.863820 -0.789279 -1.587587
104.0000004.0000003.0000003.000000In [35]: s = pd.Series(np.random.randint(1,5,size=5), index=list('ABCDE'))^M
...: s
...:
...:
Out[35]:
A1
B2
C3
D3
E1
dtype: int32In [36]: df.append(s, ignore_index=True)
Out[36]:
ABCDE
01.8529040.224001 -0.873486 -0.098751NaN
1-0.005724 -0.4330290.059684 -0.424876NaN
20.3822971.121435 -0.5720000.624490NaN
3-1.304039 -0.5231071.7594170.367895NaN
40.0304971.7683040.242685 -0.921089NaN
5-0.086144 -0.5163010.7048650.195875NaN
6-0.015493 -1.0044010.775551 -0.349997NaN
70.542791 -2.1449510.2080700.930271NaN
81.7097920.170925 -0.1024210.544754NaN
9-1.1359631.863820 -0.789279 -1.587587NaN
101.0000002.0000003.0000003.0000001.0In [37]: df = pd.DataFrame({^M
...:'A': ['foo', 'bar', 'foo', 'bar', 'bar', 'foo', 'bar', 'foo'],^M
...:'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],^M
...:'C': np.random.randn(8),^M
...:'D': np.random.randn(8)^M
...: })^M
...: df
...:
...:
Out[37]:
ABCD
0fooone0.8338450.260860
1barone -1.066645 -0.240832
2footwo0.470603 -0.276248
3barthree2.2880180.661833
4bartwo -1.213533 -1.602429
5footwo -1.4396220.518569
6barone0.0123140.789365
7foothree0.2802550.611326In [38]: df.groupby('A').sum()
Out[38]:
CD
A
bar0.020155 -0.392063
foo0.1450821.114506In [39]: df.groupby(['A', 'B']).sum()
Out[39]:
CD
AB
bar one-1.0543300.548534
three2.2880180.661833
two-1.213533 -1.602429
foo one0.8338450.260860
three0.2802550.611326
two-0.9690190.242320In [40]: df.groupby(['B', 'A']).sum()
Out[40]:
CD
BA
onebar -1.0543300.548534
foo0.8338450.260860
three bar2.2880180.661833
foo0.2802550.611326
twobar -1.213533 -1.602429
foo -0.9690190.242320
4.ipython pandas 2
In [1]: import pandas as pd^M
...: import numpy as np^M
...: import matplotlib.pyplot as plt
...:
...:In [2]: tuples = list(zip(*[['bar','bar','baz','baz',^M
...:'foo','foo','qux','qux'],^M
...:['one','two','one','two',^M
...:'one','two','one','two']]))^M
...: tuples
...:
Out[2]:
[('bar', 'one'),
('bar', 'two'),
('baz', 'one'),
('baz', 'two'),
('foo', 'one'),
('foo', 'two'),
('qux', 'one'),
('qux', 'two')]In [3]: index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])^M
...: index
...:
...:
Out[3]:
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']],
labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second'])In [4]: df = pd.DataFrame(np.random.randn(8,2), index=index, columns=['A', 'B'])^M
...: df
...:
...:
Out[4]:
AB
first second
barone-0.5738210.448645
two-0.3172580.253702
bazone-1.1811181.788925
two-0.7625510.226626
fooone0.1844540.118198
two1.0621191.506467
quxone-0.894020 -1.233768
two-0.005463 -1.275630In [5]: stacked = df.stack()^M
...: stacked
...:
...:
Out[5]:
firstsecond
baroneA-0.573821
B0.448645
twoA-0.317258
B0.253702
bazoneA-1.181118
B1.788925
twoA-0.762551
B0.226626
foooneA0.184454
B0.118198
twoA1.062119
B1.506467
quxoneA-0.894020
B-1.233768
twoA-0.005463
B-1.275630
dtype: float64In [6]: stacked.index
Out[6]:
MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two'], ['A', 'B']],
labels=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]],
names=['first', 'second', None])In [7]: stacked.unstack().unstack()
Out[7]:
AB
secondonetwoonetwo
first
bar-0.573821 -0.3172580.4486450.253702
baz-1.181118 -0.7625511.7889250.226626
foo0.1844541.0621190.1181981.506467
qux-0.894020 -0.005463 -1.233768 -1.275630In [8]: stacked.unstack()
Out[8]:
AB
first second
barone-0.5738210.448645
two-0.3172580.253702
bazone-1.1811181.788925
two-0.7625510.226626
fooone0.1844540.118198
two1.0621191.506467
quxone-0.894020 -1.233768
two-0.005463 -1.275630In [9]: df = pd.DataFrame({'A':['one','one','two','three'] * 3,^M
...:'B':['A','B','C'] * 4,^M
...:'C':['foo','foo','foo','bar','bar','bar'] * 2,^M
...:'D':np.random.randn(12),^M
...:'E':np.random.randn(12)})^M
...: df
...:
Out[9]:
ABCDE
0oneAfoo1.0951681.385659
1oneBfoo0.0266421.454903
2twoCfoo0.8356840.080938
3threeAbar -2.3384760.920093
4oneBbar -0.383956 -1.063160
5oneCbar1.708665 -0.021806
6twoAfoo0.725428 -0.031022
7threeBfoo -0.3862481.205046
8oneCfoo -0.203927 -0.259308
9oneAbar1.1847490.368413
10twoBbar1.602919 -1.816103
11threeCbar -0.4198790.303990In [10]: df.pivot_table(values=['D'], index=['A', 'B'], columns=['C'])
Out[10]:
D
Cbarfoo
AB
oneA1.1847491.095168
B -0.3839560.026642
C1.708665 -0.203927
three A -2.338476NaN
BNaN -0.386248
C -0.419879NaN
twoANaN0.725428
B1.602919NaN
CNaN0.835684In [11]: df.pivot_table(values=['E'],index=['A'], columns=['C'])
Out[11]:
E
Cbarfoo
A
one-0.2388510.860418
three0.6120411.205046
two-1.8161030.024958In [12]: df[df.A == 'one']
Out[12]:
ABCDE
0oneAfoo1.0951681.385659
1oneBfoo0.0266421.454903
4oneBbar -0.383956 -1.063160
5oneCbar1.708665 -0.021806
8oneCfoo -0.203927 -0.259308
9oneAbar1.1847490.368413In [13]: df[df.A == 'one'].groupby('C').mean()
Out[13]:
DE
C
bar0.836486 -0.238851
foo0.3059610.860418In [14]: rng = pd.date_range('20160301', periods=600, freq='s')^M
...: rng
...:
...:
Out[14]:
DatetimeIndex(['2016-03-01 00:00:00', '2016-03-01 00:00:01',
'2016-03-01 00:00:02', '2016-03-01 00:00:03',
'2016-03-01 00:00:04', '2016-03-01 00:00:05',
'2016-03-01 00:00:06', '2016-03-01 00:00:07',
'2016-03-01 00:00:08', '2016-03-01 00:00:09',
...
'2016-03-01 00:09:50', '2016-03-01 00:09:51',
'2016-03-01 00:09:52', '2016-03-01 00:09:53',
'2016-03-01 00:09:54', '2016-03-01 00:09:55',
'2016-03-01 00:09:56', '2016-03-01 00:09:57',
'2016-03-01 00:09:58', '2016-03-01 00:09:59'],
dtype='datetime64[ns]', length=600, freq='S')In [15]: s = pd.Series(np.random.randint(0 ,500, len(rng)), index=rng)^M
...: s
...:
...:
Out[15]:
2016-03-01 00:00:0086
2016-03-01 00:00:01393
2016-03-01 00:00:02285
2016-03-01 00:00:03330
2016-03-01 00:00:0430
2016-03-01 00:00:05325
2016-03-01 00:00:06325
2016-03-01 00:00:07442
2016-03-01 00:00:08426
2016-03-01 00:00:0982
2016-03-01 00:00:10320
2016-03-01 00:00:11334
2016-03-01 00:00:12434
2016-03-01 00:00:13102
2016-03-01 00:00:14440
2016-03-01 00:00:15263
2016-03-01 00:00:16258
2016-03-01 00:00:17338
2016-03-01 00:00:187
2016-03-01 00:00:19126
2016-03-01 00:00:2033
2016-03-01 00:00:21405
2016-03-01 00:00:22188
2016-03-01 00:00:23484
2016-03-01 00:00:24412
2016-03-01 00:00:25127
2016-03-01 00:00:26449
2016-03-01 00:00:27260
2016-03-01 00:00:28155
2016-03-01 00:00:29155
...
2016-03-01 00:09:30329
2016-03-01 00:09:3130
2016-03-01 00:09:32295
2016-03-01 00:09:33181
2016-03-01 00:09:34178
2016-03-01 00:09:3522
2016-03-01 00:09:36148
2016-03-01 00:09:37166
2016-03-01 00:09:38137
2016-03-01 00:09:39238
2016-03-01 00:09:40106
2016-03-01 00:09:41442
2016-03-01 00:09:42143
2016-03-01 00:09:43180
2016-03-01 00:09:4464
2016-03-01 00:09:4598
2016-03-01 00:09:4660
2016-03-01 00:09:47211
2016-03-01 00:09:48200
2016-03-01 00:09:49458
2016-03-01 00:09:50348
2016-03-01 00:09:51353
2016-03-01 00:09:52314
2016-03-01 00:09:53191
2016-03-01 00:09:5455
2016-03-01 00:09:55320
2016-03-01 00:09:56461
2016-03-01 00:09:57223
2016-03-01 00:09:58176
2016-03-01 00:09:59325
Freq: S, Length: 600, dtype: int32In [16]: s.resample('2Min', how='sum')
D:\python\Scripts\ipython:1: FutureWarning: how in .resample() is deprecated
the new syntax is .resample(...).sum()
Out[16]:
2016-03-01 00:00:0030038
2016-03-01 00:02:0031791
2016-03-01 00:04:0029403
2016-03-01 00:06:0029762
2016-03-01 00:08:0030800
Freq: 2T, dtype: int32In [17]: rng = pd.period_range('2000Q1', '2016Q1', freq='Q')^M
...: rng
...:
...:
Out[17]:
PeriodIndex(['2000Q1', '2000Q2', '2000Q3', '2000Q4', '2001Q1', '2001Q2',
'2001Q3', '2001Q4', '2002Q1', '2002Q2', '2002Q3', '2002Q4',
'2003Q1', '2003Q2', '2003Q3', '2003Q4', '2004Q1', '2004Q2',
'2004Q3', '2004Q4', '2005Q1', '2005Q2', '2005Q3', '2005Q4',
'2006Q1', '2006Q2', '2006Q3', '2006Q4', '2007Q1', '2007Q2',
'2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3', '2008Q4',
'2009Q1', '2009Q2', '2009Q3', '2009Q4', '2010Q1', '2010Q2',
'2010Q3', '2010Q4', '2011Q1', '2011Q2', '2011Q3', '2011Q4',
'2012Q1', '2012Q2', '2012Q3', '2012Q4', '2013Q1', '2013Q2',
'2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4',
'2015Q1', '2015Q2', '2015Q3', '2015Q4', '2016Q1'],
dtype='period[Q-DEC]', freq='Q-DEC')In [18]: rng.to_timestamp()
Out[18]:
DatetimeIndex(['2000-01-01', '2000-04-01', '2000-07-01', '2000-10-01',
'2001-01-01', '2001-04-01', '2001-07-01', '2001-10-01',
'2002-01-01', '2002-04-01', '2002-07-01', '2002-10-01',
'2003-01-01', '2003-04-01', '2003-07-01', '2003-10-01',
'2004-01-01', '2004-04-01', '2004-07-01', '2004-10-01',
'2005-01-01', '2005-04-01', '2005-07-01', '2005-10-01',
'2006-01-01', '2006-04-01', '2006-07-01', '2006-10-01',
'2007-01-01', '2007-04-01', '2007-07-01', '2007-10-01',
'2008-01-01', '2008-04-01', '2008-07-01', '2008-10-01',
'2009-01-01', '2009-04-01', '2009-07-01', '2009-10-01',
'2010-01-01', '2010-04-01', '2010-07-01', '2010-10-01',
'2011-01-01', '2011-04-01', '2011-07-01', '2011-10-01',
'2012-01-01', '2012-04-01', '2012-07-01', '2012-10-01',
'2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01',
'2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01',
'2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01',
'2016-01-01'],
dtype='datetime64[ns]', freq='QS-OCT')In [19]: pd.Timestamp('20160301') - pd.Timestamp('20160201')
Out[19]: Timedelta('29 days 00:00:00')In [20]: pd.Timestamp('20160301') + pd.Timedelta(days=5)
Out[20]: Timestamp('2016-03-06 00:00:00')In [21]: df = pd.DataFrame({'id': [1,2,3,4,5,6], 'raw_grade':['a', 'b', 'b', 'a', 'a', 'd']})^M
...: df
...:
...:
Out[21]:
id raw_grade
01a
12b
23b
34a
45a
56dIn [22]: df['grade'] = df.raw_grade.astype('category')^M
...: df
...:
...:
Out[22]:
id raw_grade grade
01aa
12bb
23bb
34aa
45aa
56ddIn [23]: df.grade.cat.categories
Out[23]: Index(['a', 'b', 'd'], dtype='object')In [24]: df.grade.cat.categories = ['very good', 'good', 'bad']^M
...: df
...:
...:
Out[24]:
id raw_gradegrade
01avery good
12bgood
23bgood
34avery good
45avery good
56dbadIn [25]: df.sort_values(by='grade', ascending=True)
Out[25]:
id raw_gradegrade
01avery good
34avery good
45avery good
12bgood
23bgood
56dbadIn [26]: s = pd.Series(np.random.randn(1000), index=pd.date_range('20000101', periods=1000))^M
...: s
...:
...:
Out[26]:
2000-01-01-0.141344
2000-01-02-0.797249
2000-01-03-2.464608
2000-01-04-0.870485
2000-01-05-1.210260
2000-01-061.192860
2000-01-070.642895
2000-01-08-1.152103
2000-01-09-1.313273
2000-01-100.027484
2000-01-11-0.678573
2000-01-121.167240
2000-01-13-1.650681
2000-01-140.578776
2000-01-15-1.070705
2000-01-161.345511
2000-01-17-0.161714
2000-01-18-0.315464
2000-01-19-1.189132
2000-01-20-0.162142
2000-01-211.443795
2000-01-22-0.547895
2000-01-23-0.556073
2000-01-241.990200
2000-01-25-0.215637
2000-01-261.048317
2000-01-27-1.030935
2000-01-280.256619
2000-01-29-0.130376
2000-01-301.286080
...
2002-08-28-0.588474
2002-08-291.310814
2002-08-30-0.386883
2002-08-31-0.181065
2002-09-01-1.756253
2002-09-020.305742
2002-09-03-2.771434
2002-09-040.288447
2002-09-05-0.056637
2002-09-06-0.448806
2002-09-070.811163
2002-09-08-0.205134
2002-09-090.786792
2002-09-101.951288
2002-09-110.736074
2002-09-12-0.138304
2002-09-131.119185
2002-09-14-0.037335
2002-09-150.218690
2002-09-16-0.134962
2002-09-17-2.203361
2002-09-180.177029
2002-09-191.161275
2002-09-20-1.238382
2002-09-210.250562
2002-09-220.048922
2002-09-230.504966
2002-09-240.311811
2002-09-251.020513
2002-09-26-0.975082
Freq: D, Length: 1000, dtype: float64In [27]: s = s.cumsum()In [28]: s
Out[28]:
2000-01-01-0.141344
2000-01-02-0.938593
2000-01-03-3.403201
2000-01-04-4.273685
2000-01-05-5.483945
2000-01-06-4.291085
2000-01-07-3.648190
2000-01-08-4.800293
2000-01-09-6.113566
2000-01-10-6.086082
2000-01-11-6.764654
2000-01-12-5.597414
2000-01-13-7.248095
2000-01-14-6.669319
2000-01-15-7.740024
2000-01-16-6.394512
2000-01-17-6.556226
2000-01-18-6.871690
2000-01-19-8.060822
2000-01-20-8.222964
2000-01-21-6.779169
2000-01-22-7.327065
2000-01-23-7.883137
2000-01-24-5.892937
2000-01-25-6.108574
2000-01-26-5.060258
2000-01-27-6.091193
2000-01-28-5.834574
2000-01-29-5.964950
2000-01-30-4.678870
...
2002-08-28-26.069711
2002-08-29-24.758897
2002-08-30-25.145779
2002-08-31-25.326844
2002-09-01-27.083097
2002-09-02-26.777355
2002-09-03-29.548789
2002-09-04-29.260342
2002-09-05-29.316979
2002-09-06-29.765785
2002-09-07-28.954622
2002-09-08-29.159755
2002-09-09-28.372963
2002-09-10-26.421675
2002-09-11-25.685601
2002-09-12-25.823905
2002-09-13-24.704720
2002-09-14-24.742055
2002-09-15-24.523365
2002-09-16-24.658327
2002-09-17-26.861687
2002-09-18-26.684658
2002-09-19-25.523383
2002-09-20-26.761766
2002-09-21-26.511203
2002-09-22-26.462281
2002-09-23-25.957315
2002-09-24-25.645504
2002-09-25-24.624991
2002-09-26-25.600073
Freq: D, Length: 1000, dtype: float64In [29]: df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))^M
...: df
...:
...:
Out[29]:
ABCD
00.2673270.1075060.080250 -0.621563
11.8168291.1757150.9501300.836614
2-1.4424900.6511160.4748660.179345
30.4105520.012790 -0.4868930.034593
40.5103222.0305820.466503 -0.085239
50.191119 -1.055877 -0.5207142.633334
60.0947011.506650 -1.624039 -0.366824
7-0.8999252.6692312.607940 -0.622080
81.953028 -0.6100780.885680 -0.365108
90.3064640.694631 -0.164848 -0.140056
10 -0.4440480.1356051.471948 -1.673677
110.635337 -0.922888 -1.2424640.448070
12 -0.8458900.881532 -1.1827190.773362
131.051579 -0.4775701.219806 -0.338791
14 -0.0817940.319024 -0.5668691.154736
15 -0.8787201.746420 -0.2171400.253803
160.178135 -0.0429040.4214450.325830
170.808898 -0.2474531.611405 -0.451700
18 -0.0980360.6012090.726469 -0.520484
19 -0.6422091.428606 -1.124756 -0.007182
20 -0.917643 -2.275002 -0.446514 -0.521532
210.709361 -0.735774 -0.474513 -0.133009
22 -0.470721 -0.280986 -0.2216640.424061
230.068563 -0.998091 -1.4178200.773527
24 -0.742220 -0.5619521.072177 -1.363455
250.928466 -0.960329 -0.760199 -0.000401
261.528048 -0.0261760.1971390.646781
270.1559770.238656 -0.7994000.849429
281.3888090.714155 -0.0743330.663270
290.0282290.8871511.639230 -1.722949
..............
700.3209430.2041280.5630681.005620
71 -0.0165221.032978 -0.217907 -1.338971
720.7723090.268404 -0.1119500.567020
73 -0.522461 -0.410367 -0.3293460.686058
74 -0.0665611.331591 -0.8690710.921186
75 -0.3537960.1020650.8066070.750724
761.3805410.6265050.619084 -0.618170
77 -0.8469970.227475 -1.8552330.224078
78 -0.613626 -0.075322 -0.1670480.600913
79 -1.047309 -0.3221900.048969 -1.052706
800.772724 -0.464186 -0.930369 -0.320229
811.0384980.9273401.060027 -0.103949
82 -0.716448 -1.5904100.538984 -0.189105
83 -0.710689 -0.3216690.132338 -0.162068
841.3643180.8280881.2803810.880573
85 -0.1714881.1976170.843253 -0.328299
860.326151 -0.820610 -1.629678 -0.088559
870.092089 -0.0423801.8244482.386188
880.209274 -0.9039500.2459312.023770
890.782739 -0.493215 -0.115856 -0.285229
90 -1.0836580.3363330.8683880.444167
911.859865 -0.2063990.287647 -0.298413
92 -0.677470 -0.059909 -0.347117 -0.672348
93 -0.7085170.245301 -2.1565140.023272
940.662887 -0.6548670.575094 -1.501833
950.915414 -0.731354 -0.173915 -0.834434
960.358771 -0.983519 -0.5375830.911525
97 -1.1437640.202356 -0.4506951.252933
980.4176780.2102890.472555 -0.363459
991.8030650.588571 -0.4597311.801414[100 rows x 4 columns]In [30]: df.to_csv('pandas.csv')In [31]: %ls
驱动器 C 中的卷没有标签。
卷的序列号是 CA90-0532 C:\Users\Jay 的目录2019/05/2820:50.
2019/05/2820:50..
2019/05/0400:03.3T
2018/04/1921:41.android
2017/11/0423:27.AndroidStudio3.0
2018/09/0921:188,250 .bash_history
2018/09/1614:52.config
2019/05/0323:4914 .dbshell
2017/11/0509:38.dnx
2019/03/1121:55.docker
2019/05/1012:21.dubbo
2018/01/2323:1516 .emulator_console_auth_token
2018/10/2413:2652 .gitconfig
2017/11/0520:25.gradle
2018/04/0822:47.idlerc
2019/03/1214:07.IntelliJIdea2018.2
2018/04/2421:49.ipython
2018/04/2421:52.jupyter
2019/04/0217:01.kafkatool2
2017/11/0520:36.keras
2018/11/1500:43.kube
2019/03/3000:20.m2
2018/04/0221:42.matplotlib
2018/09/1614:05.minikube
2019/05/0323:430 .mongorc.js
2018/04/2915:47.nuget
2019/05/1215:29.oracle_jre_usage
2017/12/1122:33.PyCharm2017.3
2017/12/1122:53.PyCharmCE2017.2
2019/04/1517:160 .scala_history
2019/05/2522:34.VirtualBox
2019/04/2320:29.WebStorm2019.1
2019/05/2513:113D Objects
2018/04/2422:03Anaconda3
2018/06/2302:01ansel
2018/09/0619:13AppData
2019/03/1411:46Contacts
2019/05/2722:44Desktop
2019/05/1916:56Documents
2019/03/1411:46Downloads
2019/04/2711:19Favorites
2018/05/0320:3646,251,864 heapDump-pycharm-1525350999967.hprof.zip
2018/05/0320:3846,925,852 heapDump-pycharm-1525351099190.hprof.zip
2019/04/2711:1539,983 java_error_in_idea_6940.log
2019/05/1421:1640,103 java_error_in_idea_8180.log
2018/04/2719:21144,319,266 java_error_in_pycharm.hprof
2018/05/0513:3134,521 java_error_in_pycharm_3564.log
2019/04/2711:1538,176 java_error_in_pycharm_7488.log
2018/05/0320:5334,156 java_error_in_pycharm_8968.log
2019/03/1411:46Links
2019/03/1411:46Music
2019/05/2718:30OneDrive
2019/05/2820:508,249 pandas.csv
2019/03/1411:46Pictures
2019/03/1411:46Saved Games
2019/03/1411:46Searches
2019/04/2609:12UIDowner
2019/03/1411:46Videos
15 个文件237,700,502 字节
43 个目录 28,620,050,432 可用字节In [32]: %more pandas.csv
UsageError: Line magic function `%more` not found.In [33]: pd.read_csv('pandas.csv', index_col=0)
Out[33]:
ABCD
00.2673270.1075060.080250 -0.621563
11.8168291.1757150.9501300.836614
2-1.4424900.6511160.4748660.179345
30.4105520.012790 -0.4868930.034593
40.5103222.0305820.466503 -0.085239
50.191119 -1.055877 -0.5207142.633334
60.0947011.506650 -1.624039 -0.366824
7-0.8999252.6692312.607940 -0.622080
81.953028 -0.6100780.885680 -0.365108
90.3064640.694631 -0.164848 -0.140056
10 -0.4440480.1356051.471948 -1.673677
110.635337 -0.922888 -1.2424640.448070
12 -0.8458900.881532 -1.1827190.773362
131.051579 -0.4775701.219806 -0.338791
14 -0.0817940.319024 -0.5668691.154736
15 -0.8787201.746420 -0.2171400.253803
160.178135 -0.0429040.4214450.325830
170.808898 -0.2474531.611405 -0.451700
18 -0.0980360.6012090.726469 -0.520484
19 -0.6422091.428606 -1.124756 -0.007182
20 -0.917643 -2.275002 -0.446514 -0.521532
210.709361 -0.735774 -0.474513 -0.133009
22 -0.470721 -0.280986 -0.2216640.424061
230.068563 -0.998091 -1.4178200.773527
24 -0.742220 -0.5619521.072177 -1.363455
250.928466 -0.960329 -0.760199 -0.000401
261.528048 -0.0261760.1971390.646781
270.1559770.238656 -0.7994000.849429
281.3888090.714155 -0.0743330.663270
290.0282290.8871511.639230 -1.722949
..............
700.3209430.2041280.5630681.005620
71 -0.0165221.032978 -0.217907 -1.338971
720.7723090.268404 -0.1119500.567020
73 -0.522461 -0.410367 -0.3293460.686058
74 -0.0665611.331591 -0.8690710.921186
75 -0.3537960.1020650.8066070.750724
761.3805410.6265050.619084 -0.618170
77 -0.8469970.227475 -1.8552330.224078
78 -0.613626 -0.075322 -0.1670480.600913
79 -1.047309 -0.3221900.048969 -1.052706
800.772724 -0.464186 -0.930369 -0.320229
811.0384980.9273401.060027 -0.103949
82 -0.716448 -1.5904100.538984 -0.189105
83 -0.710689 -0.3216690.132338 -0.162068
841.3643180.8280881.2803810.880573
85 -0.1714881.1976170.843253 -0.328299
860.326151 -0.820610 -1.629678 -0.088559
870.092089 -0.0423801.8244482.386188
880.209274 -0.9039500.2459312.023770
890.782739 -0.493215 -0.115856 -0.285229
90 -1.0836580.3363330.8683880.444167
911.859865 -0.2063990.287647 -0.298413
92 -0.677470 -0.059909 -0.347117 -0.672348
93 -0.7085170.245301 -2.1565140.023272
940.662887 -0.6548670.575094 -1.501833
950.915414 -0.731354 -0.173915 -0.834434
960.358771 -0.983519 -0.5375830.911525
97 -1.1437640.202356 -0.4506951.252933
980.4176780.2102890.472555 -0.363459
991.8030650.588571 -0.4597311.801414[100 rows x 4 columns]
推荐阅读
- EffectiveObjective-C2.0|EffectiveObjective-C2.0 笔记 - 第二部分
- python学习之|python学习之 实现QQ自动发送消息
- Android中的AES加密-下
- 逻辑回归的理解与python示例
- python自定义封装带颜色的logging模块
- 【Leetcode/Python】001-Two|【Leetcode/Python】001-Two Sum
- 【读书笔记】贝叶斯原理
- 【韩语学习】(韩语随堂笔记整理)
- Python基础|Python基础 - 练习1
- Python爬虫|Python爬虫 --- 1.4 正则表达式(re库)