Python数据分析笔记

应工作需要,做数据分析要用到numpy,pandas的第三方库,做做笔记。
1.numpy相关笔记

In [1]: import numpy as npIn [2]: data = https://www.it610.com/article/np.array([1, 3, 4, 8])In [3]: data Out[3]: array([1, 3, 4, 8])In [4]: data.shape Out[4]: (4,)In [5]: data.dtype Out[5]: dtype('int32')In [6]: data[1] Out[6]: 3In [7]: data[1] = 9In [8]: data Out[8]: array([1, 9, 4, 8])In [9]: data = https://www.it610.com/article/np.array([[1,2,3],[4,5,6]])In [10]: data Out[10]: array([[1, 2, 3], [4, 5, 6]])In [11]: data.shape Out[11]: (2, 3)In [12]: data[0,1] Out[12]: 2In [13]: np.arange(10) Out[13]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])In [14]: data[1,2] = 7In [15]: data Out[15]: array([[1, 2, 3], [4, 5, 7]])In [18]: np.arange(5,15) Out[18]: array([ 5,6,7,8,9, 10, 11, 12, 13, 14])In [19]: data = np.arange(10)In [20]: data.reshape(2,5) Out[20]: array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])In [22]: data2 = data.reshape(2,5)In [23]: data2 Out[23]: array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]])In [24]: data[4] = 10In [25]: data Out[25]: array([ 0,1,2,3, 10,5,6,7,8,9])In [26]: data2 Out[26]: array([[ 0,1,2,3, 10], [ 5,6,7,8,9]])In [28]: np.zeros((2,2)) Out[28]: array([[ 0.,0.], [ 0.,0.]])In [29]: np.ones((2,3,3)) Out[29]: array([[[ 1.,1.,1.], [ 1.,1.,1.], [ 1.,1.,1.]],[[ 1.,1.,1.], [ 1.,1.,1.], [ 1.,1.,1.]]])In [30]: np.eye(4) Out[30]: array([[ 1.,0.,0.,0.], [ 0.,1.,0.,0.], [ 0.,0.,1.,0.], [ 0.,0.,0.,1.]])In [31]: np.arange(16).reshape(4,4) Out[31]: array([[ 0,1,2,3], [ 4,5,6,7], [ 8,9, 10, 11], [12, 13, 14, 15]])In [37]: data = np.arange(100, step=10)In [38]: data Out[38]: array([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])In [39]: data[2] Out[39]: 20In [40]: data[2:5] Out[40]: array([20, 30, 40])In [41]: data[:3] Out[41]: array([ 0, 10, 20])In [42]: data[5:] = -1In [43]: data Out[43]: array([ 0, 10, 20, 30, 40, -1, -1, -1, -1, -1])In [44]: data = np.arange(16).reshape(4,4)In [45]: data Out[45]: array([[ 0,1,2,3], [ 4,5,6,7], [ 8,9, 10, 11], [12, 13, 14, 15]])In [46]: data[1] Out[46]: array([4, 5, 6, 7])In [47]: data[1:3] Out[47]: array([[ 4,5,6,7], [ 8,9, 10, 11]])In [48]: data[:, 2:4] Out[48]: array([[ 2,3], [ 6,7], [10, 11], [14, 15]])In [49]: data[[1,3],[2,3]] Out[49]: array([ 6, 15])In [53]: print(data[1,2],data[3,3]) 6 15In [54]: data> 10 Out[54]: array([[False, False, False, False], [False, False, False, False], [False, False, False,True], [ True,True,True,True]], dtype=bool)In [55]: data[data > 10] Out[55]: array([11, 12, 13, 14, 15])In [56]: data[data % 2 == 0] Out[56]: array([ 0,2,4,6,8, 10, 12, 14])In [57]: x = np.arange(1,5).reshape(2,2)In [58]: x Out[58]: array([[1, 2], [3, 4]])In [59]: y = np.arange(5,9).reshape(2,2)In [60]: y Out[60]: array([[5, 6], [7, 8]])In [61]: x + y Out[61]: array([[ 6,8], [10, 12]])In [62]: x - y Out[62]: array([[-4, -4], [-4, -4]])In [63]: x * y Out[63]: array([[ 5, 12], [21, 32]])In [65]: x.dot(y) Out[65]: array([[19, 22], [43, 50]])In [66]: x / y Out[66]: array([[ 0.2,0.33333333], [ 0.42857143,0.5]])In [67]: np.sqrt(x) Out[67]: array([[ 1.,1.41421356], [ 1.73205081,2.]])In [68]: x.T Out[68]: array([[1, 3], [2, 4]])In [69]: np.linspace(1,10) Out[69]: array([1.,1.18367347,1.36734694,1.55102041, 1.73469388,1.91836735,2.10204082,2.28571429, 2.46938776,2.65306122,2.83673469,3.02040816, 3.20408163,3.3877551 ,3.57142857,3.75510204, 3.93877551,4.12244898,4.30612245,4.48979592, 4.67346939,4.85714286,5.04081633,5.2244898 , 5.40816327,5.59183673,5.7755102 ,5.95918367, 6.14285714,6.32653061,6.51020408,6.69387755, 6.87755102,7.06122449,7.24489796,7.42857143, 7.6122449 ,7.79591837,7.97959184,8.16326531, 8.34693878,8.53061224,8.71428571,8.89795918, 9.08163265,9.26530612,9.44897959,9.63265306, 9.81632653,10.])In [70]: np.linspace(1,10, num=200) Out[70]: array([1.,1.04522613,1.09045226,1.13567839, 1.18090452,1.22613065,1.27135678,1.31658291, 1.36180905,1.40703518,1.45226131,1.49748744, 1.54271357,1.5879397 ,1.63316583,1.67839196, 1.72361809,1.76884422,1.81407035,1.85929648, 1.90452261,1.94974874,1.99497487,2.04020101, 2.08542714,2.13065327,2.1758794 ,2.22110553, 2.26633166,2.31155779,2.35678392,2.40201005, 2.44723618,2.49246231,2.53768844,2.58291457, 2.6281407 ,2.67336683,2.71859296,2.7638191 , 2.80904523,2.85427136,2.89949749,2.94472362, 2.98994975,3.03517588,3.08040201,3.12562814, 3.17085427,3.2160804 ,3.26130653,3.30653266, 3.35175879,3.39698492,3.44221106,3.48743719, 3.53266332,3.57788945,3.62311558,3.66834171, 3.71356784,3.75879397,3.8040201 ,3.84924623, 3.89447236,3.93969849,3.98492462,4.03015075, 4.07537688,4.12060302,4.16582915,4.21105528, 4.25628141,4.30150754,4.34673367,4.3919598 , 4.43718593,4.48241206,4.52763819,4.57286432, 4.61809045,4.66331658,4.70854271,4.75376884, 4.79899497,4.84422111,4.88944724,4.93467337, 4.9798995 ,5.02512563,5.07035176,5.11557789, 5.16080402,5.20603015,5.25125628,5.29648241, 5.34170854,5.38693467,5.4321608 ,5.47738693, 5.52261307,5.5678392 ,5.61306533,5.65829146, 5.70351759,5.74874372,5.79396985,5.83919598, 5.88442211,5.92964824,5.97487437,6.0201005 , 6.06532663,6.11055276,6.15577889,6.20100503, 6.24623116,6.29145729,6.33668342,6.38190955, 6.42713568,6.47236181,6.51758794,6.56281407, 6.6080402 ,6.65326633,6.69849246,6.74371859, 6.78894472,6.83417085,6.87939698,6.92462312, 6.96984925,7.01507538,7.06030151,7.10552764, 7.15075377,7.1959799 ,7.24120603,7.28643216, 7.33165829,7.37688442,7.42211055,7.46733668, 7.51256281,7.55778894,7.60301508,7.64824121, 7.69346734,7.73869347,7.7839196 ,7.82914573, 7.87437186,7.91959799,7.96482412,8.01005025, 8.05527638,8.10050251,8.14572864,8.19095477, 8.2361809 ,8.28140704,8.32663317,8.3718593 , 8.41708543,8.46231156,8.50753769,8.55276382, 8.59798995,8.64321608,8.68844221,8.73366834, 8.77889447,8.8241206 ,8.86934673,8.91457286, 8.95979899,9.00502513,9.05025126,9.09547739, 9.14070352,9.18592965,9.23115578,9.27638191, 9.32160804,9.36683417,9.4120603 ,9.45728643, 9.50251256,9.54773869,9.59296482,9.63819095, 9.68341709,9.72864322,9.77386935,9.81909548, 9.86432161,9.90954774,9.95477387,10.])In [71]: x = np.linspace(0,2*np.pi,num=50)In [72]: x Out[72]: array([ 0.,0.12822827,0.25645654,0.38468481,0.51291309, 0.64114136,0.76936963,0.8975979 ,1.02582617,1.15405444, 1.28228272,1.41051099,1.53873926,1.66696753,1.7951958 , 1.92342407,2.05165235,2.17988062,2.30810889,2.43633716, 2.56456543,2.6927937 ,2.82102197,2.94925025,3.07747852, 3.20570679,3.33393506,3.46216333,3.5903916 ,3.71861988, 3.84684815,3.97507642,4.10330469,4.23153296,4.35976123, 4.48798951,4.61621778,4.74444605,4.87267432,5.00090259, 5.12913086,5.25735913,5.38558741,5.51381568,5.64204395, 5.77027222,5.89850049,6.02672876,6.15495704,6.28318531])In [74]: y Out[74]: array([0.00000000e+00,1.27877162e-01,2.53654584e-01, 3.75267005e-01,4.90717552e-01,5.98110530e-01, 6.95682551e-01,7.81831482e-01,8.55142763e-01, 9.14412623e-01,9.58667853e-01,9.87181783e-01, 9.99486216e-01,9.95379113e-01,9.74927912e-01, 9.38468422e-01,8.86599306e-01,8.20172255e-01, 7.40277997e-01,6.48228395e-01,5.45534901e-01, 4.33883739e-01,3.15108218e-01,1.91158629e-01, 6.40702200e-02,-6.40702200e-02,-1.91158629e-01, -3.15108218e-01,-4.33883739e-01,-5.45534901e-01, -6.48228395e-01,-7.40277997e-01,-8.20172255e-01, -8.86599306e-01,-9.38468422e-01,-9.74927912e-01, -9.95379113e-01,-9.99486216e-01,-9.87181783e-01, -9.58667853e-01,-9.14412623e-01,-8.55142763e-01, -7.81831482e-01,-6.95682551e-01,-5.98110530e-01, -4.90717552e-01,-3.75267005e-01,-2.53654584e-01, -1.27877162e-01,-2.44929360e-16])

2.pandas相关笔记
In [1]: import pandas as pdIn [2]: import numpy as npIn [3]: s = pd.Series([1, 3, 5, np.NaN, 8, 4])In [4]: s Out[4]: 01.0 13.0 25.0 3NaN 48.0 54.0 dtype: float64In [5]: dates = pd.date_range('20160301', periods=6)In [6]: dates Out[6]: DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05', '2016-03-06'], dtype='datetime64[ns]', freq='D')In [7]: data = https://www.it610.com/article/pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))In [8]: data Out[8]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [9]: data.shape Out[9]: (6, 4)In [10]: data.values Out[10]: array([[ 2.02783946, -0.29258761,0.05187483, -0.2978054 ], [ 1.09800225, -0.20649433,0.03898198,0.04355011], [ 1.15290424,0.10370374,0.39340982, -0.42141752], [-0.37963074,0.1653215 , -2.12121315,1.70680791], [ 0.02057416, -1.69729895,1.23049898,1.82649881], [-0.72699977, -0.0723013 , -1.5117672 ,0.13140707]])In [13]: d = {'A': 1, 'B': pd.Timestamp('20130301'), 'C': [i for i in range(4)], 'D': np.arange(4)}In [14]: d Out[14]: {'A': 1, 'B': Timestamp('2013-03-01 00:00:00'), 'C': [0, 1, 2, 3], 'D': array([0, 1, 2, 3])}In [16]: df = pd.DataFrame(d)In [17]: df Out[17]: ABCD 01 2013-03-0100 11 2013-03-0111 21 2013-03-0122 31 2013-03-0133In [18]: df.dtypes Out[18]: Aint64 Bdatetime64[ns] Cint64 Dint32 dtype: objectIn [19]: df.A Out[19]: 01 11 21 31 Name: A, dtype: int64In [20]: df.C Out[20]: 00 11 22 33 Name: C, dtype: int64In [21]: df.B Out[21]: 02013-03-01 12013-03-01 22013-03-01 32013-03-01 Name: B, dtype: datetime64[ns]In [22]: df.D Out[22]: 00 11 22 33 Name: D, dtype: int32In [23]: type(df.B) Out[23]: pandas.core.series.SeriesIn [24]: data.head() Out[24]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499In [25]: data.tail() Out[25]: ABCD 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [26]: data.head(2) Out[26]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550In [27]: data.index Out[27]: DatetimeIndex(['2016-03-01', '2016-03-02', '2016-03-03', '2016-03-04', '2016-03-05', '2016-03-06'], dtype='datetime64[ns]', freq='D')In [28]: data.describe() Out[28]: ABCD count6.0000006.0000006.0000006.000000 mean0.532115 -0.333276 -0.3197020.498173 std1.0602950.6907891.2525841.004557 min-0.727000 -1.697299 -2.121213 -0.421418 25%-0.279580 -0.271064 -1.124080 -0.212467 50%0.559288 -0.1393980.0454280.087479 75%1.1391790.0597020.3080261.312958 max2.0278390.1653211.2304991.826499In [29]: data.T Out[29]: 2016-03-012016-03-022016-03-032016-03-042016-03-052016-03-06 A2.0278391.0980021.152904-0.3796310.020574-0.727000 B-0.292588-0.2064940.1037040.165321-1.697299-0.072301 C0.0518750.0389820.393410-2.1212131.230499-1.511767 D-0.2978050.043550-0.4214181.7068081.8264990.131407In [30]: data.T.shape Out[30]: (4, 6)In [31]: data.shape Out[31]: (6, 4)In [32]: data.sort_index(axis=1) Out[32]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [33]: data.sort_index(axis=1, ascending=False) Out[33]: DCBA 2016-03-01 -0.2978050.051875 -0.2925882.027839 2016-03-020.0435500.038982 -0.2064941.098002 2016-03-03 -0.4214180.3934100.1037041.152904 2016-03-041.706808 -2.1212130.165321 -0.379631 2016-03-051.8264991.230499 -1.6972990.020574 2016-03-060.131407 -1.511767 -0.072301 -0.727000In [34]: data.sort_index(axis=0, ascending=False) Out[34]: ABCD 2016-03-06 -0.727000 -0.072301 -1.5117670.131407 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-012.027839 -0.2925880.051875 -0.297805In [35]: data.sort_values(by='A') Out[35]: ABCD 2016-03-06 -0.727000 -0.072301 -1.5117670.131407 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-012.027839 -0.2925880.051875 -0.297805In [36]: data['A'] Out[36]: 2016-03-012.027839 2016-03-021.098002 2016-03-031.152904 2016-03-04-0.379631 2016-03-050.020574 2016-03-06-0.727000 Freq: D, Name: A, dtype: float64In [37]: data.A Out[37]: 2016-03-012.027839 2016-03-021.098002 2016-03-031.152904 2016-03-04-0.379631 2016-03-050.020574 2016-03-06-0.727000 Freq: D, Name: A, dtype: float64In [39]: data[2:4] Out[39]: ABCD 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808In [41]: data['20160302':'20160305'] Out[41]: ABCD 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499In [42]: data.loc['20160302':'20160305'] Out[42]: ABCD 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499In [43]: data.iloc[2:4] Out[43]: ABCD 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808In [44]: data.iloc[:,1:3] Out[44]: BC 2016-03-01 -0.2925880.051875 2016-03-02 -0.2064940.038982 2016-03-030.1037040.393410 2016-03-040.165321 -2.121213 2016-03-05 -1.6972991.230499 2016-03-06 -0.072301 -1.511767In [45]: data.loc[:,'B':'C'] Out[45]: BC 2016-03-01 -0.2925880.051875 2016-03-02 -0.2064940.038982 2016-03-030.1037040.393410 2016-03-040.165321 -2.121213 2016-03-05 -1.6972991.230499 2016-03-06 -0.072301 -1.511767In [46]: data.loc['20160302':'20160305', ['B','C']] Out[46]: BC 2016-03-02 -0.2064940.038982 2016-03-030.1037040.393410 2016-03-040.165321 -2.121213 2016-03-05 -1.6972991.230499In [48]: data.iloc[1:3, 1:3] Out[48]: BC 2016-03-02 -0.2064940.038982 2016-03-030.1037040.393410In [49]: data.loc['20160302', 'B'] Out[49]: -0.20649432992272151In [50]: data.at[pd.Timestamp('20160302'), 'B'] Out[50]: -0.20649432992272151In [51]: data.iloc[1] Out[51]: A1.098002 B-0.206494 C0.038982 D0.043550 Name: 2016-03-02 00:00:00, dtype: float64In [52]: data.iloc[1:3] Out[52]: ABCD 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418In [53]: data.iloc[:,1:3] Out[53]: BC 2016-03-01 -0.2925880.051875 2016-03-02 -0.2064940.038982 2016-03-030.1037040.393410 2016-03-040.165321 -2.121213 2016-03-05 -1.6972991.230499 2016-03-06 -0.072301 -1.511767In [54]: data.iloc[1,1] Out[54]: -0.20649432992272151In [55]: data.iat[1,1] Out[55]: -0.20649432992272151In [56]: %timeit data.iloc[1,1] 8.08 μs ± 17.4 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)In [57]: %timeit data.iat[1,1] 5.38 μs ± 10.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)In [58]: data[data.A > 0] Out[58]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-050.020574 -1.6972991.2304991.826499In [59]: data[data > 0] Out[59]: ABCD 2016-03-012.027839NaN0.051875NaN 2016-03-021.098002NaN0.0389820.043550 2016-03-031.1529040.1037040.393410NaN 2016-03-04NaN0.165321NaN1.706808 2016-03-050.020574NaN1.2304991.826499 2016-03-06NaNNaNNaN0.131407In [60]: data2 = data.copy()In [61]: data2 Out[61]: ABCD 2016-03-012.027839 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04 -0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-06 -0.727000 -0.072301 -1.5117670.131407In [62]: tag = ['a'] * 2 + ['b'] * 2 + ['c'] * 2In [63]: data2['TAG'] = tagIn [64]: data2 Out[64]: ABCD TAG 2016-03-012.027839 -0.2925880.051875 -0.297805a 2016-03-021.098002 -0.2064940.0389820.043550a 2016-03-031.1529040.1037040.393410 -0.421418b 2016-03-04 -0.3796310.165321 -2.1212131.706808b 2016-03-050.020574 -1.6972991.2304991.826499c 2016-03-06 -0.727000 -0.072301 -1.5117670.131407cIn [66]: data2[data2.TAG.isin(['a','c'])] Out[66]: ABCD TAG 2016-03-012.027839 -0.2925880.051875 -0.297805a 2016-03-021.098002 -0.2064940.0389820.043550a 2016-03-050.020574 -1.6972991.2304991.826499c 2016-03-06 -0.727000 -0.072301 -1.5117670.131407cIn [68]: data.iat[0,0] = 100In [69]: data Out[69]: ABCD 2016-03-01100.000000 -0.2925880.051875 -0.297805 2016-03-021.098002 -0.2064940.0389820.043550 2016-03-031.1529040.1037040.393410 -0.421418 2016-03-04-0.3796310.165321 -2.1212131.706808 2016-03-050.020574 -1.6972991.2304991.826499 2016-03-06-0.727000 -0.072301 -1.5117670.131407In [70]: data.A = range(6)In [71]: data Out[71]: ABCD 2016-03-010 -0.2925880.051875 -0.297805 2016-03-021 -0.2064940.0389820.043550 2016-03-0320.1037040.393410 -0.421418 2016-03-0430.165321 -2.1212131.706808 2016-03-054 -1.6972991.2304991.826499 2016-03-065 -0.072301 -1.5117670.131407In [72]: data.B = 100In [73]: data Out[73]: ABCD 2016-03-0101000.051875 -0.297805 2016-03-0211000.0389820.043550 2016-03-0321000.393410 -0.421418 2016-03-043100 -2.1212131.706808 2016-03-0541001.2304991.826499 2016-03-065100 -1.5117670.131407In [74]: data.iloc[:,2:4] = 1000In [75]: data Out[75]: ABCD 2016-03-01010010001000 2016-03-02110010001000 2016-03-03210010001000 2016-03-04310010001000 2016-03-05410010001000 2016-03-06510010001000

3.ipython pandas
In [2]: import pandas as pd^M ...: import numpy as np^M ...: import matplotlib.pyplot as plt ...: ...:In [3]: dates = pd.date_range('20190501', periods=6)^M ...: df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))^M ...: df ...: ...: Out[3]: ABCD 2019-05-01 -1.203323 -1.0230170.9948081.845361 2019-05-020.4138180.3505630.718125 -0.100747 2019-05-03 -0.9769960.298922 -0.5636731.431109 2019-05-04 -1.395960 -0.415227 -1.5838350.022370 2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065 2019-05-061.290431 -1.629373 -1.530487 -1.452107In [4]: df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])^M ...: df1 ...: ...: Out[4]: ABCDE 2019-05-01 -1.203323 -1.0230170.9948081.845361 NaN 2019-05-020.4138180.3505630.718125 -0.100747 NaN 2019-05-03 -0.9769960.298922 -0.5636731.431109 NaN 2019-05-04 -1.395960 -0.415227 -1.5838350.022370 NaNIn [5]: df1.iloc[1:3,4] = 2^M ...: df1 ...: ...: Out[5]: ABCDE 2019-05-01 -1.203323 -1.0230170.9948081.845361NaN 2019-05-020.4138180.3505630.718125 -0.1007472.0 2019-05-03 -0.9769960.298922 -0.5636731.4311092.0 2019-05-04 -1.395960 -0.415227 -1.5838350.022370NaNIn [6]: df1.dropna() Out[6]: ABCDE 2019-05-020.4138180.3505630.718125 -0.1007472.0 2019-05-03 -0.9769960.298922 -0.5636731.4311092.0In [7]: df1.fillna(value=https://www.it610.com/article/5) Out[7]: ABCDE 2019-05-01 -1.203323 -1.0230170.9948081.8453615.0 2019-05-020.4138180.3505630.718125 -0.1007472.0 2019-05-03 -0.9769960.298922 -0.5636731.4311092.0 2019-05-04 -1.395960 -0.415227 -1.5838350.0223705.0In [8]: pd.isnull(df1) Out[8]: ABCDE 2019-05-01FalseFalseFalseFalseTrue 2019-05-02FalseFalseFalseFalseFalse 2019-05-03FalseFalseFalseFalseFalse 2019-05-04FalseFalseFalseFalseTrueIn [9]: pd.isnull(df1).any().any() Out[9]: TrueIn [10]: df1.mean() Out[10]: A-0.790615 B-0.197190 C-0.108644 D0.799523 E2.000000 dtype: float64In [11]: df1.mean(axis=1) Out[11]: 2019-05-010.153457 2019-05-020.676352 2019-05-030.437872 2019-05-04-0.843163 Freq: D, dtype: float64In [12]: df1.cumsum() Out[12]: ABCDE 2019-05-01 -1.203323 -1.0230170.9948081.845361NaN 2019-05-02 -0.789505 -0.6724541.7129331.7446142.0 2019-05-03 -1.766501 -0.3735321.1492593.1757244.0 2019-05-04 -3.162461 -0.788759 -0.4345753.198094NaNIn [13]: s = pd.Series([1,3,5,np.nan,6,8],index=dates).shift(2)^M ...: s ...: ...: Out[13]: 2019-05-01NaN 2019-05-02NaN 2019-05-031.0 2019-05-043.0 2019-05-055.0 2019-05-06NaN Freq: D, dtype: float64In [14]: df Out[14]: ABCD 2019-05-01 -1.203323 -1.0230170.9948081.845361 2019-05-020.4138180.3505630.718125 -0.100747 2019-05-03 -0.9769960.298922 -0.5636731.431109 2019-05-04 -1.395960 -0.415227 -1.5838350.022370 2019-05-05 -2.466317 -0.819741 -0.417125 -2.290065 2019-05-061.290431 -1.629373 -1.530487 -1.452107In [15]: df.sub(s,axis='index') Out[15]: ABCD 2019-05-01NaNNaNNaNNaN 2019-05-02NaNNaNNaNNaN 2019-05-03 -1.976996 -0.701078 -1.5636730.431109 2019-05-04 -4.395960 -3.415227 -4.583835 -2.977630 2019-05-05 -7.466317 -5.819741 -5.417125 -7.290065 2019-05-06NaNNaNNaNNaNIn [16]: df.apply(np.cumsum) Out[16]: ABCD 2019-05-01 -1.203323 -1.0230170.9948081.845361 2019-05-02 -0.789505 -0.6724541.7129331.744614 2019-05-03 -1.766501 -0.3735321.1492593.175724 2019-05-04 -3.162461 -0.788759 -0.4345753.198094 2019-05-05 -5.628777 -1.608500 -0.8517000.908028 2019-05-06 -4.338346 -3.237874 -2.382187 -0.544078In [17]: df.apply(lambda x : x.max() - x.min()) Out[17]: A3.756748 B1.979937 C2.578643 D4.135427 dtype: float64In [18]: def _sum(x):^M ...:print(type(x))^M ...:return x.sum()^M ...: df.apply(_sum) ...: ...: Out[18]: A-4.338346 B-3.237874 C-2.382187 D-0.544078 dtype: float64In [19]: s = pd.Series(np.random.randint(10,20,size=20))^M ...: s ...: ...: Out[19]: 019 116 211 317 413 514 613 711 817 912 1019 1110 1219 1318 1412 1510 1619 1712 1817 1910 dtype: int32In [20]: s.value_counts() Out[20]: 194 173 123 103 132 112 181 161 141 dtype: int64In [21]: s.mode() Out[21]: 019 dtype: int32In [22]: df = pd.DataFrame(np.random.randn(10,4), columns=list('ABCD'))^M ...: df ...: ...: Out[22]: ABCD 01.8529040.224001 -0.873486 -0.098751 1 -0.005724 -0.4330290.059684 -0.424876 20.3822971.121435 -0.5720000.624490 3 -1.304039 -0.5231071.7594170.367895 40.0304971.7683040.242685 -0.921089 5 -0.086144 -0.5163010.7048650.195875 6 -0.015493 -1.0044010.775551 -0.349997 70.542791 -2.1449510.2080700.930271 81.7097920.170925 -0.1024210.544754 9 -1.1359631.863820 -0.789279 -1.587587In [23]: df.iloc[:3] Out[23]: ABCD 01.8529040.224001 -0.873486 -0.098751 1 -0.005724 -0.4330290.059684 -0.424876 20.3822971.121435 -0.5720000.624490In [24]: df.iloc[3:7] Out[24]: ABCD 3 -1.304039 -0.5231071.7594170.367895 40.0304971.7683040.242685 -0.921089 5 -0.086144 -0.5163010.7048650.195875 6 -0.015493 -1.0044010.775551 -0.349997In [25]: df.iloc[7:] Out[25]: ABCD 70.542791 -2.1449510.2080700.930271 81.7097920.170925 -0.1024210.544754 9 -1.1359631.863820 -0.789279 -1.587587In [26]: df1 = pd.concat([df.iloc[:3], df.iloc[3:7], df.iloc[7:]])^M ...: df1 ...: ...: Out[26]: ABCD 01.8529040.224001 -0.873486 -0.098751 1 -0.005724 -0.4330290.059684 -0.424876 20.3822971.121435 -0.5720000.624490 3 -1.304039 -0.5231071.7594170.367895 40.0304971.7683040.242685 -0.921089 5 -0.086144 -0.5163010.7048650.195875 6 -0.015493 -1.0044010.775551 -0.349997 70.542791 -2.1449510.2080700.930271 81.7097920.170925 -0.1024210.544754 9 -1.1359631.863820 -0.789279 -1.587587In [27]: df == df1 Out[27]: ABCD 0TrueTrueTrueTrue 1TrueTrueTrueTrue 2TrueTrueTrueTrue 3TrueTrueTrueTrue 4TrueTrueTrueTrue 5TrueTrueTrueTrue 6TrueTrueTrueTrue 7TrueTrueTrueTrue 8TrueTrueTrueTrue 9TrueTrueTrueTrueIn [28]: (df == df1).all().all() Out[28]: TrueIn [29]: left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1,2]})^M ...: right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4,5]}) ...: ...:In [30]: left Out[30]: keylval 0foo1 1foo2In [31]: right Out[31]: keyrval 0foo4 1foo5In [32]: pd.merge(left,right,on='key') Out[32]: keylvalrval 0foo14 1foo15 2foo24 3foo25In [33]: s = pd.Series(np.random.randint(1,5,size=4), index=list('ABCD'))^M ...: s ...: ...: Out[33]: A4 B4 C3 D3 dtype: int32In [34]: df.append(s, ignore_index=True) Out[34]: ABCD 01.8529040.224001 -0.873486 -0.098751 1-0.005724 -0.4330290.059684 -0.424876 20.3822971.121435 -0.5720000.624490 3-1.304039 -0.5231071.7594170.367895 40.0304971.7683040.242685 -0.921089 5-0.086144 -0.5163010.7048650.195875 6-0.015493 -1.0044010.775551 -0.349997 70.542791 -2.1449510.2080700.930271 81.7097920.170925 -0.1024210.544754 9-1.1359631.863820 -0.789279 -1.587587 104.0000004.0000003.0000003.000000In [35]: s = pd.Series(np.random.randint(1,5,size=5), index=list('ABCDE'))^M ...: s ...: ...: Out[35]: A1 B2 C3 D3 E1 dtype: int32In [36]: df.append(s, ignore_index=True) Out[36]: ABCDE 01.8529040.224001 -0.873486 -0.098751NaN 1-0.005724 -0.4330290.059684 -0.424876NaN 20.3822971.121435 -0.5720000.624490NaN 3-1.304039 -0.5231071.7594170.367895NaN 40.0304971.7683040.242685 -0.921089NaN 5-0.086144 -0.5163010.7048650.195875NaN 6-0.015493 -1.0044010.775551 -0.349997NaN 70.542791 -2.1449510.2080700.930271NaN 81.7097920.170925 -0.1024210.544754NaN 9-1.1359631.863820 -0.789279 -1.587587NaN 101.0000002.0000003.0000003.0000001.0In [37]: df = pd.DataFrame({^M ...:'A': ['foo', 'bar', 'foo', 'bar', 'bar', 'foo', 'bar', 'foo'],^M ...:'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],^M ...:'C': np.random.randn(8),^M ...:'D': np.random.randn(8)^M ...: })^M ...: df ...: ...: Out[37]: ABCD 0fooone0.8338450.260860 1barone -1.066645 -0.240832 2footwo0.470603 -0.276248 3barthree2.2880180.661833 4bartwo -1.213533 -1.602429 5footwo -1.4396220.518569 6barone0.0123140.789365 7foothree0.2802550.611326In [38]: df.groupby('A').sum() Out[38]: CD A bar0.020155 -0.392063 foo0.1450821.114506In [39]: df.groupby(['A', 'B']).sum() Out[39]: CD AB bar one-1.0543300.548534 three2.2880180.661833 two-1.213533 -1.602429 foo one0.8338450.260860 three0.2802550.611326 two-0.9690190.242320In [40]: df.groupby(['B', 'A']).sum() Out[40]: CD BA onebar -1.0543300.548534 foo0.8338450.260860 three bar2.2880180.661833 foo0.2802550.611326 twobar -1.213533 -1.602429 foo -0.9690190.242320

4.ipython pandas 2
In [1]: import pandas as pd^M ...: import numpy as np^M ...: import matplotlib.pyplot as plt ...: ...:In [2]: tuples = list(zip(*[['bar','bar','baz','baz',^M ...:'foo','foo','qux','qux'],^M ...:['one','two','one','two',^M ...:'one','two','one','two']]))^M ...: tuples ...: Out[2]: [('bar', 'one'), ('bar', 'two'), ('baz', 'one'), ('baz', 'two'), ('foo', 'one'), ('foo', 'two'), ('qux', 'one'), ('qux', 'two')]In [3]: index = pd.MultiIndex.from_tuples(tuples, names=['first', 'second'])^M ...: index ...: ...: Out[3]: MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two']], labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], names=['first', 'second'])In [4]: df = pd.DataFrame(np.random.randn(8,2), index=index, columns=['A', 'B'])^M ...: df ...: ...: Out[4]: AB first second barone-0.5738210.448645 two-0.3172580.253702 bazone-1.1811181.788925 two-0.7625510.226626 fooone0.1844540.118198 two1.0621191.506467 quxone-0.894020 -1.233768 two-0.005463 -1.275630In [5]: stacked = df.stack()^M ...: stacked ...: ...: Out[5]: firstsecond baroneA-0.573821 B0.448645 twoA-0.317258 B0.253702 bazoneA-1.181118 B1.788925 twoA-0.762551 B0.226626 foooneA0.184454 B0.118198 twoA1.062119 B1.506467 quxoneA-0.894020 B-1.233768 twoA-0.005463 B-1.275630 dtype: float64In [6]: stacked.index Out[6]: MultiIndex(levels=[['bar', 'baz', 'foo', 'qux'], ['one', 'two'], ['A', 'B']], labels=[[0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3], [0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]], names=['first', 'second', None])In [7]: stacked.unstack().unstack() Out[7]: AB secondonetwoonetwo first bar-0.573821 -0.3172580.4486450.253702 baz-1.181118 -0.7625511.7889250.226626 foo0.1844541.0621190.1181981.506467 qux-0.894020 -0.005463 -1.233768 -1.275630In [8]: stacked.unstack() Out[8]: AB first second barone-0.5738210.448645 two-0.3172580.253702 bazone-1.1811181.788925 two-0.7625510.226626 fooone0.1844540.118198 two1.0621191.506467 quxone-0.894020 -1.233768 two-0.005463 -1.275630In [9]: df = pd.DataFrame({'A':['one','one','two','three'] * 3,^M ...:'B':['A','B','C'] * 4,^M ...:'C':['foo','foo','foo','bar','bar','bar'] * 2,^M ...:'D':np.random.randn(12),^M ...:'E':np.random.randn(12)})^M ...: df ...: Out[9]: ABCDE 0oneAfoo1.0951681.385659 1oneBfoo0.0266421.454903 2twoCfoo0.8356840.080938 3threeAbar -2.3384760.920093 4oneBbar -0.383956 -1.063160 5oneCbar1.708665 -0.021806 6twoAfoo0.725428 -0.031022 7threeBfoo -0.3862481.205046 8oneCfoo -0.203927 -0.259308 9oneAbar1.1847490.368413 10twoBbar1.602919 -1.816103 11threeCbar -0.4198790.303990In [10]: df.pivot_table(values=['D'], index=['A', 'B'], columns=['C']) Out[10]: D Cbarfoo AB oneA1.1847491.095168 B -0.3839560.026642 C1.708665 -0.203927 three A -2.338476NaN BNaN -0.386248 C -0.419879NaN twoANaN0.725428 B1.602919NaN CNaN0.835684In [11]: df.pivot_table(values=['E'],index=['A'], columns=['C']) Out[11]: E Cbarfoo A one-0.2388510.860418 three0.6120411.205046 two-1.8161030.024958In [12]: df[df.A == 'one'] Out[12]: ABCDE 0oneAfoo1.0951681.385659 1oneBfoo0.0266421.454903 4oneBbar -0.383956 -1.063160 5oneCbar1.708665 -0.021806 8oneCfoo -0.203927 -0.259308 9oneAbar1.1847490.368413In [13]: df[df.A == 'one'].groupby('C').mean() Out[13]: DE C bar0.836486 -0.238851 foo0.3059610.860418In [14]: rng = pd.date_range('20160301', periods=600, freq='s')^M ...: rng ...: ...: Out[14]: DatetimeIndex(['2016-03-01 00:00:00', '2016-03-01 00:00:01', '2016-03-01 00:00:02', '2016-03-01 00:00:03', '2016-03-01 00:00:04', '2016-03-01 00:00:05', '2016-03-01 00:00:06', '2016-03-01 00:00:07', '2016-03-01 00:00:08', '2016-03-01 00:00:09', ... '2016-03-01 00:09:50', '2016-03-01 00:09:51', '2016-03-01 00:09:52', '2016-03-01 00:09:53', '2016-03-01 00:09:54', '2016-03-01 00:09:55', '2016-03-01 00:09:56', '2016-03-01 00:09:57', '2016-03-01 00:09:58', '2016-03-01 00:09:59'], dtype='datetime64[ns]', length=600, freq='S')In [15]: s = pd.Series(np.random.randint(0 ,500, len(rng)), index=rng)^M ...: s ...: ...: Out[15]: 2016-03-01 00:00:0086 2016-03-01 00:00:01393 2016-03-01 00:00:02285 2016-03-01 00:00:03330 2016-03-01 00:00:0430 2016-03-01 00:00:05325 2016-03-01 00:00:06325 2016-03-01 00:00:07442 2016-03-01 00:00:08426 2016-03-01 00:00:0982 2016-03-01 00:00:10320 2016-03-01 00:00:11334 2016-03-01 00:00:12434 2016-03-01 00:00:13102 2016-03-01 00:00:14440 2016-03-01 00:00:15263 2016-03-01 00:00:16258 2016-03-01 00:00:17338 2016-03-01 00:00:187 2016-03-01 00:00:19126 2016-03-01 00:00:2033 2016-03-01 00:00:21405 2016-03-01 00:00:22188 2016-03-01 00:00:23484 2016-03-01 00:00:24412 2016-03-01 00:00:25127 2016-03-01 00:00:26449 2016-03-01 00:00:27260 2016-03-01 00:00:28155 2016-03-01 00:00:29155 ... 2016-03-01 00:09:30329 2016-03-01 00:09:3130 2016-03-01 00:09:32295 2016-03-01 00:09:33181 2016-03-01 00:09:34178 2016-03-01 00:09:3522 2016-03-01 00:09:36148 2016-03-01 00:09:37166 2016-03-01 00:09:38137 2016-03-01 00:09:39238 2016-03-01 00:09:40106 2016-03-01 00:09:41442 2016-03-01 00:09:42143 2016-03-01 00:09:43180 2016-03-01 00:09:4464 2016-03-01 00:09:4598 2016-03-01 00:09:4660 2016-03-01 00:09:47211 2016-03-01 00:09:48200 2016-03-01 00:09:49458 2016-03-01 00:09:50348 2016-03-01 00:09:51353 2016-03-01 00:09:52314 2016-03-01 00:09:53191 2016-03-01 00:09:5455 2016-03-01 00:09:55320 2016-03-01 00:09:56461 2016-03-01 00:09:57223 2016-03-01 00:09:58176 2016-03-01 00:09:59325 Freq: S, Length: 600, dtype: int32In [16]: s.resample('2Min', how='sum') D:\python\Scripts\ipython:1: FutureWarning: how in .resample() is deprecated the new syntax is .resample(...).sum() Out[16]: 2016-03-01 00:00:0030038 2016-03-01 00:02:0031791 2016-03-01 00:04:0029403 2016-03-01 00:06:0029762 2016-03-01 00:08:0030800 Freq: 2T, dtype: int32In [17]: rng = pd.period_range('2000Q1', '2016Q1', freq='Q')^M ...: rng ...: ...: Out[17]: PeriodIndex(['2000Q1', '2000Q2', '2000Q3', '2000Q4', '2001Q1', '2001Q2', '2001Q3', '2001Q4', '2002Q1', '2002Q2', '2002Q3', '2002Q4', '2003Q1', '2003Q2', '2003Q3', '2003Q4', '2004Q1', '2004Q2', '2004Q3', '2004Q4', '2005Q1', '2005Q2', '2005Q3', '2005Q4', '2006Q1', '2006Q2', '2006Q3', '2006Q4', '2007Q1', '2007Q2', '2007Q3', '2007Q4', '2008Q1', '2008Q2', '2008Q3', '2008Q4', '2009Q1', '2009Q2', '2009Q3', '2009Q4', '2010Q1', '2010Q2', '2010Q3', '2010Q4', '2011Q1', '2011Q2', '2011Q3', '2011Q4', '2012Q1', '2012Q2', '2012Q3', '2012Q4', '2013Q1', '2013Q2', '2013Q3', '2013Q4', '2014Q1', '2014Q2', '2014Q3', '2014Q4', '2015Q1', '2015Q2', '2015Q3', '2015Q4', '2016Q1'], dtype='period[Q-DEC]', freq='Q-DEC')In [18]: rng.to_timestamp() Out[18]: DatetimeIndex(['2000-01-01', '2000-04-01', '2000-07-01', '2000-10-01', '2001-01-01', '2001-04-01', '2001-07-01', '2001-10-01', '2002-01-01', '2002-04-01', '2002-07-01', '2002-10-01', '2003-01-01', '2003-04-01', '2003-07-01', '2003-10-01', '2004-01-01', '2004-04-01', '2004-07-01', '2004-10-01', '2005-01-01', '2005-04-01', '2005-07-01', '2005-10-01', '2006-01-01', '2006-04-01', '2006-07-01', '2006-10-01', '2007-01-01', '2007-04-01', '2007-07-01', '2007-10-01', '2008-01-01', '2008-04-01', '2008-07-01', '2008-10-01', '2009-01-01', '2009-04-01', '2009-07-01', '2009-10-01', '2010-01-01', '2010-04-01', '2010-07-01', '2010-10-01', '2011-01-01', '2011-04-01', '2011-07-01', '2011-10-01', '2012-01-01', '2012-04-01', '2012-07-01', '2012-10-01', '2013-01-01', '2013-04-01', '2013-07-01', '2013-10-01', '2014-01-01', '2014-04-01', '2014-07-01', '2014-10-01', '2015-01-01', '2015-04-01', '2015-07-01', '2015-10-01', '2016-01-01'], dtype='datetime64[ns]', freq='QS-OCT')In [19]: pd.Timestamp('20160301') - pd.Timestamp('20160201') Out[19]: Timedelta('29 days 00:00:00')In [20]: pd.Timestamp('20160301') + pd.Timedelta(days=5) Out[20]: Timestamp('2016-03-06 00:00:00')In [21]: df = pd.DataFrame({'id': [1,2,3,4,5,6], 'raw_grade':['a', 'b', 'b', 'a', 'a', 'd']})^M ...: df ...: ...: Out[21]: id raw_grade 01a 12b 23b 34a 45a 56dIn [22]: df['grade'] = df.raw_grade.astype('category')^M ...: df ...: ...: Out[22]: id raw_grade grade 01aa 12bb 23bb 34aa 45aa 56ddIn [23]: df.grade.cat.categories Out[23]: Index(['a', 'b', 'd'], dtype='object')In [24]: df.grade.cat.categories = ['very good', 'good', 'bad']^M ...: df ...: ...: Out[24]: id raw_gradegrade 01avery good 12bgood 23bgood 34avery good 45avery good 56dbadIn [25]: df.sort_values(by='grade', ascending=True) Out[25]: id raw_gradegrade 01avery good 34avery good 45avery good 12bgood 23bgood 56dbadIn [26]: s = pd.Series(np.random.randn(1000), index=pd.date_range('20000101', periods=1000))^M ...: s ...: ...: Out[26]: 2000-01-01-0.141344 2000-01-02-0.797249 2000-01-03-2.464608 2000-01-04-0.870485 2000-01-05-1.210260 2000-01-061.192860 2000-01-070.642895 2000-01-08-1.152103 2000-01-09-1.313273 2000-01-100.027484 2000-01-11-0.678573 2000-01-121.167240 2000-01-13-1.650681 2000-01-140.578776 2000-01-15-1.070705 2000-01-161.345511 2000-01-17-0.161714 2000-01-18-0.315464 2000-01-19-1.189132 2000-01-20-0.162142 2000-01-211.443795 2000-01-22-0.547895 2000-01-23-0.556073 2000-01-241.990200 2000-01-25-0.215637 2000-01-261.048317 2000-01-27-1.030935 2000-01-280.256619 2000-01-29-0.130376 2000-01-301.286080 ... 2002-08-28-0.588474 2002-08-291.310814 2002-08-30-0.386883 2002-08-31-0.181065 2002-09-01-1.756253 2002-09-020.305742 2002-09-03-2.771434 2002-09-040.288447 2002-09-05-0.056637 2002-09-06-0.448806 2002-09-070.811163 2002-09-08-0.205134 2002-09-090.786792 2002-09-101.951288 2002-09-110.736074 2002-09-12-0.138304 2002-09-131.119185 2002-09-14-0.037335 2002-09-150.218690 2002-09-16-0.134962 2002-09-17-2.203361 2002-09-180.177029 2002-09-191.161275 2002-09-20-1.238382 2002-09-210.250562 2002-09-220.048922 2002-09-230.504966 2002-09-240.311811 2002-09-251.020513 2002-09-26-0.975082 Freq: D, Length: 1000, dtype: float64In [27]: s = s.cumsum()In [28]: s Out[28]: 2000-01-01-0.141344 2000-01-02-0.938593 2000-01-03-3.403201 2000-01-04-4.273685 2000-01-05-5.483945 2000-01-06-4.291085 2000-01-07-3.648190 2000-01-08-4.800293 2000-01-09-6.113566 2000-01-10-6.086082 2000-01-11-6.764654 2000-01-12-5.597414 2000-01-13-7.248095 2000-01-14-6.669319 2000-01-15-7.740024 2000-01-16-6.394512 2000-01-17-6.556226 2000-01-18-6.871690 2000-01-19-8.060822 2000-01-20-8.222964 2000-01-21-6.779169 2000-01-22-7.327065 2000-01-23-7.883137 2000-01-24-5.892937 2000-01-25-6.108574 2000-01-26-5.060258 2000-01-27-6.091193 2000-01-28-5.834574 2000-01-29-5.964950 2000-01-30-4.678870 ... 2002-08-28-26.069711 2002-08-29-24.758897 2002-08-30-25.145779 2002-08-31-25.326844 2002-09-01-27.083097 2002-09-02-26.777355 2002-09-03-29.548789 2002-09-04-29.260342 2002-09-05-29.316979 2002-09-06-29.765785 2002-09-07-28.954622 2002-09-08-29.159755 2002-09-09-28.372963 2002-09-10-26.421675 2002-09-11-25.685601 2002-09-12-25.823905 2002-09-13-24.704720 2002-09-14-24.742055 2002-09-15-24.523365 2002-09-16-24.658327 2002-09-17-26.861687 2002-09-18-26.684658 2002-09-19-25.523383 2002-09-20-26.761766 2002-09-21-26.511203 2002-09-22-26.462281 2002-09-23-25.957315 2002-09-24-25.645504 2002-09-25-24.624991 2002-09-26-25.600073 Freq: D, Length: 1000, dtype: float64In [29]: df = pd.DataFrame(np.random.randn(100, 4), columns=list('ABCD'))^M ...: df ...: ...: Out[29]: ABCD 00.2673270.1075060.080250 -0.621563 11.8168291.1757150.9501300.836614 2-1.4424900.6511160.4748660.179345 30.4105520.012790 -0.4868930.034593 40.5103222.0305820.466503 -0.085239 50.191119 -1.055877 -0.5207142.633334 60.0947011.506650 -1.624039 -0.366824 7-0.8999252.6692312.607940 -0.622080 81.953028 -0.6100780.885680 -0.365108 90.3064640.694631 -0.164848 -0.140056 10 -0.4440480.1356051.471948 -1.673677 110.635337 -0.922888 -1.2424640.448070 12 -0.8458900.881532 -1.1827190.773362 131.051579 -0.4775701.219806 -0.338791 14 -0.0817940.319024 -0.5668691.154736 15 -0.8787201.746420 -0.2171400.253803 160.178135 -0.0429040.4214450.325830 170.808898 -0.2474531.611405 -0.451700 18 -0.0980360.6012090.726469 -0.520484 19 -0.6422091.428606 -1.124756 -0.007182 20 -0.917643 -2.275002 -0.446514 -0.521532 210.709361 -0.735774 -0.474513 -0.133009 22 -0.470721 -0.280986 -0.2216640.424061 230.068563 -0.998091 -1.4178200.773527 24 -0.742220 -0.5619521.072177 -1.363455 250.928466 -0.960329 -0.760199 -0.000401 261.528048 -0.0261760.1971390.646781 270.1559770.238656 -0.7994000.849429 281.3888090.714155 -0.0743330.663270 290.0282290.8871511.639230 -1.722949 .............. 700.3209430.2041280.5630681.005620 71 -0.0165221.032978 -0.217907 -1.338971 720.7723090.268404 -0.1119500.567020 73 -0.522461 -0.410367 -0.3293460.686058 74 -0.0665611.331591 -0.8690710.921186 75 -0.3537960.1020650.8066070.750724 761.3805410.6265050.619084 -0.618170 77 -0.8469970.227475 -1.8552330.224078 78 -0.613626 -0.075322 -0.1670480.600913 79 -1.047309 -0.3221900.048969 -1.052706 800.772724 -0.464186 -0.930369 -0.320229 811.0384980.9273401.060027 -0.103949 82 -0.716448 -1.5904100.538984 -0.189105 83 -0.710689 -0.3216690.132338 -0.162068 841.3643180.8280881.2803810.880573 85 -0.1714881.1976170.843253 -0.328299 860.326151 -0.820610 -1.629678 -0.088559 870.092089 -0.0423801.8244482.386188 880.209274 -0.9039500.2459312.023770 890.782739 -0.493215 -0.115856 -0.285229 90 -1.0836580.3363330.8683880.444167 911.859865 -0.2063990.287647 -0.298413 92 -0.677470 -0.059909 -0.347117 -0.672348 93 -0.7085170.245301 -2.1565140.023272 940.662887 -0.6548670.575094 -1.501833 950.915414 -0.731354 -0.173915 -0.834434 960.358771 -0.983519 -0.5375830.911525 97 -1.1437640.202356 -0.4506951.252933 980.4176780.2102890.472555 -0.363459 991.8030650.588571 -0.4597311.801414[100 rows x 4 columns]In [30]: df.to_csv('pandas.csv')In [31]: %ls 驱动器 C 中的卷没有标签。 卷的序列号是 CA90-0532 C:\Users\Jay 的目录2019/05/2820:50. 2019/05/2820:50.. 2019/05/0400:03.3T 2018/04/1921:41.android 2017/11/0423:27.AndroidStudio3.0 2018/09/0921:188,250 .bash_history 2018/09/1614:52.config 2019/05/0323:4914 .dbshell 2017/11/0509:38.dnx 2019/03/1121:55.docker 2019/05/1012:21.dubbo 2018/01/2323:1516 .emulator_console_auth_token 2018/10/2413:2652 .gitconfig 2017/11/0520:25.gradle 2018/04/0822:47.idlerc 2019/03/1214:07.IntelliJIdea2018.2 2018/04/2421:49.ipython 2018/04/2421:52.jupyter 2019/04/0217:01.kafkatool2 2017/11/0520:36.keras 2018/11/1500:43.kube 2019/03/3000:20.m2 2018/04/0221:42.matplotlib 2018/09/1614:05.minikube 2019/05/0323:430 .mongorc.js 2018/04/2915:47.nuget 2019/05/1215:29.oracle_jre_usage 2017/12/1122:33.PyCharm2017.3 2017/12/1122:53.PyCharmCE2017.2 2019/04/1517:160 .scala_history 2019/05/2522:34.VirtualBox 2019/04/2320:29.WebStorm2019.1 2019/05/2513:113D Objects 2018/04/2422:03Anaconda3 2018/06/2302:01ansel 2018/09/0619:13AppData 2019/03/1411:46Contacts 2019/05/2722:44Desktop 2019/05/1916:56Documents 2019/03/1411:46Downloads 2019/04/2711:19Favorites 2018/05/0320:3646,251,864 heapDump-pycharm-1525350999967.hprof.zip 2018/05/0320:3846,925,852 heapDump-pycharm-1525351099190.hprof.zip 2019/04/2711:1539,983 java_error_in_idea_6940.log 2019/05/1421:1640,103 java_error_in_idea_8180.log 2018/04/2719:21144,319,266 java_error_in_pycharm.hprof 2018/05/0513:3134,521 java_error_in_pycharm_3564.log 2019/04/2711:1538,176 java_error_in_pycharm_7488.log 2018/05/0320:5334,156 java_error_in_pycharm_8968.log 2019/03/1411:46Links 2019/03/1411:46Music 2019/05/2718:30OneDrive 2019/05/2820:508,249 pandas.csv 2019/03/1411:46Pictures 2019/03/1411:46Saved Games 2019/03/1411:46Searches 2019/04/2609:12UIDowner 2019/03/1411:46Videos 15 个文件237,700,502 字节 43 个目录 28,620,050,432 可用字节In [32]: %more pandas.csv UsageError: Line magic function `%more` not found.In [33]: pd.read_csv('pandas.csv', index_col=0) Out[33]: ABCD 00.2673270.1075060.080250 -0.621563 11.8168291.1757150.9501300.836614 2-1.4424900.6511160.4748660.179345 30.4105520.012790 -0.4868930.034593 40.5103222.0305820.466503 -0.085239 50.191119 -1.055877 -0.5207142.633334 60.0947011.506650 -1.624039 -0.366824 7-0.8999252.6692312.607940 -0.622080 81.953028 -0.6100780.885680 -0.365108 90.3064640.694631 -0.164848 -0.140056 10 -0.4440480.1356051.471948 -1.673677 110.635337 -0.922888 -1.2424640.448070 12 -0.8458900.881532 -1.1827190.773362 131.051579 -0.4775701.219806 -0.338791 14 -0.0817940.319024 -0.5668691.154736 15 -0.8787201.746420 -0.2171400.253803 160.178135 -0.0429040.4214450.325830 170.808898 -0.2474531.611405 -0.451700 18 -0.0980360.6012090.726469 -0.520484 19 -0.6422091.428606 -1.124756 -0.007182 20 -0.917643 -2.275002 -0.446514 -0.521532 210.709361 -0.735774 -0.474513 -0.133009 22 -0.470721 -0.280986 -0.2216640.424061 230.068563 -0.998091 -1.4178200.773527 24 -0.742220 -0.5619521.072177 -1.363455 250.928466 -0.960329 -0.760199 -0.000401 261.528048 -0.0261760.1971390.646781 270.1559770.238656 -0.7994000.849429 281.3888090.714155 -0.0743330.663270 290.0282290.8871511.639230 -1.722949 .............. 700.3209430.2041280.5630681.005620 71 -0.0165221.032978 -0.217907 -1.338971 720.7723090.268404 -0.1119500.567020 73 -0.522461 -0.410367 -0.3293460.686058 74 -0.0665611.331591 -0.8690710.921186 75 -0.3537960.1020650.8066070.750724 761.3805410.6265050.619084 -0.618170 77 -0.8469970.227475 -1.8552330.224078 78 -0.613626 -0.075322 -0.1670480.600913 79 -1.047309 -0.3221900.048969 -1.052706 800.772724 -0.464186 -0.930369 -0.320229 811.0384980.9273401.060027 -0.103949 82 -0.716448 -1.5904100.538984 -0.189105 83 -0.710689 -0.3216690.132338 -0.162068 841.3643180.8280881.2803810.880573 85 -0.1714881.1976170.843253 -0.328299 860.326151 -0.820610 -1.629678 -0.088559 870.092089 -0.0423801.8244482.386188 880.209274 -0.9039500.2459312.023770 890.782739 -0.493215 -0.115856 -0.285229 90 -1.0836580.3363330.8683880.444167 911.859865 -0.2063990.287647 -0.298413 92 -0.677470 -0.059909 -0.347117 -0.672348 93 -0.7085170.245301 -2.1565140.023272 940.662887 -0.6548670.575094 -1.501833 950.915414 -0.731354 -0.173915 -0.834434 960.358771 -0.983519 -0.5375830.911525 97 -1.1437640.202356 -0.4506951.252933 980.4176780.2102890.472555 -0.363459 991.8030650.588571 -0.4597311.801414[100 rows x 4 columns]

    推荐阅读