# 列可以用columns关键字重新索引 states = ["Texas", "Utah", "California"] print(frame.reindex(columns=states))
# 结果 -----frame----- Ohio Texas California a 012 b 345 d 678 -----frame2----- Ohio Texas California a 0.01.02.0 b 3.04.05.0 c NaN NaN NaN d 6.07.08.0 Texas Utah California a 1 NaN 2 b 4 NaN 5 d 7 NaN 8
# 结果 -----obj----- a 0.0 b 1.0 c 2.0 d 3.0 e 4.0 dtype: float64 ----now_obj----- a 0.0 b 1.0 d 3.0 e 4.0 dtype: float64 c 2.0 d 3.0 e 4.0 dtype: float64
# 结果 -----data----- one two three four Ohio 0123 Colorado 4567 Utah 891011 New York 12131415 one two three four Utah 891011 New York 12131415 one three four Ohio 023 Colorado 467 Utah 81011 New York 121415 one three Ohio 02 Colorado 46 Utah 810 New York 1214
# 结果 -----obj----- a 0.0 b 1.0 c 2.0 d 3.0 dtype: float64 ----obj['b']------ 1.0 ----obj[1]------ 1.0 ----obj[2:4]------ c 2.0 d 3.0 dtype: float64 ----obj[['b', 'a', 'd']]------ b 1.0 a 0.0 d 3.0 dtype: float64 ----obj[[1,3]]------ b 1.0 d 3.0 dtype: float64 -----obj[obj < 2]----- a 0.0 b 1.0 dtype: float64
# 结果 -----data----- one two three four Ohio 0123 Colorado 4567 Utah 891011 New York 12131415 -----data['two']----- Ohio 1 Colorado 5 Utah 9 New York 13 Name: two, dtype: int64 -----data[['three', 'one']]----- three one Ohio 20 Colorado 64 Utah 108 New York 1412
# 结果 切片选择 one two three four Ohio 0123 Colorado 4567 布尔筛选 one two three four Colorado 4567 Utah 891011 New York 12131415
选取行的语法data[:2]十分方便。向[ ]传递单一的元素或列表,就可选择列。
另一种用法是通过布尔型DataFrame(比如下面这个由标量比较运算得出的)进行索引:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
bools = data < 5 print("-----bools----") print(bools)
data[data < 5] = 0 print(data)
-----bools---- one two three four Ohio TrueTrueTrueTrue Colorado TrueFalseFalseFalse Utah FalseFalseFalseFalse New York FalseFalseFalseFalse one two three four Ohio 0000 Colorado 0567 Utah 891011 New York 12131415
# 结果,发现没有相同的索引就引入缺失值 -----s1----- a 7.3 c -2.5 d 3.4 e 1.5 dtype: float64 -----s2----- a -2.1 c 3.6 e -1.5 f 4.0 g 3.1 dtype: float64 -----s1 + s2----- a 5.2 c 1.1 d NaN e 0.0 f NaN g NaN dtype: float64
# 结果 -----df1----- b c d Ohio 0.01.02.0 Texas 3.04.05.0 Colorado 6.07.08.0 -----df2----- b d e Utah 0.01.02.0 Ohio 3.04.05.0 Texas 6.07.08.0 Oregon 9.010.011.0 -----df1 + df2----- b c d e Colorado NaN NaN NaN NaN Ohio 3.0 NaN 6.0 NaN Oregon NaN NaN NaN NaN Texas 9.0 NaN 12.0 NaN Utah NaN NaN NaN NaN
# 结果,很有意思的add,fill_value() -----df1----- a b c d 00.01.02.03.0 14.05.06.07.0 28.09.010.011.0 -----df2----- a b c d e 00.01.02.03.04.0 15.0 NaN 7.08.09.0 210.011.012.013.014.0 315.016.017.018.019.0 -----df1 + df2----- a b c d e 00.02.04.06.0 NaN 19.0 NaN 13.015.0 NaN 218.020.022.024.0 NaN 3 NaN NaN NaN NaN NaN -----使用df1的add方法,传入df2以及一个fill_value参数----- a b c d e 00.02.04.06.04.0 19.05.013.015.09.0 218.020.022.024.014.0 315.016.017.018.019.0
# 结果 a b c d 0 inf 1.0000000.5000000.333333 10.2500.2000000.1666670.142857 20.1250.1111110.1000000.090909 a b c d 0 inf 1.0000000.5000000.333333 10.2500.2000000.1666670.142857 20.1250.1111110.1000000.090909
# 结果 -----frame----- b d e Utah 0.01.02.0 Ohio 3.04.05.0 Texas 6.07.08.0 Oregon 9.010.011.0 -----series----- b 0.0 d 1.0 e 2.0 Name: Utah, dtype: float64 ----frame-series----- b d e Utah 0.00.00.0 Ohio 3.03.03.0 Texas 6.06.06.0 Oregon 9.09.09.0
import pandas as pd import numpy as np frame = pd.DataFrame(np.random.randn(4,3), columns=list('bde'), index=["Utah","Ohio","Texas","Oregon"]) print("-----frame-----") print(frame) print("-----np.abs(frame)-----") print(np.abs(frame))
# 结果 -----frame----- b d e Utah -1.9214251.0786460.515443 Ohio -0.1104031.693725 -0.064599 Texas 0.6873690.6249900.028280 Oregon -0.0596811.543326 -0.290820 -----np.abs(frame)----- b d e Utah 1.9214251.0786460.515443 Ohio 0.1104031.6937250.064599 Texas 0.6873690.6249900.028280 Oregon 0.0596811.5433260.290820
# 结果 -----frame----- b d e Utah 0.315588 -1.2075911.462784 Ohio 1.1155520.025713 -0.422715 Texas 0.526468 -0.4286000.231712 Oregon -2.5674520.674578 -0.269324 -----np.abs(frame)----- b d e Utah 0.3155881.2075911.462784 Ohio 1.1155520.0257130.422715 Texas 0.5264680.4286000.231712 Oregon 2.5674520.6745780.269324 b d e min -2.567452 -1.207591 -0.422715 max1.1155520.6745781.462784
# 结果 -----frame----- b a 040 171 2 -30 321 ----frame.sort_values(by='b')----- b a 2 -30 321 040 171 ----frame.sort_values(by=['a', 'b'])----- b a 2 -30 040 321 171