# 结果 key1 key2 value1 value2 0 one a 12 1 one b 34 2 one c 56 3 one d 78 4 two a 910 5 two b 1112 6 two c 1314 7 two d 1516 层次化索引 value1 value2 key1 key2 one a 12 b 34 c 56 d 78 two a 910 b 1112 c 1314 d 1516
# 结果 原 # hey! a b c d message # just wanted to make things more difficult for... NaN NaN NaN NaN # who reads CSV files with computers anyway? NaN NaN NaN 1234 hello 5678 world 9101112 foo 规整 a b c d message 01234 hello 15678 world 29101112 foo
# 结果 原 something a b c d message 0 one 123.04 NaN 1 two 56 NaN 8 world 2 three 91011.012 foo 规整 something a b c d message 0FalseFalseFalseFalseFalseTrue 1FalseFalseFalseTrueFalseFalse 2FalseFalseFalseFalseFalseFalse na_values something a b c d message 0 one 123.04 NaN 1 two 56 NaN 8 world 2 three 91011.012 foo 字典 something a b c d message 0 one 123.04 NaN 1 NaN 56 NaN 8 world 2 three 91011.012 NaN
import pandas as pd pd.options.display.max_rows = 10 result = pd.read_csv("example/ex6.csv") print("----result-----") print(result)
# 结果 -----result------ one two three four key 00.467976 -0.038649 -0.295344 -1.824726 L 1 -0.3588931.4044530.704965 -0.200638 B 2 -0.5018400.659254 -0.421691 -0.057688 G 30.2048861.0741341.388361 -0.982404 R 40.354628 -0.1331160.283763 -0.837063 Q ... ... ... ... ... .. 99952.311896 -0.417070 -1.409599 -0.515821 L 9996 -0.479893 -0.6504190.745152 -0.646038 E 99970.5233310.7871120.4860661.093156 K 9998 -0.3625590.598894 -1.8432010.887292 G 9999 -0.096376 -1.012999 -0.657431 -0.5733150
[10000 rows x 5 columns]
如果只想读取几行(避免读取整个文件),通过nrows进行指定即可:
1 2 3 4 5 6 7 8 9 10 11 12
··· print("-----读取指定行数-----) print(pd.read_csv("examples/ex6.csv", nrows = 5)) # 结果 -----指定读取行数----- one two three four key 0 0.467976 -0.038649 -0.295344 -1.824726 L 1 -0.358893 1.404453 0.704965 -0.200638 B 2 -0.501840 0.659254 -0.421691 -0.057688 G 3 0.204886 1.074134 1.388361 -0.982404 R 4 0.354628 -0.133116 0.283763 -0.837063 Q
··· print("-----通过地迭代将值计数聚合到key列中-----") tot = pd.Series([]) for piece in chunker: tot = tot.add(piece["key"].value_counts(), fill_value=0) tot = tot.sort_values(ascending=False) print(tot[:10])
# 结果 -----通过地迭代将值计数聚合到key列中----- E 368.0 X 364.0 L 346.0 O 343.0 Q 340.0 M 338.0 J 337.0 F 335.0 K 334.0 H 330.0 dtype: float64
# 结果 1 Bank Name City ST CERT \ 0 Allied Bank Mulberry AR 91 1 The Woodbury Banking Company Woodbury GA 11297 2 First CornerStone Bank King of Prussia PA 35312 3 Trust Company Bank Memphis TN 9956 4 North Milwaukee State Bank Milwaukee WI 20364 Acquiring Institution Closing Date Updated Date 0 Today's Bank September 23, 2016 November 17, 2016 1 United Bank August 19, 2016 November 17, 2016 2 First-Citizens Bank & Trust Company May 6, 2016 September 6, 2016 3 The Bank of Fayette County April 29, 2016 September 6, 2016 4 First-Citizens Bank & Trust Company March 11, 2016 June 16, 2016 [5 rows x 7 columns]
<INDICATOR> <INDICATOR_SEQ>373889</INDICATOR_SEQ> <PARENT_SEQ></PARENT_SEQ> <AGENCY_NAME>Metro-North Railroad</AGENCY_NAME> <INDICATOR_NAME>Escalator Availability</INDICATOR_NAME> <DESCRIPTION>Percent of the time that escalators are operational systemwide. The availability rate is based on physical observations performed the morning of regular business days only. This is a new indicator the agency began reporting in 2009.</DESCRIPTION> <PERIOD_YEAR>2011</PERIOD_YEAR> <PERIOD_MONTH>12</PERIOD_MONTH> <CATEGORY>Service Indicators</CATEGORY> <FREQUENCY>M</FREQUENCY> <DESIRED_CHANGE>U</DESIRED_CHANGE> <INDICATOR_UNIT>%</INDICATOR_UNIT> <DECIMAL_PLACES>1</DECIMAL_PLACES> <YTD_TARGET>97.00</YTD_TARGET> <YTD_ACTUAL></YTD_ACTUAL> <MONTHLY_TARGET>97.00</MONTHLY_TARGET> <MONTHLY_ACTUAL></MONTHLY_ACTUAL> </INDICATOR>
for elt in root.INDICATOR: el_data = {} for child in elt.getchildren(): if child.tag in skip_fields: continue el_data[child.tag] = child.pyval data.append(el_data)