1.基础python 语法
vi 7csv_reader_column_by_name.py
#encoding=utf-8
#!/usr/bin/env python3
import csv
import sys
input_file=sys.argv[1]
output_file=sys.argv[2]
my_columns=['Invoice Number','Purchase Date'] #这里实际上是第2,5列。
my_columns_index=[]
with open(input_file,'rb') as csv_in_file:
with open(output_file,'wb') as csv_out_file:
filereader=csv.reader(csv_in_file)
filewriter=csv.writer(csv_out_file)
header=next(filereader,None)
for index_value in range(len(header)):
if header[index_value] in my_columns:
my_columns_index.append(index_value) #将my_columns写入第一行。
filewriter.writerow(my_columns)
for row_list in filereader:
row_list_output=[] #集合的值应该是:1,4;这里实际上是第2,5列。
for index_value in my_columns_index: #
row_list_output.append(row_list[index_value])
filewriter.writerow(row_list_output)
#结果
[root@mysql51 python_scripts]# python 7csv_reader_column_by_name.py supplier_data.csv 9output_csv.csv
[root@mysql51 python_scripts]#
[root@mysql51 python_scripts]#
[root@mysql51 python_scripts]# more 9output_csv.csv
Invoice Number,Purchase Date
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
50-9501,1/30/2014
50-9501,1/30/2014
50-9505,2/3/2014
50-9505,2/3/2014
920-4803,2/3/2014
920-4804,2/10/2014
920-4805,2/17/2014
920-4806,2/24/2014
2.pandas 实现特定标题列的值。
vi 7csv_reader_column_by_name.py
#encoding=utf-8
#!/usr/bin/env python3
import pandas as pd
import sys
input_file=sys.argv[1]
output_file=sys.argv[2]
data_frame=pd.read_csv(input_file)
data_frame_column_by_name=data_frame.loc[:,['Invoice Number','Purchase Date']]
data_frame_column_by_name.to_csv(output_file,index=False)
#结果
python C:\Users\4201.HJSC\PycharmProjects\pythonProject\7csv_reader_column_by_name.py \
C:\Users\4201.HJSC\Desktop\Python_exercise\supplier_data.csv \
C:\Users\4201.HJSC\Desktop\Python_exercise\9output_csv.csv
more 9output_csv.csv
Invoice Number,Purchase Date
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
50-9501,1/30/2014
50-9501,1/30/2014
50-9505,2/3/2014
50-9505,2/3/2014
920-4803,2/3/2014
920-4804,2/10/2014
920-4805,2/17/2014
920-4806,2/24/2014
3.总结
通过loc函数来选取列值。
data_frame.iloc[:,[0,3]] #取第一列和第四列
data_frame.loc[:,[‘Invoice Number’,’Purchase Date’]]#取’Invoice Number’列和’Purchase Date’列的值。