14.Python 通过列标题取列所有值

1.基础python 语法

vi 7csv_reader_column_by_name.py 
#encoding=utf-8 
#!/usr/bin/env python3 
import csv 
import sys 
input_file=sys.argv[1]
output_file=sys.argv[2]
my_columns=['Invoice Number','Purchase Date']   #这里实际上是第2,5列。
my_columns_index=[]
with open(input_file,'rb') as csv_in_file:
	with open(output_file,'wb') as csv_out_file:
		filereader=csv.reader(csv_in_file)
		filewriter=csv.writer(csv_out_file)
		header=next(filereader,None)
		for index_value in range(len(header)):
			if header[index_value] in my_columns:
				my_columns_index.append(index_value) #将my_columns写入第一行。
		filewriter.writerow(my_columns)
		for row_list in filereader:
			row_list_output=[]   #集合的值应该是:1,4;这里实际上是第2,5列。
			for index_value in my_columns_index: #
				row_list_output.append(row_list[index_value])
			filewriter.writerow(row_list_output)

#结果 
[root@mysql51 python_scripts]# python  7csv_reader_column_by_name.py supplier_data.csv 9output_csv.csv 
[root@mysql51 python_scripts]# 
[root@mysql51 python_scripts]# 
[root@mysql51 python_scripts]# more 9output_csv.csv
Invoice Number,Purchase Date
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
50-9501,1/30/2014
50-9501,1/30/2014
50-9505,2/3/2014
50-9505,2/3/2014
920-4803,2/3/2014
920-4804,2/10/2014
920-4805,2/17/2014
920-4806,2/24/2014

2.pandas 实现特定标题列的值。

vi 7csv_reader_column_by_name.py 
#encoding=utf-8 
#!/usr/bin/env python3 
import pandas as pd 
import sys 
input_file=sys.argv[1]
output_file=sys.argv[2]
data_frame=pd.read_csv(input_file)
data_frame_column_by_name=data_frame.loc[:,['Invoice Number','Purchase Date']]
data_frame_column_by_name.to_csv(output_file,index=False)


#结果 
python C:\Users\4201.HJSC\PycharmProjects\pythonProject\7csv_reader_column_by_name.py \
C:\Users\4201.HJSC\Desktop\Python_exercise\supplier_data.csv \
C:\Users\4201.HJSC\Desktop\Python_exercise\9output_csv.csv
more 9output_csv.csv
Invoice Number,Purchase Date
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
001-1001,1/20/2014
50-9501,1/30/2014
50-9501,1/30/2014
50-9505,2/3/2014
50-9505,2/3/2014
920-4803,2/3/2014
920-4804,2/10/2014
920-4805,2/17/2014
920-4806,2/24/2014

3.总结

通过loc函数来选取列值。
data_frame.iloc[:,[0,3]] #取第一列和第四列
data_frame.loc[:,[‘Invoice Number’,’Purchase Date’]]#取’Invoice Number’列和’Purchase Date’列的值。