python对多个csv文件里提取指定列汇总到一个新生成的csv文件
发布网友
发布时间:2022-04-23 12:00
我来回答
共3个回答
热心网友
时间:2022-04-10 16:52
#!/usr/bin/env python
# coding: utf-8
import os
import re
def parserln(ln, patt):
"""用给定的正则表达式解析行"""
matched = patt.match(ln)
if matched:
return matched.groupdict()
def getdata(filename, parser, callback=None):
"""用指定的解析方法parser解析指定文件,
用callback进行数据加工过的数据列表
"""
with open(filename, 'rt') as handle:
return map(
callback,
filter(None, map(parser, handle))
)
def storage(filename, dataserial, spliter=','):
"""将数据序列按行存储到指定文件,
每一序列元素间用指定的字符分割"""
with open(filename, 'wt') as handle:
handle.writelines([
"%s\n" % (spliter.join(map(str, item)))
for item in dataserial
])
if __name__ == "__main__":
patt = re.compile(
r"""^
(?P<month>\d+),
(?P<amount>\d+),
(?P<usage>\d+)
\s*$""",
re.I | re.U | re.X)
datapath = 'datasource'
# datasource下所有存在"usage.csv"文件的子目录
subpaths = [
os.path.join(datapath, path)
for path in os.listdir(datapath)
if (os.path.isdir(os.path.join(datapath, path))
and os.path.exists(
os.path.join(datapath, path, "usage.txt"))
)
]
storage(
'store.csv',
zip(*map(
lambda path: getdata(
os.path.join(path, "usage.csv"),
# 解析方法为用patt解析行
parser=lambda ln: parserln(ln, patt),
# 数据加工方法是取出"amount"转成整数
callback=lambda x: int(x["amount"]),
),
subpaths))
)
$ tail -n 12 datasource/*/*.csv
==> datasource/2014/usage.csv <==
1,4234,423
2,3523,432
3,4352,438
4,4792,458
5,4823,834
6,5093,734
7,4743,832
8,5152,859
9,4932,810
10,4993,802
11,4999,810
12,5052,850
==> datasource/2015/usage.csv <==
1,5234,423
2,4523,432
3,5352,438
4,5792,458
5,6823,834
6,6093,734
7,6743,832
8,7152,859
9,6932,810
10,6993,802
11,6999,810
12,7052,850
(venv)tim@crunchbang:~/workspace/$
$ cat store.csv
4234,5234
3523,4523
4352,5352
4792,5792
4823,6823
5093,6093
4743,6743
5152,7152
4932,6932
4993,6993
4999,6999
5052,7052
$
热心网友
时间:2022-04-10 18:10
1.首先下载python 2.7,进行安装
2.教你一个简单的csv操作的例子
如下面为一csv文件:
Title,Release Date,Director
And Now For Something Completely Different,1971,Ian MacNaughton
Monty Python And The Holy Grail,1975,Terry Gilliam and Terry Jones
Monty Python's Life Of Brian,1979,Terry Jones
Monty Python Live At The Hollywood Bowl,1982,Terry Hughes
Monty Python's The Meaning Of Life,1983,Terry Jones
打印发行日期及标题。
逐行处理:
for line in open("samples/sample.csv"):
title, year, director = line.split(",") //以“,”号为分割,按逗号将数据分成三部分;
print year, title
使用csv模块处理:
import csv
reader = csv.reader(open("samples\sample.csv"))
for title, year, director in reader:
print year, title
改变分隔符
热心网友
时间:2022-04-10 19:45
csv 是可以直接当文本直接读的,他的格式是一行由若干列逗号隔开的
和文本文件一样的读到csv后,用逗号分隔列,然后将您需要的那一列写到新的文件里就可以了
只提供思路,我就不写代码了,可能会用有 open split readline