aboutsummaryrefslogtreecommitdiffstats
path: root/python/read_excel.py
blob: ce2f5c4321361da63b50ae2596bb6f35be5dfb73 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import os
from xlrd import open_workbook
import csv

rootdir = "/home/share/rmbs/data/finra/"


# dir_list = os.listdir(rootdir)
dir_list = [d for d in dir_list if "zip" not in d]

for d in dir_list:
    file_list = os.listdir(os.path.join(rootdir, d))
    for f in file_list:
        if os.path.splitext(f)[1] == ".xls":
            wb = open_workbook(os.path.join(rootdir, d, f))
            with open( os.path.join(rootdir, d, os.path.splitext(f)[0] + ".csv"), "w") as fh:
                wbcsv = csv.writer(fh, dialect = csv.excel)
                if "PXTABLES" in f:
                    s = wb.sheets()[3]
                else:
                    s = wb.sheets()[0]
                for row in range(s.nrows):
                    this_row = []
                    for col in range(s.ncols):
                        val = s.cell_value(row, col)
                        if isinstance(val, unicode):
                            val = val.encode('utf8')
                        if isinstance(val, float):
                            val = str(val)
                        this_row.append(val)
                    wbcsv.writerow(this_row)