# labtools, Copyright (C) 2017 Jerry Fowler and Paul Scheet.
# This program comes with ABSOLUTELY NO WARRANTY. It is licensed under
# GNU GPL Version 3. License and warranty may be viewed in the manual.
'''
A toolkit for working with xlsx files.
It demonstrates a bit of fundamental learning about the way openpyxl works.
NB: Row 1 is the header, and column 1 is A.
This means that the first element of the matrix, (0, 0), is coordinate 'A1'.
labtools-1.6.7 dealt with a somewhat different behavior that was exhibited
by openpyxl 1.8. The openpyxl 2 rev is more consistent. labtools 1.6.8
is somewhat changed to account for that.
Here's openpyxl locally:
$TEAM_ROOT/3rdparty/$OS/anaconda/lib/python2.7/site-packages/openpyxl/cell.py
'''
import os
import sys
from collections import defaultdict, OrderedDict
import re
from labtools import const
from labtools import misc
from labtools import labexceptions
from labtools import reflection
import openpyxl as xlsx
from openpyxl import cell, Workbook, worksheet
from openpyxl.cell import Cell
try:
Workbook().active.max_column
except Exception as e:
raise labexceptions.LabtoolsWarning('''\
Your openpyxl library seems to be of 1.8 vintage.
Please update openpyxl or use labtools-1.6.7.
''')
RANGE_REGEX = r'(\D+)(\d+):(\D+)(\d+)'
[docs]def cell_name(c):
'''This is just for documenting how to do it.'''
return c.coordinate
[docs]def last_column(ws):
'''Return the coordinate of the last column in worksheet ws'''
return cell.get_column_letter(ws.max_column)
[docs]def last_row(ws):
'''Return the id of the last row in worksheet ws'''
return str(ws.max_row)
def next_row_index(ws):
return ws.max_row+1
def column_coordinate(idx):
if idx < 1:
raise labexceptions.LabtoolsWarning('%s called %s(%r) with a non-positive value' %
(reflection.my_callername(), reflection.my_methodname(), idx))
return cell.get_column_letter(idx)
def make_coordinate(col, row):
if isinstance(row, int):
row = str(row)
if isinstance(col, int):
col = cell.get_column_letter(col)
return col+row
[docs]def last_cell_coordinate(ws):
'''Return the coordinate of the last column in worksheet ws'''
return make_coordinate(ws.max_column, last_row(ws))
[docs]def column_headers(ws):
'''
return the top row of cells
'''
# range returns a tuple of row-tuples, so we want tuple[0]
return ws.rows[0]
[docs]def column_cells(ws, name=None, columnletter=None, fromrow=1, torow=None):
'''
Return a list of the values in the named column
'''
if name is not None:
header = column_headers(ws)
found = [c.coordinate for c in header if c.value == name]
if not found:
return []
elif columnletter is None:
found = ['A1']
else:
found = [columnletter+'1']
col = cell.column_index_from_string(ws.cell(found[0]).column)
return [ws.cell(row=r, column=col) for r in range(1,next_row_index(ws))[fromrow:torow]]
[docs]def column_values(ws, name=None, columnletter=None, fromrow=1, torow=None):
'''Return a list of the values of the named column in worksheet ws'''
return [str(c.value) if c.value is not None else const.EMPTY
for c in column_cells(ws, name, columnletter, fromrow, torow)]
[docs]def column_index(ws, name):
'''Return the index of a column name in worksheet ws'''
if name not in column_names(ws):
raise UserWarning('"%s" is not a valid column index' % (name))
return cell.get_column_letter(column_names(ws).index(name)+1)
[docs]def row_cells(ws, row, fromcol=1, tocol=None):
'''
Return a list of the values in the row
'''
row = int(row)
return [ws.cell(row=row, column=col) for col in range(1,ws.max_column+1)[fromcol-1:tocol]]
[docs]def row_values(ws, row, fromcol=1, tocol=None):
'''
Return a list of the values in the row
'''
return [const.EMPTY if c.value is None else c.value for c in row_cells(ws, row, fromcol, tocol)]
def row_is_hidden(ws, row):
return ws.row_dimensions[row].hidden
[docs]def hide_row(ws, row, mode='hide'):
'''
Make a row invisible. Height 0 as well, I think.
'''
from openpyxl.utils.units import DEFAULT_ROW_HEIGHT
if isinstance(row, str):
row = int(row)
ws.row_dimensions[row].hidden = True if mode == 'hide' else False
if mode == 'hide':
ws.row_dimensions[row].height = 0
elif ws.row_dimensions[row].height == 0:
ws.row_dimensions[row].height = DEFAULT_ROW_HEIGHT
[docs]def range_overlap(range1=None, range2=None):
'''
Test whether two ranges overlap
'''
if not (range1 and range2):
raise labexceptions.LabtoolsWarning('Must give at least coordinate or range for each set')
if const.COLON not in range1:
range1 = const.COLON.join([range1]*2)
if const.COLON not in range2:
range2 = const.COLON.join([range2]*2)
m = re.match(RANGE_REGEX, range1)
if not m:
raise labexceptions.LabtoolsWarning('%s(range1) is invalid: %s' % (reflection.my_methodname(), range1))
cola1, cola2, rowa1, rowa2 = m.group(1,3,2,4)
m = re.match(RANGE_REGEX, range2)
if not m:
raise labexceptions.LabtoolsWarning('%s(range2) is invalid: %s' % (reflection.my_methodname(), range2))
colb1, colb2, rowb1, rowb2 = m.group(1,3,2,4)
if not (colb1 <= cola1 <= colb2 or colb1 <= cola2 <= colb2 or
cola1 <= colb2 <= cola2 or cola1 <= colb2 <= cola2):
return False
rowa1, rowb1, rowa2, rowb2 = int(rowa1), int(rowb1), int(rowa2), int(rowb2)
if not (rowb1 <= rowa1 <= rowb2 or rowb1 <= rowa2 <= rowb2 or
rowa1 <= rowb2 <= rowa2 or rowa1 <= rowb2 <= rowa2):
return False
return True
[docs]def add_cell(ws, cell, force=False):
"""
Behave like worksheet internal _get_cell() but add a newly created cell.
"""
coordinate = (cell.row, cell.col_idx)
if coordinate in ws._cells and not force:
raise labexceptions.LabtoolsWarning("Won't overwrite cell without force=True")
ws._add_cell(cell)
return ws._cells[coordinate]
def delete_cell(ws, coord=None, row=None, col=None):
if coord:
row, col = worksheet.coordinate_to_tuple(coord)
coordinate = (row, col)
if coordinate in ws._cells:
del ws._cells[coordinate]
[docs]def delete_rows(ws, fromrow, torow=None):
'''
Remove a row or set of rows altogether.
'''
from itertools import chain
if isinstance(fromrow, str):
fromrow = int(fromrow)
if not torow:
torow = fromrow
elif isinstance(torow, str):
torow = int(torow)
if torow < fromrow:
fromrow, torow = torow, fromrow
shrinkage = 1 + torow - fromrow # Nothing to do with George Costanza at the beach.
target = ws.get_squared_range(1, fromrow, ws.max_column, torow)
if any([range_overlap(const.COLON.join([make_coordinate(1, fromrow), make_coordinate(ws.max_column, torow)]), r)
for r in merged_ranges(ws)]):
raise labexceptions.LabtoolsWarning('%s %r %s in merged cells' %
(misc.plural(shrinkage, 'Row'),
fromrow if shrinkage == 1 else (fromrow, torow),
(misc.plural(shrinkage, 'is', 'are'))))
for row in target:
for cell in row:
delete_cell(ws, cell.coordinate)
shrinkable = ws.get_squared_range(1, fromrow+shrinkage, ws.max_column, ws.max_row)
doctorable = []
for r in merged_ranges(ws):
if range_overlap(r, const.COLON.join([make_coordinate(1, fromrow+shrinkage),
make_coordinate(ws.max_column, ws.max_row)])):
doctorable.append(r)
print('Unmerging %r' % (r))
merge_cells(ws, r, mode='unmerge')
for row in shrinkable:
row_attributes_set = False
for cell in row:
oldrow = cell.row
newrow = cell.row - shrinkage
if not row_attributes_set:
row_attributes_set = True
ws.row_dimensions[newrow].hidden = ws.row_dimensions[oldrow].hidden
ws.row_dimensions[newrow].height = ws.row_dimensions[oldrow].height
n = add_cell(ws, Cell(ws, row=newrow, col_idx=cell.col_idx, style_array=cell._style,
value=cell.value))
delete_cell(ws, cell.coordinate)
ws._garbage_collect()
for d in doctorable:
m = re.match(RANGE_REGEX, d)
col1, col2, row1, row2 = m.group(1, 3, 2, 4)
row1, row2 = int(row1), int(row2)
doctored = '%s%d:%s%d' % (col1, row1-shrinkage, col2, row2-shrinkage)
print('Re-merging %r' % (const.COLON.join([make_coordinate(col1, row1-shrinkage),
make_coordinate(col2, row2-shrinkage)])))
ws.merge_cells(doctored)
[docs]def hide_column(ws, column, mode='hide'):
'''
Make a column invisible. Width 0 as well, I think.
'''
if isinstance(column, int):
column = column_coordinate(column)
from openpyxl.utils.units import DEFAULT_COLUMN_WIDTH
ws.column_dimensions[column].hidden = str(True if mode == 'hide' else False)
ws.column_dimensions[column].width = 0 if mode == 'hide' else DEFAULT_COLUMN_WIDTH
[docs]def set_column_widths(ws, widths, fromcol=1):
'''
Set the column widths of a list of columns
'''
for idx in range(len(widths)):
if widths[idx]:
ws.column_dimensions[cell.get_column_letter(fromcol+idx)].width = str(widths[idx])
def copy_style(ws, tocell, fromcell, ignore_exception=True, noisy=True):
from openpyxl.styles.styleable import StyleArray
try:
ws[tocell]._style = StyleArray(ws[fromcell]._style)
except TypeError as t:
if ignore_exception:
if noisy:
sys.stderr.write('Failed to copy style from %s to %s' % (fromcell, tocell))
else:
raise labexceptions.LabtoolsWarning('Failed to copy style from %s to %s: %s' %
(fromcell, tocell, t.message))
[docs]def column_names(ws):
'''Return a list of the column names in worksheet ws'''
return [c.value for c in column_headers(ws)]
def set_sheetname(ws, title):
ws.title = title
[docs]def sheet_names(wb):
'''Return a list of the worksheet names in workbook wb'''
return wb.get_sheet_names()
def get_sheet(wb, name):
return wb.get_sheet_by_name(name)
def load_workbook(filename):
return xlsx.load_workbook(filename)
def new_workbook():
return Workbook()
def allow_insert(ws, allow=True):
ws.protection.insert_rows = allow
def merge_cells(ws, range, mode='merge'):
if mode == 'merge':
ws.merge_cells(range)
else:
ws.unmerge_cells(range)
def merged_ranges(ws):
return ws._merged_cells
def merged_rowsets(ws):
return [[row for row in worksheet.rows_from_range(r)] for r in merged_ranges(ws)]
def active_sheet(wb):
return wb.active
def new_sheet(wb, name, index=0):
s = wb.create_sheet(name, index)
return s
def store_workbook(wb, filename):
wb.save(filename)
def read_template(template):
return load_workbook(template)
[docs]def build_row(ws, newvalues, rowid=None, fromcol=None, tocol=None):
'''
Put newvalues into row rowname of worksheet ws.
Use column headers if newvalues is a dict, use array offsets if a list.
If rowid is None, add the row at the bottom.
fromcol and tocol apply to list inputs only
'''
if rowid is None:
rowid = next_row_index(ws)
elif isinstance(rowid, str):
rowid = int(rowid)
if rowid > 1:
cnames = column_names(ws)
fromcol = fromcol if fromcol else 1
else:
cnames = newvalues
fromcol = 1
if isinstance(newvalues, dict):
invalid = set(newvalues.keys()) - set(cnames)
if invalid:
raise UserWarning('Invalid column names in newvalues: %r' % (sorted(invalid)))
for key in list(newvalues.keys()):
# must be found or the warning would have triggered
hidx = column_index(ws, key)
coordinate = make_coordinate(hidx, rowid)
ws[coordinate] = newvalues[key]
elif isinstance(newvalues, list):
startcol = fromcol if fromcol else cnames[0]
if misc.one_is_true([isinstance(startcol, t) for t in [str, str]]):
if startcol not in cnames:
raise UserWarning('Invalid column name in fromcol: %s' % (fromcol))
startcol = cnames.index(startcol)+1
if not 1 <= startcol <= len(cnames)+1:
raise UserWarning('column %s not in range (1, %d)' %
(fromcol if fromcol else startcol, len(cnames)+1))
if tocol is None:
tocol = cnames[-1]
if misc.one_is_true([isinstance(tocol, t) for t in [str, str]]):
if tocol not in cnames:
raise UserWarning('Invalid column name in tocol: %s' % (tocol))
tocol = cnames.index(tocol)+2
else:
tocol = tocol if tocol>0 else len(cnames)+1
if not 2 <= tocol <= len(cnames)+1:
raise UserWarning('column tocol not in range (2, %d): %s' %
(len(cnames)+1, tocol))
for cidx in range(fromcol, min(tocol,len(newvalues)+1)):
coord = make_coordinate(cidx, rowid)
ws[coord] = newvalues[cidx-fromcol]
else:
raise labexceptions.ProgrammingFlawWarning('newvalues must be either a list or a dict, not %s' % (type(newvalues)))
return cell_name(ws[last_cell_coordinate(ws)])
[docs]def build_column(ws, newvalues, columnid=None):
'''
Put newvalues into column columnid of worksheet ws.
If newvalues is a dict, treat keys as row indices, if a list, lay them out in order.
If columnid is None, add the column at the right.
'''
if columnid is None:
columnid = cell.get_column_letter(ws.max_column+1)
if isinstance(newvalues, list):
pass
else:
raise labexceptions.StupidProgrammerWarning('newvalues must be either a list or a dict, not %s' % (type(newvalues)))
[docs]def excelfile_rows(xlsfile, sheetname=None):
'''
Read an excel file and build something that looks like the input stream of
a pipe-style file.
'''
xlsx = load_workbook(xlsfile)
sheetnames = sheet_names(xlsx)
if sheetname:
if sheetname not in sheetnames:
raise UserWarning ('Sheet %s not found in %s (has %r)' %
sheetname, xlsfile, sheetnames)
else:
sheetname = sheetnames[0]
ws = get_sheet(xlsx, sheetname)
for rowidx in range(1, next_row_index(ws)):
yield row_values(ws, rowidx)