#! /usr/bin/env python3 """ TabXLSX reads and writes Excel xlsx files. It does not depend on other libraries. The output can be piped as a markdown table or csv-like data as well. A number of output format options are available but less than the tabtotext.py module. If the input contains only one table then it is used, otherwise specify which should be printed.""" __copyright__ = "(C) 2023-2025 Guido Draheim, licensed under the Apache License 2.0""" __version__ = "1.6.4023" from typing import Union, List, Dict, cast, Tuple, Optional, TextIO, Iterable, NamedTuple, Mapping, TypeVar, Generic, Iterator from collections import OrderedDict from datetime import date as Date from datetime import datetime as Time from datetime import timedelta as Plus from datetime import timezone as TimeZone from io import StringIO, TextIOWrapper from zipfile import ZipFile, ZIP_DEFLATED from xml.etree import ElementTree as ET import os.path as fs import os import re import sys # The functions in this script mimic those of openpyxl - we only implement what we need for tabtoxlsx # (actually, we make an export with openpyxl and then we adapt the code here to generate the same bytes) # from openpyxl import Workbook, load_workbook # from openpyxl.worksheet.worksheet import Worksheet # from openpyxl.styles.cell_style import CellStyle as Style # from openpyxl.styles.alignment import Alignment # from openpyxl.utils import get_column_letter # (have a look at 'make_workbook' for the generation part) from logging import getLogger, basicConfig, ERROR logg = getLogger("TABXLSX") SECTION = "data" DATEFMT = "%Y-%m-%d" TIMEFMT = "%Y-%m-%d.%H%M" FLOATFMT = "%4.2f" MINWIDTH = 5 MAXCOL = 1000 MAXROWS = 100000 NIX = "" def get_column_letter(num: int) -> str: return chr(ord('A') + (num - 1)) class Alignment: horizontal: str def __init__(self, *, horizontal: str = NIX) -> None: self.horizontal = horizontal class CellStyle: alignment: Alignment number_format: str protection: str def __init__(self, *, number_format: str = NIX, protection: str = NIX) -> None: self.alignment = Alignment() self.number_format = number_format self.protection = protection CellValue = Union[None, bool, int, float, str, Time, Date] class Cell: value: CellValue data_type: str alignment: Optional[Alignment] number_format: Optional[str] protection: Optional[str] _xf: int _numFmt: int def __init__(self) -> None: self.value = None self.data_type = NIX self.alignment = None self.number_format = None self.protection = None self._xf = 0 self._numFmt = 0 def __str__(self) -> str: return str(self.value) def __repr__(self) -> str: return str(self.value) class Dimension: width: int def __init__(self, *, width: int = 8) -> None: self.width = width class DimensionsHolder: columns: Dict[str, Dimension] def __init__(self) -> None: self.columns = {} def __getitem__(self, column: str) -> Dimension: if column not in self.columns: self.columns[column] = Dimension() return self.columns[column] class Worksheet: rows: List[Dict[str, Cell]] title: str column_dimensions: DimensionsHolder _mindim: str _maxdim: str def __init__(self, title: str = NIX) -> None: self.title = title self.rows = [] self.column_dimensions = DimensionsHolder() def cell(self, row: int, column: int) -> Cell: atrow = row - 1 name = get_column_letter(column) + str(row) while atrow >= len(self.rows): self.rows.append({}) if name not in self.rows[atrow]: self.rows[atrow][name] = Cell() return self.rows[atrow][name] def __getitem__(self, name: str) -> Cell: m = re.match("([A-Z]+)([0-9]+)", name) if not m: logg.error("can not check %s", name) raise ValueError(name) atrow = int(m.group(2)) - 1 while atrow >= len(self.rows): self.rows.append({}) if name not in self.rows[atrow]: self.rows[atrow][name] = Cell() return self.rows[atrow][name] class Workbook: _sheets: List[Worksheet] _active_sheet_index: int def __init__(self) -> None: self._sheets = [Worksheet()] self._active_sheet_index = 0 @property def worksheets(self) -> List[Worksheet]: return self._sheets @property def active(self) -> Worksheet: return self._sheets[self._active_sheet_index] def save(self, filename: str) -> None: save_workbook(filename, self) def create_sheet(self) -> Worksheet: # pragma: no cover ws = Worksheet() self._active_sheet_index = len(self._sheets) self._sheets.append(ws) return ws def get_sheet_names(self) -> List[str]: # pragma: no cover names: List[str] = [] for ws in self._sheets: names += [ws.title] return names def get_sheet_by_name(self, name: str) -> Worksheet: # pragma: no cover for ws in self._sheets: if name == ws.title: return ws raise KeyError("Worksheet does not exist") def __getitem__(self, key: str) -> Worksheet: # pragma: no cover return self.get_sheet_by_name(key) @property def sheetnames(self) -> List[str]: # pragma: no cover return self.get_sheet_names() def save_workbook(filename: str, workbook: Workbook) -> None: xmlns = "http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns_r = "http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns_p = "http://schemas.openxmlformats.org/package/2006/relationships" xmlns_w = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet" xmlns_s = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" xmlns_t = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme" xmlns_c = "http://schemas.openxmlformats.org/package/2006/content-types" NUMFMT = 164 numFmts: List[str] = ["yyyy-mm-dd h:mm:ss"] for sheet in workbook.worksheets: sheet._mindim = "" sheet._maxdim = "" for row in sheet.rows: for cellname, cell in row.items(): if not sheet._mindim: sheet._mindim = cellname sheet._maxdim = cellname if cellname < sheet._mindim: sheet._mindim = cellname if cellname > sheet._maxdim: sheet._maxdim = cellname if cell.number_format: if cell.number_format in ["General"]: continue if cell.number_format not in numFmts: numFmts.append(cell.number_format) cell._numFmt = NUMFMT + numFmts.index(cell.number_format) cellXfs: List[str] = [] for sheet in workbook.worksheets: for row in sheet.rows: for cell in row.values(): numFmtId = cell._numFmt applyAlignment = 0 xml_alignment = "" if cell.alignment and cell.alignment.horizontal: applyAlignment = 1 horizontal = cell.alignment.horizontal xml_alignment = F'' xml_xf = F'' if xml_xf not in cellXfs: cellXfs.append(xml_xf) cell._xf = cellXfs.index(xml_xf) + 1 style_xml = F'' style_xml += F'' for num, fmtCode in enumerate(numFmts): numFmtId = NUMFMT + num style_xml += F'' style_xml += F'' style_xml += F'' style_xml += F'' style_xml += F'' # style_xml += f'' style_xml += f'' style_xml += F'' style_xml += F'' style_xml += F'' style_xml += F'' for xf in cellXfs: style_xml += xf style_xml += F'' style_xml += F'' style_xml += F'' style_xml += F'' workbook_xml = F'' workbook_xml += F'' workbook_xml += F'' # workbook_xml += F'' workbook_xml += F'' workbook_xml += F'' worksheets: List[str] = [] for sheet in workbook.worksheets: wxml = F'' wxml += '' wxml += F'' wxml += '' wxml += '' if sheet.column_dimensions.columns: wxml += F'' for nam, col in sheet.column_dimensions.columns.items(): wxml += F'' wxml += F'' wxml += F'' for num, row in enumerate(sheet.rows): if not row: continue # empty wxml += F'' for r, cell in row.items(): if cell.value is None: continue elif isinstance(cell.value, str): if cell.data_type in ["", "f"] and cell.value.startswith("="): s = cell._xf f = cell.value[1:] wxml += F'' wxml += F'{f}' wxml += F'' else: s = cell._xf t = "inlineStr" wxml += F'' wxml += F'{cell.value}' wxml += F'' else: value: Union[int, float] t = "n" if isinstance(cell.value, bool): value = 1 if cell.value else 0 t = 'b' elif isinstance(cell.value, Time): value = cell.value.toordinal() - 693594. seconds = cell.value.hour * 3600 + cell.value.minute * 60 + cell.value.second value += seconds / 86400. elif isinstance(cell.value, Date): value = cell.value.toordinal() - 693594. else: value = cell.value s = cell._xf # wxml += F'' wxml += F'' wxml += F'{value}' wxml += F'' wxml += F'' wxml += F'' wxml += F'' wxml += F'' worksheets.append(wxml) workbook_xml += F'' workbook_xml += F'' workbook_xml += F'' workbook_xml += F'' theme_xml = F'' + "\n" theme_xml = F'' theme_xml = F'' with ZipFile(filename, "w", compression=ZIP_DEFLATED) as zipfile: worksheetfilelist = [] rels_xml = F'' for num, xml in enumerate(worksheets): worksheetfile = F'worksheets/sheet{num+1}.xml' worksheet_Id = F'rId{num+1}' rels_xml += F'' with zipfile.open("xl/" + worksheetfile, "w") as xmlfile: xmlfile.write(xml.encode('utf-8')) worksheetfilelist += [worksheetfile] stylefile = F"styles.xml" style_Id = F'rId{len(worksheets)+1}' rels_xml += F'' with zipfile.open("xl/" + stylefile, "w") as xmlfile: xmlfile.write(style_xml.encode('utf-8')) themefile = F"theme/theme1.xml" theme_Id = F'rId{len(worksheets)+2}' rels_xml += F'' with zipfile.open("xl/" + themefile, "w") as xmlfile: xmlfile.write(theme_xml.encode('utf-8')) rels_xml += F'' workbookfile = "workbook.xml" with zipfile.open("xl/" + workbookfile, "w") as xmlfile: xmlfile.write(workbook_xml.encode('utf-8')) relsfile = "_rels/workbook.xml.rels" with zipfile.open("xl/" + relsfile, "w") as xmlfile: xmlfile.write(rels_xml.encode('utf-8')) apps_xml = F'Microsoft Excel3.0' appsfile = "docProps/app.xml" core_xml = F'openpyxl2024-07-09T21:58:37Z2024-07-09T21:58:37Z' corefile = "docProps/core.xml" with zipfile.open(appsfile, "w") as xmlfile: xmlfile.write(apps_xml.encode('utf-8')) with zipfile.open(corefile, "w") as xmlfile: xmlfile.write(core_xml.encode('utf-8')) init_xml = F'' init_xml += F'' init_xml += F'' init_xml += F'' init_xml += F'' initfile = "_rels/.rels" with zipfile.open(initfile, "w") as xmlfile: xmlfile.write(init_xml.encode('utf-8')) content_xml = F'' content_xml += '' content_xml += '' content_xml += '' content_xml += '' content_xml += '' content_xml += '' # content_xml += '' for worksheetfile in worksheetfilelist: content_xml += F'' content_xml += '' content_xml += '' contentfile = "[Content_Types].xml" with zipfile.open(contentfile, "w") as xmlfile: xmlfile.write(content_xml.encode('utf-8')) _dateformats = ['d.mm.yy', 'yyyy-mm-dd'] _timeformats = ['yyyy-mm-dd hh:mm', 'yyyy-mm-dd h:mm:ss'] def load_workbook(filename: str) -> Workbook: workbook = Workbook() ws = workbook.active with ZipFile(filename) as zipfile: sharedStrings: List[str] = [] try: with zipfile.open("xl/sharedStrings.xml") as xmlfile: xml = ET.parse(xmlfile) for item in xml.getroot(): if ("}" + item.tag).endswith("}si"): text = "" for block in item: if ("}" + block.tag).endswith("}t"): text += block.text or "" sharedStrings += [text] except KeyError as e: logg.debug("do not use sharedStrings.xml: %s", e) formatcodes: Dict[str, str] = {} numberformat: Dict[str, str] = {} with zipfile.open("xl/styles.xml") as xmlfile: xml = ET.parse(xmlfile) for item in xml.getroot(): if ("}" + item.tag).endswith("numFmts"): for fmt in item: numFmtId = fmt.get("numFmtId", "?") formatcode = fmt.get("formatCode", "?") logg.debug("numFmtId %s formatCode %s", numFmtId, formatcode) formatcodes[numFmtId] = formatcode if ("}" + item.tag).endswith("cellXfs"): style = 0 for xfs in item: numFmtId = xfs.get("numFmtId", "?") logg.debug("numFmtId %s", numFmtId) if numFmtId in formatcodes: numberformat[str(style)] = formatcodes[numFmtId] style += 1 sheetnames: Dict[str, str] = {} with zipfile.open("xl/workbook.xml") as xmlfile: xml = ET.parse(xmlfile) for item in xml.getroot(): if ("}" + item.tag).endswith("}sheets"): for sheet in item: sheetname = sheet.get("name", "") sheetId = sheet.get("sheetId", "") if sheetId and sheetname: sheetnames[sheetId] = sheetname namelist = zipfile.namelist() for sheetnumber in range(1, 99): sheetId = str(sheetnumber) sheetfilename = F"xl/worksheets/sheet{sheetId}.xml" if sheetnumber > 1: if sheetfilename not in namelist: break ws = Worksheet() workbook._sheets.append(ws) if sheetId in sheetnames: ws.title = sheetnames[sheetId] with zipfile.open(sheetfilename) as xmlfile: logg.debug("load %s:%s", filename, sheetfilename) xml = ET.parse(xmlfile) for item in xml.getroot(): if ("}" + item.tag).endswith("}sheetData"): for rowdata in item: row = int(rowdata.get("row", "0")) for cell in rowdata: value: CellValue = None t = cell.get("t", "n") s = cell.get("s", "0") r = cell.get("r") v = "" x = "" for data in cell: if ("}" + data.tag).endswith("}v"): v = data.text or "" elif ("}" + data.tag).endswith("}is"): for block in data: x += block.text or "" elif ("}" + data.tag).endswith("}f"): x = "=" + (data.text or "") t = "f" logg.debug("r = %s | s = %s | t =%s | v = %s| x = %s", r, s, t, v, x) if t in ["b"]: value = True if v == "1" else False elif t in ["f", "inlineStr", ]: value = x elif t in ["s"]: value = sharedStrings[int(v)] # elif v in [""]: # value = "" else: if "." not in v: value = int(v) value1 = float(value) else: value1 = float(v) value = value1 if s in numberformat: numfmt = numberformat[s] logg.debug("value %s numberformat %s", value, numfmt) if numfmt in _timeformats: value0 = int(value1) value2 = Time.fromordinal(value0 + 693594) value3 = int(((value1 - value0) * 86400) + 0.4) value = value2 + Plus(seconds=value3) t = "d" elif numfmt in _dateformats: value0 = int(value1) value2 = Time.fromordinal(value0 + 693594) value = value2.date() t = "d" else: logg.debug("%s no datetime format", s) else: logg.debug("%s has no numbeformt", s) if r: ws[r].value = value ws[r].data_type = t return workbook # ..................................................................... # Files can contain multiple tables which get represented as a list of sheets where # each sheet remembers the title and the order columns in the original table. This allows # to convert file formats with the order of tables, columns (and rows) being preserved. class TabSheet(NamedTuple): data: List[Dict[str, CellValue]] headers: List[str] title: str def tablistfor(tabdata: Dict[str, List[Dict[str, CellValue]]]) -> List[TabSheet]: tablist: List[TabSheet] = [] for name, data in tabdata.items(): tablist += [TabSheet(data, [], name)] return tablist def tablistitems(tablist: List[TabSheet]) -> Iterator[Tuple[str, List[Dict[str, CellValue]]]]: for tabsheet in tablist: yield tabsheet.title, tabsheet.data def tablistmap(tablist: List[TabSheet]) -> Dict[str, List[Dict[str, CellValue]]]: tabdata: Dict[str, List[Dict[str, CellValue]]] = OrderedDict() for name, data in tablistitems(tablist): tabdata[name] = data return tabdata def tablistfileXLSX(filename: str) -> List[TabSheet]: workbook = load_workbook(filename) return tablist_workbook(workbook) def tablist_workbook(workbook: Workbook, section: str = NIX) -> List[TabSheet]: tab = [] for ws in workbook.worksheets: title = ws.title cols: List[str] = [] for col in range(MAXCOL): header = ws.cell(row=1, column=col + 1) if header.value is None: break name = header.value if name is None: break cols.append(str(name)) logg.debug("xlsx found %s cols\n\t%s", len(cols), cols) data: List[Dict[str, CellValue]] = [] for atrow in range(MAXROWS): record = [] found = 0 for atcol in range(len(cols)): cell = ws.cell(row=atrow + 2, column=atcol + 1) if cell.data_type in ["f"]: continue value = cell.value # logg.debug("[%i,%si] cell.value = %s", atcol, atrow, value) if value is not None: found += 1 if isinstance(value, str) and value == " ": value = "" record.append(value) if not found: break newrow = dict(zip(cols, record)) data.append(newrow) # type: ignore[arg-type] tab.append(TabSheet(data, cols, title)) return tab def currency() -> str: """ make dependent on locale ? """ currency_dollar = 0x024 currency_pound = 0x0A3 currency_symbol = 0x0A4 # in iso-8859-1 it shows the euro sign currency_yen = 0x0A5 currency_euro = 0x20AC return chr(currency_euro) def tablistmake_workbook(tablist: List[TabSheet], selected: List[str] = [], minwidth: int = 0) -> Optional[Workbook]: workbook: Optional[Workbook] = None for tabsheet in tablist: if workbook is not None: workbook.create_sheet() work = tabto_workbook(tabsheet.data, tabsheet.headers, selected, minwidth=minwidth, section=tabsheet.title, workbook=workbook) if workbook is None: workbook = work return workbook def tabtoXLSX(filename: str, data: Iterable[Dict[str, CellValue]], headers: List[str] = [], selected: List[str] = [], minwidth: int = 0, section: str = NIX) -> str: workbook = tabto_workbook(data, headers, selected, minwidth, section) save_workbook(filename, workbook) return "TABXLSX" def tabto_workbook(data: Iterable[Dict[str, CellValue]], headers: List[str] = [], selected: List[str] = [], minwidth: int = 0, section: str = NIX, workbook: Optional[Workbook] = None) -> Workbook: minwidth = minwidth or MINWIDTH logg.debug("tabtoXLSX:") renameheaders: Dict[str, str] = {} showheaders: List[str] = [] sortheaders: List[str] = [] formats: Dict[str, str] = {} combine: Dict[str, List[str]] = {} for header in headers: combines = "" for selheader in header.split("|"): if "@" in selheader: selcol, rename = selheader.split("@", 1) else: selcol, rename = selheader, "" if ":" in selcol: name, form = selcol.split(":", 1) if isinstance(formats, dict): fmts = form if "{" in form else ("{:" + form + "}") formats[name] = fmts.replace("i}", "n}").replace("u}", "n}").replace("r}", "s}").replace("a}", "s}") else: name = selcol showheaders += [name] # headers make a default column order if rename: sortheaders += [name] # headers does not sort anymore if not combines: combines = name elif combines not in combine: combine[combines] = [name] elif name not in combine[combines]: combine[combines] += [name] if rename: renameheaders[name] = rename logg.debug("renameheaders = %s", renameheaders) logg.debug("sortheaders = %s", sortheaders) logg.debug("formats = %s", formats) logg.debug("combine = %s", combine) combined: Dict[str, List[str]] = {} renaming: Dict[str, str] = {} selcols: List[str] = [] for selecheader in selected: combines = "" for selec in selecheader.split("|"): if "@" in selec: selcol, rename = selec.split("@", 1) else: selcol, rename = selec, "" if ":" in selcol: name, form = selcol.split(":", 1) if isinstance(formats, dict): fmts = form if "{" in form else ("{:" + form + "}") formats[name] = fmts.replace("i}", "n}").replace("u}", "n}").replace("r}", "s}").replace("a}", "s}") else: name = selcol selcols.append(name) if rename: renaming[name] = rename if not combines: combines = name elif combines not in combined: combined[combines] = [name] elif combines not in combined[combines]: combined[combines] += [name] logg.debug("combined = %s", combined) logg.debug("renaming = %s", renaming) logg.debug("selcols = %s", selcols) if not selected: combined = combine # argument renaming = renameheaders logg.debug("combined : %s", combined) logg.debug("renaming : %s", renaming) newsorts: Dict[str, str] = {} colnames: Dict[str, str] = {} for name, rename in renaming.items(): if "@" in rename: newname, newsort = rename.split("@", 1) elif rename and rename[0].isalpha(): newname, newsort = rename, "" else: newname, newsort = "", rename if newname: colnames[name] = newname if newsort: newsorts[name] = newsort logg.debug("newsorts = %s", newsorts) logg.debug("colnames = %s", colnames) sortcolumns = [(name if name not in colnames else colnames[name]) for name in (selcols or sortheaders)] if newsorts: for num, name in enumerate(sortcolumns): if name not in newsorts: newsorts[name] = ("@" * len(str(num)) + str(num)) sortcolumns = sorted(newsorts, key=lambda x: newsorts[x]) logg.debug("sortcolumns : %s", sortcolumns) if selcols: selheaders = [(name if name not in colnames else colnames[name]) for name in (selcols)] else: selheaders = [(name if name not in colnames else colnames[name]) for name in (showheaders)] def strNone(value: CellValue) -> str: if isinstance(value, Time): return value.strftime(TIMEFMT) if isinstance(value, Date): return value.strftime(DATEFMT) return str(value) def sortkey(header: str) -> str: if header in selheaders: num = selheaders.index(header) return ("@" * len(str(num)) + str(num)) return header def sortrow(row: Dict[str, CellValue]) -> str: def asdict(item: Dict[str, CellValue]) -> Dict[str, CellValue]: if hasattr(item, "_asdict"): return item._asdict() # type: ignore[union-attr, no-any-return, arg-type, attr-defined] return item item = asdict(row) sorts = sortcolumns if sorts: # numbers before empty before strings sortvalue = "" for sort in sorts: if sort in item: value = item[sort] if value is None: sortvalue += "\n?" elif value is False: sortvalue += "\n" elif value is True: sortvalue += "\n!" elif isinstance(value, int): val = "%i" % value sortvalue += "\n" + (":" * len(val)) + val elif isinstance(value, float): val = "%.6f" % value sortvalue += "\n" + (":" * val.index(".")) + val elif isinstance(value, Time): sortvalue += "\n" + value.strftime("%Y%m%d.%H%MS") elif isinstance(value, Date): sortvalue += "\n" + value.strftime("%Y%m%d") else: sortvalue += "\n" + str(value) else: sortvalue += "\n?" return sortvalue return "" rows: List[Dict[str, CellValue]] = [] cols: Dict[str, int] = {} for num, item in enumerate(data): row: Dict[str, CellValue] = {} if "#" in headers: item["#"] = num + 1 cols["#"] = len(str(num + 1)) for name, value in item.items(): selname = name if name in renameheaders and renameheaders[name] in selcols: selname = renameheaders[name] if selcols and selname not in selcols and "*" not in selcols: continue colname = selname if selname not in colnames else colnames[selname] row[colname] = value # do not format the value here! oldlen = cols[colname] if colname in cols else max(minwidth, len(colname)) cols[colname] = max(oldlen, len(strNone(value))) rows.append(row) sortedrows = list(sorted(rows, key=sortrow)) sortedcols = list(sorted(cols.keys(), key=sortkey)) return make_workbook(sortedrows, sortedcols, cols, formats, section=section, workbook=workbook) def make_workbook(rows: List[Dict[str, CellValue]], cols: List[str], colwidth: Dict[str, int], formats: Dict[str, str], section: str = NIX, workbook: Optional[Workbook] = None) -> Workbook: row = 0 workbook = workbook or Workbook() ws = workbook.active ws.title = section or SECTION col = 0 for name in cols: ws.cell(row=1, column=col + 1).value = name ws.cell(row=1, column=col + 1).alignment = Alignment(horizontal="right") if name in colwidth: ws.column_dimensions[get_column_letter(col + 1)].width = colwidth[name] col += 1 for item in rows: row += 1 values: Dict[str, CellValue] = dict([(name, "") for name in cols]) for name, value in item.items(): values[name] = value col = 0 for name in cols: value = values[name] at = {"column": col + 1, "row": row + 1} if value is None: ws.cell(**at).value = "" ws.cell(**at).alignment = Alignment(horizontal="left") ws.cell(**at).number_format = "General" elif isinstance(value, Time): ws.cell(**at).value = value ws.cell(**at).alignment = Alignment(horizontal="right") ws.cell(**at).number_format = "yyyy-mm-dd hh:mm" elif isinstance(value, Date): ws.cell(**at).value = value ws.cell(**at).alignment = Alignment(horizontal="right") ws.cell(**at).number_format = "yyyy-mm-dd" elif isinstance(value, int): ws.cell(**at).value = value ws.cell(**at).alignment = Alignment(horizontal="right") ws.cell(**at).number_format = "#,##0" elif isinstance(value, float): ws.cell(**at).value = value ws.cell(**at).alignment = Alignment(horizontal="right") ws.cell(**at).number_format = "#,##0.00" if name in formats and "$}" in formats[name]: ws.cell(**at).number_format = "#,##0.00" + currency() else: ws.cell(**at).value = value ws.cell(**at).alignment = Alignment(horizontal="left") ws.cell(**at).number_format = "General" col += 1 return workbook # ........................................................... def sec_usec(sec: Optional[str]) -> Tuple[int, int]: """ split float value to seconds and microsecond integers""" if not sec: return 0, 0 if "." in sec: x = float(sec) s = int(x) u = int((x - s) * 1000000) return s, u return int(sec), 0 class StrToDate: """ parsing iso8601 day formats""" def __init__(self, datedelim: str = "-") -> None: self.delim = datedelim self.is_date = re.compile(r"(\d\d\d\d)-(\d\d)-(\d\d)[.]?$".replace('-', datedelim)) self.is_part = re.compile(r"(\d\d\d\d)-(\d\d)-(\d\d)[^\d].*".replace('-', datedelim)) def date(self, value: str) -> Optional[Date]: got = self.is_date.match(value) if got: y, m, d = got.group(1), got.group(2), got.group(3) return Date(int(y), int(m), int(d)) return None def datepart(self, value: str) -> Optional[Date]: got = self.is_part.match(value) if got: y, m, d = got.group(1), got.group(2), got.group(3) return Date(int(y), int(m), int(d)) return None def __call__(self, value: str) -> Union[str, Date, Time]: d = self.date(value) if d: return d p = self.datepart(value) if p: return p return value class StrToTime(StrToDate): """ parsing iso8601 day or day-and-time formats with zone offsets""" def __init__(self, datedelim: str = "-") -> None: StrToDate.__init__(self, datedelim) self.is_localtime = re.compile( r"(\d\d\d\d)-(\d\d)-(\d\d)[.T ](\d\d)[:]?(\d\d)(?:[:](\d\d(?:[.]\d*)?))?$".replace('-', datedelim)) self.is_zonetime = re.compile( r"(\d\d\d\d)-(\d\d)-(\d\d)[.T ](\d\d)[:]?(\d\d)(?:[:](\d\d(?:[.]\d*)?))?[ ]*(Z|UTC|[+-][0-9][0-9])(?:[:]?([0-9][0-9]))?$".replace('-', datedelim)) def time(self, value: str) -> Optional[Time]: got = self.is_localtime.match(value) if got: y, m, d, H, M, S = got.group(1), got.group(2), got.group(3), got.group(4), got.group(5), got.group(6) return Time(int(y), int(m), int(d), int(H), int(M), *sec_usec(S)) got = self.is_zonetime.match(value) if got: hh, mm = got.group(7), got.group(8) if hh in ["Z", "UTC"]: plus = TimeZone.utc else: plus = TimeZone(Plus(hours=int(hh), minutes=int(mm or 0))) y, m, d, H, M, S = got.group(1), got.group(2), got.group(3), got.group(4), got.group(5), got.group(6) return Time(int(y), int(m), int(d), int(H), int(M), *sec_usec(S), tzinfo=plus) return None def __call__(self, value: str) -> Union[str, Date, Time]: d = self.date(value) if d: return d t = self.time(value) if t: return t return value _atformats = ["@json", "@jsn", "@markdown", "@md", "@md2", "@md3", "@md4", "@md5", "@md6", "@wide", "@read", "@txt", "@text", "@tabs", "@tab", "@data", "@ifs", "@dat", "@csv", "@scsv", "@xls", "@xlsx"] def fmt_selected(selected: List[str]) -> str: for sel in selected: if sel in _atformats: return sel[1:] return NIX def tabtotext(data: Iterable[Dict[str, CellValue]], # .. headers: List[str] = [], selected: List[str] = [], *, fmt: str = "", tab: Optional[str] = None, padding: Optional[str] = None, minwidth: int = 0, section: str = NIX, noheaders: bool = False, unique: bool = False, defaultformat: str = "") -> str: stream = StringIO() print_tabtotext(stream, data, headers, selected, # .. tab=tab, padding=padding, minwidth=minwidth, section=section, noheaders=noheaders, unique=unique, defaultformat=(fmt or defaultformat)) return stream.getvalue() def print_tabtotext(output: Union[TextIO, str], data: Iterable[Dict[str, CellValue]], # .. headers: List[str] = [], selected: List[str] = [], *, tab: Optional[str] = None, padding: Optional[str] = None, minwidth: int = 0, section: str = NIX, noheaders: bool = False, unique: bool = False, defaultformat: str = "") -> str: """ This code is supposed to be copy-n-paste into other files. You can safely try-import from tabtotext or tabtoxlsx to override this function. Only a subset of features is supported. """ spec: Dict[str, str] = dict(cast(Tuple[str, str], (x, "") if "=" not in x else x.split("=", 1)) for x in selected if x.startswith("@")) selected_fmt = fmt_selected(selected) selected = [x for x in selected if not x.startswith("@")] minwidth = minwidth or MINWIDTH padding = " " if padding is None else padding tab = "|" if tab is None else tab def extension(filename: str) -> Optional[str]: _, ext = fs.splitext(filename.lower()) if ext: return ext[1:] return None # if isinstance(output, TextIO) or isinstance(output, StringIO): out = output fmt = defaultformat or selected_fmt done = "stream" elif "." in output: fmt = extension(output) or defaultformat if fmt in ["xls", "xlsx"]: tabtoXLSX(output, data, headers, selected, section=section) return "XLSX" out = open(output, "wt", encoding="utf-8") done = output else: fmt = output or defaultformat or selected_fmt out = sys.stdout done = output # if fmt in ["md", "markdown"]: fmt = "GFM" # nopep8 if fmt in ["md2"]: fmt = "GFM" minwidth = 2 # nopep8 if fmt in ["md3"]: fmt = "GFM" minwidth = 3 # nopep8 if fmt in ["md4"]: fmt = "GFM" minwidth = 4 # nopep8 if fmt in ["md5"]: fmt = "GFM" minwidth = 5 # nopep8 if fmt in ["md6"]: fmt = "GFM" minwidth = 6 # nopep8 if fmt in ["wide"]: fmt = "GFM" tab = "" # nopep8 if fmt in ["read"]: fmt = "GFM" tab = " " # nopep8 padding = "" noheaders = True if fmt in ["txt"]: fmt = "GFM" padding = "" # nopep8 if fmt in ["text"]: fmt = "GFM" padding = "" noheaders = True # nopep8 if fmt in ["tabs"]: fmt = "GFM" tab = "\t" padding = "" # nopep8 if fmt in ["tab"]: fmt = "CSV" tab = "\t" # nopep8 if fmt in ["data"]: fmt = "CSV" tab = "\t" noheaders = True # nopep8 if fmt in ["ifs"]: fmt = "CSV" tab = os.environ.get("IFS", "\t") # nopep8 if fmt in ["dat"]: fmt = "CSV" tab = os.environ.get("IFS", "\t") noheaders = True # nopep8 if fmt in ["csv", "scsv"]: fmt = "CSV" tab = ";" # nopep8 if fmt in ["list"]: fmt = "CSV" tab = ";" noheaders = True # nopep8 if fmt in ["json"]: fmt = "JSON" if fmt in ["jsn"]: fmt = "JSON" padding = "" if fmt in ["xlsx", "xls"]: fmt = "XLS" tab = "," # nopep8 # override if "@tab" in spec: tab = spec["@tab"] if "@notab" in spec: tab = "" if "@nopadding" in spec: padding = "" if "@noheaders" in spec: noheaders = True if "@unique" in spec: unique = True # none_string = "~" true_string = "(yes)" false_string = "(no)" floatfmt = "%4.2f" noright = fmt in ["data"] noheaders = noheaders or fmt in ["data", "text", "list"] formatleft = re.compile("[{]:[^{}]*<[^{}]*[}]") formatright = re.compile("[{]:[^{}]*>[^{}]*[}]") formatnumber = re.compile("[{]:[^{}]*[defghDEFGHMQR$%][}]") # parsing the microsyntax of headers and selected columns ["colname:{:.2f}$@newname"] formats: Dict[str, str] = {} renameheaders: Dict[str, str] = {} showheaders: List[str] = [] sortheaders: List[str] = [] for header in headers: for selheader in header.split("|"): if "@" in selheader: selcol, rename = selheader.split("@", 1) else: selcol, rename = selheader, "" if ":" in selcol: name, form = selcol.split(":", 1) fmts = form if "{" in form else ("{:" + form + "}") formats[name] = fmts.replace("i}", "n}").replace("u}", "n}").replace("r}", "s}").replace("a}", "s}") else: name = selcol showheaders += [name] if rename: sortheaders += [name] # default sort by named headers (rows) if rename: renameheaders[name] = rename renaming: Dict[str, str] = {} selcols: List[str] = [] for selecheader in selected: for selec in selecheader.split("|"): if "@" in selec: selcol, rename = selec.split("@", 1) else: selcol, rename = selec, "" if ":" in selcol: name, form = selcol.split(":", 1) fmts = form if "{" in form else ("{:" + form + "}") formats[name] = fmts.replace("i}", "n}").replace("u}", "n}").replace("r}", "s}").replace("a}", "s}") else: name = selcol selcols.append(name) if rename: renaming[name] = rename if not selected: renaming = renameheaders logg.debug("sortheaders = %s | formats = %s", sortheaders, formats) newsorts: Dict[str, str] = {} colnames: Dict[str, str] = {} for name, rename in renaming.items(): if "@" in rename: newname, newsort = rename.split("@", 1) elif rename and rename[0].isalpha(): newname, newsort = rename, "" else: newname, newsort = "", rename if newname: colnames[name] = newname if name in formats: formats[newname] = formats[name] if newsort: newsorts[name] = newsort sortcolumns = [(name if name not in colnames else colnames[name]) for name in (selcols or sortheaders)] if newsorts: for num, name in enumerate(sortcolumns): if name not in newsorts: newsorts[name] = ("@" * len(str(num)) + str(num)) sortcolumns = sorted(newsorts, key=lambda x: newsorts[x]) logg.debug("sortcolumns : %s", sortcolumns) else: logg.debug("sortcolumns = %s", sortcolumns) selcolumns = [(name if name not in colnames else colnames[name]) for name in (selcols)] selheaders = [(name if name not in colnames else colnames[name]) for name in (showheaders)] # ....................................... def rightalign(col: str) -> bool: if col in formats and not noright: if formats[col].startswith(" "): return True if formats[col].startswith("{: "): return True if formatleft.search(formats[col]): return False if formatright.search(formats[col]): return True if formatnumber.search(formats[col]): return True return False def strValue(value: CellValue) -> str: if value is None: return none_string if value is False: return false_string if value is True: return true_string if isinstance(value, Time): return value.strftime("%Y-%m-%d.%H%M") # TIMEFMT if isinstance(value, Date): return value.strftime("%Y-%m-%d") # DATEFMT return str(value) def format(name: str, val: CellValue) -> str: if name in formats: fmt = formats[name] if fmt.startswith("{:") and fmt[-1] == "}" and "%s" in fmt: fmt = fmt[2:-1].replace("%s", "{:s}") if fmt.startswith("{:%") and fmt[-1] == "}" and fmt[-2] in "sf": fmt = fmt.replace("{:%", "{:") if "{:" in fmt: try: return fmt.format(val) except Exception as e: logg.debug("format <%s> does not apply: %s", fmt, e) if isinstance(val, float): return floatfmt % val return strValue(val) def asdict(item: Dict[str, CellValue]) -> Dict[str, CellValue]: if hasattr(item, "_asdict"): return item._asdict() # type: ignore[union-attr, no-any-return, arg-type, attr-defined] return item rows: List[Dict[str, CellValue]] = [] cols: Dict[str, int] = {} for num, item in enumerate(data): row: Dict[str, CellValue] = {} if "#" in selcols: row["#"] = num + 1 cols["#"] = len(str(num + 1)) for name, value in asdict(item).items(): selname = name if name in renameheaders and renameheaders[name] in selcols: selname = renameheaders[name] if selcols and selname not in selcols and "*" not in selcols: continue colname = selname if selname not in colnames else colnames[selname] row[colname] = value oldlen = cols[colname] if colname in cols else max(minwidth, len(colname)) cols[colname] = max(oldlen, len(format(colname, value))) rows.append(row) def sortkey(header: str) -> str: headers = selcolumns or selheaders if header in headers: num = headers.index(header) return ("@" * len(str(num)) + str(num)) return header def sortrow(row: Dict[str, CellValue]) -> str: item = asdict(row) sorts = sortcolumns if sorts: sortvalue = "" for sort in sorts: if sort in item: value = item[sort] if value is None: sortvalue += "\n?" elif value is False: sortvalue += "\n" elif value is True: sortvalue += "\n!" elif isinstance(value, int): val = "%i" % value sortvalue += "\n" + (":" * len(val)) + val elif isinstance(value, float): val = "%.6f" % value sortvalue += "\n" + (":" * val.index(".")) + val else: sortvalue += "\n" + strValue(value) else: sortvalue += "\n?" return sortvalue return "" # print .......................................... colo = tuple(sorted(cols.keys(), key=sortkey)) # ordered column names same = [] # JSON if fmt in ["JSON"]: import json pad = " " * len(padding) comma = "," + pad lines: List[str] = [] for row in sorted(rows, key=sortrow): line: List[str] = [] for name in colo: if name in row: value = row[name] if isinstance(value, Date) or isinstance(value, Time): line += ['"%s":%s"%s"' % (name, pad, str(value))] else: line += ['"%s":%s%s' % (name, pad, json.dumps(value))] lines.append(" {" + comma.join(line) + "}") newlist = "[\n" endlist = "\n]" if section and not noheaders: newlist = '{"%s":%s[\n' % (section.replace('"', "'"), pad) endlist = "\n]}" out.write(newlist + ",\n".join(lines) + endlist) return "JSON" # CSV if fmt in ["CSV"]: tab1 = tab if tab else ";" import csv writer = csv.DictWriter(out, fieldnames=colo, restval='~', quoting=csv.QUOTE_MINIMAL, delimiter=tab1) if not noheaders: writer.writeheader() old: Dict[str, str] = {} for row in sorted(rows, key=sortrow): rowvalues: Dict[str, str] = {} for name, value in asdict(row).items(): rowvalues[name] = format(name, value) if unique: same = [sel for sel in selcols if sel in rowvalues and sel in old and rowvalues[sel] == old[sel]] if not selcols or same != selcols: writer.writerow(rowvalues) old = rowvalues return "CSV" # GFM ws = ("", " ", " ", " ", " ", " ", " ", " ", " ") # " "*(0...8) colw = tuple((cols[col] for col in colo)) # widths of cols ordered colr = tuple((rightalign(col) for col in colo)) # rightalign of cols ordered tab2 = (tab + padding if tab else "") esc1 = "\\" esc2 = "\\\\" esc3 = tab[0] if tab else "\\" esc4 = "\\" + tab[0] if tab else "\\" esc7 = "\n" esc8 = "\\\n" if section and not noheaders: print(F"\n## {section}", file=out) if not noheaders: hpad = [(ws[w] if w < 9 else (" " * w)) for w in ((colw[m] - len(col)) for m, col in enumerate(colo))] line = [tab2 + (hpad[m] + col if colr[m] else col + hpad[m]) for m, col in enumerate(colo)] print(padding.join(line).rstrip(), file=out) if tab and padding: seps = ["-" * colw[m] for m, col in enumerate(colo)] seperators = [tab2 + (seps[m][:-1] + ":" if colr[m] else seps[m]) for m, col in enumerate(colo)] print(padding.join(seperators).rstrip(), file=out) oldvalues: Dict[str, str] = {} for item in sorted(rows, key=sortrow): values: Dict[str, str] = {} for name, value in asdict(item).items(): values[name] = format(name, value).replace(esc1, esc2).replace(esc3, esc4).replace(esc7, esc8) vals = [values.get(col, none_string) for col in colo] vpad = [(ws[w] if w < 9 else (" " * w)) for w in ((colw[m] - len(vals[m])) for m, col in enumerate(colo))] line = [tab2 + (vpad[m] + vals[m] if colr[m] else vals[m] + vpad[m]) for m, col in enumerate(colo)] if unique: same = [sel for sel in selcols if sel in values and sel in oldvalues and values[sel] == oldvalues[sel]] if not selcols or same != selcols: print((padding.join(line)).rstrip(), file=out) oldvalues = values return "GFM" def tablistfile(input: Union[TextIO, str], *, tab: Optional[str] = None, defaultformat: str = "") -> List[TabSheet]: def extension(filename: str) -> Optional[str]: _, ext = fs.splitext(filename.lower()) if ext: return ext[1:] return None # if isinstance(input, TextIO) or isinstance(input, StringIO): inp = input fmt = defaultformat done = "stream" elif "." in input: fmt = extension(input) or defaultformat if fmt in ["xls", "xlsx"]: return tablistfileXLSX(input) inp = open(input, "rt", encoding="utf-8") done = input else: fmt = input or defaultformat inp = sys.stdin done = input # tab = '|' if tab is None else tab if fmt in ["wide", "text"]: tab = '' if fmt in ["read"]: tab = ' ' if fmt in ["tabs", "tab", "dat", "ifs", "data"]: tab = '\t' if fmt in ["csv", "scsv", "list"]: tab = ';' if fmt in ["xls", "sxlx"]: tab = ',' # none_string = "~" true_string = "(yes)" false_string = "(no)" tabs: List[TabSheet] = [] if fmt in ["jsn", "json"]: import json time = StrToTime() jsondata = json.load(inp) if isinstance(jsondata, dict): jsondict = jsondata else: jsondict = {"data": jsondata} for listname, jsonlist in jsondict.items(): listdata: List[Dict[str, CellValue]] = [] if isinstance(jsonlist, Iterable): for nextgroup in jsonlist: if isinstance(nextgroup, dict): newgroup: Dict[str, CellValue] = {} for nam, jsonval in nextgroup.items(): if isinstance(jsonval, str): newgroup[nam] = time(jsonval) else: newgroup[nam] = jsonval listdata.append(newgroup) tabs.append(TabSheet(listdata, [], listname)) return tabs time = StrToTime() data: List[Dict[str, CellValue]] = [] if fmt in ["csv", "scsv", "tab"]: import csv reader = csv.DictReader(inp, delimiter=tab) for nextrecord in reader: # newrecord: Dict[str, CellValue] = cast(Dict[str, CellValue], nextrecord.copy()) newrecord: Dict[str, CellValue] = {} for nam, val in nextrecord.items(): v = val.strip() if v == none_string: newrecord[nam] = None elif v == false_string: newrecord[nam] = False elif v == true_string: newrecord[nam] = True else: try: newrecord[nam] = int(v) except: try: newrecord[nam] = float(v) except: newrecord[nam] = time(v) data.append(newrecord) return [TabSheet(data, list(reader.fieldnames if reader.fieldnames else []), SECTION)] # must have headers lookingfor = "headers" headers: List[str] = [] title = "" igs = chr(0x1D) # ascii/ebcdic group seperator pre = "" for line in inp: if "\\" in line: esc = line.rstrip().split("\\") if esc[-1] == "": pre = line.rstrip() # line continuation continue if pre: line = pre + "\n" + line pre = "" if "\\" in line: groups = [("\\" if not g else igs + g[1:] if g.startswith(tab) else g) for g in ("\n" + line).split("\\")] line = ("".join(groups))[1:] # check decoded row logg.debug("line = %s", line.replace(igs, "{tab}").replace("\n", "{br}")) if not line.rstrip() or (tab and not line.startswith(tab)): if headers: if not title: title = "-%s" % (len(tabs) + 1) tabs.append(TabSheet(data, headers, title)) title = "" headers = [] data = [] lookingfor = "headers" if line.startswith("## "): title = line[3:].strip().replace(igs, tab) continue vals = [tad.strip().replace(igs, tab) for tad in line.split(tab)] if tab: del vals[0] if lookingfor == "headers": headers = [header.strip() for header in vals] lookingfor = "divider" continue elif lookingfor == "divider": lookingfor = "data" if re.match(r"^ *:*--*:* *$", vals[0]): continue record: Dict[str, CellValue] = {} for col, val in enumerate(vals): v = val.strip() if col >= len(headers): continue colname = headers[col] if v == none_string: record[colname] = None elif v == false_string: record[colname] = False elif v == true_string: record[colname] = True else: try: record[colname] = int(v) except: try: record[colname] = float(v) except Exception as e: record[colname] = time(v) data.append(record) if headers: if not title: title = "-%s" % (len(tabs) + 1) tabs.append(TabSheet(data, headers, title)) return tabs def print_tablist(output: Union[TextIO, str], tablist: List[TabSheet] = [], selected: List[str] = [], # .. *, tab: Optional[str] = None, padding: Optional[str] = None, minwidth: int = 0, section: str = NIX, page: int = 0, noheaders: bool = False, unique: bool = False, defaultformat: str = "") -> str: def extension(filename: str) -> Optional[str]: _, ext = fs.splitext(filename.lower()) if ext: return ext[1:] return None if page: if page > len(tablist): logg.error("selected -%i page, but input has only %s pages", page, len(tablist)) tabsheets = [] else: tabsheets = [tablist[page - 1]] elif section: tabsheets = [] tabsheetnames = [] for tabsheet in tablist: tabsheetnames += [tabsheet.title] if tabsheet.title == section: tabsheets += [tabsheet] if not tabsheets: logg.error("selected '-: %s' page, but input has only -: %s", section, " ".join(tabsheetnames)) else: tabsheets = tablist if len(tabsheets) == 1: if tabsheets[0].title: logg.info(" ## %s", tabsheets[0].title) title = section if isinstance(section, str) else NIX return print_tabtotext(output, tabsheets[0].data, tabsheets[0].headers, selected, tab=tab, padding=padding, minwidth=minwidth, section=title, noheaders=noheaders, unique=unique, defaultformat=defaultformat) selected_fmt = fmt_selected(selected) if isinstance(output, TextIO) or isinstance(output, StringIO): out = output fmt = defaultformat or selected_fmt done = "stream" elif "." in output: fmt = extension(output) or defaultformat or selected_fmt if fmt in ["xls", "xlsx", "XLS", "XLSX"]: wb1 = tablistmake_workbook(tabsheets, selected) # type: ignore[arg-type] if wb1: wb1.save(output) return "tabxlsx (%s tables)" % len(wb1.worksheets) return "tabxlsx" out = open(output, "wt", encoding="utf-8") done = output else: fmt = output or defaultformat or selected_fmt out = sys.stdout done = output result: List[str] = [] for tabsheet in tabsheets: if tabsheet.title: logg.info(" ## %s", tabsheet.title) text = tabtotext(tabsheet.data, tabsheet.headers, selected, fmt=fmt, tab=tab, padding=padding, minwidth=minwidth, section=tabsheet.title, noheaders=noheaders, unique=unique, defaultformat=defaultformat) result.append(text) if fmt in ["jsn", "json", "JSN", "JSON"]: for part in range(len(result) - 1): if result[part].endswith("]}"): result[part] = result[part][:-1] + "," for part in range(1, len(result)): if result[part].startswith('{"'): result[part] = result[part][1:] for lines in result: for line in lines: out.write(line) if noheaders or "@noheaders" in selected or "@dat" in selected: return "" return ": %s results %s (%s tables)" % (len(result), done, len(tabsheets)) if __name__ == "__main__": from optparse import OptionParser, Option import sys def numbered_option(option: Option, arg: str, value: str, parser: OptionParser) -> None: setattr(parser.values, (option.dest or "numbered"), int(arg[1:])) prog = os.path.basename(__file__) cmdline = OptionParser(prog + " [-options] input(.xlsx|.csv) [:page] [column...] [@list]", epilog=__doc__) cmdline.formatter.max_help_position = 29 cmdline.add_option("--tables", "--sheetnames", "--sectionnames", "--listnames", "--onlypages", dest="onlypages", action="store_true") cmdline.add_option("-:", "--sheet", "--section", "--listname", "--page", metavar="NAME", dest="section") cmdline.add_option("-1", "-2", "-3", "-4", "-5", "-6", dest="page", action="callback", callback=numbered_option, help="numbered page instead of ':name' or '-: name'") cmdline.add_option("-v", "--verbose", action="count", default=0, help="increase logging level") cmdline.add_option("-^", "--quiet", action="count", default=0, help="decrease logging level") cmdline.add_option("-m", "--minwidth", metavar="N", default=0, help="override minwith of cells for format") cmdline.add_option("-p", "--padding", metavar="C", default=None, help="override cell padding for format") cmdline.add_option("-t", "--tabulator", metavar="C", default=None, help="override tabulator for format") cmdline.add_option("-T", "--asciitab", action="store_true", default=False, help="use ascii HT tabulator (csv,md,tab,wide)") cmdline.add_option("-N", "--notab", action="store_true", default=False, help="do not use tabulator (csv,md,tab,wide)") cmdline.add_option("-P", "--nopadding", action="store_true", default=False, help="do not use padding (csv,md,tab,wide)") cmdline.add_option("-D", "--noheaders", action="store_true", default=False, help="do not print headers (csv,md,tab,wide)") cmdline.add_option("-U", "--unique", action="store_true", default=False, help="remove same lines in sorted --labels") cmdline.add_option("-f", "--file", metavar="INPUT", dest="files", action="append", default=[], help="combine tables (instead of first argument)") cmdline.add_option("-i", "--input", metavar="CSV", dest="inputformat", default="", help="fix input format (instead of autodetection)") cmdline.add_option("-o", "--output", metavar="CSV", default="", help="data|text|md|tab|csv or file.csv (see below)") cmdline.add_option("--ifs", action="store_true", help="-o ifs: $IFS-seperated table (with headers)") cmdline.add_option("--dat", action="store_true", help="-o dat: $IFS-seperated table (without headers)") cmdline.add_option("--data", action="store_true", help="-o data: tab-seperated without headers") cmdline.add_option("--text", action="store_true", help="-o text: space-seperated without headers") cmdline.add_option("--list", action="store_true", help="-o text: semicolon-seperated without headers") cmdline.add_option("--wide", action="store_true", help="-o wide: aligned space-separated table") cmdline.add_option("--read", action="store_true", help="-o read: aligned esc-space-separated no-headers") cmdline.add_option("--md", action="store_true", help="-o md: aligned markdown table (with '|' delim)") cmdline.add_option("--markdown", action="store_true", help="-o markdown: markdown with extra '|' at end") cmdline.add_option("--tabs", action="store_true", help="-o tabs: aligned tab-seperated table (not '|')") cmdline.add_option("--tab", action="store_true", help="-o tab: aligned tab-seperated table (like --dat)") cmdline.add_option("--csv", "--scsv", action="store_true", help="-o csv: semicolon-seperated csv table") cmdline.add_option("--xls", "--xlsx", action="store_true", help="-o xls: for filename.xlsx (else comma-csv)") opt, args = cmdline.parse_args() basicConfig(level=max(0, ERROR - 10 * opt.verbose + 10 * opt.quiet)) filenames: List[str] = opt.files if not filenames and args: filenames = [args[0]] args = args[1:] page: int = int(opt.page or 0) section: str = opt.section or "" if not section and args and args[0].startswith(":"): section = args[0][1:].strip() args = args[1:] selected = args minwidth = int(opt.minwidth) padding = opt.padding if not opt.nopadding else "" tab = "\t" if opt.asciitab else opt.tabulator if not opt.notab else "" if "." in opt.output: output = opt.output defaultformat = "" else: output = "" defaultformat = opt.output if not defaultformat: if opt.ifs: defaultformat = "ifs" if opt.dat: defaultformat = "dat" if opt.data: defaultformat = "data" if opt.text: defaultformat = "text" if opt.list: defaultformat = "list" if opt.wide: defaultformat = "wide" if opt.md: defaultformat = "md" if opt.markdown: defaultformat = "markdown" if opt.tabs: defaultformat = "tabs" if opt.tab: defaultformat = "tab" if opt.csv: defaultformat = "csv" if opt.xls: defaultformat = "xls" inputformat = opt.inputformat or "xslx" tablist: List[TabSheet] = [] for filename in filenames: tablist += tablistfile(filename, defaultformat=inputformat) if opt.onlypages: for tabsheet0 in tablist: print(tabsheet0.title) else: print_tablist(output, tablist, selected, padding=padding, tab=tab, noheaders=opt.noheaders, unique=opt.unique, minwidth=minwidth, section=section, page=page, defaultformat=defaultformat)