1857 lines
59 KiB
Python
1857 lines
59 KiB
Python
###############################################################################
|
|
#
|
|
# Workbook - A class for writing the Excel XLSX Workbook file.
|
|
#
|
|
# SPDX-License-Identifier: BSD-2-Clause
|
|
#
|
|
# Copyright (c) 2013-2025, John McNamara, jmcnamara@cpan.org
|
|
#
|
|
|
|
# Standard packages.
|
|
import operator
|
|
import os
|
|
import re
|
|
import time
|
|
from datetime import datetime, timezone
|
|
from decimal import Decimal
|
|
from fractions import Fraction
|
|
from warnings import warn
|
|
from zipfile import ZIP_DEFLATED, LargeZipFile, ZipFile, ZipInfo
|
|
|
|
# Package imports.
|
|
from . import xmlwriter
|
|
from .chart_area import ChartArea
|
|
from .chart_bar import ChartBar
|
|
from .chart_column import ChartColumn
|
|
from .chart_doughnut import ChartDoughnut
|
|
from .chart_line import ChartLine
|
|
from .chart_pie import ChartPie
|
|
from .chart_radar import ChartRadar
|
|
from .chart_scatter import ChartScatter
|
|
from .chart_stock import ChartStock
|
|
from .chartsheet import Chartsheet
|
|
from .exceptions import (
|
|
DuplicateWorksheetName,
|
|
FileCreateError,
|
|
FileSizeError,
|
|
InvalidWorksheetName,
|
|
)
|
|
from .format import Format
|
|
from .packager import Packager
|
|
from .sharedstrings import SharedStringTable
|
|
from .utility import _get_image_properties, xl_cell_to_rowcol
|
|
from .worksheet import Worksheet
|
|
|
|
|
|
class Workbook(xmlwriter.XMLwriter):
|
|
"""
|
|
A class for writing the Excel XLSX Workbook file.
|
|
|
|
|
|
"""
|
|
|
|
###########################################################################
|
|
#
|
|
# Public API.
|
|
#
|
|
###########################################################################
|
|
chartsheet_class = Chartsheet
|
|
worksheet_class = Worksheet
|
|
|
|
def __init__(self, filename=None, options=None):
|
|
"""
|
|
Constructor.
|
|
|
|
"""
|
|
if options is None:
|
|
options = {}
|
|
|
|
super().__init__()
|
|
|
|
self.filename = filename
|
|
|
|
self.tmpdir = options.get("tmpdir", None)
|
|
self.date_1904 = options.get("date_1904", False)
|
|
self.strings_to_numbers = options.get("strings_to_numbers", False)
|
|
self.strings_to_formulas = options.get("strings_to_formulas", True)
|
|
self.strings_to_urls = options.get("strings_to_urls", True)
|
|
self.nan_inf_to_errors = options.get("nan_inf_to_errors", False)
|
|
self.default_date_format = options.get("default_date_format", None)
|
|
self.constant_memory = options.get("constant_memory", False)
|
|
self.in_memory = options.get("in_memory", False)
|
|
self.excel2003_style = options.get("excel2003_style", False)
|
|
self.remove_timezone = options.get("remove_timezone", False)
|
|
self.use_future_functions = options.get("use_future_functions", False)
|
|
self.default_format_properties = options.get("default_format_properties", {})
|
|
|
|
self.max_url_length = options.get("max_url_length", 2079)
|
|
if self.max_url_length < 255:
|
|
self.max_url_length = 2079
|
|
|
|
if options.get("use_zip64"):
|
|
self.allow_zip64 = True
|
|
else:
|
|
self.allow_zip64 = False
|
|
|
|
self.worksheet_meta = WorksheetMeta()
|
|
self.selected = 0
|
|
self.fileclosed = 0
|
|
self.filehandle = None
|
|
self.internal_fh = 0
|
|
self.sheet_name = "Sheet"
|
|
self.chart_name = "Chart"
|
|
self.sheetname_count = 0
|
|
self.chartname_count = 0
|
|
self.worksheets_objs = []
|
|
self.charts = []
|
|
self.drawings = []
|
|
self.sheetnames = {}
|
|
self.formats = []
|
|
self.xf_formats = []
|
|
self.xf_format_indices = {}
|
|
self.dxf_formats = []
|
|
self.dxf_format_indices = {}
|
|
self.palette = []
|
|
self.font_count = 0
|
|
self.num_formats = []
|
|
self.defined_names = []
|
|
self.named_ranges = []
|
|
self.custom_colors = []
|
|
self.doc_properties = {}
|
|
self.custom_properties = []
|
|
self.createtime = datetime.now(timezone.utc)
|
|
self.num_vml_files = 0
|
|
self.num_comment_files = 0
|
|
self.x_window = 240
|
|
self.y_window = 15
|
|
self.window_width = 16095
|
|
self.window_height = 9660
|
|
self.tab_ratio = 600
|
|
self.str_table = SharedStringTable()
|
|
self.vba_project = None
|
|
self.vba_project_is_stream = False
|
|
self.vba_project_signature = None
|
|
self.vba_project_signature_is_stream = False
|
|
self.vba_codename = None
|
|
self.image_types = {}
|
|
self.images = []
|
|
self.border_count = 0
|
|
self.fill_count = 0
|
|
self.drawing_count = 0
|
|
self.calc_mode = "auto"
|
|
self.calc_on_load = True
|
|
self.calc_id = 124519
|
|
self.has_comments = False
|
|
self.read_only = 0
|
|
self.has_metadata = False
|
|
self.has_embedded_images = False
|
|
self.has_dynamic_functions = False
|
|
self.has_embedded_descriptions = False
|
|
self.embedded_images = EmbeddedImages()
|
|
self.feature_property_bags = set()
|
|
|
|
# We can't do 'constant_memory' mode while doing 'in_memory' mode.
|
|
if self.in_memory:
|
|
self.constant_memory = False
|
|
|
|
# Add the default cell format.
|
|
if self.excel2003_style:
|
|
self.add_format({"xf_index": 0, "font_family": 0})
|
|
else:
|
|
self.add_format({"xf_index": 0})
|
|
|
|
# Add a default URL format.
|
|
self.default_url_format = self.add_format({"hyperlink": True})
|
|
|
|
# Add the default date format.
|
|
if self.default_date_format is not None:
|
|
self.default_date_format = self.add_format(
|
|
{"num_format": self.default_date_format}
|
|
)
|
|
|
|
def __enter__(self):
|
|
"""Return self object to use with "with" statement."""
|
|
return self
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
# pylint: disable=redefined-builtin
|
|
"""Close workbook when exiting "with" statement."""
|
|
self.close()
|
|
|
|
def add_worksheet(self, name=None, worksheet_class=None):
|
|
"""
|
|
Add a new worksheet to the Excel workbook.
|
|
|
|
Args:
|
|
name: The worksheet name. Defaults to 'Sheet1', etc.
|
|
|
|
Returns:
|
|
Reference to a worksheet object.
|
|
|
|
"""
|
|
if worksheet_class is None:
|
|
worksheet_class = self.worksheet_class
|
|
|
|
return self._add_sheet(name, worksheet_class=worksheet_class)
|
|
|
|
def add_chartsheet(self, name=None, chartsheet_class=None):
|
|
"""
|
|
Add a new chartsheet to the Excel workbook.
|
|
|
|
Args:
|
|
name: The chartsheet name. Defaults to 'Sheet1', etc.
|
|
|
|
Returns:
|
|
Reference to a chartsheet object.
|
|
|
|
"""
|
|
if chartsheet_class is None:
|
|
chartsheet_class = self.chartsheet_class
|
|
|
|
return self._add_sheet(name, worksheet_class=chartsheet_class)
|
|
|
|
def add_format(self, properties=None):
|
|
"""
|
|
Add a new Format to the Excel Workbook.
|
|
|
|
Args:
|
|
properties: The format properties.
|
|
|
|
Returns:
|
|
Reference to a Format object.
|
|
|
|
"""
|
|
format_properties = self.default_format_properties.copy()
|
|
|
|
if self.excel2003_style:
|
|
format_properties = {"font_name": "Arial", "font_size": 10, "theme": 1 * -1}
|
|
|
|
if properties:
|
|
format_properties.update(properties)
|
|
|
|
xf_format = Format(
|
|
format_properties, self.xf_format_indices, self.dxf_format_indices
|
|
)
|
|
|
|
# Store the format reference.
|
|
self.formats.append(xf_format)
|
|
|
|
return xf_format
|
|
|
|
def add_chart(self, options):
|
|
"""
|
|
Create a chart object.
|
|
|
|
Args:
|
|
options: The chart type and subtype options.
|
|
|
|
Returns:
|
|
Reference to a Chart object.
|
|
|
|
"""
|
|
|
|
# Type must be specified so we can create the required chart instance.
|
|
chart_type = options.get("type")
|
|
if chart_type is None:
|
|
warn("Chart type must be defined in add_chart()")
|
|
return None
|
|
|
|
if chart_type == "area":
|
|
chart = ChartArea(options)
|
|
elif chart_type == "bar":
|
|
chart = ChartBar(options)
|
|
elif chart_type == "column":
|
|
chart = ChartColumn(options)
|
|
elif chart_type == "doughnut":
|
|
chart = ChartDoughnut()
|
|
elif chart_type == "line":
|
|
chart = ChartLine(options)
|
|
elif chart_type == "pie":
|
|
chart = ChartPie()
|
|
elif chart_type == "radar":
|
|
chart = ChartRadar(options)
|
|
elif chart_type == "scatter":
|
|
chart = ChartScatter(options)
|
|
elif chart_type == "stock":
|
|
chart = ChartStock()
|
|
else:
|
|
warn(f"Unknown chart type '{chart_type}' in add_chart()")
|
|
return None
|
|
|
|
# Set the embedded chart name if present.
|
|
if "name" in options:
|
|
chart.chart_name = options["name"]
|
|
|
|
chart.embedded = True
|
|
chart.date_1904 = self.date_1904
|
|
chart.remove_timezone = self.remove_timezone
|
|
|
|
self.charts.append(chart)
|
|
|
|
return chart
|
|
|
|
def add_vba_project(self, vba_project, is_stream=False):
|
|
"""
|
|
Add a vbaProject binary to the Excel workbook.
|
|
|
|
Args:
|
|
vba_project: The vbaProject binary file name.
|
|
is_stream: vba_project is an in memory byte stream.
|
|
|
|
Returns:
|
|
0 on success.
|
|
|
|
"""
|
|
if not is_stream and not os.path.exists(vba_project):
|
|
warn(f"VBA project binary file '{vba_project}' not found.")
|
|
return -1
|
|
|
|
if self.vba_codename is None:
|
|
self.vba_codename = "ThisWorkbook"
|
|
|
|
self.vba_project = vba_project
|
|
self.vba_project_is_stream = is_stream
|
|
|
|
return 0
|
|
|
|
def add_signed_vba_project(
|
|
self, vba_project, signature, project_is_stream=False, signature_is_stream=False
|
|
):
|
|
"""
|
|
Add a vbaProject binary and a vbaProjectSignature binary to the
|
|
Excel workbook.
|
|
|
|
Args:
|
|
vba_project: The vbaProject binary file name.
|
|
signature: The vbaProjectSignature binary file name.
|
|
project_is_stream: vba_project is an in memory byte stream.
|
|
signature_is_stream: signature is an in memory byte stream.
|
|
|
|
Returns:
|
|
0 on success.
|
|
|
|
"""
|
|
if self.add_vba_project(vba_project, project_is_stream) == -1:
|
|
return -1
|
|
|
|
if not signature_is_stream and not os.path.exists(signature):
|
|
warn(f"VBA project signature binary file '{signature}' not found.")
|
|
return -1
|
|
|
|
self.vba_project_signature = signature
|
|
self.vba_project_signature_is_stream = signature_is_stream
|
|
|
|
return 0
|
|
|
|
def close(self):
|
|
"""
|
|
Call finalization code and close file.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
# pylint: disable=raise-missing-from
|
|
if not self.fileclosed:
|
|
try:
|
|
self._store_workbook()
|
|
except IOError as e:
|
|
raise FileCreateError(e)
|
|
except LargeZipFile:
|
|
raise FileSizeError(
|
|
"Filesize would require ZIP64 extensions. "
|
|
"Use workbook.use_zip64()."
|
|
)
|
|
|
|
self.fileclosed = True
|
|
|
|
# Ensure all constant_memory temp files are closed.
|
|
if self.constant_memory:
|
|
for worksheet in self.worksheets():
|
|
worksheet._opt_close()
|
|
|
|
else:
|
|
warn("Calling close() on already closed file.")
|
|
|
|
def set_size(self, width, height):
|
|
"""
|
|
Set the size of a workbook window.
|
|
|
|
Args:
|
|
width: Width of the window in pixels.
|
|
height: Height of the window in pixels.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
# Convert the width/height to twips at 96 dpi.
|
|
if width:
|
|
self.window_width = int(width * 1440 / 96)
|
|
else:
|
|
self.window_width = 16095
|
|
|
|
if height:
|
|
self.window_height = int(height * 1440 / 96)
|
|
else:
|
|
self.window_height = 9660
|
|
|
|
def set_tab_ratio(self, tab_ratio=None):
|
|
"""
|
|
Set the ratio between worksheet tabs and the horizontal slider.
|
|
|
|
Args:
|
|
tab_ratio: The tab ratio, 0 <= tab_ratio <= 100
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
if tab_ratio is None:
|
|
return
|
|
|
|
if tab_ratio < 0 or tab_ratio > 100:
|
|
warn(f"Tab ratio '{tab_ratio}' outside: 0 <= tab_ratio <= 100")
|
|
else:
|
|
self.tab_ratio = int(tab_ratio * 10)
|
|
|
|
def set_properties(self, properties):
|
|
"""
|
|
Set the document properties such as Title, Author etc.
|
|
|
|
Args:
|
|
properties: Dictionary of document properties.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
self.doc_properties = properties
|
|
|
|
def set_custom_property(self, name, value, property_type=None):
|
|
"""
|
|
Set a custom document property.
|
|
|
|
Args:
|
|
name: The name of the custom property.
|
|
value: The value of the custom property.
|
|
property_type: The type of the custom property. Optional.
|
|
|
|
Returns:
|
|
0 on success.
|
|
|
|
"""
|
|
if name is None or value is None:
|
|
warn(
|
|
"The name and value parameters must be non-None in "
|
|
"set_custom_property()"
|
|
)
|
|
return -1
|
|
|
|
if property_type is None:
|
|
# Determine the property type from the Python type.
|
|
if isinstance(value, bool):
|
|
property_type = "bool"
|
|
elif isinstance(value, datetime):
|
|
property_type = "date"
|
|
elif isinstance(value, int):
|
|
property_type = "number_int"
|
|
elif isinstance(value, (float, int, Decimal, Fraction)):
|
|
property_type = "number"
|
|
else:
|
|
property_type = "text"
|
|
|
|
if property_type == "date":
|
|
value = value.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
|
|
if property_type == "text" and len(value) > 255:
|
|
warn(
|
|
f"Length of 'value' parameter exceeds Excel's limit of 255 "
|
|
f"characters in set_custom_property(): '{value}'"
|
|
)
|
|
|
|
if len(name) > 255:
|
|
warn(
|
|
f"Length of 'name' parameter exceeds Excel's limit of 255 "
|
|
f"characters in set_custom_property(): '{name}'"
|
|
)
|
|
|
|
self.custom_properties.append((name, value, property_type))
|
|
|
|
return 0
|
|
|
|
def set_calc_mode(self, mode, calc_id=None):
|
|
"""
|
|
Set the Excel calculation mode for the workbook.
|
|
|
|
Args:
|
|
mode: String containing one of:
|
|
* manual
|
|
* auto_except_tables
|
|
* auto
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
self.calc_mode = mode
|
|
|
|
if mode == "manual":
|
|
self.calc_on_load = False
|
|
elif mode == "auto_except_tables":
|
|
self.calc_mode = "autoNoTable"
|
|
|
|
# Leave undocumented for now. Rarely required.
|
|
if calc_id:
|
|
self.calc_id = calc_id
|
|
|
|
def define_name(self, name, formula):
|
|
# Create a defined name in Excel. We handle global/workbook level
|
|
# names and local/worksheet names.
|
|
"""
|
|
Create a defined name in the workbook.
|
|
|
|
Args:
|
|
name: The defined name.
|
|
formula: The cell or range that the defined name refers to.
|
|
|
|
Returns:
|
|
0 on success.
|
|
|
|
"""
|
|
sheet_index = None
|
|
sheetname = ""
|
|
|
|
# Remove the = sign from the formula if it exists.
|
|
if formula.startswith("="):
|
|
formula = formula.lstrip("=")
|
|
|
|
# Local defined names are formatted like "Sheet1!name".
|
|
sheet_parts = re.compile(r"^([^!]+)!([^!]+)$")
|
|
match = sheet_parts.match(name)
|
|
|
|
if match:
|
|
sheetname = match.group(1)
|
|
name = match.group(2)
|
|
sheet_index = self._get_sheet_index(sheetname)
|
|
|
|
# Warn if the sheet index wasn't found.
|
|
if sheet_index is None:
|
|
warn(f"Unknown sheet name '{sheetname}' in defined_name()")
|
|
return -1
|
|
else:
|
|
# Use -1 to indicate global names.
|
|
sheet_index = -1
|
|
|
|
# Warn if the defined name contains invalid chars as defined by Excel.
|
|
if not re.match(r"^[\w\\][\w\\.]*$", name, re.UNICODE) or re.match(
|
|
r"^\d", name
|
|
):
|
|
warn(f"Invalid Excel characters in defined_name(): '{name}'")
|
|
return -1
|
|
|
|
# Warn if the defined name looks like a cell name.
|
|
if re.match(r"^[a-zA-Z][a-zA-Z]?[a-dA-D]?\d+$", name):
|
|
warn(f"Name looks like a cell name in defined_name(): '{name}'")
|
|
return -1
|
|
|
|
# Warn if the name looks like a R1C1 cell reference.
|
|
if re.match(r"^[rcRC]$", name) or re.match(r"^[rcRC]\d+[rcRC]\d+$", name):
|
|
warn(f"Invalid name '{name}' like a RC cell ref in defined_name()")
|
|
return -1
|
|
|
|
self.defined_names.append([name, sheet_index, formula, False])
|
|
|
|
return 0
|
|
|
|
def worksheets(self):
|
|
"""
|
|
Return a list of the worksheet objects in the workbook.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
A list of worksheet objects.
|
|
|
|
"""
|
|
return self.worksheets_objs
|
|
|
|
def get_worksheet_by_name(self, name):
|
|
"""
|
|
Return a worksheet object in the workbook using the sheetname.
|
|
|
|
Args:
|
|
name: The name of the worksheet.
|
|
|
|
Returns:
|
|
A worksheet object or None.
|
|
|
|
"""
|
|
return self.sheetnames.get(name)
|
|
|
|
def get_default_url_format(self):
|
|
"""
|
|
Get the default url format used when a user defined format isn't
|
|
specified with write_url(). The format is the hyperlink style defined
|
|
by Excel for the default theme.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
A format object.
|
|
|
|
"""
|
|
return self.default_url_format
|
|
|
|
def use_zip64(self):
|
|
"""
|
|
Allow ZIP64 extensions when writing xlsx file zip container.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
self.allow_zip64 = True
|
|
|
|
def set_vba_name(self, name=None):
|
|
"""
|
|
Set the VBA name for the workbook. By default the workbook is referred
|
|
to as ThisWorkbook in VBA.
|
|
|
|
Args:
|
|
name: The VBA name for the workbook.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
if name is not None:
|
|
self.vba_codename = name
|
|
else:
|
|
self.vba_codename = "ThisWorkbook"
|
|
|
|
def read_only_recommended(self):
|
|
"""
|
|
Set the Excel "Read-only recommended" option when saving a file.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
Nothing.
|
|
|
|
"""
|
|
self.read_only = 2
|
|
|
|
###########################################################################
|
|
#
|
|
# Private API.
|
|
#
|
|
###########################################################################
|
|
|
|
def _assemble_xml_file(self):
|
|
# Assemble and write the XML file.
|
|
|
|
# Prepare format object for passing to Style.pm.
|
|
self._prepare_format_properties()
|
|
|
|
# Write the XML declaration.
|
|
self._xml_declaration()
|
|
|
|
# Write the workbook element.
|
|
self._write_workbook()
|
|
|
|
# Write the fileVersion element.
|
|
self._write_file_version()
|
|
|
|
# Write the fileSharing element.
|
|
self._write_file_sharing()
|
|
|
|
# Write the workbookPr element.
|
|
self._write_workbook_pr()
|
|
|
|
# Write the bookViews element.
|
|
self._write_book_views()
|
|
|
|
# Write the sheets element.
|
|
self._write_sheets()
|
|
|
|
# Write the workbook defined names.
|
|
self._write_defined_names()
|
|
|
|
# Write the calcPr element.
|
|
self._write_calc_pr()
|
|
|
|
# Close the workbook tag.
|
|
self._xml_end_tag("workbook")
|
|
|
|
# Close the file.
|
|
self._xml_close()
|
|
|
|
def _store_workbook(self):
|
|
# pylint: disable=consider-using-with
|
|
# Create the xlsx/zip file.
|
|
try:
|
|
xlsx_file = ZipFile(
|
|
self.filename,
|
|
"w",
|
|
compression=ZIP_DEFLATED,
|
|
allowZip64=self.allow_zip64,
|
|
)
|
|
except IOError as e:
|
|
raise e
|
|
|
|
# Assemble worksheets into a workbook.
|
|
packager = self._get_packager()
|
|
|
|
# Add a default worksheet if non have been added.
|
|
if not self.worksheets():
|
|
self.add_worksheet()
|
|
|
|
# Ensure that at least one worksheet has been selected.
|
|
if self.worksheet_meta.activesheet == 0:
|
|
self.worksheets_objs[0].selected = 1
|
|
self.worksheets_objs[0].hidden = 0
|
|
|
|
# Set the active sheet.
|
|
for sheet in self.worksheets():
|
|
if sheet.index == self.worksheet_meta.activesheet:
|
|
sheet.active = 1
|
|
|
|
# Set the sheet vba_codename the workbook has a vbaProject binary.
|
|
if self.vba_project:
|
|
for sheet in self.worksheets():
|
|
if sheet.vba_codename is None:
|
|
sheet.set_vba_name()
|
|
|
|
# Convert the SST strings data structure.
|
|
self._prepare_sst_string_data()
|
|
|
|
# Prepare the worksheet VML elements such as comments and buttons.
|
|
self._prepare_vml()
|
|
|
|
# Set the defined names for the worksheets such as Print Titles.
|
|
self._prepare_defined_names()
|
|
|
|
# Prepare the drawings, charts and images.
|
|
self._prepare_drawings()
|
|
|
|
# Add cached data to charts.
|
|
self._add_chart_data()
|
|
|
|
# Prepare the worksheet tables.
|
|
self._prepare_tables()
|
|
|
|
# Prepare the metadata file links.
|
|
self._prepare_metadata()
|
|
|
|
# Package the workbook.
|
|
packager._add_workbook(self)
|
|
packager._set_tmpdir(self.tmpdir)
|
|
packager._set_in_memory(self.in_memory)
|
|
xml_files = packager._create_package()
|
|
|
|
# Free up the Packager object.
|
|
packager = None
|
|
|
|
# Add XML sub-files to the Zip file with their Excel filename.
|
|
for file_id, file_data in enumerate(xml_files):
|
|
os_filename, xml_filename, is_binary = file_data
|
|
|
|
if self.in_memory:
|
|
# Set sub-file timestamp to Excel's timestamp of 1/1/1980.
|
|
zipinfo = ZipInfo(xml_filename, (1980, 1, 1, 0, 0, 0))
|
|
|
|
# Copy compression type from parent ZipFile.
|
|
zipinfo.compress_type = xlsx_file.compression
|
|
|
|
if is_binary:
|
|
xlsx_file.writestr(zipinfo, os_filename.getvalue())
|
|
else:
|
|
xlsx_file.writestr(zipinfo, os_filename.getvalue().encode("utf-8"))
|
|
else:
|
|
# The sub-files are tempfiles on disk, i.e, not in memory.
|
|
|
|
# Set sub-file timestamp to 31/1/1980 due to portability
|
|
# issues setting it to Excel's timestamp of 1/1/1980.
|
|
timestamp = time.mktime((1980, 1, 31, 0, 0, 0, 0, 0, -1))
|
|
os.utime(os_filename, (timestamp, timestamp))
|
|
|
|
try:
|
|
xlsx_file.write(os_filename, xml_filename)
|
|
os.remove(os_filename)
|
|
except LargeZipFile as e:
|
|
# Close open temp files on zipfile.LargeZipFile exception.
|
|
for i in range(file_id, len(xml_files) - 1):
|
|
os.remove(xml_files[i][0])
|
|
raise e
|
|
|
|
xlsx_file.close()
|
|
|
|
def _add_sheet(self, name, worksheet_class=None):
|
|
# Utility for shared code in add_worksheet() and add_chartsheet().
|
|
|
|
if worksheet_class:
|
|
worksheet = worksheet_class()
|
|
else:
|
|
worksheet = self.worksheet_class()
|
|
|
|
sheet_index = len(self.worksheets_objs)
|
|
name = self._check_sheetname(name, isinstance(worksheet, Chartsheet))
|
|
|
|
# Initialization data to pass to the worksheet.
|
|
init_data = {
|
|
"name": name,
|
|
"index": sheet_index,
|
|
"str_table": self.str_table,
|
|
"worksheet_meta": self.worksheet_meta,
|
|
"constant_memory": self.constant_memory,
|
|
"tmpdir": self.tmpdir,
|
|
"date_1904": self.date_1904,
|
|
"strings_to_numbers": self.strings_to_numbers,
|
|
"strings_to_formulas": self.strings_to_formulas,
|
|
"strings_to_urls": self.strings_to_urls,
|
|
"nan_inf_to_errors": self.nan_inf_to_errors,
|
|
"default_date_format": self.default_date_format,
|
|
"default_url_format": self.default_url_format,
|
|
"workbook_add_format": self.add_format,
|
|
"excel2003_style": self.excel2003_style,
|
|
"remove_timezone": self.remove_timezone,
|
|
"max_url_length": self.max_url_length,
|
|
"use_future_functions": self.use_future_functions,
|
|
"embedded_images": self.embedded_images,
|
|
}
|
|
|
|
worksheet._initialize(init_data)
|
|
|
|
self.worksheets_objs.append(worksheet)
|
|
self.sheetnames[name] = worksheet
|
|
|
|
return worksheet
|
|
|
|
def _check_sheetname(self, sheetname, is_chartsheet=False):
|
|
# Check for valid worksheet names. We check the length, if it contains
|
|
# any invalid chars and if the sheetname is unique in the workbook.
|
|
invalid_char = re.compile(r"[\[\]:*?/\\]")
|
|
|
|
# Increment the Sheet/Chart number used for default sheet names below.
|
|
if is_chartsheet:
|
|
self.chartname_count += 1
|
|
else:
|
|
self.sheetname_count += 1
|
|
|
|
# Supply default Sheet/Chart sheetname if none has been defined.
|
|
if sheetname is None or sheetname == "":
|
|
if is_chartsheet:
|
|
sheetname = self.chart_name + str(self.chartname_count)
|
|
else:
|
|
sheetname = self.sheet_name + str(self.sheetname_count)
|
|
|
|
# Check that sheet sheetname is <= 31. Excel limit.
|
|
if len(sheetname) > 31:
|
|
raise InvalidWorksheetName(
|
|
f"Excel worksheet name '{sheetname}' must be <= 31 chars."
|
|
)
|
|
|
|
# Check that sheetname doesn't contain any invalid characters.
|
|
if invalid_char.search(sheetname):
|
|
raise InvalidWorksheetName(
|
|
f"Invalid Excel character '[]:*?/\\' in sheetname '{sheetname}'."
|
|
)
|
|
|
|
# Check that sheetname doesn't start or end with an apostrophe.
|
|
if sheetname.startswith("'") or sheetname.endswith("'"):
|
|
raise InvalidWorksheetName(
|
|
f'Sheet name cannot start or end with an apostrophe "{sheetname}".'
|
|
)
|
|
|
|
# Check that the worksheet name doesn't already exist since this is a
|
|
# fatal Excel error. The check must be case insensitive like Excel.
|
|
for worksheet in self.worksheets():
|
|
if sheetname.lower() == worksheet.name.lower():
|
|
raise DuplicateWorksheetName(
|
|
f"Sheetname '{sheetname}', with case ignored, is already in use."
|
|
)
|
|
|
|
return sheetname
|
|
|
|
def _prepare_format_properties(self):
|
|
# Prepare all Format properties prior to passing them to styles.py.
|
|
|
|
# Separate format objects into XF and DXF formats.
|
|
self._prepare_formats()
|
|
|
|
# Set the font index for the format objects.
|
|
self._prepare_fonts()
|
|
|
|
# Set the number format index for the format objects.
|
|
self._prepare_num_formats()
|
|
|
|
# Set the border index for the format objects.
|
|
self._prepare_borders()
|
|
|
|
# Set the fill index for the format objects.
|
|
self._prepare_fills()
|
|
|
|
def _prepare_formats(self):
|
|
# Iterate through the XF Format objects and separate them into
|
|
# XF and DXF formats. The XF and DF formats then need to be sorted
|
|
# back into index order rather than creation order.
|
|
xf_formats = []
|
|
dxf_formats = []
|
|
|
|
# Sort into XF and DXF formats.
|
|
for xf_format in self.formats:
|
|
if xf_format.xf_index is not None:
|
|
xf_formats.append(xf_format)
|
|
|
|
if xf_format.dxf_index is not None:
|
|
dxf_formats.append(xf_format)
|
|
|
|
# Pre-extend the format lists.
|
|
self.xf_formats = [None] * len(xf_formats)
|
|
self.dxf_formats = [None] * len(dxf_formats)
|
|
|
|
# Rearrange formats into index order.
|
|
for xf_format in xf_formats:
|
|
index = xf_format.xf_index
|
|
self.xf_formats[index] = xf_format
|
|
|
|
for dxf_format in dxf_formats:
|
|
index = dxf_format.dxf_index
|
|
self.dxf_formats[index] = dxf_format
|
|
|
|
def _set_default_xf_indices(self):
|
|
# Set the default index for each format. Only used for testing.
|
|
|
|
formats = list(self.formats)
|
|
|
|
# Delete the default url format.
|
|
del formats[1]
|
|
|
|
# Skip the default date format if set.
|
|
if self.default_date_format is not None:
|
|
del formats[1]
|
|
|
|
# Set the remaining formats.
|
|
for xf_format in formats:
|
|
xf_format._get_xf_index()
|
|
|
|
def _prepare_fonts(self):
|
|
# Iterate through the XF Format objects and give them an index to
|
|
# non-default font elements.
|
|
fonts = {}
|
|
index = 0
|
|
|
|
for xf_format in self.xf_formats:
|
|
key = xf_format._get_font_key()
|
|
if key in fonts:
|
|
# Font has already been used.
|
|
xf_format.font_index = fonts[key]
|
|
xf_format.has_font = 0
|
|
else:
|
|
# This is a new font.
|
|
fonts[key] = index
|
|
xf_format.font_index = index
|
|
xf_format.has_font = 1
|
|
index += 1
|
|
|
|
self.font_count = index
|
|
|
|
# For DXF formats we only need to check if the properties have changed.
|
|
for xf_format in self.dxf_formats:
|
|
# The only font properties that can change for a DXF format are:
|
|
# color, bold, italic, underline and strikethrough.
|
|
if (
|
|
xf_format.font_color
|
|
or xf_format.bold
|
|
or xf_format.italic
|
|
or xf_format.underline
|
|
or xf_format.font_strikeout
|
|
):
|
|
xf_format.has_dxf_font = 1
|
|
|
|
def _prepare_num_formats(self):
|
|
# User defined records in Excel start from index 0xA4.
|
|
unique_num_formats = {}
|
|
num_formats = []
|
|
index = 164
|
|
|
|
for xf_format in self.xf_formats + self.dxf_formats:
|
|
num_format = xf_format.num_format
|
|
|
|
# Check if num_format is an index to a built-in number format.
|
|
if not isinstance(num_format, str):
|
|
num_format = int(num_format)
|
|
|
|
# Number format '0' is indexed as 1 in Excel.
|
|
if num_format == 0:
|
|
num_format = 1
|
|
|
|
xf_format.num_format_index = num_format
|
|
continue
|
|
|
|
if num_format == "0":
|
|
# Number format '0' is indexed as 1 in Excel.
|
|
xf_format.num_format_index = 1
|
|
continue
|
|
|
|
if num_format == "General":
|
|
# The 'General' format has an number format index of 0.
|
|
xf_format.num_format_index = 0
|
|
continue
|
|
|
|
if num_format in unique_num_formats:
|
|
# Number xf_format has already been used.
|
|
xf_format.num_format_index = unique_num_formats[num_format]
|
|
else:
|
|
# Add a new number xf_format.
|
|
unique_num_formats[num_format] = index
|
|
xf_format.num_format_index = index
|
|
index += 1
|
|
|
|
# Only increase font count for XF formats (not DXF formats).
|
|
if xf_format.xf_index:
|
|
num_formats.append(num_format)
|
|
|
|
self.num_formats = num_formats
|
|
|
|
def _prepare_borders(self):
|
|
# Iterate through the XF Format objects and give them an index to
|
|
# non-default border elements.
|
|
borders = {}
|
|
index = 0
|
|
|
|
for xf_format in self.xf_formats:
|
|
key = xf_format._get_border_key()
|
|
|
|
if key in borders:
|
|
# Border has already been used.
|
|
xf_format.border_index = borders[key]
|
|
xf_format.has_border = 0
|
|
else:
|
|
# This is a new border.
|
|
borders[key] = index
|
|
xf_format.border_index = index
|
|
xf_format.has_border = 1
|
|
index += 1
|
|
|
|
self.border_count = index
|
|
|
|
# For DXF formats we only need to check if the properties have changed.
|
|
has_border = re.compile(r"[^0:]")
|
|
|
|
for xf_format in self.dxf_formats:
|
|
key = xf_format._get_border_key()
|
|
|
|
if has_border.search(key):
|
|
xf_format.has_dxf_border = 1
|
|
|
|
def _prepare_fills(self):
|
|
# Iterate through the XF Format objects and give them an index to
|
|
# non-default fill elements.
|
|
# The user defined fill properties start from 2 since there are 2
|
|
# default fills: patternType="none" and patternType="gray125".
|
|
fills = {}
|
|
index = 2 # Start from 2. See above.
|
|
|
|
# Add the default fills.
|
|
fills["0:0:0"] = 0
|
|
fills["17:0:0"] = 1
|
|
|
|
# Store the DXF colors separately since them may be reversed below.
|
|
for xf_format in self.dxf_formats:
|
|
if xf_format.pattern or xf_format.bg_color or xf_format.fg_color:
|
|
xf_format.has_dxf_fill = 1
|
|
xf_format.dxf_bg_color = xf_format.bg_color
|
|
xf_format.dxf_fg_color = xf_format.fg_color
|
|
|
|
for xf_format in self.xf_formats:
|
|
# The following logical statements jointly take care of special
|
|
# cases in relation to cell colors and patterns:
|
|
# 1. For a solid fill (_pattern == 1) Excel reverses the role of
|
|
# foreground and background colors, and
|
|
# 2. If the user specifies a foreground or background color
|
|
# without a pattern they probably wanted a solid fill, so we fill
|
|
# in the defaults.
|
|
if (
|
|
xf_format.pattern == 1
|
|
and xf_format.bg_color != 0
|
|
and xf_format.fg_color != 0
|
|
):
|
|
tmp = xf_format.fg_color
|
|
xf_format.fg_color = xf_format.bg_color
|
|
xf_format.bg_color = tmp
|
|
|
|
if (
|
|
xf_format.pattern <= 1
|
|
and xf_format.bg_color != 0
|
|
and xf_format.fg_color == 0
|
|
):
|
|
xf_format.fg_color = xf_format.bg_color
|
|
xf_format.bg_color = 0
|
|
xf_format.pattern = 1
|
|
|
|
if (
|
|
xf_format.pattern <= 1
|
|
and xf_format.bg_color == 0
|
|
and xf_format.fg_color != 0
|
|
):
|
|
xf_format.pattern = 1
|
|
|
|
key = xf_format._get_fill_key()
|
|
|
|
if key in fills:
|
|
# Fill has already been used.
|
|
xf_format.fill_index = fills[key]
|
|
xf_format.has_fill = 0
|
|
else:
|
|
# This is a new fill.
|
|
fills[key] = index
|
|
xf_format.fill_index = index
|
|
xf_format.has_fill = 1
|
|
index += 1
|
|
|
|
self.fill_count = index
|
|
|
|
def _has_feature_property_bags(self):
|
|
# Check for any format properties that require a feature bag. Currently
|
|
# this only applies to checkboxes.
|
|
if not self.feature_property_bags:
|
|
for xf_format in self.formats:
|
|
if xf_format.checkbox:
|
|
self.feature_property_bags.add("XFComplements")
|
|
|
|
if xf_format.dxf_index is not None and xf_format.checkbox:
|
|
self.feature_property_bags.add("DXFComplements")
|
|
|
|
return self.feature_property_bags
|
|
|
|
def _prepare_defined_names(self):
|
|
# Iterate through the worksheets and store any defined names in
|
|
# addition to any user defined names. Stores the defined names
|
|
# for the Workbook.xml and the named ranges for App.xml.
|
|
defined_names = self.defined_names
|
|
|
|
for sheet in self.worksheets():
|
|
# Check for Print Area settings.
|
|
if sheet.autofilter_area:
|
|
hidden = 1
|
|
sheet_range = sheet.autofilter_area
|
|
# Store the defined names.
|
|
defined_names.append(
|
|
["_xlnm._FilterDatabase", sheet.index, sheet_range, hidden]
|
|
)
|
|
|
|
# Check for Print Area settings.
|
|
if sheet.print_area_range:
|
|
hidden = 0
|
|
sheet_range = sheet.print_area_range
|
|
# Store the defined names.
|
|
defined_names.append(
|
|
["_xlnm.Print_Area", sheet.index, sheet_range, hidden]
|
|
)
|
|
|
|
# Check for repeat rows/cols referred to as Print Titles.
|
|
if sheet.repeat_col_range or sheet.repeat_row_range:
|
|
hidden = 0
|
|
sheet_range = ""
|
|
if sheet.repeat_col_range and sheet.repeat_row_range:
|
|
sheet_range = sheet.repeat_col_range + "," + sheet.repeat_row_range
|
|
else:
|
|
sheet_range = sheet.repeat_col_range + sheet.repeat_row_range
|
|
# Store the defined names.
|
|
defined_names.append(
|
|
["_xlnm.Print_Titles", sheet.index, sheet_range, hidden]
|
|
)
|
|
|
|
defined_names = self._sort_defined_names(defined_names)
|
|
self.defined_names = defined_names
|
|
self.named_ranges = self._extract_named_ranges(defined_names)
|
|
|
|
def _sort_defined_names(self, names):
|
|
# Sort the list of list of internal and user defined names in
|
|
# the same order as used by Excel.
|
|
|
|
# Add a normalize name string to each list for sorting.
|
|
for name_list in names:
|
|
(defined_name, _, sheet_name, _) = name_list
|
|
|
|
# Normalize the defined name by removing any leading '_xmln.'
|
|
# from internal names and lowercasing the string.
|
|
defined_name = defined_name.replace("_xlnm.", "").lower()
|
|
|
|
# Normalize the sheetname by removing the leading quote and
|
|
# lowercasing the string.
|
|
sheet_name = sheet_name.lstrip("'").lower()
|
|
|
|
name_list.append(defined_name + "::" + sheet_name)
|
|
|
|
# Sort based on the normalized key.
|
|
names.sort(key=operator.itemgetter(4))
|
|
|
|
# Remove the extra key used for sorting.
|
|
for name_list in names:
|
|
name_list.pop()
|
|
|
|
return names
|
|
|
|
def _prepare_drawings(self):
|
|
# Iterate through the worksheets and set up chart and image drawings.
|
|
chart_ref_id = 0
|
|
ref_id = 0
|
|
drawing_id = 0
|
|
image_ids = {}
|
|
header_image_ids = {}
|
|
background_ids = {}
|
|
|
|
# Store the image types for any embedded images.
|
|
for image_data in self.embedded_images.images:
|
|
image_type = image_data[1]
|
|
self.image_types[image_type] = True
|
|
if image_data[3]:
|
|
self.has_embedded_descriptions = True
|
|
|
|
image_ref_id = len(self.embedded_images.images)
|
|
|
|
for sheet in self.worksheets():
|
|
chart_count = len(sheet.charts)
|
|
image_count = len(sheet.images)
|
|
shape_count = len(sheet.shapes)
|
|
|
|
header_image_count = len(sheet.header_images)
|
|
footer_image_count = len(sheet.footer_images)
|
|
has_background = sheet.background_image
|
|
has_drawing = False
|
|
|
|
if not (
|
|
chart_count
|
|
or image_count
|
|
or shape_count
|
|
or header_image_count
|
|
or footer_image_count
|
|
or has_background
|
|
):
|
|
continue
|
|
|
|
# Don't increase the drawing_id header/footer images.
|
|
if chart_count or image_count or shape_count:
|
|
drawing_id += 1
|
|
has_drawing = True
|
|
|
|
# Prepare the background images.
|
|
if sheet.background_image:
|
|
if sheet.background_bytes:
|
|
filename = ""
|
|
image_data = sheet.background_image
|
|
else:
|
|
filename = sheet.background_image
|
|
image_data = None
|
|
|
|
(
|
|
image_type,
|
|
_,
|
|
_,
|
|
_,
|
|
_,
|
|
_,
|
|
digest,
|
|
) = _get_image_properties(filename, image_data)
|
|
|
|
self.image_types[image_type] = True
|
|
|
|
if digest in background_ids:
|
|
ref_id = background_ids[digest]
|
|
else:
|
|
image_ref_id += 1
|
|
ref_id = image_ref_id
|
|
background_ids[digest] = image_ref_id
|
|
self.images.append([filename, image_type, image_data])
|
|
|
|
sheet._prepare_background(ref_id, image_type)
|
|
|
|
# Prepare the worksheet images.
|
|
for index in range(image_count):
|
|
filename = sheet.images[index][2]
|
|
image_data = sheet.images[index][10]
|
|
(
|
|
image_type,
|
|
width,
|
|
height,
|
|
name,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
) = _get_image_properties(filename, image_data)
|
|
|
|
self.image_types[image_type] = True
|
|
|
|
if digest in image_ids:
|
|
ref_id = image_ids[digest]
|
|
else:
|
|
image_ref_id += 1
|
|
ref_id = image_ref_id
|
|
image_ids[digest] = image_ref_id
|
|
self.images.append([filename, image_type, image_data])
|
|
|
|
sheet._prepare_image(
|
|
index,
|
|
ref_id,
|
|
drawing_id,
|
|
width,
|
|
height,
|
|
name,
|
|
image_type,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
)
|
|
|
|
# Prepare the worksheet charts.
|
|
for index in range(chart_count):
|
|
chart_ref_id += 1
|
|
sheet._prepare_chart(index, chart_ref_id, drawing_id)
|
|
|
|
# Prepare the worksheet shapes.
|
|
for index in range(shape_count):
|
|
sheet._prepare_shape(index, drawing_id)
|
|
|
|
# Prepare the header images.
|
|
for index in range(header_image_count):
|
|
filename = sheet.header_images[index][0]
|
|
image_data = sheet.header_images[index][1]
|
|
position = sheet.header_images[index][2]
|
|
|
|
(
|
|
image_type,
|
|
width,
|
|
height,
|
|
name,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
) = _get_image_properties(filename, image_data)
|
|
|
|
self.image_types[image_type] = True
|
|
|
|
if digest in header_image_ids:
|
|
ref_id = header_image_ids[digest]
|
|
else:
|
|
image_ref_id += 1
|
|
ref_id = image_ref_id
|
|
header_image_ids[digest] = image_ref_id
|
|
self.images.append([filename, image_type, image_data])
|
|
|
|
sheet._prepare_header_image(
|
|
ref_id,
|
|
width,
|
|
height,
|
|
name,
|
|
image_type,
|
|
position,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
)
|
|
|
|
# Prepare the footer images.
|
|
for index in range(footer_image_count):
|
|
filename = sheet.footer_images[index][0]
|
|
image_data = sheet.footer_images[index][1]
|
|
position = sheet.footer_images[index][2]
|
|
|
|
(
|
|
image_type,
|
|
width,
|
|
height,
|
|
name,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
) = _get_image_properties(filename, image_data)
|
|
|
|
self.image_types[image_type] = True
|
|
|
|
if digest in header_image_ids:
|
|
ref_id = header_image_ids[digest]
|
|
else:
|
|
image_ref_id += 1
|
|
ref_id = image_ref_id
|
|
header_image_ids[digest] = image_ref_id
|
|
self.images.append([filename, image_type, image_data])
|
|
|
|
sheet._prepare_header_image(
|
|
ref_id,
|
|
width,
|
|
height,
|
|
name,
|
|
image_type,
|
|
position,
|
|
x_dpi,
|
|
y_dpi,
|
|
digest,
|
|
)
|
|
|
|
if has_drawing:
|
|
drawing = sheet.drawing
|
|
self.drawings.append(drawing)
|
|
|
|
# Remove charts that were created but not inserted into worksheets.
|
|
for chart in self.charts[:]:
|
|
if chart.id == -1:
|
|
self.charts.remove(chart)
|
|
|
|
# Sort the workbook charts references into the order that the were
|
|
# written to the worksheets above.
|
|
self.charts = sorted(self.charts, key=lambda chart: chart.id)
|
|
|
|
self.drawing_count = drawing_id
|
|
|
|
def _extract_named_ranges(self, defined_names):
|
|
# Extract the named ranges from the sorted list of defined names.
|
|
# These are used in the App.xml file.
|
|
named_ranges = []
|
|
|
|
for defined_name in defined_names:
|
|
name = defined_name[0]
|
|
index = defined_name[1]
|
|
sheet_range = defined_name[2]
|
|
|
|
# Skip autoFilter ranges.
|
|
if name == "_xlnm._FilterDatabase":
|
|
continue
|
|
|
|
# We are only interested in defined names with ranges.
|
|
if "!" in sheet_range:
|
|
sheet_name, _ = sheet_range.split("!", 1)
|
|
|
|
# Match Print_Area and Print_Titles xlnm types.
|
|
if name.startswith("_xlnm."):
|
|
xlnm_type = name.replace("_xlnm.", "")
|
|
name = sheet_name + "!" + xlnm_type
|
|
elif index != -1:
|
|
name = sheet_name + "!" + name
|
|
|
|
named_ranges.append(name)
|
|
|
|
return named_ranges
|
|
|
|
def _get_sheet_index(self, sheetname):
|
|
# Convert a sheet name to its index. Return None otherwise.
|
|
sheetname = sheetname.strip("'")
|
|
|
|
if sheetname in self.sheetnames:
|
|
return self.sheetnames[sheetname].index
|
|
|
|
return None
|
|
|
|
def _prepare_vml(self):
|
|
# Iterate through the worksheets and set up the VML objects.
|
|
comment_id = 0
|
|
vml_drawing_id = 0
|
|
vml_data_id = 1
|
|
vml_header_id = 0
|
|
vml_shape_id = 1024
|
|
vml_files = 0
|
|
comment_files = 0
|
|
|
|
for sheet in self.worksheets():
|
|
if not sheet.has_vml and not sheet.has_header_vml:
|
|
continue
|
|
|
|
vml_files += 1
|
|
|
|
if sheet.has_vml:
|
|
if sheet.has_comments:
|
|
comment_files += 1
|
|
comment_id += 1
|
|
self.has_comments = True
|
|
|
|
vml_drawing_id += 1
|
|
|
|
count = sheet._prepare_vml_objects(
|
|
vml_data_id, vml_shape_id, vml_drawing_id, comment_id
|
|
)
|
|
|
|
# Each VML should start with a shape id incremented by 1024.
|
|
vml_data_id += 1 * int((1024 + count) / 1024)
|
|
vml_shape_id += 1024 * int((1024 + count) / 1024)
|
|
|
|
if sheet.has_header_vml:
|
|
vml_header_id += 1
|
|
vml_drawing_id += 1
|
|
sheet._prepare_header_vml_objects(vml_header_id, vml_drawing_id)
|
|
|
|
self.num_vml_files = vml_files
|
|
self.num_comment_files = comment_files
|
|
|
|
def _prepare_tables(self):
|
|
# Set the table ids for the worksheet tables.
|
|
table_id = 0
|
|
seen = {}
|
|
|
|
for sheet in self.worksheets():
|
|
table_count = len(sheet.tables)
|
|
|
|
if not table_count:
|
|
continue
|
|
|
|
sheet._prepare_tables(table_id + 1, seen)
|
|
table_id += table_count
|
|
|
|
def _prepare_metadata(self):
|
|
# Set the metadata rel link.
|
|
self.has_embedded_images = self.embedded_images.has_images()
|
|
self.has_metadata = self.has_embedded_images
|
|
|
|
for sheet in self.worksheets():
|
|
if sheet.has_dynamic_arrays:
|
|
self.has_metadata = True
|
|
self.has_dynamic_functions = True
|
|
|
|
def _add_chart_data(self):
|
|
# Add "cached" data to charts to provide the numCache and strCache
|
|
# data for series and title/axis ranges.
|
|
worksheets = {}
|
|
seen_ranges = {}
|
|
charts = []
|
|
|
|
# Map worksheet names to worksheet objects.
|
|
for worksheet in self.worksheets():
|
|
worksheets[worksheet.name] = worksheet
|
|
|
|
# Build a list of the worksheet charts including any combined charts.
|
|
for chart in self.charts:
|
|
charts.append(chart)
|
|
if chart.combined:
|
|
charts.append(chart.combined)
|
|
|
|
for chart in charts:
|
|
for c_range in chart.formula_ids.keys():
|
|
r_id = chart.formula_ids[c_range]
|
|
|
|
# Skip if the series has user defined data.
|
|
if chart.formula_data[r_id] is not None:
|
|
if c_range not in seen_ranges or seen_ranges[c_range] is None:
|
|
data = chart.formula_data[r_id]
|
|
seen_ranges[c_range] = data
|
|
continue
|
|
|
|
# Check to see if the data is already cached locally.
|
|
if c_range in seen_ranges:
|
|
chart.formula_data[r_id] = seen_ranges[c_range]
|
|
continue
|
|
|
|
# Convert the range formula to a sheet name and cell range.
|
|
(sheetname, cells) = self._get_chart_range(c_range)
|
|
|
|
# Skip if we couldn't parse the formula.
|
|
if sheetname is None:
|
|
continue
|
|
|
|
# Handle non-contiguous ranges like:
|
|
# (Sheet1!$A$1:$A$2,Sheet1!$A$4:$A$5).
|
|
# We don't try to parse them. We just return an empty list.
|
|
if sheetname.startswith("("):
|
|
chart.formula_data[r_id] = []
|
|
seen_ranges[c_range] = []
|
|
continue
|
|
|
|
# Warn if the name is unknown since it indicates a user error
|
|
# in a chart series formula.
|
|
if sheetname not in worksheets:
|
|
warn(
|
|
f"Unknown worksheet reference '{sheetname}' in range "
|
|
f"'{c_range}' passed to add_series()"
|
|
)
|
|
chart.formula_data[r_id] = []
|
|
seen_ranges[c_range] = []
|
|
continue
|
|
|
|
# Find the worksheet object based on the sheet name.
|
|
worksheet = worksheets[sheetname]
|
|
|
|
# Get the data from the worksheet table.
|
|
data = worksheet._get_range_data(*cells)
|
|
|
|
# Add the data to the chart.
|
|
chart.formula_data[r_id] = data
|
|
|
|
# Store range data locally to avoid lookup if seen again.
|
|
seen_ranges[c_range] = data
|
|
|
|
def _get_chart_range(self, c_range):
|
|
# Convert a range formula such as Sheet1!$B$1:$B$5 into a sheet name
|
|
# and cell range such as ( 'Sheet1', 0, 1, 4, 1 ).
|
|
|
|
# Split the range formula into sheetname and cells at the last '!'.
|
|
pos = c_range.rfind("!")
|
|
if pos > 0:
|
|
sheetname = c_range[:pos]
|
|
cells = c_range[pos + 1 :]
|
|
else:
|
|
return None, None
|
|
|
|
# Split the cell range into 2 cells or else use single cell for both.
|
|
if cells.find(":") > 0:
|
|
(cell_1, cell_2) = cells.split(":", 1)
|
|
else:
|
|
(cell_1, cell_2) = (cells, cells)
|
|
|
|
# Remove leading/trailing quotes and convert escaped quotes to single.
|
|
sheetname = sheetname.strip("'")
|
|
sheetname = sheetname.replace("''", "'")
|
|
|
|
try:
|
|
# Get the row, col values from the Excel ranges. We do this in a
|
|
# try block for ranges that can't be parsed such as defined names.
|
|
(row_start, col_start) = xl_cell_to_rowcol(cell_1)
|
|
(row_end, col_end) = xl_cell_to_rowcol(cell_2)
|
|
except AttributeError:
|
|
return None, None
|
|
|
|
# We only handle 1D ranges.
|
|
if row_start != row_end and col_start != col_end:
|
|
return None, None
|
|
|
|
return sheetname, [row_start, col_start, row_end, col_end]
|
|
|
|
def _prepare_sst_string_data(self):
|
|
# Convert the SST string data from a dict to a list.
|
|
self.str_table._sort_string_data()
|
|
|
|
def _get_packager(self):
|
|
# Get and instance of the Packager class to create the xlsx package.
|
|
# This allows the default packager to be over-ridden.
|
|
return Packager()
|
|
|
|
###########################################################################
|
|
#
|
|
# XML methods.
|
|
#
|
|
###########################################################################
|
|
|
|
def _write_workbook(self):
|
|
# Write <workbook> element.
|
|
|
|
schema = "http://schemas.openxmlformats.org"
|
|
xmlns = schema + "/spreadsheetml/2006/main"
|
|
xmlns_r = schema + "/officeDocument/2006/relationships"
|
|
|
|
attributes = [
|
|
("xmlns", xmlns),
|
|
("xmlns:r", xmlns_r),
|
|
]
|
|
|
|
self._xml_start_tag("workbook", attributes)
|
|
|
|
def _write_file_version(self):
|
|
# Write the <fileVersion> element.
|
|
|
|
app_name = "xl"
|
|
last_edited = 4
|
|
lowest_edited = 4
|
|
rup_build = 4505
|
|
|
|
attributes = [
|
|
("appName", app_name),
|
|
("lastEdited", last_edited),
|
|
("lowestEdited", lowest_edited),
|
|
("rupBuild", rup_build),
|
|
]
|
|
|
|
if self.vba_project:
|
|
attributes.append(("codeName", "{37E998C4-C9E5-D4B9-71C8-EB1FF731991C}"))
|
|
|
|
self._xml_empty_tag("fileVersion", attributes)
|
|
|
|
def _write_file_sharing(self):
|
|
# Write the <fileSharing> element.
|
|
if self.read_only == 0:
|
|
return
|
|
|
|
attributes = [("readOnlyRecommended", 1)]
|
|
|
|
self._xml_empty_tag("fileSharing", attributes)
|
|
|
|
def _write_workbook_pr(self):
|
|
# Write <workbookPr> element.
|
|
default_theme_version = 124226
|
|
attributes = []
|
|
|
|
if self.vba_codename:
|
|
attributes.append(("codeName", self.vba_codename))
|
|
if self.date_1904:
|
|
attributes.append(("date1904", 1))
|
|
|
|
attributes.append(("defaultThemeVersion", default_theme_version))
|
|
|
|
self._xml_empty_tag("workbookPr", attributes)
|
|
|
|
def _write_book_views(self):
|
|
# Write <bookViews> element.
|
|
self._xml_start_tag("bookViews")
|
|
self._write_workbook_view()
|
|
self._xml_end_tag("bookViews")
|
|
|
|
def _write_workbook_view(self):
|
|
# Write <workbookView> element.
|
|
attributes = [
|
|
("xWindow", self.x_window),
|
|
("yWindow", self.y_window),
|
|
("windowWidth", self.window_width),
|
|
("windowHeight", self.window_height),
|
|
]
|
|
|
|
# Store the tabRatio attribute when it isn't the default.
|
|
if self.tab_ratio != 600:
|
|
attributes.append(("tabRatio", self.tab_ratio))
|
|
|
|
# Store the firstSheet attribute when it isn't the default.
|
|
if self.worksheet_meta.firstsheet > 0:
|
|
firstsheet = self.worksheet_meta.firstsheet + 1
|
|
attributes.append(("firstSheet", firstsheet))
|
|
|
|
# Store the activeTab attribute when it isn't the first sheet.
|
|
if self.worksheet_meta.activesheet > 0:
|
|
attributes.append(("activeTab", self.worksheet_meta.activesheet))
|
|
|
|
self._xml_empty_tag("workbookView", attributes)
|
|
|
|
def _write_sheets(self):
|
|
# Write <sheets> element.
|
|
self._xml_start_tag("sheets")
|
|
|
|
id_num = 1
|
|
for worksheet in self.worksheets():
|
|
self._write_sheet(worksheet.name, id_num, worksheet.hidden)
|
|
id_num += 1
|
|
|
|
self._xml_end_tag("sheets")
|
|
|
|
def _write_sheet(self, name, sheet_id, hidden):
|
|
# Write <sheet> element.
|
|
attributes = [
|
|
("name", name),
|
|
("sheetId", sheet_id),
|
|
]
|
|
|
|
if hidden == 1:
|
|
attributes.append(("state", "hidden"))
|
|
elif hidden == 2:
|
|
attributes.append(("state", "veryHidden"))
|
|
|
|
attributes.append(("r:id", "rId" + str(sheet_id)))
|
|
|
|
self._xml_empty_tag("sheet", attributes)
|
|
|
|
def _write_calc_pr(self):
|
|
# Write the <calcPr> element.
|
|
attributes = [("calcId", self.calc_id)]
|
|
|
|
if self.calc_mode == "manual":
|
|
attributes.append(("calcMode", self.calc_mode))
|
|
attributes.append(("calcOnSave", "0"))
|
|
elif self.calc_mode == "autoNoTable":
|
|
attributes.append(("calcMode", self.calc_mode))
|
|
|
|
if self.calc_on_load:
|
|
attributes.append(("fullCalcOnLoad", "1"))
|
|
|
|
self._xml_empty_tag("calcPr", attributes)
|
|
|
|
def _write_defined_names(self):
|
|
# Write the <definedNames> element.
|
|
if not self.defined_names:
|
|
return
|
|
|
|
self._xml_start_tag("definedNames")
|
|
|
|
for defined_name in self.defined_names:
|
|
self._write_defined_name(defined_name)
|
|
|
|
self._xml_end_tag("definedNames")
|
|
|
|
def _write_defined_name(self, defined_name):
|
|
# Write the <definedName> element.
|
|
name = defined_name[0]
|
|
sheet_id = defined_name[1]
|
|
sheet_range = defined_name[2]
|
|
hidden = defined_name[3]
|
|
|
|
attributes = [("name", name)]
|
|
|
|
if sheet_id != -1:
|
|
attributes.append(("localSheetId", sheet_id))
|
|
if hidden:
|
|
attributes.append(("hidden", 1))
|
|
|
|
self._xml_data_element("definedName", sheet_range, attributes)
|
|
|
|
|
|
# A metadata class to share data between worksheets.
|
|
class WorksheetMeta:
|
|
"""
|
|
A class to track worksheets data such as the active sheet and the
|
|
first sheet.
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.activesheet = 0
|
|
self.firstsheet = 0
|
|
|
|
|
|
# A helper class to share embedded images between worksheets.
|
|
class EmbeddedImages:
|
|
"""
|
|
A class to track duplicate embedded images between worksheets.
|
|
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.images = []
|
|
self.image_indexes = {}
|
|
|
|
def get_image_index(self, image, digest):
|
|
"""
|
|
Get the index of an embedded image.
|
|
|
|
Args:
|
|
image: The image to lookup.
|
|
digest: The digest of the image.
|
|
|
|
Returns:
|
|
The image index.
|
|
|
|
"""
|
|
image_index = self.image_indexes.get(digest)
|
|
|
|
if image_index is None:
|
|
self.images.append(image)
|
|
image_index = len(self.images)
|
|
self.image_indexes[digest] = image_index
|
|
|
|
return image_index
|
|
|
|
def has_images(self):
|
|
"""
|
|
Check if the worksheet has embedded images.
|
|
|
|
Args:
|
|
None.
|
|
|
|
Returns:
|
|
Boolean.
|
|
|
|
"""
|
|
return len(self.images) > 0
|