############################################################################### # # Packager - A class for writing the Excel XLSX Worksheet file. # # SPDX-License-Identifier: BSD-2-Clause # # Copyright (c) 2013-2025, John McNamara, jmcnamara@cpan.org # # Standard packages. import os import stat import tempfile from io import BytesIO, StringIO from shutil import copy # Package imports. from .app import App from .comments import Comments from .contenttypes import ContentTypes from .core import Core from .custom import Custom from .exceptions import EmptyChartSeries from .feature_property_bag import FeaturePropertyBag from .metadata import Metadata from .relationships import Relationships from .rich_value import RichValue from .rich_value_rel import RichValueRel from .rich_value_structure import RichValueStructure from .rich_value_types import RichValueTypes from .sharedstrings import SharedStrings from .styles import Styles from .table import Table from .theme import Theme from .vml import Vml class Packager: """ A class for writing the Excel XLSX Packager file. This module is used in conjunction with XlsxWriter to create an Excel XLSX container file. From Wikipedia: The Open Packaging Conventions (OPC) is a container-file technology initially created by Microsoft to store a combination of XML and non-XML files that together form a single entity such as an Open XML Paper Specification (OpenXPS) document. http://en.wikipedia.org/wiki/Open_Packaging_Conventions. At its simplest an Excel XLSX file contains the following elements:: ____ [Content_Types].xml | |____ docProps | |____ app.xml | |____ core.xml | |____ xl | |____ workbook.xml | |____ worksheets | | |____ sheet1.xml | | | |____ styles.xml | | | |____ theme | | |____ theme1.xml | | | |_____rels | |____ workbook.xml.rels | |_____rels |____ .rels The Packager class coordinates the classes that represent the elements of the package and writes them into the XLSX file. """ ########################################################################### # # Public API. # ########################################################################### def __init__(self): """ Constructor. """ super().__init__() self.tmpdir = "" self.in_memory = False self.workbook = None self.worksheet_count = 0 self.chartsheet_count = 0 self.chart_count = 0 self.drawing_count = 0 self.table_count = 0 self.num_vml_files = 0 self.num_comment_files = 0 self.named_ranges = [] self.filenames = [] ########################################################################### # # Private API. # ########################################################################### def _set_tmpdir(self, tmpdir): # Set an optional user defined temp directory. self.tmpdir = tmpdir def _set_in_memory(self, in_memory): # Set the optional 'in_memory' mode. self.in_memory = in_memory def _add_workbook(self, workbook): # Add the Excel::Writer::XLSX::Workbook object to the package. self.workbook = workbook self.chart_count = len(workbook.charts) self.drawing_count = len(workbook.drawings) self.num_vml_files = workbook.num_vml_files self.num_comment_files = workbook.num_comment_files self.named_ranges = workbook.named_ranges for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: self.chartsheet_count += 1 else: self.worksheet_count += 1 def _create_package(self): # Write the xml files that make up the XLSX OPC package. self._write_content_types_file() self._write_root_rels_file() self._write_workbook_rels_file() self._write_worksheet_files() self._write_chartsheet_files() self._write_workbook_file() self._write_chart_files() self._write_drawing_files() self._write_vml_files() self._write_comment_files() self._write_table_files() self._write_shared_strings_file() self._write_styles_file() self._write_custom_file() self._write_theme_file() self._write_worksheet_rels_files() self._write_chartsheet_rels_files() self._write_drawing_rels_files() self._write_rich_value_rels_files() self._add_image_files() self._add_vba_project() self._add_vba_project_signature() self._write_vba_project_rels_file() self._write_core_file() self._write_app_file() self._write_metadata_file() self._write_feature_bag_property() self._write_rich_value_files() return self.filenames def _filename(self, xml_filename): # Create a temp filename to write the XML data to and store the Excel # filename to use as the name in the Zip container. if self.in_memory: os_filename = StringIO() else: (fd, os_filename) = tempfile.mkstemp(dir=self.tmpdir) os.close(fd) self.filenames.append((os_filename, xml_filename, False)) return os_filename def _write_workbook_file(self): # Write the workbook.xml file. workbook = self.workbook workbook._set_xml_writer(self._filename("xl/workbook.xml")) workbook._assemble_xml_file() def _write_worksheet_files(self): # Write the worksheet files. index = 1 for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: continue if worksheet.constant_memory: worksheet._opt_reopen() worksheet._write_single_row() worksheet._set_xml_writer( self._filename("xl/worksheets/sheet" + str(index) + ".xml") ) worksheet._assemble_xml_file() index += 1 def _write_chartsheet_files(self): # Write the chartsheet files. index = 1 for worksheet in self.workbook.worksheets(): if not worksheet.is_chartsheet: continue worksheet._set_xml_writer( self._filename("xl/chartsheets/sheet" + str(index) + ".xml") ) worksheet._assemble_xml_file() index += 1 def _write_chart_files(self): # Write the chart files. if not self.workbook.charts: return index = 1 for chart in self.workbook.charts: # Check that the chart has at least one data series. if not chart.series: raise EmptyChartSeries( f"Chart{index} must contain at least one " f"data series. See chart.add_series()." ) chart._set_xml_writer( self._filename("xl/charts/chart" + str(index) + ".xml") ) chart._assemble_xml_file() index += 1 def _write_drawing_files(self): # Write the drawing files. if not self.drawing_count: return index = 1 for drawing in self.workbook.drawings: drawing._set_xml_writer( self._filename("xl/drawings/drawing" + str(index) + ".xml") ) drawing._assemble_xml_file() index += 1 def _write_vml_files(self): # Write the comment VML files. index = 1 for worksheet in self.workbook.worksheets(): if not worksheet.has_vml and not worksheet.has_header_vml: continue if worksheet.has_vml: vml = Vml() vml._set_xml_writer( self._filename("xl/drawings/vmlDrawing" + str(index) + ".vml") ) vml._assemble_xml_file( worksheet.vml_data_id, worksheet.vml_shape_id, worksheet.comments_list, worksheet.buttons_list, ) index += 1 if worksheet.has_header_vml: vml = Vml() vml._set_xml_writer( self._filename("xl/drawings/vmlDrawing" + str(index) + ".vml") ) vml._assemble_xml_file( worksheet.vml_header_id, worksheet.vml_header_id * 1024, None, None, worksheet.header_images_list, ) self._write_vml_drawing_rels_file(worksheet, index) index += 1 def _write_comment_files(self): # Write the comment files. index = 1 for worksheet in self.workbook.worksheets(): if not worksheet.has_comments: continue comment = Comments() comment._set_xml_writer(self._filename("xl/comments" + str(index) + ".xml")) comment._assemble_xml_file(worksheet.comments_list) index += 1 def _write_shared_strings_file(self): # Write the sharedStrings.xml file. sst = SharedStrings() sst.string_table = self.workbook.str_table if not self.workbook.str_table.count: return sst._set_xml_writer(self._filename("xl/sharedStrings.xml")) sst._assemble_xml_file() def _write_app_file(self): # Write the app.xml file. properties = self.workbook.doc_properties app = App() # Add the Worksheet parts. worksheet_count = 0 for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: continue # Don't write/count veryHidden sheets. if worksheet.hidden != 2: app._add_part_name(worksheet.name) worksheet_count += 1 # Add the Worksheet heading pairs. app._add_heading_pair(["Worksheets", worksheet_count]) # Add the Chartsheet parts. for worksheet in self.workbook.worksheets(): if not worksheet.is_chartsheet: continue app._add_part_name(worksheet.name) # Add the Chartsheet heading pairs. app._add_heading_pair(["Charts", self.chartsheet_count]) # Add the Named Range heading pairs. if self.named_ranges: app._add_heading_pair(["Named Ranges", len(self.named_ranges)]) # Add the Named Ranges parts. for named_range in self.named_ranges: app._add_part_name(named_range) app._set_properties(properties) app.doc_security = self.workbook.read_only app._set_xml_writer(self._filename("docProps/app.xml")) app._assemble_xml_file() def _write_core_file(self): # Write the core.xml file. properties = self.workbook.doc_properties core = Core() core._set_properties(properties) core._set_xml_writer(self._filename("docProps/core.xml")) core._assemble_xml_file() def _write_metadata_file(self): # Write the metadata.xml file. if not self.workbook.has_metadata: return metadata = Metadata() metadata.has_dynamic_functions = self.workbook.has_dynamic_functions metadata.num_embedded_images = len(self.workbook.embedded_images.images) metadata._set_xml_writer(self._filename("xl/metadata.xml")) metadata._assemble_xml_file() def _write_feature_bag_property(self): # Write the featurePropertyBag.xml file. feature_property_bags = self.workbook._has_feature_property_bags() if not feature_property_bags: return property_bag = FeaturePropertyBag() property_bag.feature_property_bags = feature_property_bags property_bag._set_xml_writer( self._filename("xl/featurePropertyBag/featurePropertyBag.xml") ) property_bag._assemble_xml_file() def _write_rich_value_files(self): if not self.workbook.embedded_images.has_images(): return self._write_rich_value() self._write_rich_value_types() self._write_rich_value_structure() self._write_rich_value_rel() def _write_rich_value(self): # Write the rdrichvalue.xml file. filename = self._filename("xl/richData/rdrichvalue.xml") xml_file = RichValue() xml_file.embedded_images = self.workbook.embedded_images.images xml_file._set_xml_writer(filename) xml_file._assemble_xml_file() def _write_rich_value_types(self): # Write the rdRichValueTypes.xml file. filename = self._filename("xl/richData/rdRichValueTypes.xml") xml_file = RichValueTypes() xml_file._set_xml_writer(filename) xml_file._assemble_xml_file() def _write_rich_value_structure(self): # Write the rdrichvaluestructure.xml file. filename = self._filename("xl/richData/rdrichvaluestructure.xml") xml_file = RichValueStructure() xml_file.has_embedded_descriptions = self.workbook.has_embedded_descriptions xml_file._set_xml_writer(filename) xml_file._assemble_xml_file() def _write_rich_value_rel(self): # Write the richValueRel.xml file. filename = self._filename("xl/richData/richValueRel.xml") xml_file = RichValueRel() xml_file.num_embedded_images = len(self.workbook.embedded_images.images) xml_file._set_xml_writer(filename) xml_file._assemble_xml_file() def _write_custom_file(self): # Write the custom.xml file. properties = self.workbook.custom_properties custom = Custom() if not properties: return custom._set_properties(properties) custom._set_xml_writer(self._filename("docProps/custom.xml")) custom._assemble_xml_file() def _write_content_types_file(self): # Write the ContentTypes.xml file. content = ContentTypes() content._add_image_types(self.workbook.image_types) self._get_table_count() worksheet_index = 1 chartsheet_index = 1 for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: content._add_chartsheet_name("sheet" + str(chartsheet_index)) chartsheet_index += 1 else: content._add_worksheet_name("sheet" + str(worksheet_index)) worksheet_index += 1 for i in range(1, self.chart_count + 1): content._add_chart_name("chart" + str(i)) for i in range(1, self.drawing_count + 1): content._add_drawing_name("drawing" + str(i)) if self.num_vml_files: content._add_vml_name() for i in range(1, self.table_count + 1): content._add_table_name("table" + str(i)) for i in range(1, self.num_comment_files + 1): content._add_comment_name("comments" + str(i)) # Add the sharedString rel if there is string data in the workbook. if self.workbook.str_table.count: content._add_shared_strings() # Add vbaProject (and optionally vbaProjectSignature) if present. if self.workbook.vba_project: content._add_vba_project() if self.workbook.vba_project_signature: content._add_vba_project_signature() # Add the custom properties if present. if self.workbook.custom_properties: content._add_custom_properties() # Add the metadata file if present. if self.workbook.has_metadata: content._add_metadata() # Add the metadata file if present. if self.workbook._has_feature_property_bags(): content._add_feature_bag_property() # Add the RichValue file if present. if self.workbook.embedded_images.has_images(): content._add_rich_value() content._set_xml_writer(self._filename("[Content_Types].xml")) content._assemble_xml_file() def _write_styles_file(self): # Write the style xml file. xf_formats = self.workbook.xf_formats palette = self.workbook.palette font_count = self.workbook.font_count num_formats = self.workbook.num_formats border_count = self.workbook.border_count fill_count = self.workbook.fill_count custom_colors = self.workbook.custom_colors dxf_formats = self.workbook.dxf_formats has_comments = self.workbook.has_comments styles = Styles() styles._set_style_properties( [ xf_formats, palette, font_count, num_formats, border_count, fill_count, custom_colors, dxf_formats, has_comments, ] ) styles._set_xml_writer(self._filename("xl/styles.xml")) styles._assemble_xml_file() def _write_theme_file(self): # Write the theme xml file. theme = Theme() theme._set_xml_writer(self._filename("xl/theme/theme1.xml")) theme._assemble_xml_file() def _write_table_files(self): # Write the table files. index = 1 for worksheet in self.workbook.worksheets(): table_props = worksheet.tables if not table_props: continue for table_props in table_props: table = Table() table._set_xml_writer( self._filename("xl/tables/table" + str(index) + ".xml") ) table._set_properties(table_props) table._assemble_xml_file() index += 1 def _get_table_count(self): # Count the table files. Required for the [Content_Types] file. for worksheet in self.workbook.worksheets(): for _ in worksheet.tables: self.table_count += 1 def _write_root_rels_file(self): # Write the _rels/.rels xml file. rels = Relationships() rels._add_document_relationship("/officeDocument", "xl/workbook.xml") rels._add_package_relationship("/metadata/core-properties", "docProps/core.xml") rels._add_document_relationship("/extended-properties", "docProps/app.xml") if self.workbook.custom_properties: rels._add_document_relationship("/custom-properties", "docProps/custom.xml") rels._set_xml_writer(self._filename("_rels/.rels")) rels._assemble_xml_file() def _write_workbook_rels_file(self): # Write the _rels/.rels xml file. rels = Relationships() worksheet_index = 1 chartsheet_index = 1 for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: rels._add_document_relationship( "/chartsheet", "chartsheets/sheet" + str(chartsheet_index) + ".xml" ) chartsheet_index += 1 else: rels._add_document_relationship( "/worksheet", "worksheets/sheet" + str(worksheet_index) + ".xml" ) worksheet_index += 1 rels._add_document_relationship("/theme", "theme/theme1.xml") rels._add_document_relationship("/styles", "styles.xml") # Add the sharedString rel if there is string data in the workbook. if self.workbook.str_table.count: rels._add_document_relationship("/sharedStrings", "sharedStrings.xml") # Add vbaProject if present. if self.workbook.vba_project: rels._add_ms_package_relationship("/vbaProject", "vbaProject.bin") # Add the metadata file if required. if self.workbook.has_metadata: rels._add_document_relationship("/sheetMetadata", "metadata.xml") # Add the RichValue files if present. if self.workbook.embedded_images.has_images(): rels._add_rich_value_relationship() # Add the checkbox/FeaturePropertyBag file if present. if self.workbook._has_feature_property_bags(): rels._add_feature_bag_relationship() rels._set_xml_writer(self._filename("xl/_rels/workbook.xml.rels")) rels._assemble_xml_file() def _write_worksheet_rels_files(self): # Write data such as hyperlinks or drawings. index = 0 for worksheet in self.workbook.worksheets(): if worksheet.is_chartsheet: continue index += 1 external_links = ( worksheet.external_hyper_links + worksheet.external_drawing_links + worksheet.external_vml_links + worksheet.external_background_links + worksheet.external_table_links + worksheet.external_comment_links ) if not external_links: continue # Create the worksheet .rels dirs. rels = Relationships() for link_data in external_links: rels._add_document_relationship(*link_data) # Create .rels file such as /xl/worksheets/_rels/sheet1.xml.rels. rels._set_xml_writer( self._filename("xl/worksheets/_rels/sheet" + str(index) + ".xml.rels") ) rels._assemble_xml_file() def _write_chartsheet_rels_files(self): # Write the chartsheet .rels files for links to drawing files. index = 0 for worksheet in self.workbook.worksheets(): if not worksheet.is_chartsheet: continue index += 1 external_links = ( worksheet.external_drawing_links + worksheet.external_vml_links ) if not external_links: continue # Create the chartsheet .rels xlsx_dir. rels = Relationships() for link_data in external_links: rels._add_document_relationship(*link_data) # Create .rels file such as /xl/chartsheets/_rels/sheet1.xml.rels. rels._set_xml_writer( self._filename("xl/chartsheets/_rels/sheet" + str(index) + ".xml.rels") ) rels._assemble_xml_file() def _write_drawing_rels_files(self): # Write the drawing .rels files for worksheets with charts or drawings. index = 0 for worksheet in self.workbook.worksheets(): if worksheet.drawing: index += 1 if not worksheet.drawing_links: continue # Create the drawing .rels xlsx_dir. rels = Relationships() for drawing_data in worksheet.drawing_links: rels._add_document_relationship(*drawing_data) # Create .rels file such as /xl/drawings/_rels/sheet1.xml.rels. rels._set_xml_writer( self._filename("xl/drawings/_rels/drawing" + str(index) + ".xml.rels") ) rels._assemble_xml_file() def _write_vml_drawing_rels_file(self, worksheet, index): # Write the vmlDdrawing .rels files for worksheets with images in # headers or footers. # Create the drawing .rels dir. rels = Relationships() for drawing_data in worksheet.vml_drawing_links: rels._add_document_relationship(*drawing_data) # Create .rels file such as /xl/drawings/_rels/vmlDrawing1.vml.rels. rels._set_xml_writer( self._filename("xl/drawings/_rels/vmlDrawing" + str(index) + ".vml.rels") ) rels._assemble_xml_file() def _write_vba_project_rels_file(self): # Write the vbaProject.rels xml file if signed macros exist. vba_project_signature = self.workbook.vba_project_signature if not vba_project_signature: return # Create the vbaProject .rels dir. rels = Relationships() rels._add_ms_package_relationship( "/vbaProjectSignature", "vbaProjectSignature.bin" ) rels._set_xml_writer(self._filename("xl/_rels/vbaProject.bin.rels")) rels._assemble_xml_file() def _write_rich_value_rels_files(self): # Write the richValueRel.xml.rels for embedded images. if not self.workbook.embedded_images.has_images(): return # Create the worksheet .rels dirs. rels = Relationships() index = 1 for image_data in self.workbook.embedded_images.images: file_type = image_data[1] image_file = f"../media/image{index}.{file_type}" rels._add_document_relationship("/image", image_file) index += 1 # Create .rels file such as /xl/worksheets/_rels/sheet1.xml.rels. rels._set_xml_writer(self._filename("/xl/richData/_rels/richValueRel.xml.rels")) rels._assemble_xml_file() def _add_image_files(self): # pylint: disable=consider-using-with # Write the /xl/media/image?.xml files. workbook = self.workbook index = 1 images = workbook.embedded_images.images + workbook.images for image in images: filename = image[0] ext = "." + image[1] image_data = image[2] xml_image_name = "xl/media/image" + str(index) + ext if not self.in_memory: # In file mode we just write or copy the image file. os_filename = self._filename(xml_image_name) if image_data: # The data is in a byte stream. Write it to the target. os_file = open(os_filename, mode="wb") os_file.write(image_data.getvalue()) os_file.close() else: copy(filename, os_filename) # Allow copies of Windows read-only images to be deleted. try: os.chmod( os_filename, os.stat(os_filename).st_mode | stat.S_IWRITE ) except OSError: pass else: # For in-memory mode we read the image into a stream. if image_data: # The data is already in a byte stream. os_filename = image_data else: image_file = open(filename, mode="rb") image_data = image_file.read() os_filename = BytesIO(image_data) image_file.close() self.filenames.append((os_filename, xml_image_name, True)) index += 1 def _add_vba_project_signature(self): # pylint: disable=consider-using-with # Copy in a vbaProjectSignature.bin file. vba_project_signature = self.workbook.vba_project_signature vba_project_signature_is_stream = self.workbook.vba_project_signature_is_stream if not vba_project_signature: return xml_vba_signature_name = "xl/vbaProjectSignature.bin" if not self.in_memory: # In file mode we just write or copy the VBA project signature file. os_filename = self._filename(xml_vba_signature_name) if vba_project_signature_is_stream: # The data is in a byte stream. Write it to the target. os_file = open(os_filename, mode="wb") os_file.write(vba_project_signature.getvalue()) os_file.close() else: copy(vba_project_signature, os_filename) else: # For in-memory mode we read the vba into a stream. if vba_project_signature_is_stream: # The data is already in a byte stream. os_filename = vba_project_signature else: vba_file = open(vba_project_signature, mode="rb") vba_data = vba_file.read() os_filename = BytesIO(vba_data) vba_file.close() self.filenames.append((os_filename, xml_vba_signature_name, True)) def _add_vba_project(self): # pylint: disable=consider-using-with # Copy in a vbaProject.bin file. vba_project = self.workbook.vba_project vba_project_is_stream = self.workbook.vba_project_is_stream if not vba_project: return xml_vba_name = "xl/vbaProject.bin" if not self.in_memory: # In file mode we just write or copy the VBA file. os_filename = self._filename(xml_vba_name) if vba_project_is_stream: # The data is in a byte stream. Write it to the target. os_file = open(os_filename, mode="wb") os_file.write(vba_project.getvalue()) os_file.close() else: copy(vba_project, os_filename) else: # For in-memory mode we read the vba into a stream. if vba_project_is_stream: # The data is already in a byte stream. os_filename = vba_project else: vba_file = open(vba_project, mode="rb") vba_data = vba_file.read() os_filename = BytesIO(vba_data) vba_file.close() self.filenames.append((os_filename, xml_vba_name, True))