In [1]:
import os
from xml.dom import minidom
import zipfile
os.mkdir("build-word/test/_rels")
startdir = "build-word/test"
xml1 = minidom.Document()
wdoc = xml1.createElement("w:document")
wdoc.setAttribute("xmlns:w","http://schemas.openxmlformats.org/wordprocessingml/2006/main")
xml1.appendChild(wdoc)
text=xml1.createTextNode("openxml")
with open("build-word/test/document.xml","w") as fp:
xml1.writexml(fp, " ", "", "\n", "UTF-8")
with zipfile.ZipFile('build-word/test.zip','w',zipfile.ZIP_DEFLATED) as f:
for dirpath,dirnames,filenames in os.walk(startdir):
for filename in filenames:
filepath = os.path.join(dirpath,filename)
f.write(filepath, filepath.replace(startdir,''))
os.rename("build-word/test.zip", "build-word/test.docx")
In [4]:
from pathlib import Path
from zipfile import ZipFile
class WordPackage:
def __init__(self, root_dir, document_name):
self.root_dir = Path(root_dir).resolve()
self.document_name = document_name
self.docx_file = self.root_dir / '{0}.docx'.format(document_name)
def _make_subdir(self, parent_dir, subdir_name):
subdir = parent_dir / subdir_name
if not subdir.exists():
subdir.mkdir()
return subdir
def prepare_structure(self):
self.docx_dir = self._make_subdir(root_dir, document_name)
self.docx_rels_dir = self._make_subdir(self.docx_dir, '_rels')
self.docx_word_dir = self._make_subdir(self.docx_dir, 'word')
content_types_xml = document_dir / '[Content_Types].xml'
def package_document(self):
with ZipFile(self.docx_file, 'w', zipfile.ZIP_DEFLATED) as docx:
for packaged_path in self.docx_dir.iterdir():
if packaged_path.is_dir():
continue
docx.write(packaged_path, packaged_path.relative_to(self.docx_dir))
return self.docx_file
def main():
test2 = WordPackage('./build-word', 'test2')
test2.prepare_structure()
test2.package_document()
In [5]:
main()
In [10]:
import xml.etree.ElementTree as et
import io
types = et.Element('Types', attrib=dict(xmlns="http://schemas.openxmlformats.org/package/2006/content-types"))
default = et.SubElement(types, 'Default', attrib=dict(Extension="xml", ContentType="application/xml"))
content_types = et.ElementTree(types)
output = io.BytesIO()
content_types.write(
output,
encoding='utf-8',
xml_declaration=True)
# default_namespace="http://schemas.openxmlformats.org/package/2006/content-types")
ct_str = output.getvalue()
ct_str
Out[10]:
In [13]:
document_ns_map = {
'xmlns:w': "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
'xmlns:m': "http://schemas.openxmlformats.org/officeDocument/2006/math",
'xmlns:r': "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
'xmlns:o': "urn:schemas-microsoft-com:office:office",
'xmlns:v': "urn:schemas-microsoft-com:vml",
'xmlns:w10': "urn:schemas-microsoft-com:office:word",
'xmlns:a': "http://schemas.openxmlformats.org/drawingml/2006/main",
'xmlns:pic': "http://schemas.openxmlformats.org/drawingml/2006/picture",
'xmlns:wp': "http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
}
In [ ]: