This is the test record: http://cds.cern.ch/record/2058156
In [29]:
CDS_PHOTO_RECORD = """
<?xml version="1.0" encoding="UTF-8"?>
<collection xmlns="http://www.loc.gov/MARC21/slim">
<record>
<controlfield tag="001">2058156</controlfield>
<controlfield tag="005">20151008225323.0</controlfield>
<datafield tag="024" ind1="8" ind2=" ">
<subfield code="a">oai:cds.cern.ch:2058156</subfield>
<subfield code="p">cerncds:FULLTEXT</subfield>
</datafield>
<datafield tag="037" ind1=" " ind2=" ">
<subfield code="a">CERN-PHOTO-201510-197</subfield>
</datafield>
<datafield tag="100" ind1=" " ind2=" ">
<subfield code="a">Bennett, Sophia Elizabeth</subfield>
<subfield code="0">AUTHOR|(SzGeCERN)780240</subfield>
<subfield code="u">CERN</subfield>
<subfield code="m">sophia.bennett@cern.ch</subfield>
</datafield>
<datafield tag="245" ind1=" " ind2=" ">
<subfield code="a">Big Bang Passport - New Location</subfield>
</datafield>
<datafield tag="260" ind1=" " ind2=" ">
<subfield code="c">2015</subfield>
</datafield>
<datafield tag="269" ind1=" " ind2=" ">
<subfield code="a">Geneva</subfield>
<subfield code="b">CERN</subfield>
<subfield code="c">2015-10-08</subfield>
</datafield>
<datafield tag="500" ind1=" " ind2=" ">
<subfield code="a">General Photo</subfield>
</datafield>
<datafield tag="506" ind1=" " ind2=" ">
<subfield code="a">public</subfield>
</datafield>
<datafield tag="520" ind1=" " ind2=" ">
<subfield code="a">New loaction</subfield>
</datafield>
<datafield tag="542" ind1=" " ind2=" ">
<subfield code="d">CERN</subfield>
<subfield code="g">2015</subfield>
</datafield>
<datafield tag="595" ind1=" " ind2=" ">
<subfield code="a">CERN EDS</subfield>
<subfield code="s">PHOTOLAB</subfield>
</datafield>
<datafield tag="650" ind1="1" ind2="7">
<subfield code="2">SzGeCERN</subfield>
<subfield code="a">Photolab</subfield>
</datafield>
<datafield tag="650" ind1="2" ind2="7">
<subfield code="2">SzGeCERN</subfield>
<subfield code="a">Life at CERN</subfield>
</datafield>
<datafield tag="653" ind1="1" ind2=" ">
<subfield code="a">Life at CERN</subfield>
<subfield code="9">CERN</subfield>
</datafield>
<datafield tag="690" ind1="C" ind2=" ">
<subfield code="a">CERN</subfield>
</datafield>
<datafield tag="690" ind1="C" ind2=" ">
<subfield code="a">PHOTO</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150739</subfield>
<subfield code="s">20888154</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0388.JPG</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150740</subfield>
<subfield code="s">18005815</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0390.JPG</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150741</subfield>
<subfield code="s">24104669</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0396.JPG</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150742</subfield>
<subfield code="s">21212927</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0399.JPG</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150743</subfield>
<subfield code="s">21257737</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0407.JPG</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150739</subfield>
<subfield code="s">1406947</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0388.jpg?subformat=icon-1440</subfield>
<subfield code="x">icon-1440</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150739</subfield>
<subfield code="s">342545</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0388.jpg?subformat=icon-640</subfield>
<subfield code="x">icon-640</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150739</subfield>
<subfield code="s">73531</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0388.jpg?subformat=icon-180</subfield>
<subfield code="x">icon-180</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150740</subfield>
<subfield code="s">1282195</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0390.jpg?subformat=icon-1440</subfield>
<subfield code="x">icon-1440</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150740</subfield>
<subfield code="s">345928</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0390.jpg?subformat=icon-640</subfield>
<subfield code="x">icon-640</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150740</subfield>
<subfield code="s">79720</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0390.jpg?subformat=icon-180</subfield>
<subfield code="x">icon-180</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150741</subfield>
<subfield code="s">1558062</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0396.jpg?subformat=icon-1440</subfield>
<subfield code="x">icon-1440</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150741</subfield>
<subfield code="s">384736</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0396.jpg?subformat=icon-640</subfield>
<subfield code="x">icon-640</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150741</subfield>
<subfield code="s">78661</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0396.jpg?subformat=icon-180</subfield>
<subfield code="x">icon-180</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150742</subfield>
<subfield code="s">1410785</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0399.jpg?subformat=icon-1440</subfield>
<subfield code="x">icon-1440</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150742</subfield>
<subfield code="s">366776</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0399.jpg?subformat=icon-640</subfield>
<subfield code="x">icon-640</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150742</subfield>
<subfield code="s">81339</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0399.jpg?subformat=icon-180</subfield>
<subfield code="x">icon-180</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150743</subfield>
<subfield code="s">1298236</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0407.jpg?subformat=icon-1440</subfield>
<subfield code="x">icon-1440</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150743</subfield>
<subfield code="s">327592</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0407.jpg?subformat=icon-640</subfield>
<subfield code="x">icon-640</subfield>
</datafield>
<datafield tag="856" ind1="4" ind2=" ">
<subfield code="8">1150743</subfield>
<subfield code="s">76882</subfield>
<subfield code="u">http://cds.cern.ch/record/2058156/files/MAX_0407.jpg?subformat=icon-180</subfield>
<subfield code="x">icon-180</subfield>
</datafield>
<datafield tag="859" ind1=" " ind2=" ">
<subfield code="f">maximilien.brice@cern.ch</subfield>
</datafield>
<datafield tag="859" ind1=" " ind2=" ">
<subfield code="f">Francois.Briard@cern.ch</subfield>
</datafield>
<datafield tag="916" ind1=" " ind2=" ">
<subfield code="s">n</subfield>
<subfield code="w">201541</subfield>
</datafield>
<datafield tag="923" ind1=" " ind2=" ">
<subfield code="p">CERN</subfield>
<subfield code="r">Briard Francois <Francois.Briard@cern.ch></subfield>
</datafield>
<datafield tag="960" ind1=" " ind2=" ">
<subfield code="a">86</subfield>
</datafield>
<datafield tag="963" ind1=" " ind2=" ">
<subfield code="a">PUBLIC</subfield>
</datafield>
<datafield tag="963" ind1=" " ind2=" ">
<subfield code="b">VISIBLE</subfield>
</datafield>
<datafield tag="980" ind1=" " ind2=" ">
<subfield code="a">PHOTOLABCERN</subfield>
</datafield>
</record>
</collection>
"""
Let's create the splitter!
In [30]:
from cds.ext.record_split.photo import PhotoSplitter
splitter = PhotoSplitter()
Now, let's split the photo record. We want to create 1 album record, and 5 photo records. The album should contain the references to the photo records, and the photo records should contain a reference back to the album. A few notes:
In [31]:
records = splitter.split(CDS_PHOTO_RECORD)
In [32]:
album_record = records[0][0]
image_records = records[0][1]
Let's make sure we have a nice printing:
In [33]:
import pprint
pp = pprint.PrettyPrinter(indent=2)
In [16]:
pp.pprint(album_record)
In [34]:
pp.pprint(image_records)
In [35]:
print len(image_records)
In [36]:
pp.pprint(image_records[0])
In [ ]: