In [1]:
import xml.etree.ElementTree as et
import pandas as pd

In this example, we are parsing an XML file full of information on country names and ISO 3166 codes. Inside the file's root node there are elements that look like this:

    <iso_3166_entry
            alpha_2_code="AL"
            alpha_3_code="ALB"
            numeric_code="008"
            name="Albania"
            official_name="Republic of Albania" />

In [2]:
tree = et.parse('/data/iso_3166.xml')
root = tree.getroot()
root


Out[2]:
<Element 'iso_3166_entries' at 0x7fa8bd7e8f98>

In [3]:
entries = root.findall("iso_3166_entry")
data = []
for entry in entries:
    row = []
    row.append(entry.attrib["alpha_2_code"])
    row.append(entry.attrib["alpha_3_code"])
    row.append(entry.attrib["numeric_code"])
    row.append(entry.attrib["name"])
    if ("official_name" in entry.attrib):
        row.append(entry.attrib["official_name"])
    else:
        row.append(entry.attrib["name"]) 
    data.append(row)
    
pd.DataFrame(data)


Out[3]:
0 1 2 3 4
0 AF AFG 004 Afghanistan Islamic Republic of Afghanistan
1 AX ALA 248 Åland Islands Åland Islands
2 AL ALB 008 Albania Republic of Albania
3 DZ DZA 012 Algeria People's Democratic Republic of Algeria
4 AS ASM 016 American Samoa American Samoa
5 AD AND 020 Andorra Principality of Andorra
6 AO AGO 024 Angola Republic of Angola
7 AI AIA 660 Anguilla Anguilla
8 AQ ATA 010 Antarctica Antarctica
9 AG ATG 028 Antigua and Barbuda Antigua and Barbuda
10 AR ARG 032 Argentina Argentine Republic
11 AM ARM 051 Armenia Republic of Armenia
12 AW ABW 533 Aruba Aruba
13 AU AUS 036 Australia Australia
14 AT AUT 040 Austria Republic of Austria
15 AZ AZE 031 Azerbaijan Republic of Azerbaijan
16 BS BHS 044 Bahamas Commonwealth of the Bahamas
17 BH BHR 048 Bahrain Kingdom of Bahrain
18 BD BGD 050 Bangladesh People's Republic of Bangladesh
19 BB BRB 052 Barbados Barbados
20 BY BLR 112 Belarus Republic of Belarus
21 BE BEL 056 Belgium Kingdom of Belgium
22 BZ BLZ 084 Belize Belize
23 BJ BEN 204 Benin Republic of Benin
24 BM BMU 060 Bermuda Bermuda
25 BT BTN 064 Bhutan Kingdom of Bhutan
26 BO BOL 068 Bolivia, Plurinational State of Plurinational State of Bolivia
27 BQ BES 535 Bonaire, Sint Eustatius and Saba Bonaire, Sint Eustatius and Saba
28 BA BIH 070 Bosnia and Herzegovina Republic of Bosnia and Herzegovina
29 BW BWA 072 Botswana Republic of Botswana
... ... ... ... ... ...
219 TZ TZA 834 Tanzania, United Republic of United Republic of Tanzania
220 TH THA 764 Thailand Kingdom of Thailand
221 TL TLS 626 Timor-Leste Democratic Republic of Timor-Leste
222 TG TGO 768 Togo Togolese Republic
223 TK TKL 772 Tokelau Tokelau
224 TO TON 776 Tonga Kingdom of Tonga
225 TT TTO 780 Trinidad and Tobago Republic of Trinidad and Tobago
226 TN TUN 788 Tunisia Republic of Tunisia
227 TR TUR 792 Turkey Republic of Turkey
228 TM TKM 795 Turkmenistan Turkmenistan
229 TC TCA 796 Turks and Caicos Islands Turks and Caicos Islands
230 TV TUV 798 Tuvalu Tuvalu
231 UG UGA 800 Uganda Republic of Uganda
232 UA UKR 804 Ukraine Ukraine
233 AE ARE 784 United Arab Emirates United Arab Emirates
234 GB GBR 826 United Kingdom United Kingdom of Great Britain and Northern I...
235 US USA 840 United States United States of America
236 UM UMI 581 United States Minor Outlying Islands United States Minor Outlying Islands
237 UY URY 858 Uruguay Eastern Republic of Uruguay
238 UZ UZB 860 Uzbekistan Republic of Uzbekistan
239 VU VUT 548 Vanuatu Republic of Vanuatu
240 VE VEN 862 Venezuela, Bolivarian Republic of Bolivarian Republic of Venezuela
241 VN VNM 704 Viet Nam Socialist Republic of Viet Nam
242 VG VGB 092 Virgin Islands, British British Virgin Islands
243 VI VIR 850 Virgin Islands, U.S. Virgin Islands of the United States
244 WF WLF 876 Wallis and Futuna Wallis and Futuna
245 EH ESH 732 Western Sahara Western Sahara
246 YE YEM 887 Yemen Republic of Yemen
247 ZM ZMB 894 Zambia Republic of Zambia
248 ZW ZWE 716 Zimbabwe Republic of Zimbabwe

249 rows × 5 columns


In [ ]: