# Understanding ICSD data

In [9]:

from __future__ import division, print_function

# import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pymatgen.core import Element, Composition

%matplotlib inline

In [4]:

import csv

with open("../../ICSD/icsd-ternaries.csv", "r") as f:
data = [line for line in csv_reader]

formulas = [line[2] for line in data]
compositions = [Composition(f) for f in formulas]

## Structure Types

Structure types are assigned by hand by ICSD curators.

In [5]:

# How many ternaries have been assigned a structure type?
structure_types = [line[3] for line in data if line[3] is not '']
unique_structure_types = set(structure_types)
print("There are {} ICSD ternaries entries.".format(len(data)))
print("Structure types are assigned for {} entries.".format(len(structure_types)))
print("There are {} unique structure types.".format(len(unique_structure_types)))

There are 68064 ICSD ternaries entries.
Structure types are assigned for 59117 entries.
There are 4201 unique structure types.

Filter for stoichiometric compounds only:

In [6]:

def is_stoichiometric(composition):
return np.all(np.mod(composition.values(), 1) == 0)

In [7]:

stoichiometric_compositions = [c for c in compositions if is_stoichiometric(c)]
print("Number of stoichiometric compositions: {}".format(len(stoichiometric_compositions)))
ternaries = set(c.formula for c in stoichiometric_compositions)
print("Number of unique stoichiometric compositions: {}".format(len(ternaries)))

Number of stoichiometric compositions: 49893
Number of unique stoichiometric compositions: 26255

In [10]:

data_stoichiometric = [x for x in data if is_stoichiometric(Composition(x[2]))]

In [11]:

from collections import Counter

struct_type_freq = Counter(x[3] for x in data_stoichiometric if x[3] is not '')

In [15]:

plt.loglog(range(1, len(struct_type_freq)+1),
sorted(struct_type_freq.values(), reverse = True), 'o')
plt.xlabel("Structure Type")
plt.ylabel("Structure Type Frequency")
plt.title("Distribution of Frequencies of Structure Types")

In [55]:

sorted(struct_type_freq.items(), key = lambda x: x[1], reverse = True)

In [16]:

uniq_phases = set()
for row in data_stoichiometric:
spacegroup, formula, struct_type = row[1:4]
phase = (spacegroup, Composition(formula).formula, struct_type)

In [18]:

uniq_struct_type_freq = Counter(x[2] for x in uniq_phases if x[2] is not '')
uniq_struct_type_freq_sorted = sorted(uniq_struct_type_freq.items(), key = lambda x: x[1], reverse = True)

In [20]:

plt.loglog(range(1, len(uniq_struct_type_freq_sorted)+1),
[x[1] for x in uniq_struct_type_freq_sorted], 'o')
plt.xlabel("Structure Type")
plt.ylabel("Structure Type Frequency")
plt.title("Distribution of Frequencies of Structure Types")

In [21]:

uniq_struct_type_freq_sorted

In [22]:

for struct_type,freq in uniq_struct_type_freq_sorted[:10]:
print("{} : {}".format(struct_type, freq))
fffs = [p[1] for p in uniq_phases if p[2] == struct_type]
fmt = "  ".join(["{:14}"]*5)
print(fmt.format(*fffs[0:5]))
print(fmt.format(*fffs[5:10]))
print(fmt.format(*fffs[10:15]))
print(fmt.format(*fffs[15:20]))

ThCr2Si2 : 676
Sm1 Ni2 Sb2     Yb1 Si2 Ag2     Pr1 Fe2 Si2     Np1 Si2 Au2     Tb1 Co2 B2
Ce1 Al3 Cu1     Nd1 P2 Pd2      Yb1 Co2 Ge2     Np1 Ni2 Ge2     U1 Si2 Ir2
Gd1 Ni2 Sb2     Ho1 Si2 Pt2     Sm1 Fe2 Si2     Dy1 Mn2 Ge2     Er1 Mn2 Si2
Pr1 Co2 B2      La1 Mn2 Ge2     Ba1 Ag2 Ge2     Ba1 Al2 Si2     Sm1 Fe2 Ge2
TiNiSi-MgSrSi : 586
Ti1 As1 Rh1     Gd1 Zn1 Rh1     Pr1 Ge1 Rh1     Hf1 Si1 Rh1     Er1 Ga1 Pd1
Gd1 Ge1 Rh1     Nb1 Fe1 Si1     Yb1 Ni1 Sn1     Sr1 Li1 Bi1     Yb1 Mg1 Pd1
Ti1 Co1 P1      U1 Co1 Ge1      Gd1 Ge1 Ir1     Tm1 Ga1 Pt1     Tb1 Ni1 Ge1
Y1 Si1 Pt1      Ti1 Si1 Ru1     Ho1 Ge1 Ru1     Sm1 Ge1 Pt1     Co1 Mo1 P1
ZrNiAl-Fe2P : 511
Gd1 Zn1 Pd1     Dy1 Mg1 Pd1     Cr1 Ni1 P1      Ho6 Mn1 Te2     Gd1 Pd1 Pb1
U1 Al1 Rh1      Sm1 Cd1 Cu1     Nb1 Fe1 B1      Ce1 Cd1 Pt1     Pu1 Al1 Ni1
Dy1 Sn1 Pt1     Ho1 Al1 Cu1     Na1 Sr1 P1      La1 Mg1 Tl1     Th1 Al1 Pd1
Ho1 Mg1 In1     Gd1 In1 Rh1     Yb1 Cd1 Ge1     Zr1 Ga1 Pt1     Tm1 Ag1 Sn1
Perovskite-CaTiO3 : 439
Eu1 D3 Pd1      Th1 Ta1 N3      Cs1 I1 O3       Rb1 U1 O3       La1 B1 Rh3
Ca1 Zr1 O3      Ba1 Nb1 O3      Y3 Tl1 C1       Ti3 In1 N1      Ho3 Tl1 C1
Gd3 In1 N1      K3 Cl1 O1       Nd3 Tl1 C1      Ca3 Au1 N1      Sc3 In1 N1
Ho1 B1 Rh3      Ce3 Tl1 C1      Ta1 Sn1 O3      Ti3 In1 C1      Ca3 Sn1 O1
Heusler-AlCu2Mn : 343
Sc1 Al1 Au2     Zr1 In1 Pt2     Li2 Mg1 Sb1     Ce4 Mg9 Fe3     Ce1 In1 Cu2
Sc1 In1 Pt2     Li2 Mg1 In1     Hf1 Co2 Sn1     Dy1 In1 Au2     Mn1 Ga1 Fe2
Li2 Sn1 Pt1     Ho1 In1 Au2     Li2 Cd1 Sb1     V1 Co2 Si1      As1 Pd6 Pb1
Y1 Sb1 Pd2      Mn1 Ga1 Ir2     Cu1 Sn1 Rh2     Li2 Cd1 Ge1     Sc2 Mn1 Si1
Mn12Th : 339
Ce1 In7 Cu5     Gd1 V2 Fe10     Tm1 Al6 Cu6     Er1 Mn4 Al8     Gd1 Mn6 Al6
Ho1 Al8 Cr4     Gd1 Fe10 Si2    Ho1 V2 Fe10     Tb1 Fe11 Mo1    Yb1 Mn6 Al6
Ce3 Fe20 Mo3    Y1 Mn6 Co6      Sm1 Al8 Fe4     Ce1 Al8 Fe4     Sm1 Ti1 Fe11
La1 Al8 Cr4     Tb1 Si2 Ni10    Yb1 Al6 Cu6     Lu1 Al8 Cu4     Sm1 V2 Fe10
PbClF/Cu2Sb : 334
Tb1 S1 F1       Na1 Zn1 P1      Ba1 Mn1 Ge1     La1 Co1 Ge1     U1 Ge1 S1
K1 Li1 S1       Tb1 Co1 Si1     Mg1 Co1 Ge1     Mn1 Cr1 As1     Dy1 Fe1 Si1
Ba1 H1 Cl1      Bi1 I1 O1       U1 Te1 As1      Pr1 Sb1 Te1     Rb1 Ca1 As1
Cs1 Mn1 Sb1     Ca1 Mn1 Ge1     Na1 Zn1 P1      Sr1 Cl1 F1      Ac1 Br1 O1
Perovskite-GdFeO3 : 314
La1 Ga1 O3      Cd1 Tc1 O3      Tb1 Cr1 O3      Lu1 V1 O3       Ba1 Pu1 O3
K1 Ca1 F3       Ho1 Ni1 O3      Na1 Fe1 F3      Y1 Mn1 O3       La1 Sc1 O3
La1 Tm1 O3      Y1 Fe1 O3       Ba1 Ce1 O3      Nd1 Rh1 O3      Tb1 Rh1 O3
Sr1 Ru1 O3      Dy1 Co1 O3      Sm1 Co1 O3      Lu1 Rh1 O3      Ca1 Nb1 O3
Spinel-Al2MgO4 : 305
Tm2 Cd1 Se4     Li1 V2 O4       Mn1 In2 S4      Mn3 Fe3 O8      Cr1 Fe2 O4
Zn1 In2 O4      In2 Fe1 S4      Cd1 In2 O4      Sc2 Mn1 Se4     Zn1 Cr2 Se4
Yb2 Cd1 S4      Fe2 Cu1 O4      Mg1 Rh2 O4      Sc2 Cd1 S4      Li2 Cd1 Cl4
Mg1 Fe2 O4      Ho2 Cd1 S4      V1 Zn2 O4       Lu2 Mn1 Se4     Zn1 Ga2 O4
Delafossite-NaCrS2 : 278
Na1 Y1 S2       Cs1 Lu1 S2      Cu1 Rh1 O2      Na1 Dy1 S2      K1 Cr1 O2
Eu1 Cu1 O2      Tl1 Bi1 Se2     Cs1 Nd1 S2      Eu2 P1 I1       Sr2 Br1 N1
Mg2 N1 Cl1      Rb1 Tl1 O2      K1 Er1 S2       Sm1 Tl1 Te2     Rb1 Sm1 O2
Na1 Sm1 Se2     Rb1 Gd1 S2      Li2 Mo1 O3      Y1 Tl1 Te2      Li1 Cr1 O2

## Long Formulas

In [6]:

# What are the longest formulas?
for formula in sorted(formulas, key = lambda x: len(x), reverse = True)[:20]:
print(formula)

((Si (C H3)2)2 (Si (Si (C H3)3)2) (Si (C H3) (Si (C H3)3)) (Si (C H3) (Si (C H3)2 (Si (Si (C H3)3)3))))
((Si (C H3)2)2 (Si (Si (C H3)3)2) (Si (C H3) (Si (C H3)3)) (Si (C H3) (Si (Si (C H3)3)3)))
(((C H3) ((C H3)3 Si)2 Si (C H3)2 Si)2 Si (C H3) Si (C H3)2 Si (Si (C H3)3)2 Si (C H3)2)2
((((C H3)3 Si)3 Si (C H3)2 Si)2 Si (C H3) Si (C H3)2 Si (Si (C H3)3)2 Si (C H3)2)2
((Si (C H3)2)2 (Si (Si (C H3)3)2)2 (Si (C H3) (Si (C H3)2 (Si (Si (C H3)3)3))))
((Si (C H3)2)3 (Si (Si (C H3)3)2) (Si (C H3) (Si (C H3)2 (Si (Si (C H3)3)3))))
((Si (C H3)2)3 (Si (Si (C H3)3)2) (Si (C H3) (Si (Si (C H3)3)3)))
((Si (C H3)2)2 (Si (Si (C H3)3)2)2 (Si (C H3) (Si (C H3)3)))
((Si (C H3)2)4 (Si (C H3) (Si (C H3)3)) (Si (Si (C H3)3)2))
Ba8 (Si2.63 Al3.28) (Si11.3 Al4.32) (Si16.9 Al6.48)
(Mo138 O410 (O H)20 (H2 O)46) (H3 O)40 (H2 O)108
(Mo138 O410 (O H)20 (H2 O)38) (H3 O)40 (H2 O)78
(Mo138 O406 (O H)16 (H2 O)46) (H3 O)28 (H2 O)88
(Mo142 O400 (O H)52 (H2 O)38) (H3 O)28 (H2 O)34
(Mo148 O436 (O H)15 (H2 O)58) (H3 O)27 (H2 O)86
(Mo150 O451 (O H)5 (H2 O)61) (H3 O)35 (H2 O)139
Al1.5 (Al0.805 Nb0.195) (Al1.5 Ir1.5) Ir0.5 Nb3
(Li0.508 Fe0.192) (Li1.192 Fe0.808) (Fe2 O4)
(Si (C H3)3)4 ((C H3)11 Si6)2 (Si4 (C H3)4)
((Si (C H3)3)3 (C H3)8 Si6)2  (Si2 (C H3)4)

Two key insights:

1. Just because there are three elements in the formula doesn't mean the compound is fundamentally a ternary. There are doped binaries which masquerade as ternaries. And there are doped ternaries which masquerade as quaternaries, or even quintenaries. Because I only asked for compositions with 3 elements, this data is missing.
2. ICSD has strategically placed parentheses in the formulas which give hints as to logical groupings. For example: (Ho1.3 Ti0.7) ((Ti0.64 Ho1.36) O6.67) is in fact in the pyrochlore family, A2B2O7.

## Intermetallics

How many intermetallics does the ICSD database contain?

In [8]:

def filter_in_set(compound, universe):
return all((e in universe) for e in Composition(compound))

transition_metals = [e for e in Element if e.is_transition_metal]
tm_ternaries = [c for c in formulas if filter_in_set(c, transition_metals)]
print("Number of intermetallics:", len(tm_ternaries))

Number of intermetallics: 1021

``````
unique_tm_ternaries = set([Composition(c).formula for c in tm_ternaries])
print("Number of unique intermetallics:", len(unique_tm_ternaries))

``````
``````
unique_tm_ternaries

