This is the program for a student Bayesian network


In [143]:
from pgmpy.models import BayesianModel
student_model = BayesianModel()

Add nodes and edges


In [144]:
student_model.add_nodes_from(['difficulty', 'intelligence', 'grade', 'sat', 'letter'])

In [145]:
student_model.nodes()


Out[145]:
['grade', 'letter', 'sat', 'intelligence', 'difficulty']

In [146]:
student_model.add_edges_from([('difficulty', 'grade'), ('intelligence', 'grade'), ('intelligence', 'sat'), ('grade', 'letter')])

In [147]:
student_model.edges()


Out[147]:
[('grade', 'letter'),
 ('intelligence', 'grade'),
 ('intelligence', 'sat'),
 ('difficulty', 'grade')]

In a Bayesian network, each node has an associated CPD (conditional probability distribution).


In [148]:
from pgmpy.factors import TabularCPD

In [149]:
#TabularCPD?
cpd_difficulty = TabularCPD('difficulty', 2, [[0.6], [0.4]])
cpd_intelligence = TabularCPD('intelligence', 2, [[0.7], [0.3]])
cpd_sat = TabularCPD('sat', 2, [[0.95, 0.2], 
                                [0.05, 0.8]], evidence=['intelligence'], evidence_card=[2])
cpd_grade = TabularCPD('grade', 3, [[0.3, 0.05, 0.9, 0.5],
                                    [0.4, 0.25, 0.08, 0.3],
                                    [0.3, 0.7, 0.02, 0.2]],
                       evidence=['intelligence', 'difficulty'], evidence_card=[2, 2])
cpd_letter = TabularCPD('letter', 2, [[0.1, 0.4, 0.99], [0.9, 0.6, 0.01]], evidence=['grade'], evidence_card=[3])

In [150]:
student_model.add_cpds(cpd_difficulty, cpd_intelligence, cpd_sat, cpd_grade, cpd_letter)

In [151]:
student_model.get_cpds()
print(cpd_difficulty) # 0:easy, 1:hard
print(cpd_intelligence) # 0:low, 1:high
print(cpd_grade) # 0:A, 1:B, 2:C
print(cpd_sat) # 0:low, 1:high
print(cpd_letter) # 0:week, 1:strong


╒══════════════╤═════╕
│ difficulty_0 │ 0.6 │
├──────────────┼─────┤
│ difficulty_1 │ 0.4 │
╘══════════════╧═════╛
╒════════════════╤═════╕
│ intelligence_0 │ 0.7 │
├────────────────┼─────┤
│ intelligence_1 │ 0.3 │
╘════════════════╧═════╛
╒══════════════╤════════════════╤════════════════╤════════════════╤════════════════╕
│ intelligence │ intelligence_0 │ intelligence_0 │ intelligence_1 │ intelligence_1 │
├──────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│ difficulty   │ difficulty_0   │ difficulty_1   │ difficulty_0   │ difficulty_1   │
├──────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│ grade_0      │ 0.3            │ 0.05           │ 0.9            │ 0.5            │
├──────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│ grade_1      │ 0.4            │ 0.25           │ 0.08           │ 0.3            │
├──────────────┼────────────────┼────────────────┼────────────────┼────────────────┤
│ grade_2      │ 0.3            │ 0.7            │ 0.02           │ 0.2            │
╘══════════════╧════════════════╧════════════════╧════════════════╧════════════════╛
╒══════════════╤════════════════╤════════════════╕
│ intelligence │ intelligence_0 │ intelligence_1 │
├──────────────┼────────────────┼────────────────┤
│ sat_0        │ 0.95           │ 0.2            │
├──────────────┼────────────────┼────────────────┤
│ sat_1        │ 0.05           │ 0.8            │
╘══════════════╧════════════════╧════════════════╛
╒══════════╤═════════╤═════════╤═════════╕
│ grade    │ grade_0 │ grade_1 │ grade_2 │
├──────────┼─────────┼─────────┼─────────┤
│ letter_0 │ 0.1     │ 0.4     │ 0.99    │
├──────────┼─────────┼─────────┼─────────┤
│ letter_1 │ 0.9     │ 0.6     │ 0.01    │
╘══════════╧═════════╧═════════╧═════════╛

To check the consistency of the model and associated CPDs


In [152]:
student_model.check_model()
student_model.get_independencies()


Out[152]:
(grade _|_ difficulty, intelligence, sat | letter)
(grade _|_ letter, intelligence, difficulty | sat)
(grade _|_ letter, difficulty | intelligence)
(grade _|_ letter, intelligence, sat | difficulty)
(letter _|_ grade, intelligence, difficulty | sat)
(letter _|_ grade, difficulty | intelligence)
(letter _|_ grade, intelligence, sat | difficulty)
(sat _|_ intelligence, difficulty | grade)
(sat _|_ grade, intelligence, difficulty | letter)
(sat _|_ grade, letter, intelligence | difficulty)
(intelligence _|_ sat, difficulty | grade)
(intelligence _|_ sat, grade, difficulty | letter)
(intelligence _|_ grade, letter | sat)
(intelligence _|_ grade, letter, sat | difficulty)
(difficulty _|_ intelligence, sat | grade)
(difficulty _|_ grade, intelligence, sat | letter)
(difficulty _|_ grade, letter | sat)
(difficulty _|_ grade, letter | intelligence)

if an influence can flow in a trail in a network, it is known as an active trail


In [153]:
student_model.is_active_trail('difficulty', 'intelligence')


Out[153]:
False

In [154]:
student_model.is_active_trail('difficulty', 'intelligence',
observed='grade')


Out[154]:
True

You can query the network as follows: query(variables, evidence=None, elimination_order=None)

  • variables: list : list of variables for which you want to compute the probability
  • evidence: dict : a dict key, value pair as {var: state_of_var_observed} None if no evidence
  • elimination_order: list : order of variable eliminations (if nothing is provided) order is computed automatically

In [155]:
from pgmpy.inference import VariableElimination
student_infer = VariableElimination(student_model)
# marginal prob of grade
probs = student_infer.query(['grade', 'letter'])
print(probs['grade'])
print(probs['letter'])


╒═════════╤══════════════╕
│ grade   │   phi(grade) │
╞═════════╪══════════════╡
│ grade_0 │       0.4470 │
├─────────┼──────────────┤
│ grade_1 │       0.2714 │
├─────────┼──────────────┤
│ grade_2 │       0.2816 │
╘═════════╧══════════════╛
╒══════════╤═══════════════╕
│ letter   │   phi(letter) │
╞══════════╪═══════════════╡
│ letter_0 │        0.4320 │
├──────────┼───────────────┤
│ letter_1 │        0.5680 │
╘══════════╧═══════════════╛

Direct Causal Influence


In [156]:
# probs of grades given knowing nothing about course difficulty and intelligence
print(probs['grade'])


╒═════════╤══════════════╕
│ grade   │   phi(grade) │
╞═════════╪══════════════╡
│ grade_0 │       0.4470 │
├─────────┼──────────────┤
│ grade_1 │       0.2714 │
├─────────┼──────────────┤
│ grade_2 │       0.2816 │
╘═════════╧══════════════╛

In [157]:
# probs of grades knowing course is hard
prob_grade_hard = student_infer.query(['grade'], {'difficulty':1})
print(prob_grade_hard['grade'])


╒═════════╤══════════════╕
│ grade   │   phi(grade) │
╞═════════╪══════════════╡
│ grade_0 │       0.7800 │
├─────────┼──────────────┤
│ grade_1 │       0.1460 │
├─────────┼──────────────┤
│ grade_2 │       0.0740 │
╘═════════╧══════════════╛

In [158]:
# probs of getting an A knowing course is easy, and intelligence is low
prob_grade_easy_smart = student_infer.query(['grade'], {'difficulty':0, 'intelligence':1})
print(prob_grade_easy_smart['grade'])


╒═════════╤══════════════╕
│ grade   │   phi(grade) │
╞═════════╪══════════════╡
│ grade_0 │       0.0500 │
├─────────┼──────────────┤
│ grade_1 │       0.2500 │
├─────────┼──────────────┤
│ grade_2 │       0.7000 │
╘═════════╧══════════════╛

Indirect Causal Influence


In [159]:
# probs of letter knowing nothing
print(probs['letter'])


╒══════════╤═══════════════╕
│ letter   │   phi(letter) │
╞══════════╪═══════════════╡
│ letter_0 │        0.4320 │
├──────────┼───────────────┤
│ letter_1 │        0.5680 │
╘══════════╧═══════════════╛

In [160]:
# probs of letter knowing course is difficult
prob_letter_hard = student_infer.query(['letter'], {'difficulty':1})
print(prob_letter_hard['letter'])


╒══════════╤═══════════════╕
│ letter   │   phi(letter) │
╞══════════╪═══════════════╡
│ letter_0 │        0.2097 │
├──────────┼───────────────┤
│ letter_1 │        0.7903 │
╘══════════╧═══════════════╛

Direct Evidential Influence


In [ ]: