In [1]:
!pip install pgmpy
In [78]:
from pgmpy.factors import TabularCPD
In [79]:
# Declare a CPD
grade_cpd = TabularCPD(variable="G",
variable_card=3,
values=[[0.3, 0.05, 0.9, 0.5],
[0.4, 0.25, 0.08, 0.3],
[0.3, 0.7, 0.02, 0.2]],
evidence=["I", "D"],
evidence_card=[2, 2])
grade_cpd
Out[79]:
In [80]:
# Declare the sudent model in pgmpy
from pgmpy.models import BayesianModel
from pgmpy.factors import TabularCPD
# Define nodes and edges
student_model = BayesianModel([("D", "G"),
("I", "G"),
("G", "L"),
("I", "S")])
#Define CPDs
grade_cpd = TabularCPD(
variable="G",
variable_card=3,
values=[[0.3, 0.05, 0.9, 0.5],
[0.4, 0.25, 0.08, 0.3],
[0.3, 0.7, 0.02, 0.2]],
evidence=["I", "D"],
evidence_card=[2, 2])
difficulty_cpd = TabularCPD(
variable="D",
variable_card=2,
values=[[0.6, 0.4]])
intel_cpd = TabularCPD(
variable="I",
variable_card=2,
values=[[0.7, 0.3]])
letter_cpd = TabularCPD(
variable="L",
variable_card=2,
values=[[0.1, 0.4, 0.99],
[0.9, 0.6, 0.01]],
evidence=["G"],
evidence_card=[3])
sat_cpd = TabularCPD(
variable="S",
variable_card=2,
values=[[0.95, 0.2],
[0.05, 0.8]],
evidence=["I"],
evidence_card=[2])
#Add CPDs to nodes and edges
student_model.add_cpds(grade_cpd, difficulty_cpd,
intel_cpd, letter_cpd,
sat_cpd)
grade_cpd
Out[80]:
In [81]:
student_model.get_cpds('G')
Out[81]:
In [85]:
student_model.get_parents('G')
Out[85]:
In [56]:
from pgmpy.inference import VariableElimination
student_infer = VariableElimination(student_model)
prob_G = student_infer.query(variables='G')
print(prob_G['G'])
In [57]:
prob_G = student_infer.query(variables='G', evidence={'I': 1, 'D' : 0})
print(prob_G['G'])
In [58]:
prob_G = student_infer.query(variables='G', evidence={'I': 0, 'D' : 1})
print(prob_G['G'])
In [67]:
#Train Model from Data
from pgmpy.models import BayesianModel
import pandas as pd
import numpy as np
# Considering that each variable have only 2 states,
# we can generate some random data.
raw_data = np.random.randint(low=0,high=2,size=(1000, 5))
data = pd.DataFrame(raw_data,columns=["D", "I", "G","L", "S"])
print(data[: int(data.shape[0]*0.75)])
data_train = data[: int(data.shape[0] * 0.75)]
student_model = BayesianModel([("D", "G"),("I", "G"),("I", "S"),("G", "L")])
student_model.fit(data_train)
student_model.get_cpds('D')
Out[67]:
In [60]:
student_model.get_cpds('L')
Out[60]:
In [25]:
student_model.active_trail_nodes('D')
Out[25]:
In [26]:
student_model.local_independencies('G')
Out[26]:
In [27]:
student_model.get_independencies()
Out[27]:
In [77]:
data_test = data[int(0.75 * data.shape[0]) : data.shape[0]]
data_test.drop('G', axis=1, inplace=True)
student_model.predict(data_test)
Out[77]: