In [0]:
#Description: This is a 'self learning' chatbot program
In [0]:
#Install the package NLTK
pip install nltk
In [0]:
#Install the package newspaper3k
pip install newspaper3k
In [0]:
#Import Libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings
In [0]:
#Ingore any warning messages
warnings.filterwarnings('ignore')
In [0]:
#Download the packages from NLTK
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)
In [0]:
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/coronavirus/symptoms-causes/syc-20479963')
article.download()
article.parse()
article.nlp()
corpus = article.text
#Print the corpus/text
print(corpus)
In [0]:
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text) #Convert the text into a list of sentences
#Print the list of sentences
print(sent_tokens)
In [0]:
#Create a dictornary (key:value) pair to remove punctuations
remove_punct_dict = dict( (ord(punct),None) for punct in string.punctuation)
#Print the punctuations
print(string.punctuation)
#Print the dictionary
print(remove_punct_dict)
In [0]:
#Create a function to return a list of lemmatized lower case words after removing punctuations
def LemNormalize(text):
return nltk.word_tokenize(text.lower().translate(remove_punct_dict))
#Print the tokenization text
print(LemNormalize(text))
In [0]:
#Keyword Matching
#Greeting Inputs
GREETING_INPUTS = ["hi","hello","hola","greetings","wassup","hey"]
#Greeting Responses back to user
GREETING_RESPONSES = ["howdy","hi","hello","hey","whatsup","hey there"]
#Function to return a random greeting response to a user greeting
def greeting(sentence):
#if the user input is a greeting, then return a randomly chosen greeting response
for word in sentence.split():
if word.lower() in GREETING_INPUTS:
return random.choice(GREETING_RESPONSES)
In [0]:
#Generate the response
def response(user_response):
#The users response / query
#user_response = 'What is Coronavirus'
user_response = user_response.lower() #Make the response lower case
###Print the user query / response
#print(user_response)
#Set the chatbot response to an empty string
robo_response = ''
#Append the users response to the sentence list
sent_tokens.append(user_response)
###Print the sentence list after appending the users response
#print(sent_tokens)
#Create a TfidfVectorizer Object
TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')
#Convert the text to a matrix of TF-IDF features
tfidf = TfidfVec.fit_transform(sent_tokens)
###Print the TF-IDF Features
#print(tfidf)
#Get the measure of similarity (similarity scores)
vals = cosine_similarity(tfidf[-1], tfidf)
###Print the similarity scores
#print(vals)
#Get the index of the most similar text/sentense to the user response
idx = vals.argsort()[0][-2]
#Reduce the dimensionality of vals
flat = vals.flatten()
#sort the list in ascending order
flat.sort()
#Get the most similar score to the users response
score = flat[-2]
###Print the similarity score
#print(score)
#If the variable 'score' is 0 then there is no text to users response
if(score == 0):
robo_response = robo_response+"I apologise, I don't understand, kindly rephrase your questions."
else:
robo_response = robo_response+sent_tokens[idx]
#Print the chatbot response
#print(robo_response)
#Remove the users response from the senstence token list
sent_tokens.remove(user_response)
return robo_response
In [0]:
flag = True
print("Stella: I am an ALIFE Air Health Bot. I will help you understand all you need about COVID 19. You may exit anytime, just type Bye!")
while(flag == True):
user_response = input()
user_response = user_response.lower()
if(user_response != 'bye'):
if(user_response == 'thanks' or user_response =='thank you'):
flag=False
print("Stella: You are welcome !")
else:
if(greeting(user_response) != None):
print("Stella: "+greeting(user_response))
else:
print("Stella: "+response(user_response))
else:
flag = False
print("Stella: See you later !")