In [0]:
#Description: This is a 'self learning' chatbot program

In [0]:
#Install the package NLTK
pip install nltk

In [0]:
#Install the package newspaper3k
pip install newspaper3k

In [0]:
#Import Libraries
from newspaper import Article
import random
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
import numpy as np
import warnings

In [0]:
#Ingore any warning messages
warnings.filterwarnings('ignore')

In [0]:
#Download the packages from NLTK
nltk.download('punkt', quiet=True)
nltk.download('wordnet', quiet=True)

In [0]:
#Get the article URL
article = Article('https://www.mayoclinic.org/diseases-conditions/coronavirus/symptoms-causes/syc-20479963')
article.download()
article.parse()
article.nlp()
corpus = article.text

#Print the corpus/text
print(corpus)

In [0]:
#Tokenization
text = corpus
sent_tokens = nltk.sent_tokenize(text) #Convert the text into a list of sentences

#Print the list of sentences
print(sent_tokens)

In [0]:
#Create a dictornary (key:value) pair to remove punctuations
remove_punct_dict = dict(  (ord(punct),None) for punct in string.punctuation)

#Print the punctuations
print(string.punctuation)

#Print the dictionary
print(remove_punct_dict)

In [0]:
#Create a function to return a list of lemmatized lower case words after removing punctuations
def LemNormalize(text):
  return nltk.word_tokenize(text.lower().translate(remove_punct_dict))

#Print the tokenization text
print(LemNormalize(text))

In [0]:
#Keyword Matching

#Greeting Inputs
GREETING_INPUTS = ["hi","hello","hola","greetings","wassup","hey"]

#Greeting Responses back to user
GREETING_RESPONSES = ["howdy","hi","hello","hey","whatsup","hey there"]

#Function to return a random greeting response to a user greeting
def greeting(sentence):
  #if the user input is a greeting, then return a randomly chosen greeting response
  for word in sentence.split():
    if word.lower() in GREETING_INPUTS:
      return random.choice(GREETING_RESPONSES)

In [0]:
#Generate the response
def response(user_response):
  #The users response / query
  #user_response = 'What is Coronavirus'

  user_response = user_response.lower() #Make the response lower case

  ###Print the user query / response
  #print(user_response)

  #Set the chatbot response to an empty string
  robo_response = ''

  #Append the users response to the sentence list
  sent_tokens.append(user_response)

  ###Print the sentence list after appending the users response
  #print(sent_tokens)

  #Create a TfidfVectorizer Object
  TfidfVec = TfidfVectorizer(tokenizer = LemNormalize, stop_words='english')

  #Convert the text to a matrix of TF-IDF features
  tfidf = TfidfVec.fit_transform(sent_tokens)

  ###Print the TF-IDF Features
  #print(tfidf)

  #Get the measure of similarity (similarity scores)
  vals = cosine_similarity(tfidf[-1], tfidf)

  ###Print the similarity scores
  #print(vals)

  #Get the index of the most similar text/sentense to the user response
  idx = vals.argsort()[0][-2]

  #Reduce the dimensionality of vals
  flat = vals.flatten()

  #sort the list in ascending order
  flat.sort()

  #Get the most similar score to the users response
  score = flat[-2]

  ###Print the similarity score
  #print(score)

  #If the variable 'score' is 0 then there is no text to users response
  if(score == 0):
    robo_response = robo_response+"I apologise, I don't understand, kindly rephrase your questions."
  else:
    robo_response = robo_response+sent_tokens[idx]

  #Print the chatbot response
  #print(robo_response)
  
  #Remove the users response from the senstence token list
  sent_tokens.remove(user_response)

  return robo_response

In [0]:
flag = True
print("Stella: I am an ALIFE Air Health Bot. I will help you understand all you need about COVID 19. You may exit anytime, just type Bye!")
while(flag == True):
  user_response = input()
  user_response = user_response.lower()
  if(user_response != 'bye'):
    if(user_response == 'thanks' or user_response =='thank you'):
      flag=False
      print("Stella: You are welcome !")
    else:
      if(greeting(user_response) != None):
        print("Stella: "+greeting(user_response))
      else:
        print("Stella: "+response(user_response))
  else:
    flag = False
    print("Stella: See you later !")