We have opted for the Literate Programming approach to present our project this semester, which is based on Sentiment Analysis using the Python NLTK library

This jupyter notebook showcases the various Techniques and Libraries we have relied upon for the Project.

The index of what we are to cover in this notebook are as follows

An Overview of the Project
- The Timeline of Bans
- Nature of Bans
- India Selected
- States Selected
Application of NLTK on Tweets
- Fetching live Tweets
- Application of NLTK
Using Pandas on various datasets for Statistical Operations
- Senate Related Data
- Census Data
Using various Python Visualization libraries to improve our understanding of the Data
- Bokeh
- Matplotlib
- Seaborn
Conclusions :- A Tale of Two Cities

Overview

India

States

The Timeline of Bans

Nature of Bans

NLTK and Twitter

The Live Twitter Feed ( Modify This!)

NOTE Must hit the < Interrupt Kernel > button, for there intrinsic filter lacks a time boundation



In [71]:

    
from tweepy import Stream 
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener

ckey='pD6v5kfp67FXY2NASfowDazRJ'
csecret='7iZO2rFiMDUwOqOfQVJF4RfVxbQtoapQ1eTMUeFs1e8szTaDVy'

# The access tokens can be found on your applications's Details
# page located at https://dev.twitter.com/apps (located
# under "Your access token")
atoken='3414048620-HZauU3mJwiI7TBrUluFGCPKn4k4UdZviNty6KcD'
asecret='OlZbg7WTAThOnnMHpWKfK82XOKbZ2Sx0Kh082r7uGb3PX'

# Tweepy

class listener(StreamListener):
    def on_data (self, data):
        print(data)
        return True
        
    def on_error(self, status):
        print(status)
        
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)


import time 

start_time = time.time()
ls = []
while True:
    twitterStream = Stream(auth, listener())
    ls.append(twitterStream.filter(track = ["car"]))
    
    if(( time.time() - start_time ) < 0.009):
           break









    



{"created_at":"Mon Nov 30 11:08:38 +0000 2015","id":671284696702038018,"id_str":"671284696702038018","text":"kids in the car w their lollipop and pillow \ud83d\udcad \ud83d\ude97","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":123140674,"id_str":"123140674","name":"panda mayan","screen_name":"MarianMayan","location":"cali ","url":null,"description":"a feeling where interlude between the lighting strike and a thunder \u2022 \u007bJ\u007d","protected":false,"verified":false,"followers_count":4374,"friends_count":686,"listed_count":6,"favourites_count":97408,"statuses_count":116950,"created_at":"Mon Mar 15 03:59:56 +0000 2010","utc_offset":-28800,"time_zone":"Pacific Time (US & Canada)","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/pbs.twimg.com\/profile_background_images\/447991170384134144\/c-f8Uvyc.jpeg","profile_background_image_url_https":"https:\/\/pbs.twimg.com\/profile_background_images\/447991170384134144\/c-f8Uvyc.jpeg","profile_background_tile":true,"profile_link_color":"4FB5B0","profile_sidebar_border_color":"FFFFFF","profile_sidebar_fill_color":"B6B8B8","profile_text_color":"1495D1","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/671063796824215553\/zedBP5GS_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/671063796824215553\/zedBP5GS_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/123140674\/1445347969","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1448881718217"}

{"created_at":"Mon Nov 30 11:08:38 +0000 2015","id":671284696526012416,"id_str":"671284696526012416","text":"My car is annoying man","source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":349242935,"id_str":"349242935","name":".           Vittorio","screen_name":"VeeSammarco","location":null,"url":null,"description":"\u26bd\ufe0f=\u2764\ufe0f IG: veesammarco Snapchat: veesammarco Fino alla fine. Kawasaki ninja","protected":false,"verified":false,"followers_count":587,"friends_count":818,"listed_count":4,"favourites_count":9773,"statuses_count":27399,"created_at":"Fri Aug 05 19:59:47 +0000 2011","utc_offset":-18000,"time_zone":"Quito","geo_enabled":true,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/653993701014925312\/hGr3u1Mc_normal.jpg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/653993701014925312\/hGr3u1Mc_normal.jpg","profile_banner_url":"https:\/\/pbs.twimg.com\/profile_banners\/349242935\/1412048309","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1448881718175"}

{"created_at":"Mon Nov 30 11:08:38 +0000 2015","id":671284699340210176,"id_str":"671284699340210176","text":"Kick Mat For Car Auto Back Seat Cover Kid Care Organizer Protector Cleaning 2pk https:\/\/t.co\/Zsr18EwYrT https:\/\/t.co\/U73Za3kXeI","source":"\u003ca href=\"http:\/\/www.facebook.com\/twitter\" rel=\"nofollow\"\u003eFacebook\u003c\/a\u003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":2720678359,"id_str":"2720678359","name":"CARS","screen_name":"cars740","location":"USA","url":null,"description":null,"protected":false,"verified":false,"followers_count":3,"friends_count":1,"listed_count":0,"favourites_count":2,"statuses_count":140809,"created_at":"Sun Aug 10 02:30:58 +0000 2014","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"C0DEED","profile_background_image_url":"http:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_image_url_https":"https:\/\/abs.twimg.com\/images\/themes\/theme1\/bg.png","profile_background_tile":false,"profile_link_color":"0084B4","profile_sidebar_border_color":"C0DEED","profile_sidebar_fill_color":"DDEEF6","profile_text_color":"333333","profile_use_background_image":true,"profile_image_url":"http:\/\/pbs.twimg.com\/profile_images\/498296887892398080\/6xnMuM39_normal.jpeg","profile_image_url_https":"https:\/\/pbs.twimg.com\/profile_images\/498296887892398080\/6xnMuM39_normal.jpeg","default_profile":true,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null},"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"retweet_count":0,"favorite_count":0,"entities":{"hashtags":[],"urls":[{"url":"https:\/\/t.co\/Zsr18EwYrT","expanded_url":"http:\/\/ift.tt\/1Slm6Xp","display_url":"ift.tt\/1Slm6Xp","indices":[80,103]},{"url":"https:\/\/t.co\/U73Za3kXeI","expanded_url":"http:\/\/fb.me\/4lOZdjHa2","display_url":"fb.me\/4lOZdjHa2","indices":[104,127]}],"user_mentions":[],"symbols":[]},"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","timestamp_ms":"1448881718846"}







    



---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-71-89e8d308bcec> in <module>()
     32 while True:
     33     twitterStream = Stream(auth, listener())
---> 34     ls.append(twitterStream.filter(track = ["car"]))
     35 
     36     if(( time.time() - start_time ) < 0.009):

C:\Anaconda3\lib\site-packages\tweepy-3.4.0-py3.4.egg\tweepy\streaming.py in filter(self, follow, track, async, locations, stall_warnings, languages, encoding, filter_level)
    428         self.session.params = {'delimited': 'length'}
    429         self.host = 'stream.twitter.com'
--> 430         self._start(async)
    431 
    432     def sitestream(self, follow, stall_warnings=False,

C:\Anaconda3\lib\site-packages\tweepy-3.4.0-py3.4.egg\tweepy\streaming.py in _start(self, async)
    344             self._thread.start()
    345         else:
--> 346             self._run()
    347 
    348     def on_closed(self, resp):

C:\Anaconda3\lib\site-packages\tweepy-3.4.0-py3.4.egg\tweepy\streaming.py in _run(self)
    253                     self.snooze_time = self.snooze_time_step
    254                     self.listener.on_connect()
--> 255                     self._read_loop(resp)
    256             except (Timeout, ssl.SSLError) as exc:
    257                 # This is still necessary, as a SSLError can actually be

C:\Anaconda3\lib\site-packages\tweepy-3.4.0-py3.4.egg\tweepy\streaming.py in _read_loop(self, resp)
    305                     raise TweepError('Expecting length, unexpected value found')
    306 
--> 307             next_status_obj = buf.read_len(length)
    308             if self.running:
    309                 self._data(next_status_obj)

C:\Anaconda3\lib\site-packages\tweepy-3.4.0-py3.4.egg\tweepy\streaming.py in read_len(self, length)
    159                 return self._pop(length)
    160             read_len = max(self._chunk_size, length - len(self._buffer))
--> 161             self._buffer += self._stream.read(read_len).decode('UTF-8', 'ignore')
    162 
    163     def read_line(self, sep='\n'):

C:\Anaconda3\lib\site-packages\requests\packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
    280             else:
    281                 cache_content = False
--> 282                 data = self._fp.read(amt)
    283                 if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.
    284                     # Close the connection when no data is returned

C:\Anaconda3\lib\http\client.py in read(self, amt)
    498             # Amount is given, so call base class version
    499             # (which is implemented in terms of self.readinto)
--> 500             return super(HTTPResponse, self).read(amt)
    501         else:
    502             # Amount is not given (unbounded read) so we must check self.length

C:\Anaconda3\lib\http\client.py in readinto(self, b)
    527 
    528         if self.chunked:
--> 529             return self._readinto_chunked(b)
    530 
    531         if self.length is not None:

C:\Anaconda3\lib\http\client.py in _readinto_chunked(self, b)
    624             elif len(mvb) == chunk_left:
    625                 n = self._safe_readinto(mvb)
--> 626                 self._safe_read(2)  # toss the CRLF at the end of the chunk
    627                 self.chunk_left = None
    628                 return total_bytes + n

C:\Anaconda3\lib\http\client.py in _safe_read(self, amt)
    660         s = []
    661         while amt > 0:
--> 662             chunk = self.fp.read(min(amt, MAXAMOUNT))
    663             if not chunk:
    664                 raise IncompleteRead(b''.join(s), amt)

C:\Anaconda3\lib\socket.py in readinto(self, b)
    372         while True:
    373             try:
--> 374                 return self._sock.recv_into(b)
    375             except timeout:
    376                 self._timeout_occurred = True

C:\Anaconda3\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
    749                   "non-zero flags not allowed in calls to recv_into() on %s" %
    750                   self.__class__)
--> 751             return self.read(nbytes, buffer)
    752         else:
    753             return socket.recv_into(self, buffer, nbytes, flags)

C:\Anaconda3\lib\ssl.py in read(self, len, buffer)
    621         try:
    622             if buffer is not None:
--> 623                 v = self._sslobj.read(len, buffer)
    624             else:
    625                 v = self._sslobj.read(len or 1024)

KeyboardInterrupt:

Importing various modules from the NLTK platform

NLTK - Natural Language processing Tool Kit



In [2]:

    
import nltk
from nltk.probability import ELEProbDist, FreqDist, DictionaryProbDist
from nltk import NaiveBayesClassifier
from nltk import FreqDist, ConditionalFreqDist
from nltk import BigramAssocMeasures
from collections import defaultdict

Applying it to the tweets to understand the sentiment.



In [5]:

    
pos_tweets = [('Attention all Nature Lovers - Cattle Cruelty in India & Rescues', 'positive'),
              ('Are you having debate on Poojari Lynching for try to save cow slaughter?', 'positive'),
              ('Hope People start loving all animals like this & not show', 'positive'),
              ('if slaughter houses had glass walls everyone would be a vegetarian', 'positive'),
              ('BanBeef SayNOtoMEATexport PlunderOfIndia SaveGauVansh Cow Vegetarian JagoBharathJag', 'positive'),
              ('I will eat beef and pork for religious reasons, occasionally. I will be vegetarian for ethical reasons, frequently. #meatban #vegetarian','positive')
]
              
neg_tweets = [('Let try to ban hunger before we ban meat?', 'negative'),
              ('meatban causing price of pulses at 200+/kg', 'negative'),
              ('Where is Indian Politics heading to? Chicken, mutton or beef now parliament will approve the dinner', 'negative'),
              ('There is something truly secretly delicious about having your mouth enjoy a perfect BLT in a country with meat ban', 'negative'),
              ('We will let loose 100 pigs in Jama Masjid if the meatban was not enforced on 9 days of Navratri', 'negative')]     


test_tweet = [('A Question: Can someone please tell me how jhatka came into being in answer to halal in India? Please enlighten','negative')]             
             
tweets = []
for (words, sentiment) in pos_tweets + neg_tweets:
    words_filtered = [e.lower() for e in words.split()
if len(e) >= 3]
    tweets.append((words_filtered, sentiment))             
#print(tweets)

test_tweets = []
for (words, sentiment) in test_tweet:
    words_filtered = [e.lower() for e in words.split()
if len(e) >= 3]
    test_tweets.append((words_filtered, sentiment))    
#print(test_tweets)       

def get_words_in_tweets(tweets):
    all_words = []
    for (words, sentiment) in tweets:
        all_words.extend(words)
    return all_words     

def get_word_features(wordlist):
    wordlist = nltk.FreqDist(wordlist)
    word_features = wordlist.keys()
    return word_features         
    

word_features = get_word_features(get_words_in_tweets(tweets))   
#print(word_features)    

def extract_features(document):
    document_words = set(document)
    features = {}
    for word in word_features:
        features['contains(%s)' % word] = (word in document_words)
    return features
    
a = extract_features(test_tweet)  
print(a) 
    
training_set = nltk.classify.apply_features(extract_features, tweets)
print(training_set)
classifier = nltk.NaiveBayesClassifier.train(training_set)

def train(labeled_featuresets, estimator= ELEProbDist):
    ...
    # Create the P(label) distribution
    label_freqdist = ConditionalFreqDist()
    label_probdist = estimator(label_freqdist)
    ...
    # Create the P(fval|label, fname) distribution
    feature_probdist = {}
    ...
    return NaiveBayesClassifier(label_probdist, feature_probdist)
    #!print(label_probdist.prob('positive'))    
    #!print(feature_probdist)   
    
print(classifier.show_most_informative_features(32))    

#tweet = 'Meat Ban reminds me of TV Ban. If one of your siblings was taking the board exams, you cannot watch too'
        #'And I support Cow,Buffalo #meatban if #India returns to #Swadeshi #agriculture Invest in Agriculture to #SaveIndia'
#print(classifier.classify(extract_features(tweet.split())))









    



{'contains(to?)': False, 'contains(cruelty)': False, 'contains(try)': False, 'contains(and)': False, 'contains(ethical)': False, 'contains(frequently.)': False, 'contains(are)': False, 'contains(200+/kg)': False, 'contains(people)': False, 'contains(where)': False, 'contains(truly)': False, 'contains(before)': False, 'contains(secretly)': False, 'contains(enforced)': False, 'contains(cattle)': False, 'contains(days)': False, 'contains(perfect)': False, 'contains(now)': False, 'contains(show)': False, 'contains(occasionally.)': False, 'contains(pigs)': False, 'contains(plunderofindia)': False, 'contains(hope)': False, 'contains(will)': False, 'contains(religious)': False, 'contains(glass)': False, 'contains(causing)': False, 'contains(animals)': False, 'contains(let)': False, 'contains(for)': False, 'contains(all)': False, 'contains(meat?)': False, 'contains(loose)': False, 'contains(not)': False, 'contains(vegetarian)': False, 'contains(pulses)': False, 'contains(lovers)': False, 'contains(savegauvansh)': False, 'contains(would)': False, 'contains(there)': False, 'contains(ban)': False, 'contains(india)': False, 'contains(had)': False, 'contains(#meatban)': False, 'contains(heading)': False, 'contains(nature)': False, 'contains(price)': False, 'contains(country)': False, 'contains(walls)': False, 'contains(banbeef)': False, 'contains(reasons,)': False, 'contains(meatban)': False, 'contains(dinner)': False, 'contains(was)': False, 'contains(jama)': False, 'contains(like)': False, 'contains(debate)': False, 'contains(mouth)': False, 'contains(beef)': False, 'contains(saynotomeatexport)': False, 'contains(having)': False, 'contains(you)': False, 'contains(everyone)': False, 'contains(save)': False, 'contains(eat)': False, 'contains(approve)': False, 'contains(jagobharathjag)': False, 'contains(poojari)': False, 'contains(#vegetarian)': False, 'contains(cow)': False, 'contains(meat)': False, 'contains(slaughter?)': False, 'contains(indian)': False, 'contains(loving)': False, 'contains(about)': False, 'contains(masjid)': False, 'contains(mutton)': False, 'contains(100)': False, 'contains(attention)': False, 'contains(the)': False, 'contains(rescues)': False, 'contains(pork)': False, 'contains(something)': False, 'contains(hunger)': False, 'contains(lynching)': False, 'contains(politics)': False, 'contains(your)': False, 'contains(start)': False, 'contains(houses)': False, 'contains(with)': False, 'contains(blt)': False, 'contains(parliament)': False, 'contains(enjoy)': False, 'contains(chicken,)': False, 'contains(navratri)': False, 'contains(this)': False, 'contains(delicious)': False, 'contains(slaughter)': False}
[({'contains(to?)': False, 'contains(cruelty)': True, 'contains(try)': False, 'contains(and)': False, 'contains(ethical)': False, 'contains(frequently.)': False, 'contains(are)': False, 'contains(200+/kg)': False, 'contains(people)': False, 'contains(where)': False, 'contains(truly)': False, 'contains(before)': False, 'contains(secretly)': False, 'contains(enforced)': False, 'contains(cattle)': True, 'contains(days)': False, 'contains(perfect)': False, 'contains(now)': False, 'contains(show)': False, 'contains(occasionally.)': False, 'contains(pigs)': False, 'contains(plunderofindia)': False, 'contains(hope)': False, 'contains(will)': False, 'contains(religious)': False, 'contains(glass)': False, 'contains(causing)': False, 'contains(animals)': False, 'contains(let)': False, 'contains(for)': False, 'contains(all)': True, 'contains(meat?)': False, 'contains(loose)': False, 'contains(not)': False, 'contains(vegetarian)': False, 'contains(pulses)': False, 'contains(lovers)': True, 'contains(savegauvansh)': False, 'contains(would)': False, 'contains(there)': False, 'contains(ban)': False, 'contains(india)': True, 'contains(had)': False, 'contains(#meatban)': False, 'contains(heading)': False, 'contains(nature)': True, 'contains(price)': False, 'contains(country)': False, 'contains(walls)': False, 'contains(banbeef)': False, 'contains(reasons,)': False, 'contains(meatban)': False, 'contains(dinner)': False, 'contains(was)': False, 'contains(jama)': False, 'contains(like)': False, 'contains(debate)': False, 'contains(mouth)': False, 'contains(beef)': False, 'contains(saynotomeatexport)': False, 'contains(having)': False, 'contains(you)': False, 'contains(everyone)': False, 'contains(save)': False, 'contains(eat)': False, 'contains(approve)': False, 'contains(jagobharathjag)': False, 'contains(poojari)': False, 'contains(#vegetarian)': False, 'contains(cow)': False, 'contains(meat)': False, 'contains(slaughter?)': False, 'contains(indian)': False, 'contains(loving)': False, 'contains(about)': False, 'contains(masjid)': False, 'contains(mutton)': False, 'contains(100)': False, 'contains(attention)': True, 'contains(the)': False, 'contains(rescues)': True, 'contains(pork)': False, 'contains(something)': False, 'contains(hunger)': False, 'contains(lynching)': False, 'contains(politics)': False, 'contains(your)': False, 'contains(start)': False, 'contains(houses)': False, 'contains(with)': False, 'contains(blt)': False, 'contains(parliament)': False, 'contains(enjoy)': False, 'contains(chicken,)': False, 'contains(navratri)': False, 'contains(this)': False, 'contains(delicious)': False, 'contains(slaughter)': False}, 'positive'), ({'contains(to?)': False, 'contains(cruelty)': False, 'contains(try)': True, 'contains(and)': False, 'contains(ethical)': False, 'contains(frequently.)': False, 'contains(are)': True, 'contains(200+/kg)': False, 'contains(people)': False, 'contains(where)': False, 'contains(truly)': False, 'contains(before)': False, 'contains(secretly)': False, 'contains(enforced)': False, 'contains(cattle)': False, 'contains(days)': False, 'contains(perfect)': False, 'contains(now)': False, 'contains(show)': False, 'contains(occasionally.)': False, 'contains(pigs)': False, 'contains(plunderofindia)': False, 'contains(hope)': False, 'contains(will)': False, 'contains(religious)': False, 'contains(glass)': False, 'contains(causing)': False, 'contains(animals)': False, 'contains(let)': False, 'contains(for)': True, 'contains(all)': False, 'contains(meat?)': False, 'contains(loose)': False, 'contains(not)': False, 'contains(vegetarian)': False, 'contains(pulses)': False, 'contains(lovers)': False, 'contains(savegauvansh)': False, 'contains(would)': False, 'contains(there)': False, 'contains(ban)': False, 'contains(india)': False, 'contains(had)': False, 'contains(#meatban)': False, 'contains(heading)': False, 'contains(nature)': False, 'contains(price)': False, 'contains(country)': False, 'contains(walls)': False, 'contains(banbeef)': False, 'contains(reasons,)': False, 'contains(meatban)': False, 'contains(dinner)': False, 'contains(was)': False, 'contains(jama)': False, 'contains(like)': False, 'contains(debate)': True, 'contains(mouth)': False, 'contains(beef)': False, 'contains(saynotomeatexport)': False, 'contains(having)': True, 'contains(you)': True, 'contains(everyone)': False, 'contains(save)': True, 'contains(eat)': False, 'contains(approve)': False, 'contains(jagobharathjag)': False, 'contains(poojari)': True, 'contains(#vegetarian)': False, 'contains(cow)': True, 'contains(meat)': False, 'contains(slaughter?)': True, 'contains(indian)': False, 'contains(loving)': False, 'contains(about)': False, 'contains(masjid)': False, 'contains(mutton)': False, 'contains(100)': False, 'contains(attention)': False, 'contains(the)': False, 'contains(rescues)': False, 'contains(pork)': False, 'contains(something)': False, 'contains(hunger)': False, 'contains(lynching)': True, 'contains(politics)': False, 'contains(your)': False, 'contains(start)': False, 'contains(houses)': False, 'contains(with)': False, 'contains(blt)': False, 'contains(parliament)': False, 'contains(enjoy)': False, 'contains(chicken,)': False, 'contains(navratri)': False, 'contains(this)': False, 'contains(delicious)': False, 'contains(slaughter)': False}, 'positive'), ...]
Most Informative Features
          contains(will) = True           negati : positi =      1.9 : 1.0
    contains(vegetarian) = False          negati : positi =      1.8 : 1.0
       contains(meatban) = False          positi : negati =      1.6 : 1.0
           contains(let) = False          positi : negati =      1.6 : 1.0
           contains(the) = False          positi : negati =      1.6 : 1.0
           contains(ban) = False          positi : negati =      1.6 : 1.0
           contains(cow) = False          negati : positi =      1.4 : 1.0
           contains(all) = False          negati : positi =      1.4 : 1.0
           contains(for) = False          negati : positi =      1.4 : 1.0
          contains(will) = False          positi : negati =      1.3 : 1.0
        contains(before) = False          positi : negati =      1.2 : 1.0
      contains(secretly) = False          positi : negati =      1.2 : 1.0
           contains(was) = False          positi : negati =      1.2 : 1.0
         contains(loose) = False          positi : negati =      1.2 : 1.0
        contains(dinner) = False          positi : negati =      1.2 : 1.0
      contains(enforced) = False          positi : negati =      1.2 : 1.0
         contains(enjoy) = False          positi : negati =      1.2 : 1.0
        contains(indian) = False          positi : negati =      1.2 : 1.0
       contains(country) = False          positi : negati =      1.2 : 1.0
         contains(mouth) = False          positi : negati =      1.2 : 1.0
         contains(price) = False          positi : negati =      1.2 : 1.0
      contains(navratri) = False          positi : negati =      1.2 : 1.0
     contains(delicious) = False          positi : negati =      1.2 : 1.0
          contains(days) = False          positi : negati =      1.2 : 1.0
         contains(there) = False          positi : negati =      1.2 : 1.0
         contains(where) = False          positi : negati =      1.2 : 1.0
          contains(jama) = False          positi : negati =      1.2 : 1.0
         contains(meat?) = False          positi : negati =      1.2 : 1.0
    contains(parliament) = False          positi : negati =      1.2 : 1.0
           contains(now) = False          positi : negati =      1.2 : 1.0
     contains(something) = False          positi : negati =      1.2 : 1.0
           contains(blt) = False          positi : negati =      1.2 : 1.0
None

Datasets and Pandas

Let us first consider the composition of this term's Government

Move to the datasets directory



In [13]:

    
import os
#os.chdir("/home/archimedeas/wrkspc/anaconda/the-visual-verdict/visualizations/1_the_senate/datasets")
os.getcwd()









    Out[13]:





'C:\\Users\\user\\Desktop\\7th Sem Project\\datasets\\the_senate_datasets'



In [17]:

    
os.listdir()









    Out[17]:





['Ban Spreadsheet.csv',
 'originals',
 'processed_census_datasets',
 'the_senate_datasets',
 'tweepy-master']



In [15]:

    
os.chdir('..')



In [19]:

    
os.getcwd()
os.chdir('the_senate_datasets')

List of DataSets



In [20]:

    
os.listdir()









    Out[20]:





['10_women_occupation.csv',
 '11_women_first_time.csv',
 '1_age_group_5yr_span.csv',
 '2_avg_age_lok_sabha_16_sessions.csv',
 '3_age_group_first_time_member.csv',
 '4_educational_background.csv',
 '5_occupational_background.csv',
 '6_first_time_elected.csv',
 '7_women_member_per_state.csv',
 '8_women_age_group.csv',
 '9_educational_qual_women.csv']

Now we read in the CSV files using the Pandas Dataset



In [21]:

    
import pandas as pd
df_men = pd.read_csv("1_age_group_5yr_span.csv")
df_men









    Out[21]:






  
    
      
      Table - 1  Distribution of Age Groups of members with 5 year span
      Unnamed: 1
    
  
  
    
      0
      Year-Space
      No of Members
    
    
      1
      25-30
      12
    
    
      2
      31-35
      21
    
    
      3
      36-40
      36
    
    
      4
      41-45
      58
    
    
      5
      46-50
      70
    
    
      6
      51-55
      87
    
    
      7
      55-60
      92
    
    
      8
      61-65
      83
    
    
      9
      66-70
      43
    
    
      10
      71-75
      28
    
    
      11
      76-80
      11
    
    
      12
      81-85
      1
    
    
      13
      86-90
      1

Converting this dataset to a suitable format

For the purpose of using various fields as labels in matplotlib
We use numpy for the same

Visual representation of the Information

Changing the default style sheet for matplotlib



In [28]:

    
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (10.0, 8.0)



In [29]:

    
ls_labels_men = []
ls_values_men = []

for i in range(1,df_men.shape[0]):
    ls_labels_men.append(str(df_men.iat[i,0]))
    ls_values_men.append(float(df_men.iat[i,1]))



In [30]:

    
import numpy as np
import matplotlib.pyplot as plt

N = len(ls_labels_men)


ind = np.arange(N)  # the x locations for the groups
width = 0.35       # the width of the bars

fig, ax = plt.subplots()

rects1 = ax.bar(ind, ls_values_men, width, color = 'red', alpha = 0.6)



#rects2 = ax.bar(ind + width, ls_values_men, width )

# add some text for labels, title and axes ticks
ax.set_ylabel('Scores')

ax.set_title('Scores by group and gender')
ax.set_xticks(ind + width)
ax.set_xticklabels(ls_labels_men)


#ax.legend((rects1[0], rects2[0]), ('Men', 'Women'))

plt.show()



In [31]:

    
import pandas as pd
df_men = pd.read_csv("4_educational_background.csv")



In [32]:

    
df_men









    Out[32]:






  
    
      
      Table - 4 Educational Background of Members
      Unnamed: 1
      Unnamed: 2
    
  
  
    
      0
      Educational Qualifications
      No. of Members
      Percentage
    
    
      1
      Under Matriculates/Certified Courses/ Others
      17
      3.13
    
    
      2
      Matric, Inter/High Secondary, Diploma Holders
      92
      16.95
    
    
      3
      Under Graduates
      15
      2.77
    
    
      4
      Graduates includeing those with equivalent tec...
      226
      41.62
    
    
      5
      Post Graduates including those with equivalent...
      160
      29.46
    
    
      6
      Doctorate
      33
      6.07



In [33]:

    
ls_labels_men = []
ls_values_men = []

for i in range(1,df_men.shape[0]):
    ls_labels_men.append(str(df_men.iat[i,0]))
    ls_values_men.append(float(df_men.iat[i,1]))



In [34]:

    
import matplotlib.pyplot as plt
import pandas as pd
plt.rcParams['figure.figsize'] = (10.0, 10.0)

# The slices will be ordered and plotted counter-clockwise.

labels = ls_labels_men
sizes = ls_values_men
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral', 'red', 'lightgreen']
explode = (0.1, 0.1, 0.1, 0.1, 0.1, .1) 

p, text = plt.pie(sizes,  colors = colors, explode =  explode, shadow=True, startangle=90 )
# Set aspect ratio to be equal so that pie is drawn as a circle.
plt.axis('equal')

#plt.title("Educational Background", fontsize = 50, loc = 'right')

plt.legend(p, labels, loc= 'lower right')

plt.show()

Similarly we have analysed the census data for education, populatin, literacy rates etc for the country



In [47]:

    
os.getcwd()









    Out[47]:





'C:\\Users\\user\\Desktop\\7th Sem Project\\datasets\\originals'



In [50]:

    
os.chdir('..')



In [45]:

    
os.getcwd()









    Out[45]:





'C:\\Users\\user\\Desktop\\7th Sem Project\\datasets\\originals'



In [52]:

    
os.chdir('processed_census_datasets')



In [53]:

    
import pandas as pd
df = pd.read_csv("processed-population-and-education_DDW-0000C-10.xlsx - C-10 SC.csv")
df









    Out[53]:






  
    
      
      Unnamed: 0
      Unnamed: 1
      Unnamed: 2
      Unnamed: 3
      Unnamed: 4
      C-10 : POPULATION ATTENDING EDUCATIONAL INSTITUTIONS BY AGE, SEX  AND TYPE OF EDUCATIONAL INSTITUTION - 2011
      Unnamed: 6
      Unnamed: 7
      Unnamed: 8
      Unnamed: 9
      ...
      Unnamed: 26
      Unnamed: 27
      Unnamed: 28
      Unnamed: 29
      Unnamed: 30
      Unnamed: 31
      Unnamed: 32
      Unnamed: 33
      Unnamed: 34
      Unnamed: 35
    
  
  
    
      0
      Table
      State
      Distt.
      Area Name
      Total/
      Age-
      Total population
      NaN
      NaN
      Population
      ...
      NaN
      NaN
      NaN
      NaN
      Population not attending educational institutions
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      Name
      Code
      Code
      NaN
      Rural/
      group
      NaN
      NaN
      NaN
      attending educational institutions
      ...
      NaN
      Other institution
      NaN
      NaN
      Attended Before
      NaN
      NaN
      Never Attended
      NaN
      NaN
    
    
      2
      NaN
      NaN
      NaN
      NaN
      Urban/
      NaN
      Persons
      Males
      Females
      Persons
      ...
      Females
      Persons
      Males
      Females
      Persons
      Males
      Females
      Persons
      Males
      Females
    
    
      3
      NaN
      NaN
      NaN
      NaN
      NaN
      1
      2
      3
      4
      5
      ...
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
    
    
      4
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      5
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      6
      C3010
      0
      0
      INDIA
      Total
      All ages
      1210854977
      623270258
      587584719
      314556926
      ...
      846217
      1023291
      556681
      466610
      481187692
      279953000
      201234692
      415110359
      172779827
      242330532
    
    
      7
      C3010
      0
      0
      INDIA
      Rural
      All ages
      833748852
      427781058
      405967794
      214744835
      ...
      712212
      595972
      318582
      277390
      290282165
      175224339
      115057826
      328721852
      135648574
      193073278
    
    
      8
      C3010
      0
      0
      INDIA
      Urban
      All ages
      377106125
      195489200
      181616925
      99812091
      ...
      134005
      427319
      238099
      189220
      190905527
      104728661
      86176866
      86388507
      37131253
      49257254
    
    
      9
      C3010
      1
      0
      State - JAMMU & KASHMIR
      Total
      All ages
      12541302
      6640662
      5900640
      3696633
      ...
      1382
      27380
      16518
      10862
      3772010
      2439586
      1332424
      5072659
      2177376
      2895283
    
    
      10
      C3010
      1
      0
      State - JAMMU & KASHMIR
      Rural
      All ages
      9108060
      4774477
      4333583
      2676870
      ...
      907
      13592
      8977
      4615
      2369581
      1561745
      807836
      4061609
      1735005
      2326604
    
    
      11
      C3010
      1
      0
      State - JAMMU & KASHMIR
      Urban
      All ages
      3433242
      1866185
      1567057
      1019763
      ...
      475
      13788
      7541
      6247
      1402429
      877841
      524588
      1011050
      442371
      568679
    
    
      12
      C3010
      3
      0
      State - PUNJAB
      Total
      All ages
      27743338
      14639465
      13103873
      6801164
      ...
      4336
      36724
      17065
      19659
      12745862
      7123202
      5622660
      8196312
      3759685
      4436627
    
    
      13
      C3010
      3
      0
      State - PUNJAB
      Rural
      All ages
      17344192
      9093476
      8250716
      4182696
      ...
      2807
      17453
      7910
      9543
      7325234
      4115714
      3209520
      5836262
      2664860
      3171402
    
    
      14
      C3010
      3
      0
      State - PUNJAB
      Urban
      All ages
      10399146
      5545989
      4853157
      2618468
      ...
      1529
      19271
      9155
      10116
      5420628
      3007488
      2413140
      2360050
      1094825
      1265225
    
    
      15
      C3010
      7
      0
      State - NCT OF DELHI
      Total
      All ages
      16787941
      8987326
      7800615
      4717065
      ...
      1821
      29129
      16621
      12508
      8416894
      4812396
      3604498
      3653982
      1584957
      2069025
    
    
      16
      C3010
      7
      0
      State - NCT OF DELHI
      Rural
      All ages
      419042
      226321
      192721
      121535
      ...
      64
      591
      311
      280
      185424
      111001
      74423
      112083
      46574
      65509
    
    
      17
      C3010
      7
      0
      State - NCT OF DELHI
      Urban
      All ages
      16368899
      8761005
      7607894
      4595530
      ...
      1757
      28538
      16310
      12228
      8231470
      4701395
      3530075
      3541899
      1538383
      2003516
    
    
      18
      C3010
      9
      0
      State - UTTAR PRADESH
      Total
      All ages
      199812341
      104480510
      95331831
      57397639
      ...
      57981
      170034
      93597
      76437
      61343576
      38702160
      22641416
      81071126
      34355911
      46715215
    
    
      19
      C3010
      9
      0
      State - UTTAR PRADESH
      Rural
      All ages
      155317278
      80992995
      74324283
      44660839
      ...
      44855
      103705
      53921
      49784
      43954328
      28710787
      15243541
      66702111
      27761497
      38940614
    
    
      20
      C3010
      9
      0
      State - UTTAR PRADESH
      Urban
      All ages
      44495063
      23487515
      21007548
      12736800
      ...
      13126
      66329
      39676
      26653
      17389248
      9991373
      7397875
      14369015
      6594414
      7774601
    
    
      21
      C3010
      10
      0
      State - BIHAR
      Total
      All ages
      104099452
      54278157
      49821295
      28365229
      ...
      158407
      136148
      67884
      68264
      26855076
      17136181
      9718895
      48879147
      21384073
      27495074
    
    
      22
      C3010
      10
      0
      State - BIHAR
      Rural
      All ages
      92341436
      48073850
      44267586
      24671221
      ...
      151553
      116961
      57056
      59905
      22551179
      14657142
      7894037
      45119036
      19694374
      25424662
    
    
      23
      C3010
      10
      0
      State - BIHAR
      Urban
      All ages
      11758016
      6204307
      5553709
      3694008
      ...
      6854
      19187
      10828
      8359
      4303897
      2479039
      1824858
      3760111
      1689699
      2070412
    
    
      24
      C3010
      18
      0
      State - ASSAM
      Total
      All ages
      31205576
      15939443
      15266133
      7548916
      ...
      5726
      18000
      10881
      7119
      12432989
      7057632
      5375357
      11223671
      4979617
      6244054
    
    
      25
      C3010
      18
      0
      State - ASSAM
      Rural
      All ages
      26807034
      13678989
      13128045
      6423680
      ...
      5195
      13304
      8338
      4966
      9962844
      5726720
      4236124
      10420510
      4631948
      5788562
    
    
      26
      C3010
      18
      0
      State - ASSAM
      Urban
      All ages
      4398542
      2260454
      2138088
      1125236
      ...
      531
      4696
      2543
      2153
      2470145
      1330912
      1139233
      803161
      347669
      455492
    
    
      27
      C3010
      19
      0
      State - WEST BENGAL
      Total
      All ages
      91276115
      46809027
      44467088
      20857818
      ...
      126983
      76249
      43086
      33163
      42496443
      23897571
      18598872
      27921854
      12082218
      15839636
    
    
      28
      C3010
      19
      0
      State - WEST BENGAL
      Rural
      All ages
      62183113
      31844945
      30338168
      14434107
      ...
      105788
      40592
      24341
      16251
      26090469
      15024391
      11066078
      21658537
      9340722
      12317815
    
    
      29
      C3010
      19
      0
      State - WEST BENGAL
      Urban
      All ages
      29093002
      14964082
      14128920
      6423711
      ...
      21195
      35657
      18745
      16912
      16405974
      8873180
      7532794
      6263317
      2741496
      3521821
    
    
      30
      C3010
      24
      0
      State - GUJARAT
      Total
      All ages
      60439692
      31491260
      28948432
      14320617
      ...
      16550
      23440
      13686
      9754
      28356858
      16250612
      12106246
      17762217
      7206427
      10555790
    
    
      31
      C3010
      24
      0
      State - GUJARAT
      Rural
      All ages
      34694609
      17799159
      16895450
      8102296
      ...
      12977
      11388
      6948
      4440
      14317218
      8443925
      5873293
      12275095
      4834504
      7440591
    
    
      32
      C3010
      24
      0
      State - GUJARAT
      Urban
      All ages
      25745083
      13692101
      12052982
      6218321
      ...
      3573
      12052
      6738
      5314
      14039640
      7806687
      6232953
      5487122
      2371923
      3115199
    
    
      33
      C3010
      27
      0
      State - MAHARASHTRA
      Total
      All ages
      112374333
      58243056
      54131277
      29474864
      ...
      54642
      67881
      38110
      29771
      55603074
      31085636
      24517438
      27296395
      11127800
      16168595
    
    
      34
      C3010
      27
      0
      State - MAHARASHTRA
      Rural
      All ages
      61556074
      31539034
      30017040
      16201903
      ...
      40866
      28439
      15114
      13325
      27210847
      15558963
      11651884
      18143324
      7149532
      10993792
    
    
      35
      C3010
      27
      0
      State - MAHARASHTRA
      Urban
      All ages
      50818259
      26704022
      24114237
      13272961
      ...
      13776
      39442
      22996
      16446
      28392227
      15526673
      12865554
      9153071
      3978268
      5174803
    
    
      36
      C3010
      28
      0
      State - ANDHRA PRADESH
      Total
      All ages
      84580777
      42442146
      42138631
      21701636
      ...
      98659
      95981
      49204
      46777
      31253843
      17665590
      13588253
      31625298
      13042707
      18582591
    
    
      37
      C3010
      28
      0
      State - ANDHRA PRADESH
      Rural
      All ages
      56361702
      28243241
      28118461
      13660166
      ...
      74039
      56085
      27830
      28255
      18356718
      10721909
      7634809
      24344818
      10045101
      14299717
    
    
      38
      C3010
      28
      0
      State - ANDHRA PRADESH
      Urban
      All ages
      28219075
      14198905
      14020170
      8041470
      ...
      24620
      39896
      21374
      18522
      12897125
      6943681
      5953444
      7280480
      2997606
      4282874
    
  

39 rows × 36 columns



In [54]:

    
df.shape









    Out[54]:





(39, 36)

Other Datasets fom the Census 2011 survey



In [55]:

    
os.listdir()









    Out[55]:





['processed-education-level_DDW-0000C-08.xlsx - C-08.csv',
 'processed-graduate-and-above_DDW-0000C-08A.xlsx - C-08App.csv',
 'processed-household-size_DDW-HH01-0000-2011.XLS - Sheet1.csv',
 'processed-non-worker_DDW-0000B-13-Census.xls - B13.csv',
 'processed-population-and-education_DDW-0000C-10.xlsx - C-10 SC.csv',
 'processed-population-and-religion_DDW00C-01_20MDDS.XLS - C01.csv',
 'processed-workforce-analysis_DDW-0000B-01-Census.xls - B01.csv',
 'processed-youth-population_PCA_AY_2011_Revised.xlsx - Sheet1.csv']

Various Visualization Techniques

Matplotlib has been introduced earlier

Using Bokeh

Here we represent the Parameters wrt National level and State level.

Namely, of two states Delhi and Assam



In [30]:

    
from bokeh._legacy_charts import Bar
from bokeh.io import output_notebook, show

# get the countries and we group the data by medal type
states = ['delhi', 'assam']


delhi = [ [56,46],
         [23,77],
         [45,55],
         [60,40],
         [35,15,25,25]   
]

assam = [ [30,10],
         [33,67],
         [75,25],
         [50,50],
         [75,5,10,10]   
]

output_notebook()

bar = Bar([delhi[0],assam[0]], states, title="Stacked bars", stacked=True)
bar2 = Bar([delhi[0],assam[0]], states, title="Stacked bars")

show(bar)









    




    


    

    
        
        BokehJS successfully loaded.
    






    



C:\Anaconda3\lib\site-packages\bokeh\_legacy_charts\_chart.py:92: UserWarning: Instantiating a Legacy Chart from bokeh._legacy_charts
  warn("Instantiating a Legacy Chart from bokeh._legacy_charts")

A Tale of Two Cities



In [58]:

    
from collections import OrderedDict
from math import log, sqrt

import numpy as np
import pandas as pd
from six.moves import cStringIO as StringIO

from bokeh.plotting import figure
from bokeh.io import output_notebook, show

antibiotics = """
attitude,                       Positive, Negative, Dont_care, gram
ASSAM,      80,        5,            5,        negative
DELHI,       10,         0.8,          0.09,     negative
INDIA,                3,          0.1,          0.1,      positive"""

drug_color = OrderedDict([
    ("Dont_care",   "#0d3362"),
    ("Positive", "#c64737"),
    ("Negative",     "black"  ),
])

gram_color = {
    "positive" : "#aeaeb8",
    "negative" : "#e69584",
}

df = pd.read_csv(StringIO(antibiotics),
                 skiprows=1,
                 skipinitialspace=True,
                 engine='python')

width = 800
height = 800
inner_radius = 90
outer_radius = 300 - 10

minr = sqrt(log(.001 * 1E4))
maxr = sqrt(log(1000 * 1E4))
a = (outer_radius - inner_radius) / (minr - maxr)
b = inner_radius - a * maxr

def rad(mic):
    return a * np.sqrt(np.log(mic * 1E4)) + b

big_angle = 2.0 * np.pi / (len(df) + 1)
small_angle = big_angle / 7

x = np.zeros(len(df))
y = np.zeros(len(df))

#output_file("burtin.html", title="burtin.py example")

output_notebook()

p = figure(plot_width=width, plot_height=height, title="",
    x_axis_type=None, y_axis_type=None,
    x_range=[-420, 420], y_range=[-420, 420],
    min_border=0, outline_line_color="black",
    background_fill="#f0e1d2", border_fill="#f0e1d2")

p.line(x+1, y+1, alpha=0)

# annular wedges
angles = np.pi/2 - big_angle/2 - df.index.to_series()*big_angle
colors = [gram_color[gram] for gram in df.gram]
p.annular_wedge(
    x, y, inner_radius, outer_radius, -big_angle+angles, angles, color=colors,
)

# small wedges

p.annular_wedge(x, y, inner_radius, rad(df.Dont_care),
    -big_angle+angles+5*small_angle, -big_angle+angles+6*small_angle,
    color=drug_color['Dont_care'])

p.annular_wedge(x, y, inner_radius, rad(df.Positive),
    -big_angle+angles+3*small_angle, -big_angle+angles+4*small_angle,
    color=drug_color['Positive'])

p.annular_wedge(x, y, inner_radius, rad(df.Negative),
    -big_angle+angles+1*small_angle, -big_angle+angles+2*small_angle,
    color=drug_color['Negative'])

# circular axes and lables
labels = np.power(10.0, np.arange(-3, 4))
radii = a * np.sqrt(np.log(labels * 1E4)) + b
p.circle(x, y, radius=radii, fill_color=None, line_color="white")
p.text(x[:-1], radii[:-1], [str(r) for r in labels[:-1]],
    text_font_size="8pt", text_align="center", text_baseline="middle")

# radial axes
p.annular_wedge(x, y, inner_radius-10, outer_radius+10,
    -big_angle+angles, -big_angle+angles, color="black")

# attitude labels
xr = radii[0]*np.cos(np.array(-big_angle/2 + angles))
yr = radii[0]*np.sin(np.array(-big_angle/2 + angles))
label_angle=np.array(-big_angle/2+angles)
label_angle[label_angle < -np.pi/2] += np.pi # easier to read labels on the left side
p.text(xr, yr, df.attitude, angle=label_angle,
    text_font_size="9pt", text_align="center", text_baseline="middle")

# OK, these hand drawn legends are pretty clunky, will be improved in future release
p.circle([-40, -40], [-370, -390], color=list(gram_color.values()), radius=5)
p.text([-30, -30], [-370, -390], text=[ "National", "States" ],
    text_font_size="7pt", text_align="left", text_baseline="middle")

p.rect([-40, -40, -40], [18, 0, -18], width=30, height=13,
    color=list(drug_color.values()))
p.text([-15, -15, -15], [18, 0, -18], text=list(drug_color.keys()),
    text_font_size="9pt", text_align="left", text_baseline="middle")

p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None

show(p)









    




    


    

    
        
        BokehJS successfully loaded.
    






    



ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 037801bb-c139-4f83-aaf7-39c2d0465075
ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 4fdfc1a6-c549-4cae-9f2d-bab55e887fc9

Now we modify it a bit so that it outputs to an HTML file, thus we can embed these plots directly in a webpage - without dealing with JavaScript at all !!



In [46]:

    
from bokeh.io import output_file, show
output_file('tale_of_cities.html')
show(p)









    



ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 4b5c306f-361b-4db7-97b1-962ad2ec139c
ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 69e54c2b-5836-429e-a9d4-ab907fcf31f2






    














    



ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 4b5c306f-361b-4db7-97b1-962ad2ec139c
ERROR:C:\Anaconda3\lib\site-packages\bokeh\validation\check.py:E-1000 (COLUMN_LENGTHS): ColumnDataSource column lengths are not all the same: ColumnDataSource, ViewModel:ColumnDataSource, ref _id: 69e54c2b-5836-429e-a9d4-ab907fcf31f2

Similarly we have analysed many such datasets and have mapped them onto the Graphics



In [32]:

    
from IPython.display import Image
image0 = Image('IndiaSelected.jpg')
image1 = Image('BanSectors.jpg')
image2 = Image('StatesSelected.jpg')
image3 = Image('BanTimeLine.jpg')



In [37]:

    
image3









    Out[37]:



In [19]:

    
from IPython.display import Image, HTML, display
from glob import glob
imagesList=''.join( ["<img style='width: 180px; margin: 0px; float: left; border: 1px solid black;' src='%s' />" % str(s) 
                     for s in glob('*.jpg') ])
display(HTML(imagesList))



In [67]:

    
os.chdir('C:\\Users\\user\\Desktop\\7th Sem Project\\Jupyter')



In [68]:

    
os.listdir()









    Out[68]:





['.ipynb_checkpoints',
 'BanSectors.jpg',
 'BanTimeLine.jpg',
 'Components',
 'facets_with_custom_projection.png',
 'hist_new_stacked.png',
 'IndiaSelected.jpg',
 'Main.ipynb',
 'simple_violinplots.png',
 'StatesSelected.jpg']



In [70]:

    
from glob import glob 
glob('*.png')









    Out[70]:





['facets_with_custom_projection.png',
 'hist_new_stacked.png',
 'simple_violinplots.png']



In [ ]:

	Table - 1 Distribution of Age Groups of members with 5 year span	Unnamed: 1
0	Year-Space	No of Members
1	25-30	12
2	31-35	21
3	36-40	36
4	41-45	58
5	46-50	70
6	51-55	87
7	55-60	92
8	61-65	83
9	66-70	43
10	71-75	28
11	76-80	11
12	81-85	1
13	86-90	1

	Table - 4 Educational Background of Members	Unnamed: 1	Unnamed: 2
0	Educational Qualifications	No. of Members	Percentage
1	Under Matriculates/Certified Courses/ Others	17	3.13
2	Matric, Inter/High Secondary, Diploma Holders	92	16.95
3	Under Graduates	15	2.77
4	Graduates includeing those with equivalent tec...	226	41.62
5	Post Graduates including those with equivalent...	160	29.46
6	Doctorate	33	6.07

	Unnamed: 0	Unnamed: 1	Unnamed: 2	Unnamed: 3	Unnamed: 4	C-10 : POPULATION ATTENDING EDUCATIONAL INSTITUTIONS BY AGE, SEX AND TYPE OF EDUCATIONAL INSTITUTION - 2011	Unnamed: 6	Unnamed: 7	Unnamed: 8	Unnamed: 9	...	Unnamed: 26	Unnamed: 27	Unnamed: 28	Unnamed: 29	Unnamed: 30	Unnamed: 31	Unnamed: 32	Unnamed: 33	Unnamed: 34	Unnamed: 35
0	Table	State	Distt.	Area Name	Total/	Age-	Total population	NaN	NaN	Population	...	NaN	NaN	NaN	NaN	Population not attending educational institutions	NaN	NaN	NaN	NaN	NaN
1	Name	Code	Code	NaN	Rural/	group	NaN	NaN	NaN	attending educational institutions	...	NaN	Other institution	NaN	NaN	Attended Before	NaN	NaN	Never Attended	NaN	NaN
2	NaN	NaN	NaN	NaN	Urban/	NaN	Persons	Males	Females	Persons	...	Females	Persons	Males	Females	Persons	Males	Females	Persons	Males	Females
3	NaN	NaN	NaN	NaN	NaN	1	2	3	4	5	...	22	23	24	25	26	27	28	29	30	31
4	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
5	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
6	C3010	0	0	INDIA	Total	All ages	1210854977	623270258	587584719	314556926	...	846217	1023291	556681	466610	481187692	279953000	201234692	415110359	172779827	242330532
7	C3010	0	0	INDIA	Rural	All ages	833748852	427781058	405967794	214744835	...	712212	595972	318582	277390	290282165	175224339	115057826	328721852	135648574	193073278
8	C3010	0	0	INDIA	Urban	All ages	377106125	195489200	181616925	99812091	...	134005	427319	238099	189220	190905527	104728661	86176866	86388507	37131253	49257254
9	C3010	1	0	State - JAMMU & KASHMIR	Total	All ages	12541302	6640662	5900640	3696633	...	1382	27380	16518	10862	3772010	2439586	1332424	5072659	2177376	2895283
10	C3010	1	0	State - JAMMU & KASHMIR	Rural	All ages	9108060	4774477	4333583	2676870	...	907	13592	8977	4615	2369581	1561745	807836	4061609	1735005	2326604
11	C3010	1	0	State - JAMMU & KASHMIR	Urban	All ages	3433242	1866185	1567057	1019763	...	475	13788	7541	6247	1402429	877841	524588	1011050	442371	568679
12	C3010	3	0	State - PUNJAB	Total	All ages	27743338	14639465	13103873	6801164	...	4336	36724	17065	19659	12745862	7123202	5622660	8196312	3759685	4436627
13	C3010	3	0	State - PUNJAB	Rural	All ages	17344192	9093476	8250716	4182696	...	2807	17453	7910	9543	7325234	4115714	3209520	5836262	2664860	3171402
14	C3010	3	0	State - PUNJAB	Urban	All ages	10399146	5545989	4853157	2618468	...	1529	19271	9155	10116	5420628	3007488	2413140	2360050	1094825	1265225
15	C3010	7	0	State - NCT OF DELHI	Total	All ages	16787941	8987326	7800615	4717065	...	1821	29129	16621	12508	8416894	4812396	3604498	3653982	1584957	2069025
16	C3010	7	0	State - NCT OF DELHI	Rural	All ages	419042	226321	192721	121535	...	64	591	311	280	185424	111001	74423	112083	46574	65509
17	C3010	7	0	State - NCT OF DELHI	Urban	All ages	16368899	8761005	7607894	4595530	...	1757	28538	16310	12228	8231470	4701395	3530075	3541899	1538383	2003516
18	C3010	9	0	State - UTTAR PRADESH	Total	All ages	199812341	104480510	95331831	57397639	...	57981	170034	93597	76437	61343576	38702160	22641416	81071126	34355911	46715215
19	C3010	9	0	State - UTTAR PRADESH	Rural	All ages	155317278	80992995	74324283	44660839	...	44855	103705	53921	49784	43954328	28710787	15243541	66702111	27761497	38940614
20	C3010	9	0	State - UTTAR PRADESH	Urban	All ages	44495063	23487515	21007548	12736800	...	13126	66329	39676	26653	17389248	9991373	7397875	14369015	6594414	7774601
21	C3010	10	0	State - BIHAR	Total	All ages	104099452	54278157	49821295	28365229	...	158407	136148	67884	68264	26855076	17136181	9718895	48879147	21384073	27495074
22	C3010	10	0	State - BIHAR	Rural	All ages	92341436	48073850	44267586	24671221	...	151553	116961	57056	59905	22551179	14657142	7894037	45119036	19694374	25424662
23	C3010	10	0	State - BIHAR	Urban	All ages	11758016	6204307	5553709	3694008	...	6854	19187	10828	8359	4303897	2479039	1824858	3760111	1689699	2070412
24	C3010	18	0	State - ASSAM	Total	All ages	31205576	15939443	15266133	7548916	...	5726	18000	10881	7119	12432989	7057632	5375357	11223671	4979617	6244054
25	C3010	18	0	State - ASSAM	Rural	All ages	26807034	13678989	13128045	6423680	...	5195	13304	8338	4966	9962844	5726720	4236124	10420510	4631948	5788562
26	C3010	18	0	State - ASSAM	Urban	All ages	4398542	2260454	2138088	1125236	...	531	4696	2543	2153	2470145	1330912	1139233	803161	347669	455492
27	C3010	19	0	State - WEST BENGAL	Total	All ages	91276115	46809027	44467088	20857818	...	126983	76249	43086	33163	42496443	23897571	18598872	27921854	12082218	15839636
28	C3010	19	0	State - WEST BENGAL	Rural	All ages	62183113	31844945	30338168	14434107	...	105788	40592	24341	16251	26090469	15024391	11066078	21658537	9340722	12317815
29	C3010	19	0	State - WEST BENGAL	Urban	All ages	29093002	14964082	14128920	6423711	...	21195	35657	18745	16912	16405974	8873180	7532794	6263317	2741496	3521821
30	C3010	24	0	State - GUJARAT	Total	All ages	60439692	31491260	28948432	14320617	...	16550	23440	13686	9754	28356858	16250612	12106246	17762217	7206427	10555790
31	C3010	24	0	State - GUJARAT	Rural	All ages	34694609	17799159	16895450	8102296	...	12977	11388	6948	4440	14317218	8443925	5873293	12275095	4834504	7440591
32	C3010	24	0	State - GUJARAT	Urban	All ages	25745083	13692101	12052982	6218321	...	3573	12052	6738	5314	14039640	7806687	6232953	5487122	2371923	3115199
33	C3010	27	0	State - MAHARASHTRA	Total	All ages	112374333	58243056	54131277	29474864	...	54642	67881	38110	29771	55603074	31085636	24517438	27296395	11127800	16168595
34	C3010	27	0	State - MAHARASHTRA	Rural	All ages	61556074	31539034	30017040	16201903	...	40866	28439	15114	13325	27210847	15558963	11651884	18143324	7149532	10993792
35	C3010	27	0	State - MAHARASHTRA	Urban	All ages	50818259	26704022	24114237	13272961	...	13776	39442	22996	16446	28392227	15526673	12865554	9153071	3978268	5174803
36	C3010	28	0	State - ANDHRA PRADESH	Total	All ages	84580777	42442146	42138631	21701636	...	98659	95981	49204	46777	31253843	17665590	13588253	31625298	13042707	18582591
37	C3010	28	0	State - ANDHRA PRADESH	Rural	All ages	56361702	28243241	28118461	13660166	...	74039	56085	27830	28255	18356718	10721909	7634809	24344818	10045101	14299717
38	C3010	28	0	State - ANDHRA PRADESH	Urban	All ages	28219075	14198905	14020170	8041470	...	24620	39896	21374	18522	12897125	6943681	5953444	7280480	2997606	4282874