notebook.community

Edit and run



In [ ]:

    
Use the artist layer of Matplotlib to replicate the bar chart below to visualize the percentage of the respondents' interest in the different data science topics surveyed.


To create this bar chart, you can follow the following steps:

Sort the dataframe in descending order of Very interested.
Convert the numbers into percentages of the total number of respondents. Recall that 2,233 respondents completed the survey. Round percentages to 2 decimal places.

As for the chart:


display the percentages above the bars as shown above, and remove the left, top, and right borders.

Once you are satisfied with your chart, please upload a screenshot of your plot. (5 marks)



In [27]:

    
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# https://cocl.us/datascience_survey_data
data = pd.read_csv("https://cocl.us/datascience_survey_data", index_col=0)
# print(data.head())
# data dscribed
# data.describe()

data = data.sort_values("Very interested", ascending=False)
data = ((data * 100) / 2233).round(2)

# plt.bar(data, , width=0.8, bottom=None, *, align='center', data=None, **kwargs)
# ax = df[['V1','V2']].plot(kind='bar', title ="V comp", figsize=(15, 10), legend=True, fontsize=12)
ax = data.plot(kind='bar', 
               width=0.8, figsize=(20, 8), legend=True, fontsize=14, color=['#5cb85c', '#5bc0de', '#d9534f'])
ax.set_title("Percentage of Respondent's Interest in Data Science Areas", fontsize=16)
ax.set_ylabel('Percentage of Respondent\'s Interest', fontsize=14)

for i in ax.patches:
    # get_x pulls left or right; get_height pushes up or down
    ax.text(i.get_x(), i.get_height()+ 2, \
            str(round(i.get_height(), 2))+'%', fontsize=10,
                color='black', bbox=dict(facecolor='dimgrey', alpha=0.5))

plt.show()



In [36]:

    
data2 = pd.read_csv('https://cocl.us/sanfran_crime_dataset')
data2[['Category','PdDistrict']].groupby(['PdDistrict']).count().sort_values('Category')









    Out[36]:







  
    
      
      Category
    
    
      PdDistrict
      
    
  
  
    
      PARK
      8699
    
    
      RICHMOND
      8922
    
    
      TENDERLOIN
      9942
    
    
      TARAVAL
      11325
    
    
      INGLESIDE
      11594
    
    
      BAYVIEW
      14303
    
    
      CENTRAL
      17666
    
    
      MISSION
      19503
    
    
      NORTHERN
      20100
    
    
      SOUTHERN
      28445



In [6]:

    
# Task 4
import folium
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

san_geo = 'https://cocl.us/sanfran_geojson'

df = pd.read_csv('https://cocl.us/sanfran_crime_dataset')
df.head()
df_select = df.loc[:,["PdDistrict"]]
df_select.head(10)
df_select['count'] = df_select.groupby('PdDistrict')['PdDistrict'].transform('count')
df_select = df_select.drop_duplicates()
df_select
df_final = df_select.iloc[0:9,:]
df_final

# plot it

sf_geo = r'sf.geojson' # geojson file
sf_map = folium.Map(location=[37.773972, -122.431297], zoom_start=12)

sf_map.choropleth(
    geo_data=san_geo,
    data=df_final,
    columns=['PdDistrict', 'count'],
    key_on='feature.properties.DISTRICT',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Crime Rates in San Francisco'
)

# display map
sf_map









    Out[6]:



In [7]:

    
import requests
import io

r = requests.get("https://cocl.us/sanfran_crime_dataset")
read_me_csv = io.StringIO(r.text)
df2 = pd.read_csv(read_me_csv)

test_me = df2['PdDistrict'].value_counts()
test_me = pd.DataFrame({'district':test_me.index, 'count':test_me.values})
test_me

world_map = folium.Map(location=[37.775421,-122.403405], zoom_start=12)

world_map.choropleth(
    geo_data=san_geo,
    data=test_me,
    columns=['district', 'count'],
    key_on='feature.properties.DISTRICT',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Reported Crimes in San Francisco'
)

# display world map
world_map









    Out[7]:



In [ ]:



In [ ]:

	Category
PdDistrict
PARK	8699
RICHMOND	8922
TENDERLOIN	9942
TARAVAL	11325
INGLESIDE	11594
BAYVIEW	14303
CENTRAL	17666
MISSION	19503
NORTHERN	20100
SOUTHERN	28445