In [1]:

    
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib as mplc
import matplotlib.pyplot as plt
from bokeh import mpl
from bokeh.charts import Bar, Scatter, defaults, Histogram
from bokeh.plotting import figure, show
from bokeh.io import output_notebook









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/matplotlib/__init__.py:872: UserWarning: axes.color_cycle is deprecated and replaced with axes.prop_cycle; please use the latter.
  warnings.warn(self.msg_depr % (key, alt_key))



In [2]:

    
output_notebook()









    




    


    

    
        
        BokehJS successfully loaded.

Playing with google play store datasets using pandas, bokeh and jupyter

@idwaker

Data Sample



In [3]:

    
apps = pd.DataFrame()
apps = pd.read_csv("../gapps.csv", index_col=None, parse_dates=['app_updated'], thousands=',')
apps.head()









    Out[3]:






  
    
      
      app_downloads_max
      app_updated
      app_rating_count
      app_name
      developer_address
      developer_email
      app_content_rating
      app_downloads_min
      app_genre
      developer_name
      developer_website
      app_rating
      app_price
      app_size
      app_link
      id
      app_min_os
    
  
  
    
      0
      5000000
      2015-09-29
      31139
      Disney Junior Play
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      1000000
      Educational
      Disney
      http://help.disney.com
      3.7
      0
      36M
      https://play.google.com/store/apps/details?id=...
      com.disney.disneyjuniorplay_goo
      4.0
    
    
      1
      5000000
      2015-08-12
      33760
      Animals Farm For Kids
      Poland,\nPoznań 61-697 os. Wichrowe Wzgórze 28...
      kidsgamesprojects@gmail.com
      Everyone
      1000000
      Educational
      Kids Games Projects
      http://kidsgameprojects.com
      4.1
      0
      29M
      https://play.google.com/store/apps/details?id=...
      pl.kidsgameprojects.com.AnimalsFarmForKids
      2.3
    
    
      2
      50000
      2014-10-13
      199
      Pet Link DELUXE
      NaN
      phanhaiduong80@gmail.com
      Everyone
      10000
      Board
      phanhaiduong80
      NaN
      4.2
      0
      4.1M
      https://play.google.com/store/apps/details?id=...
      com.fungalaxy.petlinkdeluxe
      2.1
    
    
      3
      50000
      2015-06-08
      131
      Aesops Fables stories for kids
      610, Shiromani complex,\nAbove kandoi bhogilal...
      pratikmachchar@gmail.com
      Everyone
      10000
      Books & Reference
      Pratik Machchar
      http://vyaap.com
      4.3
      0
      9.2M
      https://play.google.com/store/apps/details?id=...
      com.pratik.mobileapps.aesopfables.shortstories...
      2.3.3
    
    
      4
      5000
      2015-05-25
      16
      Маша и Медведь: Кто икнул?
      Bachemer Str. 210\n50935 Köln
      games@mashabear.ru
      Everyone
      1000
      Educational
      tekkon IPM GmbH
      http://games.mashabear.ru/
      3.4
      $1.00
      21M
      https://play.google.com/store/apps/details?id=...
      air.ru.mashabear.hiccup
      2.2

Notes

app distribution with categories
top 20 app developers according to number of apps published
top 20 app developers according to aggregated mean downloads
top 20 app developers according to average rating of all apps
top 20 app developers with highest number of rating counts
top 20 most expensive apps
average ratings comparision between 2 or more categories



In [4]:

    
defaults.width = 800
defaults.height = 420



In [5]:

    
apps.describe()









    Out[5]:






  
    
      
      app_downloads_max
      app_rating_count
      app_downloads_min
      app_rating
    
  
  
    
      count
      7.922800e+04
      75333.000000
      7.922800e+04
      75333.000000
    
    
      mean
      2.463210e+06
      14616.394515
      5.946835e+05
      4.053591
    
    
      std
      6.674247e+07
      253880.087074
      1.394023e+07
      0.597489
    
    
      min
      5.000000e+00
      1.000000
      1.000000e+00
      0.000000
    
    
      25%
      1.000000e+03
      17.000000
      5.000000e+02
      3.800000
    
    
      50%
      1.000000e+04
      125.000000
      5.000000e+03
      4.100000
    
    
      75%
      1.000000e+05
      1002.000000
      5.000000e+04
      4.400000
    
    
      max
      5.000000e+09
      31881332.000000
      1.000000e+09
      5.000000



In [6]:

    
# create series with genre and counts
genre = apps.app_genre.value_counts()

# convert series to dataframe
genre = pd.DataFrame(genre)
genre.head()









    Out[6]:






  
    
      
      app_genre
    
  
  
    
      Personalization
      8472
    
    
      Books & Reference
      7303
    
    
      Sports
      5266
    
    
      Tools
      5161
    
    
      Entertainment
      4797



In [7]:

    
genre = genre.reset_index()
genre.columns = ["Categories", "Counts"]
genre.head()









    Out[7]:






  
    
      
      Categories
      Counts
    
  
  
    
      0
      Personalization
      8472
    
    
      1
      Books & Reference
      7303
    
    
      2
      Sports
      5266
    
    
      3
      Tools
      5161
    
    
      4
      Entertainment
      4797



In [8]:

    
plot = Bar(genre, "Categories", values="Counts", color="wheat", 
           title="App distribution with categories")
show(plot)









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [9]:

    
devs_apps_count = apps.developer_name.value_counts()
devs_apps_count.head()









    Out[9]:





Oceanhouse Media, Inc.    73
Google Inc.               71
MobiSystems               71
ZT.art                    69
romeLab                   65
Name: developer_name, dtype: int64



In [10]:

    
# convert series to dataframe
devs_apps_count = pd.DataFrame(devs_apps_count)
devs_apps_count = devs_apps_count.reset_index()
devs_apps_count.head()









    Out[10]:






  
    
      
      index
      developer_name
    
  
  
    
      0
      Oceanhouse Media, Inc.
      73
    
    
      1
      Google Inc.
      71
    
    
      2
      MobiSystems
      71
    
    
      3
      ZT.art
      69
    
    
      4
      romeLab
      65



In [11]:

    
devs_apps_count.columns = ["Developers", "Count"]
devs_apps_count = devs_apps_count.head(20)
plot = Bar(devs_apps_count, "Developers", values="Count", color="wheat",
          title="Top 20 Developers with highest number of apps uploaded")
show(plot)









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [ ]:

    
# developer | average min downloads | average max downloads
# dev_average_downloads = apps[["developer_name", "app_downloads_min", "app_downloads_max"]]
# dev_average_downloads.head()



In [ ]:

    
# from bokeh.charts import Histogram
# p = Histogram(dev_average_downloads.app_downloads_min, title="Download distribution")
# show(p)



In [ ]:

    
# dev_average_downloads = dev_average_downloads.groupby('developer_name')



In [ ]:

    
# average_downloads = pd.DataFrame()

# for n, g in dev_average_downloads:
#     data = {"developer": n}
#     lower = g.app_downloads_min.sum()
#     higher = g.app_downloads_max.sum()
#     data["downloads"] = np.mean((lower, higher))
#     average_downloads.append(data, ignore_index=True)



In [ ]:

    
# ["2.3": "2.2", "2.3.3": "4.0", "2.1", "4.0.3", "3.0", "1.6", "4.1", "1.5", "2.0", "4.3", "3.2"]
# df = apps.app_min_os.value_counts()[:14]
# df = pd.DataFrame(df)
# df = df.reset_index()
# df.columns = ["version", "count"]
# df = df.set_index("version")
# #df[["version"]] = df[["version"]].astype(float)
# df



In [12]:

    
apps.app_min_os.describe()









    Out[12]:





count     79874
unique       70
top         2.3
freq      15745
Name: app_min_os, dtype: object



In [13]:

    
%matplotlib inline



In [14]:

    
df = apps.app_min_os.value_counts()[:14]
df = pd.DataFrame(df)
df = df.reset_index()
df.columns = ["version", "count"]
df = df.set_index("version")
# #df[["version"]] = df[["version"]].astype(float)
df









    Out[14]:






  
    
      
      count
    
    
      version
      
    
  
  
    
      2.3
      15745
    
    
      2.2
      13996
    
    
      2.3.3
      10285
    
    
      4.0
      9149
    
    
      2.1
      7299
    
    
      4.0.3
      4350
    
    
      3.0
      3928
    
    
      1.6
      3541
    
    
      Varies with device
      2599
    
    
      4.1
      2491
    
    
      1.5
      2177
    
    
      2.0
      851
    
    
      4.3
      636
    
    
      3.2
      587



In [15]:

    
df = apps[["app_rating", "app_genre", "app_rating_count"]]
df = df.groupby('app_genre').mean()
df = df.reset_index()
df[["app_rating_count"]] = df[["app_rating_count"]] / 1000
df









    Out[15]:






  
    
      
      app_genre
      app_rating
      app_rating_count
    
  
  
    
      0
      Action
      4.011181
      104.134200
    
    
      1
      Adventure
      4.097447
      28.195745
    
    
      2
      Arcade
      4.041518
      59.328449
    
    
      3
      Board
      4.120513
      13.869318
    
    
      4
      Books & Reference
      4.173396
      1.924583
    
    
      5
      Business
      3.997228
      6.026630
    
    
      6
      Card
      4.037453
      29.860734
    
    
      7
      Casino
      4.206897
      31.942126
    
    
      8
      Casual
      3.929950
      55.386864
    
    
      9
      Comics
      4.105493
      2.125908
    
    
      10
      Communication
      4.021654
      108.735026
    
    
      11
      Education
      4.040881
      2.530699
    
    
      12
      Educational
      3.948350
      1.991588
    
    
      13
      Entertainment
      4.005387
      10.359418
    
    
      14
      Finance
      4.007984
      2.669704
    
    
      15
      Health & Fitness
      4.002206
      5.344012
    
    
      16
      Libraries & Demo
      4.065567
      1.117482
    
    
      17
      Lifestyle
      4.046205
      4.349274
    
    
      18
      Media & Video
      3.848709
      14.090607
    
    
      19
      Medical
      4.007736
      0.263632
    
    
      20
      Music
      3.991935
      2.353355
    
    
      21
      Music & Audio
      4.107416
      14.102717
    
    
      22
      News & Magazines
      4.033912
      6.457827
    
    
      23
      Personalization
      4.204609
      3.927539
    
    
      24
      Photography
      4.007900
      41.435332
    
    
      25
      Productivity
      4.083363
      20.985893
    
    
      26
      Puzzle
      4.066897
      17.188863
    
    
      27
      Racing
      3.934063
      139.357741
    
    
      28
      Role Playing
      4.100704
      38.847852
    
    
      29
      Shopping
      3.974662
      29.719652
    
    
      30
      Simulation
      3.804110
      52.400362
    
    
      31
      Social
      4.058929
      84.588328
    
    
      32
      Sports
      4.061368
      6.603193
    
    
      33
      Strategy
      4.137278
      112.142562
    
    
      34
      Tools
      4.046820
      13.443610
    
    
      35
      Transportation
      3.953732
      1.060029
    
    
      36
      Travel & Local
      3.981950
      14.982025
    
    
      37
      Trivia
      4.041410
      42.324718
    
    
      38
      Weather
      4.026106
      2.673707
    
    
      39
      Word
      4.105648
      34.428100



In [16]:

    
plot = Scatter(df, x='app_rating', y='app_rating_count', color='app_genre', xlabel='Mean Rating', 
               ylabel='Mean Rating Count (in Thousands)', legend='top_left', title='Frequency Distribution of Categories with Rating')









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [17]:

    
show(plot)



In [18]:

    
df = apps[["app_content_rating", "app_downloads_min", "app_rating"]]
df = df.groupby('app_content_rating').mean()
df = df.reset_index()
df[["app_downloads_min"]] = df[["app_downloads_min"]] / 1000
df.columns = ["Content Rating", "Mean Downloads", "Mean App Rating"]
df









    Out[18]:






  
    
      
      Content Rating
      Mean Downloads
      Mean App Rating
    
  
  
    
      0
      Adults only 18+
      463.273077
      3.900000
    
    
      1
      Everyone
      598.845695
      4.098742
    
    
      2
      Everyone 10+
      1710.798305
      4.136165
    
    
      3
      Mature 17+
      921.755529
      4.126981
    
    
      4
      Teen
      3148.191777
      4.094936
    
    
      5
      Unrated
      71.411608
      3.907865



In [19]:

    
plot = Scatter(df, x='Mean App Rating', y='Mean Downloads', color='Content Rating',
              legend='top_left', title="Relation of Content Rating with Downloads and App Rating")









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [20]:

    
show(plot)



In [21]:

    
df = apps.app_min_os.convert_objects(convert_numeric=True).dropna()









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: convert_objects is deprecated.  Use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  if __name__ == '__main__':



In [22]:

    
plt.rcParams['figure.figsize'] = (12.0, 6.0)

Kernel Density Plot for Minimum os requirement on apps



In [23]:

    
sns.kdeplot(df, shade=True)









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x106503908>

App Rating Histogram



In [24]:

    
df = apps[["app_rating"]]
p = Histogram(df, xlabel="Rating", ylabel="Count", bins=10, title="App Rating Histogram")
show(p)



In [25]:

    
df = apps[["app_rating", "app_genre"]]
df = df[(df["app_genre"] == "Educational") | (df["app_genre"] == "Books & Reference")]
df = df.dropna()
# df.head()
p = Histogram(df, values="app_rating", color="app_genre", legend=True,
              xlabel="Rating", ylabel="Count", bins=10, title="Educational and Books & Reference comparision")









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [26]:

    
show(p)



In [27]:

    
disney = apps[apps["developer_name"] == "Disney"]
disney.head()









    Out[27]:






  
    
      
      app_downloads_max
      app_updated
      app_rating_count
      app_name
      developer_address
      developer_email
      app_content_rating
      app_downloads_min
      app_genre
      developer_name
      developer_website
      app_rating
      app_price
      app_size
      app_link
      id
      app_min_os
    
  
  
    
      0
      5000000
      2015-09-29
      31139
      Disney Junior Play
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      1000000
      Educational
      Disney
      http://help.disney.com
      3.7
      0
      36M
      https://play.google.com/store/apps/details?id=...
      com.disney.disneyjuniorplay_goo
      4.0
    
    
      560
      100000
      2015-05-14
      1532
      Where's My Mickey? XL
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      50000
      Puzzle
      Disney
      http://help.disney.com
      4.2
      $2.99
      83M
      https://play.google.com/store/apps/details?id=...
      com.disney.wheresmymickey_tab_goo
      2.3.3
    
    
      570
      100000
      2013-12-04
      1264
      Disney Fairies: Lost & Found
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      50000
      Puzzle
      Disney
      http://help.disney.com
      3.6
      $0.99
      88M
      https://play.google.com/store/apps/details?id=...
      com.disney.lostandfound
      2.3.3
    
    
      573
      5000000
      2014-01-14
      80202
      Where's My Water? T-Mo Edition
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      1000000
      Puzzle
      Disney
      http://help.disney.com
      4.4
      0
      49M
      https://play.google.com/store/apps/details?id=...
      com.disney.WMW_TMobile
      4.0
    
    
      580
      100000000
      2015-05-21
      1300424
      Where's My Water? 2
      500 South Buena Vista Street\nBurbank, CA 9152...
      support@disneymobile.com
      Everyone
      50000000
      Puzzle
      Disney
      http://help.disney.com
      4.1
      0
      69M
      https://play.google.com/store/apps/details?id=...
      com.disney.wheresmywater2_goo
      2.3.3



In [28]:

    
dfree = disney[["app_price", "app_rating"]]
dfree.ix[dfree.app_price == "0", "app_price"] = "Free"
dfree.ix[dfree.app_price != "Free", "app_price"] = "Paid"
dfree









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/pandas/core/indexing.py:426: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s






    Out[28]:






  
    
      
      app_price
      app_rating
    
  
  
    
      0
      Free
      3.7
    
    
      560
      Paid
      4.2
    
    
      570
      Paid
      3.6
    
    
      573
      Free
      4.4
    
    
      580
      Free
      4.1
    
    
      608
      Paid
      4.1
    
    
      609
      Paid
      4.6
    
    
      610
      Paid
      4.7
    
    
      1035
      Free
      3.7
    
    
      1036
      Free
      4.4
    
    
      1191
      Free
      4.2
    
    
      1192
      Free
      4.4
    
    
      1193
      Free
      4.2
    
    
      1194
      Free
      3.8
    
    
      1195
      Free
      4.1
    
    
      1196
      Free
      3.9
    
    
      1197
      Free
      4.3
    
    
      1233
      Paid
      4.2
    
    
      1481
      Paid
      4.4
    
    
      1770
      Paid
      3.5
    
    
      1772
      Paid
      4.3
    
    
      2831
      Free
      4.0
    
    
      3559
      Free
      4.0
    
    
      5377
      Free
      4.3
    
    
      5861
      Free
      4.4
    
    
      46455
      Free
      4.5
    
    
      60829
      Paid
      3.5
    
    
      65047
      Free
      4.1



In [29]:

    
p = Histogram(dfree, values="app_rating", color="app_price", legend=True, bins=10,
             title="Ratings on Free and Paid by Disney")









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [30]:

    
show(p)



In [31]:

    
df = apps.app_updated.dt.month.value_counts()
df = df.sort_index()
df = pd.DataFrame(df)
df









    Out[31]:






  
    
      
      app_updated
    
  
  
    
      1
      4091
    
    
      2
      3774
    
    
      3
      4412
    
    
      4
      4516
    
    
      5
      4861
    
    
      6
      5544
    
    
      7
      6759
    
    
      8
      7678
    
    
      9
      8525
    
    
      10
      11140
    
    
      11
      14284
    
    
      12
      4290



In [32]:

    
cat = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
df.index = cat
df = df.reset_index()
df.columns = ["Month", "Count"]
df



In [33]:

    
p = Bar(df, "Month", values="Count", xlabel="Month", ylabel="Number of Updates", 
        title="App updates by Month of Year")









    



/Users/diwaker/miniconda3/envs/notebook/lib/python3.5/site-packages/bokeh/charts/_attributes.py:78: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  df = df.sort(columns=columns)



In [34]:

    
show(p)



In [ ]:

    
### Work in Progress



In [ ]:

	app_downloads_max	app_updated	app_rating_count	app_name	developer_address	developer_email	app_content_rating	app_downloads_min	app_genre	developer_name	developer_website	app_rating	app_price	app_size	app_link	id	app_min_os
0	5000000	2015-09-29	31139	Disney Junior Play	500 South Buena Vista Street\nBurbank, CA 9152...	support@disneymobile.com	Everyone	1000000	Educational	Disney	http://help.disney.com	3.7	0	36M	https://play.google.com/store/apps/details?id=...	com.disney.disneyjuniorplay_goo	4.0
1	5000000	2015-08-12	33760	Animals Farm For Kids	Poland,\nPoznań 61-697 os. Wichrowe Wzgórze 28...	kidsgamesprojects@gmail.com	Everyone	1000000	Educational	Kids Games Projects	http://kidsgameprojects.com	4.1	0	29M	https://play.google.com/store/apps/details?id=...	pl.kidsgameprojects.com.AnimalsFarmForKids	2.3
2	50000	2014-10-13	199	Pet Link DELUXE	NaN	phanhaiduong80@gmail.com	Everyone	10000	Board	phanhaiduong80	NaN	4.2	0	4.1M	https://play.google.com/store/apps/details?id=...	com.fungalaxy.petlinkdeluxe	2.1
3	50000	2015-06-08	131	Aesops Fables stories for kids	610, Shiromani complex,\nAbove kandoi bhogilal...	pratikmachchar@gmail.com	Everyone	10000	Books & Reference	Pratik Machchar	http://vyaap.com	4.3	0	9.2M	https://play.google.com/store/apps/details?id=...	com.pratik.mobileapps.aesopfables.shortstories...	2.3.3
4	5000	2015-05-25	16	Маша и Медведь: Кто икнул?	Bachemer Str. 210\n50935 Köln	games@mashabear.ru	Everyone	1000	Educational	tekkon IPM GmbH	http://games.mashabear.ru/	3.4	$1.00	21M	https://play.google.com/store/apps/details?id=...	air.ru.mashabear.hiccup	2.2

	app_downloads_max	app_rating_count	app_downloads_min	app_rating
count	7.922800e+04	75333.000000	7.922800e+04	75333.000000
mean	2.463210e+06	14616.394515	5.946835e+05	4.053591
std	6.674247e+07	253880.087074	1.394023e+07	0.597489
min	5.000000e+00	1.000000	1.000000e+00	0.000000
25%	1.000000e+03	17.000000	5.000000e+02	3.800000
50%	1.000000e+04	125.000000	5.000000e+03	4.100000
75%	1.000000e+05	1002.000000	5.000000e+04	4.400000
max	5.000000e+09	31881332.000000	1.000000e+09	5.000000

	app_genre
Personalization	8472
Books & Reference	7303
Sports	5266
Tools	5161
Entertainment	4797

	index	developer_name
0	Oceanhouse Media, Inc.	73
1	Google Inc.	71
2	MobiSystems	71
3	ZT.art	69
4	romeLab	65

	count
version
2.3	15745
2.2	13996
2.3.3	10285
4.0	9149
2.1	7299
4.0.3	4350
3.0	3928
1.6	3541
Varies with device	2599
4.1	2491
1.5	2177
2.0	851
4.3	636
3.2	587

	app_genre	app_rating	app_rating_count
0	Action	4.011181	104.134200
1	Adventure	4.097447	28.195745
2	Arcade	4.041518	59.328449
3	Board	4.120513	13.869318
4	Books & Reference	4.173396	1.924583
5	Business	3.997228	6.026630
6	Card	4.037453	29.860734
7	Casino	4.206897	31.942126
8	Casual	3.929950	55.386864
9	Comics	4.105493	2.125908
10	Communication	4.021654	108.735026
11	Education	4.040881	2.530699
12	Educational	3.948350	1.991588
13	Entertainment	4.005387	10.359418
14	Finance	4.007984	2.669704
15	Health & Fitness	4.002206	5.344012
16	Libraries & Demo	4.065567	1.117482
17	Lifestyle	4.046205	4.349274
18	Media & Video	3.848709	14.090607
19	Medical	4.007736	0.263632
20	Music	3.991935	2.353355
21	Music & Audio	4.107416	14.102717
22	News & Magazines	4.033912	6.457827
23	Personalization	4.204609	3.927539
24	Photography	4.007900	41.435332
25	Productivity	4.083363	20.985893
26	Puzzle	4.066897	17.188863
27	Racing	3.934063	139.357741
28	Role Playing	4.100704	38.847852
29	Shopping	3.974662	29.719652
30	Simulation	3.804110	52.400362
31	Social	4.058929	84.588328
32	Sports	4.061368	6.603193
33	Strategy	4.137278	112.142562
34	Tools	4.046820	13.443610
35	Transportation	3.953732	1.060029
36	Travel & Local	3.981950	14.982025
37	Trivia	4.041410	42.324718
38	Weather	4.026106	2.673707
39	Word	4.105648	34.428100

	Content Rating	Mean Downloads	Mean App Rating
0	Adults only 18+	463.273077	3.900000
1	Everyone	598.845695	4.098742
2	Everyone 10+	1710.798305	4.136165
3	Mature 17+	921.755529	4.126981
4	Teen	3148.191777	4.094936
5	Unrated	71.411608	3.907865

	app_price	app_rating
0	Free	3.7
560	Paid	4.2
570	Paid	3.6
573	Free	4.4
580	Free	4.1
608	Paid	4.1
609	Paid	4.6
610	Paid	4.7
1035	Free	3.7
1036	Free	4.4
1191	Free	4.2
1192	Free	4.4
1193	Free	4.2
1194	Free	3.8
1195	Free	4.1
1196	Free	3.9
1197	Free	4.3
1233	Paid	4.2
1481	Paid	4.4
1770	Paid	3.5
1772	Paid	4.3
2831	Free	4.0
3559	Free	4.0
5377	Free	4.3
5861	Free	4.4
46455	Free	4.5
60829	Paid	3.5
65047	Free	4.1

	app_updated
1	4091
2	3774
3	4412
4	4516
5	4861
6	5544
7	6759
8	7678
9	8525
10	11140
11	14284
12	4290