notebook.community

Edit and run



In [1]:

    
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
plt.style.use('ggplot')
%matplotlib inline



In [2]:

    
app = pd.read_pickle('app_cleaned.pickle')



In [3]:

    
app = app.drop_duplicates()



In [4]:

    
app = app.dropna(axis = 0)#remove the NAN



In [5]:

    
app.head()









    Out[5]:






  
    
      
      category
      current_rating
      description
      id
      is_InAppPurcased
      is_multilingual
      is_multiplatform
      name
      new_version_desc
      num_current_rating
      ...
      review2
      review2_star
      review3
      review3_star
      scrape_date
      seller
      size
      update_date
      url
      version
    
  
  
    
      0
      Finance
      1.80000
      The JPay App lets you send money and email to ...
      584959322
      0
      0
      0
      JPay
      This update has a big new feature for the new ...
      20.0
      ...
      What is wrong with this freakin App? It keep s...
      1.0
      Emails are not sent and received in a timely m...
      1.0
      2017-03-13
      JPay, Inc.
      13.0 MB
      Jan 20, 2017
      https://itunes.apple.com/us/app/jpay/id5849593...
      4.7
    
    
      2
      Finance
      4.64497
      Access your interactive Experian Credit Report...
      1087101090
      1
      0
      0
      Experian - Free Credit Report
      Bug fixes and other minor updates
      169.0
      ...
      This Experian app helps me measure my credit a...
      5.0
      I don't exactly like credit bureaus my experie...
      5.0
      2017-03-13
      CONSUMERINFO.COM, INC.
      58.0 MB
      Feb 22, 2017
      https://itunes.apple.com/us/app/experian-free-...
      1.6.1
    
    
      3
      Finance
      2.16667
      Use the Vanguard app to check your accounts an...
      335186209
      0
      0
      0
      Vanguard
      See what's new!  Have an IRA? Watch your progr...
      30.0
      ...
      The app is very easy to use and has tons of he...
      5.0
      My company used to use Fidelity for our retire...
      1.0
      2017-03-13
      The Vanguard Group, Inc.
      49.0 MB
      Jan 22, 2017
      https://itunes.apple.com/us/app/vanguard/id335...
      7.1
    
    
      4
      Finance
      4.60773
      Conveniently manage your credit card account f...
      1128712763
      0
      0
      0
      Credit One Bank Mobile
      - Ability add an additional account if qualifi...
      724.0
      ...
      I am so fed up with this entire company! I'm g...
      2.0
      I have no complaints, well one but its not a b...
      5.0
      2017-03-13
      Credit One Bank, National Association
      42.8 MB
      Jan 31, 2017
      https://itunes.apple.com/us/app/credit-one-ban...
      1.4
    
    
      5
      Finance
      4.34066
      Be the first to know about news and market mov...
      552799694
      1
      0
      0
      Seeking Alpha Portfolio
      Performance improvements
      91.0
      ...
      SA offers a lot of information supplied by art...
      5.0
      I'm new to learning about stocks and managing ...
      5.0
      2017-03-13
      Seeking Alpha Ltd.
      46.2 MB
      Feb 14, 2017
      https://itunes.apple.com/us/app/seeking-alpha-...
      3.3.7
    
  

5 rows × 26 columns



In [8]:

    
ratio = app['num_current_rating']/app['num_overall_rating']



In [9]:

    
#use histogram to show the range of ratio
plt.hist(ratio,bins = 20, alpha = .4, label = 'ratio')
plt.legend()
plt.show()

according to the histogram, the ratios are mainly under 0.2.



In [10]:

    
index = ratio>0.05#get the index of ratio larger than 0.05



In [11]:

    
appfilter = app.loc[index]#filter the apps which number of current rating over number of overall rating larger than 0.1



In [12]:

    
#use histogram to show the range of current_rating-overall_rating
plt.hist(appfilter['current_rating']-appfilter['overall_rating'],bins = 20, alpha = .4, label = 'diff')
plt.legend()
plt.show()



In [13]:

    
diff = appfilter['current_rating']-appfilter['overall_rating']



In [14]:

    
index2 = diff>=0.1#get the index of the difference larger than 0.1
index2b = diff<= -0.1#get the index of the difference smaller than -0.1



In [15]:

    
appinprove = appfilter.loc[index2]
appdecrease = appfilter.loc[index2b]



In [16]:

    
nvd = appinprove['new_version_desc']
nvdd = appdecrease['new_version_desc']



In [17]:

    
#compile documents
doc_complete = nvd.tolist()
doc_complete2 = nvdd.tolist()



In [18]:

    
#clean doc
import nltk
from nltk import corpus
from nltk.stem.porter import PorterStemmer
from nltk.corpus import stopwords
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
stemmer = PorterStemmer().stem
tokenize = nltk.word_tokenize
stop = stopwords.words('english')+list(string.punctuation)+['we','new','fix','io','updat','improv','bug',
                                                            'app','featur','perform','ad',"\'s","--","us"
                                                            ,"minor","support","iphon","issu","add","enhanc",
                                                           "user","pleas","10","7","experi","thank",
                                                           "version","experi","screen","\'\'","2","6","icon",
                                                           "stabil","review","5","``"]
def stem(tokens,stemmer = PorterStemmer().stem):
    stemwords = [stemmer(w.lower()) for w in tokens if w not in stop]
    return [w for w in stemwords if w not in stop]
def lemmatize(text):
    return stem(tokenize(text))



In [19]:

    
doc_clean = [lemmatize(doc) for doc in doc_complete]
doc_clean2 = [lemmatize(doc) for doc in doc_complete2]



In [20]:

    
# Importing Gensim
import gensim
from gensim import corpora

# Creating the term dictionary of our courpus, where every unique term is assigned an index. 
dictionary = corpora.Dictionary(doc_clean)
dictionary2 = corpora.Dictionary(doc_clean2)

# Converting list of documents (corpus) into Document Term Matrix using dictionary prepared above.
doc_term_matrix = [dictionary.doc2bow(doc) for doc in doc_clean]
doc_term_matrix2 = [dictionary2.doc2bow(doc) for doc in doc_clean2]



In [21]:

    
# Creating the object for LDA model using gensim library
Lda = gensim.models.ldamodel.LdaModel

# Running and Trainign LDA model on the document term matrix.
ldamodel = Lda(doc_term_matrix, num_topics=3, id2word = dictionary, passes=50)
ldamodel2 = Lda(doc_term_matrix2, num_topics=3, id2word = dictionary2, passes=50)



In [22]:

    
print(ldamodel.print_topics(num_topics=3, num_words=3))
print(ldamodel2.print_topics(num_topics=3, num_words=3))









    



[(0, u'0.008*"store" + 0.007*"make" + 0.007*"help"'), (1, u'0.006*"feedback" + 0.006*"thi" + 0.005*"crash"'), (2, u'0.010*"includ" + 0.008*"crash" + 0.006*"use"')]
[(0, u'0.007*"includ" + 0.007*"-bug" + 0.006*"locat"'), (1, u'0.008*"help" + 0.006*"play" + 0.006*"ipad"'), (2, u'0.006*"crash" + 0.006*"use" + 0.006*"share"')]

Improved app



In [185]:

    
index_interfac = []
for i in range(len(doc_clean)):
    if 'interfac' in doc_clean[i]:
        index_interfac.append(True)
    else:
        index_interfac.append(False)



In [187]:

    
nvd[index_interfac][1342]









    Out[187]:





u'Minor Interface Improvements'



In [188]:

    
index_feedback = []
for i in range(len(doc_clean)):
    if 'feedback' in doc_clean[i]:
        index_feedback.append(True)
    else:
        index_feedback.append(False)



In [190]:

    
nvd[index_feedback][193]









    Out[190]:





u"- New truth or dare cards - Fixed bugs & improved design If you have any feedback or suggestions, tweet us @truthdareapp! We'd love to hear from you."



In [192]:

    
index_store = []
for i in range(len(doc_clean)):
    if 'store' in doc_clean[i]:
        index_store.append(True)
    else:
        index_store.append(False)



In [241]:

    
nvd[index_store][1024]









    Out[241]:





u'- Bug fixes If you enjoy our apps, please leave us a review on the App Store. Reviews help us stay motivated to keep the updates coming. Thanks!'

improved pattern:

1.some improvements on interface

2.ask for feedbacks

3.ask for reviews on app store

Worsen app



In [214]:

    
index_ipad = []
for i in range(len(doc_clean2)):
    if 'ipad' in doc_clean2[i]:
        index_ipad.append(True)
    else:
        index_ipad.append(False)



In [220]:

    
nvdd[index_ipad][1373]









    Out[220]:





u'Holy Quran Arabic text with side by side English translation by Maulvi Sher Ali. Retina display support for iPhone 4+. Universal app with iPad support. Swipe slides the pages left and right accordingly.'



In [222]:

    
index_music = []
for i in range(len(doc_clean2)):
    if 'music' in doc_clean2[i]:
        index_music.append(True)
    else:
        index_music.append(False)



In [232]:

    
nvdd[index_music][2157]









    Out[232]:





u'- Introducing Music Share! After a quick over-the-air update for your headphones, you can stream music or audio with a friend between two Bose headphones at the same time. - Bug fixes and improvements'



In [234]:

    
index_card = []
for i in range(len(doc_clean2)):
    if 'card' in doc_clean2[i]:
        index_card.append(True)
    else:
        index_card.append(False)



In [239]:

    
nvdd[index_card][646]









    Out[239]:





u'Based on your feedback weve made some enhancements and fixed bugs to help improve your experience.  Added swipe gesture to access the digital card from account summary. (Digital Card is available on most CareCredit accounts.)  Improvements on performance and stability. *We have heard some CareCredit customers are experiencing issues logging into the CareCredit App. Please know that our team is aware and we are looking for the best solution. We will be making regular updates to the app to improve your experience.'

woren pattern:

1.add more features on ipad version

2.add more features related to music function

3.the apps are designed for cards



In [24]:

    
import pyLDAvis.gensim



In [25]:

    
pyLDAvis.enable_notebook()
dec_improv = pyLDAvis.gensim.prepare(ldamodel,doc_term_matrix, dictionary)
dec_decrea = pyLDAvis.gensim.prepare(ldamodel2,doc_term_matrix2, dictionary2)



In [26]:

    
dec_improv









    Out[26]:



In [27]:

    
pyLDAvis.save_html(dec_improv,'improved_apps.html')



In [28]:

    
dec_decrea









    Out[28]:



In [28]:

    
pyLDAvis.save_html(dec_decrea,'worsen_apps.html')



In [ ]:

	category	current_rating	description	id	is_InAppPurcased	name	new_version_desc	num_current_rating	...	review2	review2_star	review3	review3_star	scrape_date	seller	size	update_date	url	version
0	Finance	1.80000	The JPay App lets you send money and email to ...	584959322	0	JPay	This update has a big new feature for the new ...	20.0	...	What is wrong with this freakin App? It keep s...	1.0	Emails are not sent and received in a timely m...	1.0	2017-03-13	JPay, Inc.	13.0 MB	Jan 20, 2017	https://itunes.apple.com/us/app/jpay/id5849593...	4.7
2	Finance	4.64497	Access your interactive Experian Credit Report...	1087101090	1	Experian - Free Credit Report	Bug fixes and other minor updates	169.0	...	This Experian app helps me measure my credit a...	5.0	I don't exactly like credit bureaus my experie...	5.0	2017-03-13	CONSUMERINFO.COM, INC.	58.0 MB	Feb 22, 2017	https://itunes.apple.com/us/app/experian-free-...	1.6.1
3	Finance	2.16667	Use the Vanguard app to check your accounts an...	335186209	0	Vanguard	See what's new! Have an IRA? Watch your progr...	30.0	...	The app is very easy to use and has tons of he...	5.0	My company used to use Fidelity for our retire...	1.0	2017-03-13	The Vanguard Group, Inc.	49.0 MB	Jan 22, 2017	https://itunes.apple.com/us/app/vanguard/id335...	7.1
4	Finance	4.60773	Conveniently manage your credit card account f...	1128712763	0	Credit One Bank Mobile	- Ability add an additional account if qualifi...	724.0	...	I am so fed up with this entire company! I'm g...	2.0	I have no complaints, well one but its not a b...	5.0	2017-03-13	Credit One Bank, National Association	42.8 MB	Jan 31, 2017	https://itunes.apple.com/us/app/credit-one-ban...	1.4
5	Finance	4.34066	Be the first to know about news and market mov...	552799694	1	Seeking Alpha Portfolio	Performance improvements	91.0	...	SA offers a lot of information supplied by art...	5.0	I'm new to learning about stocks and managing ...	5.0	2017-03-13	Seeking Alpha Ltd.	46.2 MB	Feb 14, 2017	https://itunes.apple.com/us/app/seeking-alpha-...	3.3.7