In [1]:
import sys
sys.path.append('../../')
from instabot import User, Getter
Login users to be used in instabot. I suggest you to add as many users as you have because all get requests will be parallized between them to distribute the Instagram servers load.
In [2]:
_ = User("user_for_scrapping1", "password")
_ = User("user_for_scrapping2", "password")
_ = User("user_for_scrapping3", "password")
Init the Getter class without any parameters. It will use all of the available and successfully logged in users to parallize the get requests to Instagram's servers.
In [3]:
get = Getter()
In [4]:
location_name = "МФТИ"
location_id = get.geo_id(location_name)
print ("The id of %s is %d." % (location_name, location_id))
For example you want to know who posts with specific geotag. You can iterate over medias and take the author's username.
Get iterator over geo medias
In [5]:
geo_medias = get.geo_medias(location_id, total=10)
In [6]:
print ("Users who post with %s geotag:" % location_name)
for media in geo_medias:
print (media["user"]["username"])
All the values that are in response media's json:
In [7]:
media.keys()
Out[7]:
In [8]:
username = "ohld"
user_info = get.user_info(username)
user_id = user_info["pk"]
print ("The id of '%s' is %d." % (username, user_id))
In [9]:
mean = lambda l: 0 if l == [] else sum(l) * 1. / len(l)
like_counts = [media["like_count"] for media in get.user_feed(user_id, total=20)]
print ("Amount of likes recieved by %s" % username)
print (like_counts)
print ("Mean: %.2f. Total: %d" % (mean(like_counts), sum(like_counts)))
In [10]:
from tqdm import tqdm_notebook # to see the progress of scrapping
mean_likes = {}
for user in tqdm_notebook(get.user_followers(user_id), total=user_info["follower_count"]):
like_counts = [media['like_count'] for media in get.user_feed(user['pk'], total=5)]
mean_likes[user["username"]] = mean(like_counts)
In [11]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.figure(figsize=(15, 5))
plt.hist([i for i in list(mean_likes.values()) if i > 0], bins=500)
plt.title("Mean likes of %s followers" % username)
plt.xlabel("Mean likes")
plt.ylabel("Frequency")
plt.show()
In [12]:
filtered_likes = [item for item in mean_likes.values() if 0 < item < 300]
plt.figure(figsize=(15, 5))
plt.hist(filtered_likes, bins=100)
plt.title("Mean likes of %s followers" % username)
plt.xlabel("Mean likes")
plt.ylabel("Frequency")
plt.show()
Let's take a look at the greatest mean likes owner
In [13]:
print ("%s has the highest value of mean likes in %s followers." % (max(mean_likes, key=mean_likes.get), username))