notebook.community

Edit and run



In [1]:

    
from github import Github
import configparser
import json
import datetime
import time
import pandas as pd
from pocket import Pocket, PocketException



In [2]:

    
config = configparser.ConfigParser()
config.read('secrets.ini')

gh_user = config.get('Github', 'username') 
gh_password =  config.get('Github', 'password')

gh = Github(gh_user,gh_password)

pocket_consumer_key = config.get('Pocket', 'consumer_key') 
pocket_access_token =  config.get('Pocket', 'access_token')

p = Pocket(consumer_key=pocket_consumer_key,
           access_token=pocket_access_token)



In [27]:

    
repos = gh.get_repos()
repos[0].updated_at









    Out[27]:





datetime.datetime(2016, 9, 8, 5, 16, 47)



In [16]:

    
articles









    Out[16]:





{'complete': 0,
 'error': None,
 'list': [],
 'search_meta': {'search_type': 'normal'},
 'since': 1470348449,
 'status': 2}



In [17]:

    
with open('cache_timestamps.json', 'r') as infile:
    cache_timestamps = json.load(infile)
pocket_cache_datetime = cache_timestamps['pocket']

# Fetch a list of articles
try:
     articles = p.retrieve(since=pocket_cache_datetime,detailType="complete")
except PocketException as e:
    print(e.message)
    
pocket_data = load_pocket_data()
if articles['list']:
    for article,data in articles["list"].items():
            if article not in pocket_data.keys():
                try:
                    if "tags" in data.keys():
                        tags = list(data["tags"].keys())
                    else:
                        tags = []
                    if data["has_video"] == "2":
                        media_type = "video"
                    elif data["has_image"] =="2":
                        media_type = "image"
                    else:
                        media_type = "article"            

                        pocket_data[article] = {'timestamp':data['time_added'],
                                               'title':data["resolved_title"],
                                               'tags':tags,
                                               'media_type':media_type,
                                               'excerpt':data["excerpt"],
                                               'word_count':data["word_count"],
                                               'url':data["resolved_url"]}
                except KeyError as e:
                    print (str(e) + " not in article data")
                
dump_pocket_data(pocket_data)
cache_timestamps['pocket'] = str(int(time.time()))
with open('cache_timestamps.json', 'w') as outfile:
    json.dump(cache_timestamps, outfile)



In [13]:

    
pocket_data = load_pocket_data()
pocket_tag_tuples = []
for article,data in pocket_data.items():
    tags = data['tags']
    for tag in tags:
        pocket_tag_tuples.append((tag,datetime.datetime.fromtimestamp(int(data["timestamp"])),data["media_type"]))
df = pd.DataFrame(pocket_tag_tuples, columns=['tag', 'timestamp', 'media_type'])
df["hour"] = df["timestamp"].apply(lambda s : s.strftime("%Y-%m-%dT%H"))
df["date"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y-%m-%d"))
df["month"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y-%m"))
df["year"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y"))
hourly_grouped = df.groupby(["hour","media_type","tag"])
daily_grouped = df.groupby(["date","media_type","tag"])
monthly_grouped = df.groupby(["month","media_type","tag"])
yearly_grouped = df.groupby(["year","media_type","tag"])
hourly_df = pd.DataFrame(hourly_grouped.size().reset_index(name = "Group_Count"))
daily_df = pd.DataFrame(daily_grouped.size().reset_index(name = "Group_Count"))
monthly_df = pd.DataFrame(monthly_grouped.size().reset_index(name = "Group_Count"))
yearly_df = pd.DataFrame(yearly_grouped.size().reset_index(name = "Group_Count"))
hourly_tuples = [tuple(x) for x in hourly_df.values]
daily_tuples = [tuple(x) for x in daily_df.values]
monthly_tuples = [tuple(x) for x in monthly_df.values]
yearly_tuples = [tuple(x) for x in yearly_df.values]
with open('pocket_tag_tuples.json', 'w') as outfile:
    json.dump({'hourly':hourly_tuples,'daily':daily_tuples,'monthly':monthly_tuples,'yearly':yearly_tuples}, outfile)



In [70]:

    
monthly_tuples









    Out[70]:





[('2011-10', 'article', 'advice', 1),
 ('2011-11', 'article', 'advice', 2),
 ('2011-11', 'article', 'apps', 1),
 ('2011-11', 'article', 'cool', 3),
 ('2011-11', 'article', 'emacs', 1),
 ('2011-11', 'article', 'equations', 2),
 ('2011-11', 'article', 'idea', 1),
 ('2011-11', 'article', 'ipad', 1),
 ('2011-11', 'article', 'lua', 1),
 ('2011-11', 'article', 'music', 1),
 ('2011-11', 'article', 'nasa', 1),
 ('2011-11', 'article', 'picture', 1),
 ('2011-11', 'article', 'programming', 3),
 ('2011-11', 'article', 'space', 1),
 ('2011-11', 'article', 'ted', 1),
 ('2011-11', 'image', 'cool', 2),
 ('2011-11', 'image', 'funny', 1),
 ('2011-11', 'image', 'picture', 3),
 ('2011-11', 'image', 'space', 1),
 ('2011-12', 'article', 'cool', 2),
 ('2011-12', 'article', 'idea', 1),
 ('2011-12', 'article', 'learning resource', 1),
 ('2011-12', 'article', 'picture', 1),
 ('2011-12', 'article', 'programming', 2),
 ('2011-12', 'article', 'sopa', 1),
 ('2011-12', 'article', 'space', 1),
 ('2012-01', 'article', 'hot topic', 1),
 ('2012-01', 'article', 'iran', 1),
 ('2012-01', 'article', 'oil', 1),
 ('2012-01', 'article', 'politics', 1),
 ('2012-01', 'article', 'startups', 1),
 ('2012-01', 'article', 'tech', 1),
 ('2012-01', 'article', 'ted', 1),
 ('2012-01', 'image', 'cool', 1),
 ('2012-01', 'image', 'picture', 1),
 ('2012-03', 'article', 'bank of america', 1),
 ('2012-03', 'article', 'birth control', 1),
 ('2012-03', 'article', 'cool', 2),
 ('2012-03', 'article', 'design', 1),
 ('2012-03', 'article', 'magnet', 1),
 ('2012-03', 'article', 'programming', 1),
 ('2012-03', 'article', 'shuttle launch', 1),
 ('2012-03', 'article', 'space', 1),
 ('2012-03', 'article', 'video', 1),
 ('2012-03', 'image', 'cool', 1),
 ('2012-03', 'image', 'diy', 1),
 ('2012-03', 'image', 'idea', 1),
 ('2012-04', 'article', 'algorithms', 1),
 ('2012-04', 'article', 'apps', 3),
 ('2012-04', 'article', 'asteroid mining', 1),
 ('2012-04', 'article', 'awesome', 1),
 ('2012-04', 'article', 'backend', 1),
 ('2012-04', 'article', 'bitcoin', 1),
 ('2012-04', 'article', 'cheat sheet', 1),
 ('2012-04', 'article', 'coffee', 1),
 ('2012-04', 'article', 'commute', 1),
 ('2012-04', 'article', 'cool', 6),
 ('2012-04', 'article', 'css3', 1),
 ('2012-04', 'article', 'design', 2),
 ('2012-04', 'article', 'diy', 2),
 ('2012-04', 'article', 'hashing', 1),
 ('2012-04', 'article', 'high frequency trading', 2),
 ('2012-04', 'article', 'html', 1),
 ('2012-04', 'article', 'ice', 1),
 ('2012-04', 'article', 'idea', 4),
 ('2012-04', 'article', 'jquery', 1),
 ('2012-04', 'article', 'kids', 1),
 ('2012-04', 'article', 'learning resource', 2),
 ('2012-04', 'article', 'node', 1),
 ('2012-04', 'article', 'oil', 1),
 ('2012-04', 'article', 'pattern matching', 1),
 ('2012-04', 'article', 'picture', 1),
 ('2012-04', 'article', 'presentation', 1),
 ('2012-04', 'article', 'programming', 8),
 ('2012-04', 'article', 'python', 2),
 ('2012-04', 'article', 'rocket', 1),
 ('2012-04', 'article', 'simulation', 1),
 ('2012-04', 'article', 'sorting', 1),
 ('2012-04', 'article', 'space', 1),
 ('2012-04', 'article', 'stocks, trading', 2),
 ('2012-04', 'article', 'tar sands', 1),
 ('2012-04', 'image', 'advice', 1),
 ('2012-04', 'image', 'cool', 2),
 ('2012-04', 'image', 'idea', 1),
 ('2012-04', 'image', 'picture', 2),
 ('2012-04', 'image', 'well said', 1),
 ('2012-05', 'article', 'advice', 2),
 ('2012-05', 'article', 'diy', 1),
 ('2012-05', 'article', 'economy', 1),
 ('2012-05', 'article', 'funny', 1),
 ('2012-05', 'article', 'idea', 1),
 ('2012-05', 'article', 'new grads', 1),
 ('2012-05', 'article', 'programming', 2),
 ('2012-05', 'article', 'python', 1),
 ('2012-05', 'article', 'rocket', 1),
 ('2012-05', 'article', 'startups', 1),
 ('2012-05', 'article', 'well said', 1),
 ('2012-05', 'image', 'awesome', 1),
 ('2012-05', 'image', 'cool', 2),
 ('2012-05', 'image', 'diy', 1),
 ('2012-05', 'image', 'picture', 1),
 ('2012-10', 'article', 'algorithms', 1),
 ('2012-10', 'article', 'cenovus', 1),
 ('2012-10', 'article', 'reddit', 1),
 ('2012-11', 'article', 'big data', 1),
 ('2012-11', 'article', 'mars', 1),
 ('2012-11', 'article', 'musk', 1),
 ('2012-11', 'article', 'programming', 1),
 ('2012-11', 'article', 'spacex', 1),
 ('2012-11', 'article', 'teaching', 1),
 ('2012-11', 'image', 'awesome', 1),
 ('2012-11', 'image', 'cartoon', 1),
 ('2012-11', 'image', 'cool', 2),
 ('2012-11', 'image', 'funny', 1),
 ('2012-11', 'image', 'physics', 1),
 ('2012-11', 'image', 'picture', 3),
 ('2012-12', 'article', '3d printing', 1),
 ('2012-12', 'article', 'advice', 4),
 ('2012-12', 'article', 'algorithms', 1),
 ('2012-12', 'article', 'bosses', 1),
 ('2012-12', 'article', 'career', 1),
 ('2012-12', 'article', 'cool', 1),
 ('2012-12', 'article', 'design', 2),
 ('2012-12', 'article', 'development', 1),
 ('2012-12', 'article', 'hackathon', 1),
 ('2012-12', 'article', 'mobile', 1),
 ('2012-12', 'article', 'movile', 1),
 ('2012-12', 'article', 'physics', 1),
 ('2012-12', 'article', 'picture', 1),
 ('2012-12', 'article', 'presentation', 1),
 ('2012-12', 'article', 'programming', 2),
 ('2012-12', 'article', 'rasberrypi', 1),
 ('2012-12', 'article', 'skills', 1),
 ('2012-12', 'article', 'startup', 1),
 ('2012-12', 'article', 'stats', 1),
 ('2012-12', 'article', 'well said', 1),
 ('2012-12', 'image', 'awesome', 1),
 ('2012-12', 'image', 'cool', 1),
 ('2012-12', 'image', 'picture', 1),
 ('2016-06', 'article', 'bitcoin', 1),
 ('2016-06', 'article', 'blockchain', 1),
 ('2016-06', 'article', 'data models', 1),
 ('2016-06', 'article', 'datascience', 1),
 ('2016-06', 'article', 'design', 1),
 ('2016-06', 'article', 'ethereum', 1),
 ('2016-06', 'article', 'futuorology', 1),
 ('2016-06', 'article', 'google', 1),
 ('2016-06', 'article', 'hack', 1),
 ('2016-06', 'article', 'machine learning', 1),
 ('2016-06', 'article', 'music', 1),
 ('2016-06', 'article', 'photo', 1),
 ('2016-06', 'article', 'physics', 1),
 ('2016-06', 'article', 'programming', 1),
 ('2016-06', 'article', 'project idea', 1),
 ('2016-06', 'article', 'quantum', 1),
 ('2016-06', 'article', 'science', 1),
 ('2016-07', 'article', 'artificial intelligence', 1),
 ('2016-07', 'article', 'brain', 2),
 ('2016-07', 'article', 'business', 1),
 ('2016-07', 'article', 'companies', 1),
 ('2016-07', 'article', 'corporations', 1),
 ('2016-07', 'article', 'd3', 3),
 ('2016-07', 'article', 'data science', 3),
 ('2016-07', 'article', 'datascience', 1),
 ('2016-07', 'article', 'datasource', 2),
 ('2016-07', 'article', 'dataviz', 2),
 ('2016-07', 'article', 'design', 1),
 ('2016-07', 'article', 'driverless cars', 1),
 ('2016-07', 'article', 'economics', 2),
 ('2016-07', 'article', 'exercise', 1),
 ('2016-07', 'article', 'fitness', 1),
 ('2016-07', 'article', 'food', 1),
 ('2016-07', 'article', 'funny', 1),
 ('2016-07', 'article', 'futurology', 1),
 ('2016-07', 'article', 'geek', 1),
 ('2016-07', 'article', 'google', 1),
 ('2016-07', 'article', 'health', 3),
 ('2016-07', 'article', 'kaggle', 1),
 ('2016-07', 'article', 'learning resource', 5),
 ('2016-07', 'article', 'machine learning', 3),
 ('2016-07', 'article', 'mental health', 1),
 ('2016-07', 'article', 'mental models', 1),
 ('2016-07', 'article', 'photo', 1),
 ('2016-07', 'article', 'planetary science', 1),
 ('2016-07', 'article', 'product', 2),
 ('2016-07', 'article', 'productivity', 1),
 ('2016-07', 'article', 'programming', 1),
 ('2016-07', 'article', 'psychology', 2),
 ('2016-07', 'article', 'python', 3),
 ('2016-07', 'article', 'recipes', 1),
 ('2016-07', 'article', 'reddit', 1),
 ('2016-07', 'article', 'satire', 1),
 ('2016-07', 'article', 'science', 1),
 ('2016-07', 'article', 'scipy', 1),
 ('2016-07', 'article', 'space', 2),
 ('2016-07', 'article', 'tech', 2),
 ('2016-07', 'article', 'workout', 1),
 ('2016-08', 'article', 'brain', 1),
 ('2016-08', 'article', 'datascience', 1),
 ('2016-08', 'article', 'dataviz', 1),
 ('2016-08', 'article', 'economics', 1),
 ('2016-08', 'article', 'inequality', 1),
 ('2016-08', 'article', 'javascript', 1),
 ('2016-08', 'article', 'learning', 1),
 ('2016-08', 'article', 'olympics', 1),
 ('2016-08', 'article', 'politics', 1),
 ('2016-08', 'article', 'productivity', 1),
 ('2016-08', 'article', 'programming', 1),
 ('2016-08', 'article', 'project-idea', 1),
 ('2016-08', 'article', 'psychology', 1),
 ('2016-08', 'article', 'python', 1),
 ('2016-08', 'article', 'reddit', 1),
 ('2016-08', 'article', 'sports', 1)]



In [4]:

    
def load_commit_data():
    with open("commit_data.json",'r') as infile:
        commit_data = json.load(infile)
    return commit_data

def dump_commit_data(data):
    with open('commit_data.json', 'w') as outfile:
        json.dump(data, outfile)

def load_pocket_data():
    with open("pocket_data.json",'r') as infile:
        commit_data = json.load(infile)
    return commit_data

def dump_pocket_data(data):
    with open('pocket_data.json', 'w') as outfile:
        json.dump(data, outfile)



In [56]:

    
def parse_commit_memo(memo):
    tags = [tag.replace('--',"") for tag in memo.split() if tag.startswith('--')]
    for tag in tags:
        memo = memo.replace(tag,"")
    memo = memo.replace('--',"").strip()
    return tags,memo



In [58]:

    
with open('cache_timestamps.json', 'r') as infile:
    cache_timestamps = json.load(infile)
gh_cache_datetime = datetime.datetime.strptime(cache_timestamps['github'],"%Y-%m-%dT%H:%M:%SZ")

profile = gh.get_user()
repos = profile.get_repos()
commit_data = load_commit_data()
for repo in repos:
    if repo.pushed_at >= gh_cache_datetime:
        commits = repo.get_commits(since = gh_cache_datetime)
        for commit in commits:
            if commit.sha not in commit_data.keys():
                message = commit.raw_data["commit"]["message"]
                tags,memo = parse_commit_memo(message)
                commit_data[commit.sha] = {'timestamp':commit.raw_data['commit']['author']['date'],
                                   'repo':repo.name,
                                   'tags':tags,
                                   'memo':memo,
                                   'stats':commit.raw_data['stats']}
dump_commit_data(commit_data)
cache_timestamps['github'] = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
with open('cache_timestamps.json', 'w') as outfile:
    json.dump(cache_timestamps, outfile)



In [59]:

    
commit_data = load_commit_data()
commit_tag_tuples = []
for commit,data in commit_data.items():
    tags = data['tags']
    for tag in tags:
        commit_tag_tuples.append((tag,datetime.datetime.strptime(data["timestamp"],"%Y-%m-%dT%H:%M:%SZ"),data["repo"]))
df = pd.DataFrame(commit_tag_tuples, columns=['tag', 'timestamp', 'repo'])
df["hour"] = df["timestamp"].apply(lambda s : s.strftime("%Y-%m-%dT%H"))
df["date"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y-%m-%d"))
df["month"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y-%m"))
df["year"] = df["timestamp"].apply(lambda s : s.date().strftime("%Y"))
hourly_grouped = df.groupby(["hour","repo","tag"])
daily_grouped = df.groupby(["date","repo","tag"])
monthly_grouped = df.groupby(["month","repo","tag"])
yearly_grouped = df.groupby(["year","repo","tag"])
hourly_df = pd.DataFrame(hourly_grouped.size().reset_index(name = "Group_Count"))
daily_df = pd.DataFrame(daily_grouped.size().reset_index(name = "Group_Count"))
monthly_df = pd.DataFrame(monthly_grouped.size().reset_index(name = "Group_Count"))
yearly_df = pd.DataFrame(yearly_grouped.size().reset_index(name = "Group_Count"))
hourly_tuples = [tuple(x) for x in hourly_df.values]
daily_tuples = [tuple(x) for x in daily_df.values]
monthly_tuples = [tuple(x) for x in monthly_df.values]
yearly_tuples = [tuple(x) for x in yearly_df.values]
with open('commit_tag_tuples.json', 'w') as outfile:
    json.dump({'hourly':hourly_tuples,'daily':daily_tuples,'monthly':monthly_tuples,'yearly':yearly_tuples}, outfile)



In [60]:

    
hourly_tuples









    Out[60]:





[('2016-07-23T02', 'cole-maclean.github.io', 'jupyter', 1),
 ('2016-07-23T02', 'cole-maclean.github.io', 'python', 1),
 ('2016-07-23T03', 'cole-maclean.github.io', 'API', 1),
 ('2016-07-23T03', 'cole-maclean.github.io', 'json', 1),
 ('2016-07-23T03', 'cole-maclean.github.io', 'jupyter', 2),
 ('2016-07-23T03', 'cole-maclean.github.io', 'python', 2),
 ('2016-07-23T04', 'cole-maclean.github.io', 'pandas', 2),
 ('2016-07-23T04', 'cole-maclean.github.io', 'python', 1),
 ('2016-07-23T20', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-23T20', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-07-25T03', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-25T20', 'cole-maclean.github.io', 'd3.js', 2),
 ('2016-07-25T20', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-07-25T21', 'cole-maclean.github.io', 'd3.js', 2),
 ('2016-07-25T22', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-26T05', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-26T05', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-07-26T05', 'cole-maclean.github.io', 'debugging', 1),
 ('2016-07-26T05', 'cole-maclean.github.io', 'python', 2),
 ('2016-07-26T06', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-26T06', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-07-27T03', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-07-27T03', 'cole-maclean.github.io', 'dataviz.', 1),
 ('2016-07-27T03', 'cole-maclean.github.io', 'html', 1),
 ('2016-07-27T03', 'cole-maclean.github.io', 'js', 1),
 ('2016-07-27T21', 'cole-maclean.github.io', 'svg', 1),
 ('2016-07-27T22', 'cole-maclean.github.io', 'svg', 1),
 ('2016-08-02T19', 'cole-maclean.github.io', 'd3.js', 2),
 ('2016-08-02T19', 'cole-maclean.github.io', 'dataviz', 2),
 ('2016-08-02T19', 'cole-maclean.github.io', 'design', 1),
 ('2016-08-02T22', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-08-02T22', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-08-02T22', 'cole-maclean.github.io', 'debugging', 1),
 ('2016-08-02T22', 'cole-maclean.github.io', 'html', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'css', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'd3.js', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'dataviz', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'debugging', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'documentation', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'html', 1),
 ('2016-08-02T23', 'cole-maclean.github.io', 'python', 1),
 ('2016-08-03T21', 'cole-maclean.github.io', 'API', 1),
 ('2016-08-03T21', 'cole-maclean.github.io', 'data-scrapping', 1),
 ('2016-08-03T21', 'cole-maclean.github.io', 'json', 1),
 ('2016-08-03T21', 'cole-maclean.github.io', 'jupyter', 1),
 ('2016-08-03T21', 'cole-maclean.github.io', 'python', 1)]