In [1]:
    
import datetime as dt
from steemdata import SteemData
    
In [7]:
    
db = SteemData().db
    
In [3]:
    
# %load_ext autoreload
# %autoreload 2
    
In [4]:
    
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf
    
In [41]:
    
# go offline for faster workflow
# cf.go_offline()
    
In [106]:
    
# go back online to update charts on website
# cf.go_online()
    
In [ ]:
    
    
In [10]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'created': time_constraints,
}
    
In [11]:
    
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'created': 1}))
    
In [12]:
    
len(posts)
    
    Out[12]:
In [13]:
    
# count number of daily posts
from collections import Counter
c = Counter([x['created'].date() for x in posts])
    
In [14]:
    
# create pandas dataframe
df = pd.DataFrame.from_dict(c, orient='index').reset_index()
df.columns = ['date', 'count']
df = df.sort_values('date')
df.set_index('date', inplace=True)
    
In [15]:
    
df.tail()
    
    Out[15]:
In [16]:
    
# plot everything but last day
df.iloc[1:-1].iplot(
    title='Daily Post Count',
    colors=['blue'],
    theme='white',
    legend=False,
    filename='steemdata-30d-post-count')
    
    
    
    Out[16]:
In [ ]:
    
    
In [17]:
    
# list op_types
db['AccountOperations'].distinct('type', {})
    
    Out[17]:
In [18]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'timestamp': time_constraints,
}
    
In [19]:
    
# count all operations
db['AccountOperations'].find(conditions).count()
    
    Out[19]:
In [20]:
    
# count operations for each type
query = [
    {'$match': conditions},
    {'$project': {'type':1}},
    {'$group': {'_id': '$type', 'total': {'$sum': 1}}}
]
results = list(db['AccountOperations'].aggregate(query))
    
In [21]:
    
# construct a Pandas dataframe
df = pd.DataFrame(results)
df.columns = ['type', 'total']
total = df['total'].sum()
df['pct'] = df['total'].apply(lambda x: (x/total*100))
    
In [22]:
    
# check most common operations
df.sort_values('pct', ascending=False).head()
    
    Out[22]:
In [23]:
    
# filter out op_types that are less than 0.05%
df = df[(df['pct'] > 0.05)]
    
In [24]:
    
# render a nice pie chart
pie = go.Pie(
    labels=df['type'].values.tolist(),
    values=df['pct'].values)
layout = go.Layout(title='Blockchain Operations Distribution')
py.iplot(go.Figure(data=[pie], layout=layout), filename='steemdata-7d-type-pct')
    
    Out[24]:
In [25]:
    
# py.iplot?
    
In [26]:
    
from steem.converter import Converter
def power_up_down_data(direction='up', days=30, exclude_steemit=False):
    """direction: `up` or `down`."""
    time_constraints = {
        '$gte': dt.datetime.now() - dt.timedelta(days=days),
    }
    conditions = {
        'type': 'fill_vesting_withdraw' if direction == 'down' else 'transfer_to_vesting',
        'timestamp': time_constraints,
    }
    # count daily power up sums
    power_ups = list(db['AccountOperations'].find(conditions))
    
    def power_down_amount(amount):
        """ If power-down is in VESTS, convert it to STEEM first."""
        if amount['asset'] == 'VESTS':
            return Converter().vests_to_sp(amount['amount'])
        return amount['amount']
    
    if direction == 'down':
        return [{
            'account': x['account'],
            'amount': power_down_amount(x['deposited']),
            'date': x['timestamp'].date()
        } for x in power_ups if not (exclude_steemit == True and x['account'] == 'steemit')]
    else:
        return [{
            'account': x['account'],
            'amount': x['amount']['amount'],
            'date': x['timestamp'].date()
        } for x in power_ups]
    
In [27]:
    
#downs = pd.DataFrame(power_up_down_data(direction='down'))
#downs.sort_values(by='amount', ascending=False).head(30)
# downs[downs.account == 'steemit'].head()
    
In [28]:
    
ups = pd.DataFrame(power_up_down_data(direction='up', days=20))
daily_powerups = ups[['amount', 'date']].groupby('date').sum()
daily_powerups.columns = ['UP']
    
In [29]:
    
downs = pd.DataFrame(power_up_down_data(direction='down', days=20))
daily_powerdowns = downs[['amount', 'date']].groupby('date').sum()
daily_powerdowns.columns = ['DOWN']
    
In [30]:
    
combined = pd.merge(daily_powerdowns.reset_index(), daily_powerups.reset_index(), on='date')
combined.set_index('date', inplace=True)
    
In [31]:
    
combined.iplot(kind='bar',
                title='Daily STEEM Power-Up vs Power-Down for past 20 days',
                colors=['blue', 'orange'],
                theme='white',
                filename='steemdata-30d-power-ups')
    
    Out[31]:
In [ ]:
    
    
In [32]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'json_metadata.app': {'$exists': True},
    'created': time_constraints,
}
    
In [33]:
    
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'json_metadata.app': 1}))
apps = [x['json_metadata']['app'] for x in posts]
# remove version information
apps = [x.split('/')[0] for x in apps]
    
In [34]:
    
# count most common apps
c = Counter(apps)
top_apps = c.most_common(10)
    
In [35]:
    
df = pd.DataFrame(top_apps, index=list(range(1, len(top_apps)+1)))
df.columns = ['App', 'Post Count']
df.head()
    
    Out[35]:
In [36]:
    
# save table to plotly
from plotly import figure_factory as FF
table = FF.create_table(df, index=True, index_title='Position')
py.iplot(table, filename='steemdata-30d-post-app-types')
    
    Out[36]:
In [ ]:
    
    
In [37]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'type': {'$in': ['account_create', 'account_create_with_delegation']},
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'creator': 1,
    'new_account_name': 1,
}
accounts = list(db['Operations'].find(conditions, projection=projection))
    
In [38]:
    
accounts = [{
    'creator': x['creator'],
    'name': x['new_account_name'],
    'date': x['timestamp'].date(),
    
} for x in accounts]
    
In [39]:
    
accounts[0]
    
    Out[39]:
In [40]:
    
df = pd.DataFrame(accounts)
# df.set_index('date', inplace=True)
df.drop('name', axis=1, inplace=True)
df.head()
    
    Out[40]:
In [41]:
    
df_count = df.groupby('date').count()
df_count.iloc[1:-1].iplot(
    theme='white',
    colors=['blue'],
    legend=False,
    title='Daily Account Creation',
    filename='steemdata-account-creation',
)
    
    Out[41]:
In [42]:
    
# df_affiliate = df.groupby('creator').count().sort_values('date', ascending=False)
# df_affiliate.iloc[:5].iplot(
#     kind='bar',
#     theme='white',
#     colors=['blue'],
#     title='Top Account Creators for past 90 days',
#     filename='steemdata-account-creation-faucet',
# )
    
In [ ]:
    
    
In [43]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'author_reward',
    'timestamp': time_constraints,
    'vesting_payout.amount': {'$gt': 10000},
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'account': 1,
    'vesting_payout.amount': 1,
    'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))
    
In [44]:
    
payouts[0]
    
    Out[44]:
In [45]:
    
def process_payouts(payouts):
    """ Turn author_rewards into normalized VESTS payouts. """
    results = []
    for payout in payouts:
        # if this is a 100% power-up post, cut VESTS in half
        vests = int(payout['vesting_payout']['amount'])
        if int(payout['steem_payout']['amount']) == 0:
            vests = int(vests/2)
        
        results.append({
            'account': payout['account'],
            'permlink': payout.get('permlink', ''),
            'VESTS': vests,
            'date': payout['timestamp'].date(),
        })
    return results
    
In [46]:
    
payouts = process_payouts(payouts)
    
In [47]:
    
payouts[0]
    
    Out[47]:
In [48]:
    
df = pd.DataFrame(payouts)
    
In [49]:
    
top_earners = df[['account', 'VESTS']].groupby('account').sum().sort_values('VESTS', ascending=False)
    
In [50]:
    
top_earners.iplot(
    kind='area',
    fill=True,
    title='Distribution of Author Rewards (in vests) for past 30 days',
    colors=['blue', 'orange'],
    theme='white',
    legend=False,
    yTitle='Vests Awarded',
    filename='steemdata-30d-author-rewards'
)
    
    Out[50]:
In [ ]:
    
    
In [51]:
    
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'type': 'author_reward',
    'timestamp': time_constraints,
    'vesting_payout.amount': {'$gt': 10000},
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'account': 1,
    'permlink': 1,
    'vesting_payout.amount': 1,
    'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))
    
In [52]:
    
payouts2 = process_payouts(payouts)
    
In [53]:
    
posts_pool = 0
comments_pool = 0
for p in payouts2:
    if p['permlink'][:3] == "re-":
        comments_pool += p['VESTS']
    else:
        posts_pool += p['VESTS']
    
In [54]:
    
comments_pool/posts_pool*100
    
    Out[54]:
In [55]:
    
## how about rshares ratio?
    
In [56]:
    
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'created': time_constraints,
}
projection = {
    '_id': 0,
    'identifier': 1,
    'abs_rshares': 1,
    'children_abs_rshares': 1,
}
posts = list(db['Posts'].find(conditions, projection=projection))
    
In [57]:
    
from funcy.colls import pluck
post_rshares = sum(map(int, pluck('abs_rshares', posts)))
    
In [58]:
    
root_posts = list(pluck('identifier', posts))
    
In [ ]:
    
    
In [59]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'type': 'comment',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'json_metadata': 0,
    'body': 0,
    'trx_id': 0,
    'block_num': 0,
    'type': 0,
}
all_comments = list(db['Operations'].find(conditions, projection=projection))
    
In [60]:
    
all_comments[0]
    
    Out[60]:
In [61]:
    
from steem.utils import is_comment
from funcy import first, complement, pluck
    
In [62]:
    
# split into comments and main posts
comments = list(filter(is_comment, all_comments))
posts = list(filter(complement(is_comment), all_comments))
    
In [63]:
    
# turn datetime into dates
comments = [{**x, 'date': x['timestamp'].date()} for x in comments]
posts = [{**x, 'date': x['timestamp'].date()} for x in posts]
    
In [64]:
    
def filter_accounts(accounts, min_rep=40, min_sp=100):
    """ Return list of accounts that match minimum rep and SP requirements. """
    conditions = {
        'name': {'$in': accounts},
        'sp': {'$gt': min_sp},
        'rep': {'$gt': min_rep},
    }
    projection = {
        '_id': 0,
        'name': 1,
    }
    return [x['name'] for x in db['Accounts'].find(conditions, projection)]
    
In [65]:
    
# create a list of all accounts with adequate Rep/SP for both comments and posts
accounts_to_filter = list(set(pluck('author', comments)) | set(pluck('author', posts)))
qualifying_accounts = filter_accounts(accounts_to_filter)
len(qualifying_accounts)
    
    Out[65]:
In [66]:
    
def filter_comments(posts_or_comments, qualifying_accounts):
    """ Filter out all posts/comments from disqualified accounts. """
    return [x for x in posts_or_comments if x['author'] in qualifying_accounts]
    
In [67]:
    
def create_df(posts_or_comments):
    df = pd.DataFrame(posts_or_comments)
    df.drop_duplicates(['author', 'permlink'], inplace=True)
    return df[['author', 'date']]
    
In [68]:
    
# prepare all dataframes
comments_df = create_df(comments)
posts_df = create_df(posts)
q_comments_df = create_df(filter_comments(comments, qualifying_accounts))
q_posts_df = create_df(filter_comments(posts, qualifying_accounts))
    
In [69]:
    
comments_df.head()
    
    Out[69]:
In [70]:
    
def merge_counts(qualified, unqualified):
    """ Merge all comments and comments from reputable authors into single DataFrame. """
    left = qualified.groupby('date').count().iloc[:-1]
    left.columns = ['reputable']
    right = unqualified.groupby('date').count().iloc[:-1]
    right.columns = ['all']
    
    return pd.merge(left, right, left_index=True, right_index=True)
    
In [71]:
    
merge_counts(q_comments_df, comments_df).iplot(
    theme='white',
    legend=False,
    colors=['orange', 'blue'],
    title='Daily Comments Count',
    filename='steemdata-comments-count',
)
    
    Out[71]:
In [72]:
    
merge_counts(q_posts_df, posts_df).iplot(
    theme='white',
    legend=False,
    colors=['orange', 'blue'],
    title='Daily Posts Count',
    filename='steemdata-posts-count-2',
)
    
    Out[72]:
In [88]:
    
# only bother with accounts powering down at least 1kV
conditions = {
    'vesting_withdraw_rate.amount': {'$gt': 1000},
}
projection = {
    '_id': 0,
    'vesting_withdraw_rate.amount': 1,
    'next_vesting_withdrawal': 1,
    'name': 1,
}
data = list(db['Accounts'].find(conditions, projection=projection))
    
In [89]:
    
from steem.converter import Converter
withdrawing_accounts = [{
    'name': x['name'],
    'vests': x['vesting_withdraw_rate']['amount'],
    'date': x['next_vesting_withdrawal'].date()} for x in data]
    
In [110]:
    
# how much SP is being powered down right now?
all_vests = sum(pluck('vests', withdrawing_accounts))
mvest_down = round(Converter().vests_to_sp(all_vests) / 1000000, 2)
mvest_down
    
    Out[110]:
In [96]:
    
df = pd.DataFrame(withdrawing_accounts)
    
In [104]:
    
pd_sum = df[['vests', 'date']].groupby('date').sum()
pd_sum['sp'] = pd_sum['vests'].apply(lambda x: Converter().vests_to_sp(x))
pd_sum.drop('vests', axis=1, inplace=True)
    
In [113]:
    
pd_sum.iplot(
    kind='bar',
    theme='white',
    colors=['blue'],
    legend=False,
    yTitle='STEEM to be powered down - %sm in total' % mvest_down,
    title='Future Power-Downs',
    filename='steemdata-future-powerdown',
)
    
    Out[113]:
In [ ]:
    
    
In [ ]:
    
    
In [ ]:
    
exchanges = ['poloniex', 'bittrex', 'blocktrades']
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=60),
}
incoming = {
    'from': {'$in': exchanges},
    'to': {'$nin': exchanges},
}
outgoing = {
    'from': {'$nin': exchanges},
    'to': {'$in': exchanges},
}
conditions = {
    'type': 'transfer',
    'timestamp': time_constraints,
}
    
In [ ]:
    
from steem.utils import keep_in_dict
def simplify_transfers(transfers):
    data = [keep_in_dict(x, ['amount', 'from', 'to', 'timestamp']) for x in transfers]
    data = [{
        x['amount']['asset']: x['amount']['amount'],
        'date': x['timestamp'].date(),
        **keep_in_dict(x, ['to', 'from']),
    } for x in data]
    return data
    
In [ ]:
    
def exchange_flow(direction='incoming'):
    if direction == 'incoming':
        return simplify_transfers(db['Operations'].find({**conditions, **incoming}))
    return simplify_transfers(db['Operations'].find({**conditions, **outgoing}))
    
In [ ]:
    
incoming = exchange_flow('incoming')
outgoing = exchange_flow('outgoing')
    
In [ ]:
    
incoming[0]
    
In [ ]:
    
incoming_df = pd.DataFrame(incoming).groupby('date')['STEEM'].sum()
outgoing_df = pd.DataFrame(outgoing).groupby('date')['STEEM'].sum() * -1
diff_df = incoming_df + outgoing_df # add together because we mult outgoing by -1 above
    
In [ ]:
    
diff_df.head()
    
In [ ]:
    
# outgoing_df.iplot(kind='bar')
    
In [ ]:
    
diff = go.Scatter(
    name='Delta',
    mode = 'lines+markers',
    opacity=0.7,
    x=diff_df.index,
    y=diff_df.values
)
incoming = go.Bar(
    name='Incoming STEEM',
    opacity=0.9,
    x=incoming_df.index,
    y=incoming_df.values
)
outgoing = go.Bar(
    name='Outgoing STEEM',
    opacity=0.9,
    x=outgoing_df.index,
    y=outgoing_df.values
)
colors=['blue', 'orange', 'red']
layout = go.Layout(title='STEEM Exchange Flows')
fig = go.Figure(data=[incoming, outgoing, diff], layout=layout)
py.iplot(fig, filename='steemdata-exchange-flows')
# todo, make bar charts share X axis
    
In [ ]:
    
    
In [ ]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'transfer',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'amount': 1,
    'timestamp': 1,
}
transfers = list(db['Operations'].find(conditions, projection=projection))
    
In [ ]:
    
transfers = [
    omit({
        **x,
        'date': x['timestamp'].date(),
         x['amount']['asset']: x['amount']['amount']}, ['timestamp', 'amount']) for x in transfers]
    
In [ ]:
    
transfers[0]
    
In [ ]:
    
df = pd.DataFrame(transfers).fillna(0)
    
In [ ]:
    
df.head()
    
In [ ]:
    
# df.groupby('date').aggregate({
#     'SBD': 'sum',
#     'STEEM': 'sum',
# })
    
In [ ]:
    
df_sum = df.groupby('date').sum()
df_sum['count'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_sum['no_dust'] = no_dust.groupby('date').count()['SBD']
    
In [ ]:
    
df_sum.head()
    
In [ ]:
    
# df_sum.iplot(
#     kind='bar',
#     theme='white',
#     colors=['blue', 'orange'],
#     legend=False,
#     title='Daily Transfer Volume',
#     filename='steemdata-daily-transfer-volume',
# )
    
In [ ]:
    
data = [
    go.Bar(
        name='STEEM Volume',
        opacity=0.9,
        x=df_sum.index,
        y=df_sum['STEEM'].values
    ),
    go.Bar(
        name='SBD Volume',
        opacity=0.9,
        x=df_sum.index,
        y=df_sum['SBD'].values
    ),
    go.Scatter(
        name='Transactions',
        mode = 'lines+markers',
        opacity=0.7,
        x=df_sum.index,
        y=df_sum['no_dust'].values,
        yaxis='y2'
    ),
]
layout = go.Layout(
    title='Daily Transactions',
    yaxis=dict(
        title='Transaction Volume'
    ),
    yaxis2=dict(
        title='Count (Dust Removed)',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    )
)
colors=['blue', 'orange', 'yellow']
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='steemdata-tranfers-2')
    
In [ ]:
    
# daily transaction count
df_count = pd.DataFrame()
df_count['All Transfers'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_count['/wo Dust'] = no_dust.groupby('date').count()['SBD']
    
In [ ]:
    
df_count.head()
    
In [ ]:
    
# df_count.iplot(
#     kind='line',
#     theme='white',
#     colors=['blue', 'orange'],
#     legend=False,
#     title='Daily Transfers Count',
#     filename='steemdata-daily-transfers-count',
# )
    
In [ ]:
    
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'fill_convert_request',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'amount_in': 1,
    'timestamp': 1,
}
conversions = list(db['AccountOperations'].find(conditions, projection=projection))
    
In [ ]:
    
conversions = [{
    'date': x['timestamp'].date(),
     x['amount_in']['asset']: x['amount_in']['amount']} for x in conversions]
    
In [ ]:
    
df = pd.DataFrame(conversions)
df_sum = df[['SBD', 'date']].groupby('date').sum()
    
In [ ]:
    
df_sum.iplot(
    kind='bar',
    theme='white',
    colors=['blue'],
    legend=False,
    yTitle='SBD Converted',
    title='SBD to STEEM conversions',
    filename='steemdata-sbd-steem-conversions',
)
    
In [ ]: