In [1]:
import datetime as dt
from steemdata import SteemData

In [7]:
db = SteemData().db

In [3]:
# %load_ext autoreload
# %autoreload 2

In [4]:
import pandas as pd
import numpy as np

import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf

In [41]:
# go offline for faster workflow
# cf.go_offline()

In [106]:
# go back online to update charts on website
# cf.go_online()

In [ ]:

Daily Posts Count


In [10]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'created': time_constraints,
}

In [11]:
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'created': 1}))

In [12]:
len(posts)


Out[12]:
209976

In [13]:
# count number of daily posts
from collections import Counter
c = Counter([x['created'].date() for x in posts])

In [14]:
# create pandas dataframe
df = pd.DataFrame.from_dict(c, orient='index').reset_index()
df.columns = ['date', 'count']
df = df.sort_values('date')
df.set_index('date', inplace=True)

In [15]:
df.tail()


Out[15]:
count
date
2017-05-24 5856
2017-05-25 6142
2017-05-26 5577
2017-05-27 5129
2017-05-28 1618

In [16]:
# plot everything but last day
df.iloc[1:-1].iplot(
    title='Daily Post Count',
    colors=['blue'],
    theme='white',
    legend=False,
    filename='steemdata-30d-post-count')


/home/user/anaconda3/lib/python3.6/site-packages/cufflinks/plotlytools.py:156: FutureWarning:

pandas.tslib is deprecated and will be removed in a future version.
You can access Timestamp as pandas.Timestamp

High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~plotlyfurion/0 or inside your plot.ly account where it is named 'steemdata-30d-post-count'
Out[16]:

In [ ]:

Type Frequency Breakdown


In [17]:
# list op_types
db['AccountOperations'].distinct('type', {})


Out[17]:
['account_create',
 'account_create_with_delegation',
 'account_update',
 'account_witness_proxy',
 'account_witness_vote',
 'author_reward',
 'cancel_transfer_from_savings',
 'change_recovery_account',
 'claim_reward_balance',
 'comment',
 'comment_benefactor_reward',
 'comment_options',
 'convert',
 'curation_reward',
 'custom',
 'custom_json',
 'delegate_vesting_shares',
 'delete_comment',
 'escrow_approve',
 'escrow_dispute',
 'escrow_release',
 'escrow_transfer',
 'feed_publish',
 'fill_convert_request',
 'fill_order',
 'fill_transfer_from_savings',
 'fill_vesting_withdraw',
 'interest',
 'limit_order_cancel',
 'limit_order_create',
 'limit_order_create2',
 'liquidity_reward',
 'pow',
 'pow2',
 'recover_account',
 'request_account_recovery',
 'return_vesting_delegation',
 'set_withdraw_vesting_route',
 'shutdown_witness',
 'transfer',
 'transfer_from_savings',
 'transfer_to_savings',
 'transfer_to_vesting',
 'vote',
 'withdraw_vesting',
 'witness_update']

In [18]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'timestamp': time_constraints,
}

In [19]:
# count all operations
db['AccountOperations'].find(conditions).count()


Out[19]:
4040194

In [20]:
# count operations for each type
query = [
    {'$match': conditions},
    {'$project': {'type':1}},
    {'$group': {'_id': '$type', 'total': {'$sum': 1}}}
]
results = list(db['AccountOperations'].aggregate(query))

In [21]:
# construct a Pandas dataframe
df = pd.DataFrame(results)
df.columns = ['type', 'total']
total = df['total'].sum()
df['pct'] = df['total'].apply(lambda x: (x/total*100))

In [22]:
# check most common operations
df.sort_values('pct', ascending=False).head()


Out[22]:
type total pct
12 vote 2531745 62.663946
10 curation_reward 516230 12.777357
18 comment 443224 10.970364
17 custom_json 260748 6.453849
24 author_reward 55332 1.369538

In [23]:
# filter out op_types that are less than 0.05%
df = df[(df['pct'] > 0.05)]

In [24]:
# render a nice pie chart
pie = go.Pie(
    labels=df['type'].values.tolist(),
    values=df['pct'].values)

layout = go.Layout(title='Blockchain Operations Distribution')
py.iplot(go.Figure(data=[pie], layout=layout), filename='steemdata-7d-type-pct')


Out[24]:

In [25]:
# py.iplot?

Daily Power-Ups


In [26]:
from steem.converter import Converter

def power_up_down_data(direction='up', days=30, exclude_steemit=False):
    """direction: `up` or `down`."""
    time_constraints = {
        '$gte': dt.datetime.now() - dt.timedelta(days=days),
    }
    conditions = {
        'type': 'fill_vesting_withdraw' if direction == 'down' else 'transfer_to_vesting',
        'timestamp': time_constraints,
    }
    # count daily power up sums
    power_ups = list(db['AccountOperations'].find(conditions))
    
    def power_down_amount(amount):
        """ If power-down is in VESTS, convert it to STEEM first."""
        if amount['asset'] == 'VESTS':
            return Converter().vests_to_sp(amount['amount'])
        return amount['amount']
    
    if direction == 'down':
        return [{
            'account': x['account'],
            'amount': power_down_amount(x['deposited']),
            'date': x['timestamp'].date()
        } for x in power_ups if not (exclude_steemit == True and x['account'] == 'steemit')]
    else:
        return [{
            'account': x['account'],
            'amount': x['amount']['amount'],
            'date': x['timestamp'].date()
        } for x in power_ups]

In [27]:
#downs = pd.DataFrame(power_up_down_data(direction='down'))
#downs.sort_values(by='amount', ascending=False).head(30)

# downs[downs.account == 'steemit'].head()

In [28]:
ups = pd.DataFrame(power_up_down_data(direction='up', days=20))
daily_powerups = ups[['amount', 'date']].groupby('date').sum()
daily_powerups.columns = ['UP']

In [29]:
downs = pd.DataFrame(power_up_down_data(direction='down', days=20))
daily_powerdowns = downs[['amount', 'date']].groupby('date').sum()
daily_powerdowns.columns = ['DOWN']

In [30]:
combined = pd.merge(daily_powerdowns.reset_index(), daily_powerups.reset_index(), on='date')
combined.set_index('date', inplace=True)

In [31]:
combined.iplot(kind='bar',
                title='Daily STEEM Power-Up vs Power-Down for past 20 days',
                colors=['blue', 'orange'],
                theme='white',
                filename='steemdata-30d-power-ups')


Out[31]:

In [ ]:

Post by App Type


In [32]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'json_metadata.app': {'$exists': True},
    'created': time_constraints,
}

In [33]:
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'json_metadata.app': 1}))
apps = [x['json_metadata']['app'] for x in posts]
# remove version information
apps = [x.split('/')[0] for x in apps]

In [34]:
# count most common apps
c = Counter(apps)
top_apps = c.most_common(10)

In [35]:
df = pd.DataFrame(top_apps, index=list(range(1, len(top_apps)+1)))
df.columns = ['App', 'Post Count']
df.head()


Out[35]:
App Post Count
1 steemit 116954
2 esteem 2141
3 streemian 723
4 pysteem 280
5 steepshot 180

In [36]:
# save table to plotly
from plotly import figure_factory as FF

table = FF.create_table(df, index=True, index_title='Position')
py.iplot(table, filename='steemdata-30d-post-app-types')


Out[36]:

In [ ]:

New Accounts


In [37]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'type': {'$in': ['account_create', 'account_create_with_delegation']},
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'creator': 1,
    'new_account_name': 1,
}
accounts = list(db['Operations'].find(conditions, projection=projection))

In [38]:
accounts = [{
    'creator': x['creator'],
    'name': x['new_account_name'],
    'date': x['timestamp'].date(),
    
} for x in accounts]

In [39]:
accounts[0]


Out[39]:
{'creator': 'steem', 'date': datetime.date(2017, 3, 30), 'name': 'invorfiha'}

In [40]:
df = pd.DataFrame(accounts)
# df.set_index('date', inplace=True)
df.drop('name', axis=1, inplace=True)
df.head()


Out[40]:
creator date
0 steem 2017-03-30
1 steem 2017-03-30
2 steem 2017-03-30
3 steem 2017-03-30
4 steem 2017-03-30

In [41]:
df_count = df.groupby('date').count()
df_count.iloc[1:-1].iplot(
    theme='white',
    colors=['blue'],
    legend=False,
    title='Daily Account Creation',
    filename='steemdata-account-creation',
)


Out[41]:

In [42]:
# df_affiliate = df.groupby('creator').count().sort_values('date', ascending=False)
# df_affiliate.iloc[:5].iplot(
#     kind='bar',
#     theme='white',
#     colors=['blue'],
#     title='Top Account Creators for past 90 days',
#     filename='steemdata-account-creation-faucet',
# )

In [ ]:

Author Reward Distribution


In [43]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'author_reward',
    'timestamp': time_constraints,
    'vesting_payout.amount': {'$gt': 10000},
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'account': 1,
    'vesting_payout.amount': 1,
    'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))

In [44]:
payouts[0]


Out[44]:
{'account': 'a-condor',
 'steem_payout': {'amount': 0.0},
 'timestamp': datetime.datetime(2017, 5, 11, 8, 41, 57),
 'vesting_payout': {'amount': 19997.612583}}

In [45]:
def process_payouts(payouts):
    """ Turn author_rewards into normalized VESTS payouts. """
    results = []
    for payout in payouts:
        # if this is a 100% power-up post, cut VESTS in half
        vests = int(payout['vesting_payout']['amount'])
        if int(payout['steem_payout']['amount']) == 0:
            vests = int(vests/2)
        
        results.append({
            'account': payout['account'],
            'permlink': payout.get('permlink', ''),
            'VESTS': vests,
            'date': payout['timestamp'].date(),
        })
    return results

In [46]:
payouts = process_payouts(payouts)

In [47]:
payouts[0]


Out[47]:
{'VESTS': 9998,
 'account': 'a-condor',
 'date': datetime.date(2017, 5, 11),
 'permlink': ''}

In [48]:
df = pd.DataFrame(payouts)

In [49]:
top_earners = df[['account', 'VESTS']].groupby('account').sum().sort_values('VESTS', ascending=False)

In [50]:
top_earners.iplot(
    kind='area',
    fill=True,
    title='Distribution of Author Rewards (in vests) for past 30 days',
    colors=['blue', 'orange'],
    theme='white',
    legend=False,
    yTitle='Vests Awarded',
    filename='steemdata-30d-author-rewards'
)


Out[50]:

In [ ]:

How much rewards go to posts vs comments?


In [51]:
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'type': 'author_reward',
    'timestamp': time_constraints,
    'vesting_payout.amount': {'$gt': 10000},
}
projection = {
    '_id': 0,
    'timestamp': 1,
    'account': 1,
    'permlink': 1,
    'vesting_payout.amount': 1,
    'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))

In [52]:
payouts2 = process_payouts(payouts)

In [53]:
posts_pool = 0
comments_pool = 0
for p in payouts2:
    if p['permlink'][:3] == "re-":
        comments_pool += p['VESTS']
    else:
        posts_pool += p['VESTS']

In [54]:
comments_pool/posts_pool*100


Out[54]:
0.15287020670368953

In [55]:
## how about rshares ratio?

In [56]:
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
    'created': time_constraints,
}
projection = {
    '_id': 0,
    'identifier': 1,
    'abs_rshares': 1,
    'children_abs_rshares': 1,
}
posts = list(db['Posts'].find(conditions, projection=projection))

In [57]:
from funcy.colls import pluck
post_rshares = sum(map(int, pluck('abs_rshares', posts)))

In [58]:
root_posts = list(pluck('identifier', posts))

In [ ]:

Reputable Users Comments


In [59]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
    'type': 'comment',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'json_metadata': 0,
    'body': 0,
    'trx_id': 0,
    'block_num': 0,
    'type': 0,
}
all_comments = list(db['Operations'].find(conditions, projection=projection))

In [60]:
all_comments[0]


Out[60]:
{'author': 'nerdylab',
 'parent_author': 'marcel1965',
 'parent_permlink': 're-nerdylab-how-to-make-money-online-by-selling-tshirts-2017-guide-20170528t071037746z',
 'permlink': 're-marcel1965-re-nerdylab-how-to-make-money-online-by-selling-tshirts-2017-guide-20170528t101719668z',
 'timestamp': datetime.datetime(2017, 5, 28, 10, 17, 24),
 'title': ''}

In [61]:
from steem.utils import is_comment
from funcy import first, complement, pluck

In [62]:
# split into comments and main posts
comments = list(filter(is_comment, all_comments))
posts = list(filter(complement(is_comment), all_comments))

In [63]:
# turn datetime into dates
comments = [{**x, 'date': x['timestamp'].date()} for x in comments]
posts = [{**x, 'date': x['timestamp'].date()} for x in posts]

In [64]:
def filter_accounts(accounts, min_rep=40, min_sp=100):
    """ Return list of accounts that match minimum rep and SP requirements. """
    conditions = {
        'name': {'$in': accounts},
        'sp': {'$gt': min_sp},
        'rep': {'$gt': min_rep},
    }
    projection = {
        '_id': 0,
        'name': 1,
    }
    return [x['name'] for x in db['Accounts'].find(conditions, projection)]

In [65]:
# create a list of all accounts with adequate Rep/SP for both comments and posts
accounts_to_filter = list(set(pluck('author', comments)) | set(pluck('author', posts)))
qualifying_accounts = filter_accounts(accounts_to_filter)
len(qualifying_accounts)


Out[65]:
3727

In [66]:
def filter_comments(posts_or_comments, qualifying_accounts):
    """ Filter out all posts/comments from disqualified accounts. """
    return [x for x in posts_or_comments if x['author'] in qualifying_accounts]

In [67]:
def create_df(posts_or_comments):
    df = pd.DataFrame(posts_or_comments)
    df.drop_duplicates(['author', 'permlink'], inplace=True)
    return df[['author', 'date']]

In [68]:
# prepare all dataframes
comments_df = create_df(comments)
posts_df = create_df(posts)
q_comments_df = create_df(filter_comments(comments, qualifying_accounts))
q_posts_df = create_df(filter_comments(posts, qualifying_accounts))

In [69]:
comments_df.head()


Out[69]:
author date
0 nerdylab 2017-05-28
1 toptimist 2017-05-28
2 condra 2017-05-28
3 erikkartmen 2017-05-28
4 tamaralovelace 2017-05-28

In [70]:
def merge_counts(qualified, unqualified):
    """ Merge all comments and comments from reputable authors into single DataFrame. """
    left = qualified.groupby('date').count().iloc[:-1]
    left.columns = ['reputable']
    right = unqualified.groupby('date').count().iloc[:-1]
    right.columns = ['all']
    
    return pd.merge(left, right, left_index=True, right_index=True)

In [71]:
merge_counts(q_comments_df, comments_df).iplot(
    theme='white',
    legend=False,
    colors=['orange', 'blue'],
    title='Daily Comments Count',
    filename='steemdata-comments-count',
)


Out[71]:

In [72]:
merge_counts(q_posts_df, posts_df).iplot(
    theme='white',
    legend=False,
    colors=['orange', 'blue'],
    title='Daily Posts Count',
    filename='steemdata-posts-count-2',
)


Out[72]:

Withdrawal Prediction


In [88]:
# only bother with accounts powering down at least 1kV
conditions = {
    'vesting_withdraw_rate.amount': {'$gt': 1000},
}
projection = {
    '_id': 0,
    'vesting_withdraw_rate.amount': 1,
    'next_vesting_withdrawal': 1,
    'name': 1,
}
data = list(db['Accounts'].find(conditions, projection=projection))

In [89]:
from steem.converter import Converter
withdrawing_accounts = [{
    'name': x['name'],
    'vests': x['vesting_withdraw_rate']['amount'],
    'date': x['next_vesting_withdrawal'].date()} for x in data]

In [110]:
# how much SP is being powered down right now?
all_vests = sum(pluck('vests', withdrawing_accounts))
mvest_down = round(Converter().vests_to_sp(all_vests) / 1000000, 2)
mvest_down


Out[110]:
2.85

In [96]:
df = pd.DataFrame(withdrawing_accounts)

In [104]:
pd_sum = df[['vests', 'date']].groupby('date').sum()
pd_sum['sp'] = pd_sum['vests'].apply(lambda x: Converter().vests_to_sp(x))
pd_sum.drop('vests', axis=1, inplace=True)

In [113]:
pd_sum.iplot(
    kind='bar',
    theme='white',
    colors=['blue'],
    legend=False,
    yTitle='STEEM to be powered down - %sm in total' % mvest_down,
    title='Future Power-Downs',
    filename='steemdata-future-powerdown',
)


Out[113]:

In [ ]:


In [ ]:

Transfer from exchanges


In [ ]:
exchanges = ['poloniex', 'bittrex', 'blocktrades']
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=60),
}
incoming = {
    'from': {'$in': exchanges},
    'to': {'$nin': exchanges},
}
outgoing = {
    'from': {'$nin': exchanges},
    'to': {'$in': exchanges},
}
conditions = {
    'type': 'transfer',
    'timestamp': time_constraints,
}

In [ ]:
from steem.utils import keep_in_dict
def simplify_transfers(transfers):
    data = [keep_in_dict(x, ['amount', 'from', 'to', 'timestamp']) for x in transfers]
    data = [{
        x['amount']['asset']: x['amount']['amount'],
        'date': x['timestamp'].date(),
        **keep_in_dict(x, ['to', 'from']),
    } for x in data]
    return data

In [ ]:
def exchange_flow(direction='incoming'):
    if direction == 'incoming':
        return simplify_transfers(db['Operations'].find({**conditions, **incoming}))
    return simplify_transfers(db['Operations'].find({**conditions, **outgoing}))

In [ ]:
incoming = exchange_flow('incoming')
outgoing = exchange_flow('outgoing')

In [ ]:
incoming[0]

In [ ]:
incoming_df = pd.DataFrame(incoming).groupby('date')['STEEM'].sum()
outgoing_df = pd.DataFrame(outgoing).groupby('date')['STEEM'].sum() * -1
diff_df = incoming_df + outgoing_df # add together because we mult outgoing by -1 above

In [ ]:
diff_df.head()

In [ ]:
# outgoing_df.iplot(kind='bar')

In [ ]:
diff = go.Scatter(
    name='Delta',
    mode = 'lines+markers',
    opacity=0.7,
    x=diff_df.index,
    y=diff_df.values
)

incoming = go.Bar(
    name='Incoming STEEM',
    opacity=0.9,
    x=incoming_df.index,
    y=incoming_df.values
)

outgoing = go.Bar(
    name='Outgoing STEEM',
    opacity=0.9,
    x=outgoing_df.index,
    y=outgoing_df.values
)

colors=['blue', 'orange', 'red']
layout = go.Layout(title='STEEM Exchange Flows')
fig = go.Figure(data=[incoming, outgoing, diff], layout=layout)
py.iplot(fig, filename='steemdata-exchange-flows')

# todo, make bar charts share X axis

In [ ]:

Daily Transaction Rates

  • avg tx/s, transaction number, transaction volume (USD implied)

In [ ]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'transfer',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'amount': 1,
    'timestamp': 1,
}
transfers = list(db['Operations'].find(conditions, projection=projection))

In [ ]:
transfers = [
    omit({
        **x,
        'date': x['timestamp'].date(),
         x['amount']['asset']: x['amount']['amount']}, ['timestamp', 'amount']) for x in transfers]

In [ ]:
transfers[0]

In [ ]:
df = pd.DataFrame(transfers).fillna(0)

In [ ]:
df.head()

In [ ]:
# df.groupby('date').aggregate({
#     'SBD': 'sum',
#     'STEEM': 'sum',
# })

In [ ]:
df_sum = df.groupby('date').sum()
df_sum['count'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_sum['no_dust'] = no_dust.groupby('date').count()['SBD']

In [ ]:
df_sum.head()

In [ ]:
# df_sum.iplot(
#     kind='bar',
#     theme='white',
#     colors=['blue', 'orange'],
#     legend=False,
#     title='Daily Transfer Volume',
#     filename='steemdata-daily-transfer-volume',
# )

In [ ]:
data = [
    go.Bar(
        name='STEEM Volume',
        opacity=0.9,
        x=df_sum.index,
        y=df_sum['STEEM'].values
    ),
    go.Bar(
        name='SBD Volume',
        opacity=0.9,
        x=df_sum.index,
        y=df_sum['SBD'].values
    ),
    go.Scatter(
        name='Transactions',
        mode = 'lines+markers',
        opacity=0.7,
        x=df_sum.index,
        y=df_sum['no_dust'].values,
        yaxis='y2'
    ),
]

layout = go.Layout(
    title='Daily Transactions',
    yaxis=dict(
        title='Transaction Volume'
    ),
    yaxis2=dict(
        title='Count (Dust Removed)',
        titlefont=dict(
            color='rgb(148, 103, 189)'
        ),
        tickfont=dict(
            color='rgb(148, 103, 189)'
        ),
        overlaying='y',
        side='right'
    )
)

colors=['blue', 'orange', 'yellow']
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='steemdata-tranfers-2')

In [ ]:
# daily transaction count
df_count = pd.DataFrame()
df_count['All Transfers'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_count['/wo Dust'] = no_dust.groupby('date').count()['SBD']

In [ ]:
df_count.head()

In [ ]:
# df_count.iplot(
#     kind='line',
#     theme='white',
#     colors=['blue', 'orange'],
#     legend=False,
#     title='Daily Transfers Count',
#     filename='steemdata-daily-transfers-count',
# )

Conversion Requests Fill


In [ ]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
    'type': 'fill_convert_request',
    'timestamp': time_constraints,
}
projection = {
    '_id': 0,
    'amount_in': 1,
    'timestamp': 1,
}
conversions = list(db['AccountOperations'].find(conditions, projection=projection))

In [ ]:
conversions = [{
    'date': x['timestamp'].date(),
     x['amount_in']['asset']: x['amount_in']['amount']} for x in conversions]

In [ ]:
df = pd.DataFrame(conversions)
df_sum = df[['SBD', 'date']].groupby('date').sum()

In [ ]:
df_sum.iplot(
    kind='bar',
    theme='white',
    colors=['blue'],
    legend=False,
    yTitle='SBD Converted',
    title='SBD to STEEM conversions',
    filename='steemdata-sbd-steem-conversions',
)

In [ ]: