In [1]:
import datetime as dt
from steemdata import SteemData
In [7]:
db = SteemData().db
In [3]:
# %load_ext autoreload
# %autoreload 2
In [4]:
import pandas as pd
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf
In [41]:
# go offline for faster workflow
# cf.go_offline()
In [106]:
# go back online to update charts on website
# cf.go_online()
In [ ]:
In [10]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
'created': time_constraints,
}
In [11]:
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'created': 1}))
In [12]:
len(posts)
Out[12]:
In [13]:
# count number of daily posts
from collections import Counter
c = Counter([x['created'].date() for x in posts])
In [14]:
# create pandas dataframe
df = pd.DataFrame.from_dict(c, orient='index').reset_index()
df.columns = ['date', 'count']
df = df.sort_values('date')
df.set_index('date', inplace=True)
In [15]:
df.tail()
Out[15]:
In [16]:
# plot everything but last day
df.iloc[1:-1].iplot(
title='Daily Post Count',
colors=['blue'],
theme='white',
legend=False,
filename='steemdata-30d-post-count')
Out[16]:
In [ ]:
In [17]:
# list op_types
db['AccountOperations'].distinct('type', {})
Out[17]:
In [18]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
'timestamp': time_constraints,
}
In [19]:
# count all operations
db['AccountOperations'].find(conditions).count()
Out[19]:
In [20]:
# count operations for each type
query = [
{'$match': conditions},
{'$project': {'type':1}},
{'$group': {'_id': '$type', 'total': {'$sum': 1}}}
]
results = list(db['AccountOperations'].aggregate(query))
In [21]:
# construct a Pandas dataframe
df = pd.DataFrame(results)
df.columns = ['type', 'total']
total = df['total'].sum()
df['pct'] = df['total'].apply(lambda x: (x/total*100))
In [22]:
# check most common operations
df.sort_values('pct', ascending=False).head()
Out[22]:
In [23]:
# filter out op_types that are less than 0.05%
df = df[(df['pct'] > 0.05)]
In [24]:
# render a nice pie chart
pie = go.Pie(
labels=df['type'].values.tolist(),
values=df['pct'].values)
layout = go.Layout(title='Blockchain Operations Distribution')
py.iplot(go.Figure(data=[pie], layout=layout), filename='steemdata-7d-type-pct')
Out[24]:
In [25]:
# py.iplot?
In [26]:
from steem.converter import Converter
def power_up_down_data(direction='up', days=30, exclude_steemit=False):
"""direction: `up` or `down`."""
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=days),
}
conditions = {
'type': 'fill_vesting_withdraw' if direction == 'down' else 'transfer_to_vesting',
'timestamp': time_constraints,
}
# count daily power up sums
power_ups = list(db['AccountOperations'].find(conditions))
def power_down_amount(amount):
""" If power-down is in VESTS, convert it to STEEM first."""
if amount['asset'] == 'VESTS':
return Converter().vests_to_sp(amount['amount'])
return amount['amount']
if direction == 'down':
return [{
'account': x['account'],
'amount': power_down_amount(x['deposited']),
'date': x['timestamp'].date()
} for x in power_ups if not (exclude_steemit == True and x['account'] == 'steemit')]
else:
return [{
'account': x['account'],
'amount': x['amount']['amount'],
'date': x['timestamp'].date()
} for x in power_ups]
In [27]:
#downs = pd.DataFrame(power_up_down_data(direction='down'))
#downs.sort_values(by='amount', ascending=False).head(30)
# downs[downs.account == 'steemit'].head()
In [28]:
ups = pd.DataFrame(power_up_down_data(direction='up', days=20))
daily_powerups = ups[['amount', 'date']].groupby('date').sum()
daily_powerups.columns = ['UP']
In [29]:
downs = pd.DataFrame(power_up_down_data(direction='down', days=20))
daily_powerdowns = downs[['amount', 'date']].groupby('date').sum()
daily_powerdowns.columns = ['DOWN']
In [30]:
combined = pd.merge(daily_powerdowns.reset_index(), daily_powerups.reset_index(), on='date')
combined.set_index('date', inplace=True)
In [31]:
combined.iplot(kind='bar',
title='Daily STEEM Power-Up vs Power-Down for past 20 days',
colors=['blue', 'orange'],
theme='white',
filename='steemdata-30d-power-ups')
Out[31]:
In [ ]:
In [32]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
'json_metadata.app': {'$exists': True},
'created': time_constraints,
}
In [33]:
posts = list(db['Posts'].find(conditions, projection={'_id': 0, 'json_metadata.app': 1}))
apps = [x['json_metadata']['app'] for x in posts]
# remove version information
apps = [x.split('/')[0] for x in apps]
In [34]:
# count most common apps
c = Counter(apps)
top_apps = c.most_common(10)
In [35]:
df = pd.DataFrame(top_apps, index=list(range(1, len(top_apps)+1)))
df.columns = ['App', 'Post Count']
df.head()
Out[35]:
In [36]:
# save table to plotly
from plotly import figure_factory as FF
table = FF.create_table(df, index=True, index_title='Position')
py.iplot(table, filename='steemdata-30d-post-app-types')
Out[36]:
In [ ]:
In [37]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
'type': {'$in': ['account_create', 'account_create_with_delegation']},
'timestamp': time_constraints,
}
projection = {
'_id': 0,
'timestamp': 1,
'creator': 1,
'new_account_name': 1,
}
accounts = list(db['Operations'].find(conditions, projection=projection))
In [38]:
accounts = [{
'creator': x['creator'],
'name': x['new_account_name'],
'date': x['timestamp'].date(),
} for x in accounts]
In [39]:
accounts[0]
Out[39]:
In [40]:
df = pd.DataFrame(accounts)
# df.set_index('date', inplace=True)
df.drop('name', axis=1, inplace=True)
df.head()
Out[40]:
In [41]:
df_count = df.groupby('date').count()
df_count.iloc[1:-1].iplot(
theme='white',
colors=['blue'],
legend=False,
title='Daily Account Creation',
filename='steemdata-account-creation',
)
Out[41]:
In [42]:
# df_affiliate = df.groupby('creator').count().sort_values('date', ascending=False)
# df_affiliate.iloc[:5].iplot(
# kind='bar',
# theme='white',
# colors=['blue'],
# title='Top Account Creators for past 90 days',
# filename='steemdata-account-creation-faucet',
# )
In [ ]:
In [43]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
'type': 'author_reward',
'timestamp': time_constraints,
'vesting_payout.amount': {'$gt': 10000},
}
projection = {
'_id': 0,
'timestamp': 1,
'account': 1,
'vesting_payout.amount': 1,
'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))
In [44]:
payouts[0]
Out[44]:
In [45]:
def process_payouts(payouts):
""" Turn author_rewards into normalized VESTS payouts. """
results = []
for payout in payouts:
# if this is a 100% power-up post, cut VESTS in half
vests = int(payout['vesting_payout']['amount'])
if int(payout['steem_payout']['amount']) == 0:
vests = int(vests/2)
results.append({
'account': payout['account'],
'permlink': payout.get('permlink', ''),
'VESTS': vests,
'date': payout['timestamp'].date(),
})
return results
In [46]:
payouts = process_payouts(payouts)
In [47]:
payouts[0]
Out[47]:
In [48]:
df = pd.DataFrame(payouts)
In [49]:
top_earners = df[['account', 'VESTS']].groupby('account').sum().sort_values('VESTS', ascending=False)
In [50]:
top_earners.iplot(
kind='area',
fill=True,
title='Distribution of Author Rewards (in vests) for past 30 days',
colors=['blue', 'orange'],
theme='white',
legend=False,
yTitle='Vests Awarded',
filename='steemdata-30d-author-rewards'
)
Out[50]:
In [ ]:
In [51]:
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
'type': 'author_reward',
'timestamp': time_constraints,
'vesting_payout.amount': {'$gt': 10000},
}
projection = {
'_id': 0,
'timestamp': 1,
'account': 1,
'permlink': 1,
'vesting_payout.amount': 1,
'steem_payout.amount': 1,
}
payouts = list(db['AccountOperations'].find(conditions, projection=projection))
In [52]:
payouts2 = process_payouts(payouts)
In [53]:
posts_pool = 0
comments_pool = 0
for p in payouts2:
if p['permlink'][:3] == "re-":
comments_pool += p['VESTS']
else:
posts_pool += p['VESTS']
In [54]:
comments_pool/posts_pool*100
Out[54]:
In [55]:
## how about rshares ratio?
In [56]:
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=7),
}
conditions = {
'created': time_constraints,
}
projection = {
'_id': 0,
'identifier': 1,
'abs_rshares': 1,
'children_abs_rshares': 1,
}
posts = list(db['Posts'].find(conditions, projection=projection))
In [57]:
from funcy.colls import pluck
post_rshares = sum(map(int, pluck('abs_rshares', posts)))
In [58]:
root_posts = list(pluck('identifier', posts))
In [ ]:
In [59]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=90),
}
conditions = {
'type': 'comment',
'timestamp': time_constraints,
}
projection = {
'_id': 0,
'json_metadata': 0,
'body': 0,
'trx_id': 0,
'block_num': 0,
'type': 0,
}
all_comments = list(db['Operations'].find(conditions, projection=projection))
In [60]:
all_comments[0]
Out[60]:
In [61]:
from steem.utils import is_comment
from funcy import first, complement, pluck
In [62]:
# split into comments and main posts
comments = list(filter(is_comment, all_comments))
posts = list(filter(complement(is_comment), all_comments))
In [63]:
# turn datetime into dates
comments = [{**x, 'date': x['timestamp'].date()} for x in comments]
posts = [{**x, 'date': x['timestamp'].date()} for x in posts]
In [64]:
def filter_accounts(accounts, min_rep=40, min_sp=100):
""" Return list of accounts that match minimum rep and SP requirements. """
conditions = {
'name': {'$in': accounts},
'sp': {'$gt': min_sp},
'rep': {'$gt': min_rep},
}
projection = {
'_id': 0,
'name': 1,
}
return [x['name'] for x in db['Accounts'].find(conditions, projection)]
In [65]:
# create a list of all accounts with adequate Rep/SP for both comments and posts
accounts_to_filter = list(set(pluck('author', comments)) | set(pluck('author', posts)))
qualifying_accounts = filter_accounts(accounts_to_filter)
len(qualifying_accounts)
Out[65]:
In [66]:
def filter_comments(posts_or_comments, qualifying_accounts):
""" Filter out all posts/comments from disqualified accounts. """
return [x for x in posts_or_comments if x['author'] in qualifying_accounts]
In [67]:
def create_df(posts_or_comments):
df = pd.DataFrame(posts_or_comments)
df.drop_duplicates(['author', 'permlink'], inplace=True)
return df[['author', 'date']]
In [68]:
# prepare all dataframes
comments_df = create_df(comments)
posts_df = create_df(posts)
q_comments_df = create_df(filter_comments(comments, qualifying_accounts))
q_posts_df = create_df(filter_comments(posts, qualifying_accounts))
In [69]:
comments_df.head()
Out[69]:
In [70]:
def merge_counts(qualified, unqualified):
""" Merge all comments and comments from reputable authors into single DataFrame. """
left = qualified.groupby('date').count().iloc[:-1]
left.columns = ['reputable']
right = unqualified.groupby('date').count().iloc[:-1]
right.columns = ['all']
return pd.merge(left, right, left_index=True, right_index=True)
In [71]:
merge_counts(q_comments_df, comments_df).iplot(
theme='white',
legend=False,
colors=['orange', 'blue'],
title='Daily Comments Count',
filename='steemdata-comments-count',
)
Out[71]:
In [72]:
merge_counts(q_posts_df, posts_df).iplot(
theme='white',
legend=False,
colors=['orange', 'blue'],
title='Daily Posts Count',
filename='steemdata-posts-count-2',
)
Out[72]:
In [88]:
# only bother with accounts powering down at least 1kV
conditions = {
'vesting_withdraw_rate.amount': {'$gt': 1000},
}
projection = {
'_id': 0,
'vesting_withdraw_rate.amount': 1,
'next_vesting_withdrawal': 1,
'name': 1,
}
data = list(db['Accounts'].find(conditions, projection=projection))
In [89]:
from steem.converter import Converter
withdrawing_accounts = [{
'name': x['name'],
'vests': x['vesting_withdraw_rate']['amount'],
'date': x['next_vesting_withdrawal'].date()} for x in data]
In [110]:
# how much SP is being powered down right now?
all_vests = sum(pluck('vests', withdrawing_accounts))
mvest_down = round(Converter().vests_to_sp(all_vests) / 1000000, 2)
mvest_down
Out[110]:
In [96]:
df = pd.DataFrame(withdrawing_accounts)
In [104]:
pd_sum = df[['vests', 'date']].groupby('date').sum()
pd_sum['sp'] = pd_sum['vests'].apply(lambda x: Converter().vests_to_sp(x))
pd_sum.drop('vests', axis=1, inplace=True)
In [113]:
pd_sum.iplot(
kind='bar',
theme='white',
colors=['blue'],
legend=False,
yTitle='STEEM to be powered down - %sm in total' % mvest_down,
title='Future Power-Downs',
filename='steemdata-future-powerdown',
)
Out[113]:
In [ ]:
In [ ]:
In [ ]:
exchanges = ['poloniex', 'bittrex', 'blocktrades']
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=60),
}
incoming = {
'from': {'$in': exchanges},
'to': {'$nin': exchanges},
}
outgoing = {
'from': {'$nin': exchanges},
'to': {'$in': exchanges},
}
conditions = {
'type': 'transfer',
'timestamp': time_constraints,
}
In [ ]:
from steem.utils import keep_in_dict
def simplify_transfers(transfers):
data = [keep_in_dict(x, ['amount', 'from', 'to', 'timestamp']) for x in transfers]
data = [{
x['amount']['asset']: x['amount']['amount'],
'date': x['timestamp'].date(),
**keep_in_dict(x, ['to', 'from']),
} for x in data]
return data
In [ ]:
def exchange_flow(direction='incoming'):
if direction == 'incoming':
return simplify_transfers(db['Operations'].find({**conditions, **incoming}))
return simplify_transfers(db['Operations'].find({**conditions, **outgoing}))
In [ ]:
incoming = exchange_flow('incoming')
outgoing = exchange_flow('outgoing')
In [ ]:
incoming[0]
In [ ]:
incoming_df = pd.DataFrame(incoming).groupby('date')['STEEM'].sum()
outgoing_df = pd.DataFrame(outgoing).groupby('date')['STEEM'].sum() * -1
diff_df = incoming_df + outgoing_df # add together because we mult outgoing by -1 above
In [ ]:
diff_df.head()
In [ ]:
# outgoing_df.iplot(kind='bar')
In [ ]:
diff = go.Scatter(
name='Delta',
mode = 'lines+markers',
opacity=0.7,
x=diff_df.index,
y=diff_df.values
)
incoming = go.Bar(
name='Incoming STEEM',
opacity=0.9,
x=incoming_df.index,
y=incoming_df.values
)
outgoing = go.Bar(
name='Outgoing STEEM',
opacity=0.9,
x=outgoing_df.index,
y=outgoing_df.values
)
colors=['blue', 'orange', 'red']
layout = go.Layout(title='STEEM Exchange Flows')
fig = go.Figure(data=[incoming, outgoing, diff], layout=layout)
py.iplot(fig, filename='steemdata-exchange-flows')
# todo, make bar charts share X axis
In [ ]:
In [ ]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
'type': 'transfer',
'timestamp': time_constraints,
}
projection = {
'_id': 0,
'amount': 1,
'timestamp': 1,
}
transfers = list(db['Operations'].find(conditions, projection=projection))
In [ ]:
transfers = [
omit({
**x,
'date': x['timestamp'].date(),
x['amount']['asset']: x['amount']['amount']}, ['timestamp', 'amount']) for x in transfers]
In [ ]:
transfers[0]
In [ ]:
df = pd.DataFrame(transfers).fillna(0)
In [ ]:
df.head()
In [ ]:
# df.groupby('date').aggregate({
# 'SBD': 'sum',
# 'STEEM': 'sum',
# })
In [ ]:
df_sum = df.groupby('date').sum()
df_sum['count'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_sum['no_dust'] = no_dust.groupby('date').count()['SBD']
In [ ]:
df_sum.head()
In [ ]:
# df_sum.iplot(
# kind='bar',
# theme='white',
# colors=['blue', 'orange'],
# legend=False,
# title='Daily Transfer Volume',
# filename='steemdata-daily-transfer-volume',
# )
In [ ]:
data = [
go.Bar(
name='STEEM Volume',
opacity=0.9,
x=df_sum.index,
y=df_sum['STEEM'].values
),
go.Bar(
name='SBD Volume',
opacity=0.9,
x=df_sum.index,
y=df_sum['SBD'].values
),
go.Scatter(
name='Transactions',
mode = 'lines+markers',
opacity=0.7,
x=df_sum.index,
y=df_sum['no_dust'].values,
yaxis='y2'
),
]
layout = go.Layout(
title='Daily Transactions',
yaxis=dict(
title='Transaction Volume'
),
yaxis2=dict(
title='Count (Dust Removed)',
titlefont=dict(
color='rgb(148, 103, 189)'
),
tickfont=dict(
color='rgb(148, 103, 189)'
),
overlaying='y',
side='right'
)
)
colors=['blue', 'orange', 'yellow']
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='steemdata-tranfers-2')
In [ ]:
# daily transaction count
df_count = pd.DataFrame()
df_count['All Transfers'] = df.groupby('date').count()['SBD']
no_dust = df[(df.STEEM > 0.1) | (df.SBD > 0.1)]
df_count['/wo Dust'] = no_dust.groupby('date').count()['SBD']
In [ ]:
df_count.head()
In [ ]:
# df_count.iplot(
# kind='line',
# theme='white',
# colors=['blue', 'orange'],
# legend=False,
# title='Daily Transfers Count',
# filename='steemdata-daily-transfers-count',
# )
In [ ]:
# time constraints
time_constraints = {
'$gte': dt.datetime.now() - dt.timedelta(days=30),
}
conditions = {
'type': 'fill_convert_request',
'timestamp': time_constraints,
}
projection = {
'_id': 0,
'amount_in': 1,
'timestamp': 1,
}
conversions = list(db['AccountOperations'].find(conditions, projection=projection))
In [ ]:
conversions = [{
'date': x['timestamp'].date(),
x['amount_in']['asset']: x['amount_in']['amount']} for x in conversions]
In [ ]:
df = pd.DataFrame(conversions)
df_sum = df[['SBD', 'date']].groupby('date').sum()
In [ ]:
df_sum.iplot(
kind='bar',
theme='white',
colors=['blue'],
legend=False,
yTitle='SBD Converted',
title='SBD to STEEM conversions',
filename='steemdata-sbd-steem-conversions',
)
In [ ]: