In [32]:
import pandas as pd
import numpy as np

In [10]:
!head pvcs.csv


NAME,STATUS,VOLUME,CAPACITY,ACCESSMODES,AGE
claim-redacted-205,Bound,pvc-5643aadf-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-1088,Bound,pvc-bc68d752-ecba-11e6-98ef-42010af000c3,10Gi,RWO,2d
claim-redacted-389,Bound,pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-119,Bound,pvc-73c5d644-ddb1-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-64,Bound,pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-192,Bound,pvc-51b672a8-ddb3-11e6-98ef-42010af000c3,10Gi,RWO,21d
claim-redacted-814,Bound,pvc-2b83ae02-dee0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-775,Bound,pvc-ec1d47be-deb0-11e6-98ef-42010af000c3,10Gi,RWO,20d
claim-redacted-954,Bound,pvc-5a0acdc5-e3ee-11e6-98ef-42010af000c3,10Gi,RWO,13d

In [14]:
df = pd.DataFrame.from_csv('pvcs.csv')
df.head()


Out[14]:
STATUS VOLUME CAPACITY ACCESSMODES AGE
NAME
claim-redacted-205 Bound pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 10Gi RWO 21d
claim-redacted-1088 Bound pvc-bc68d752-ecba-11e6-98ef-42010af000c3 10Gi RWO 2d
claim-redacted-389 Bound pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 10Gi RWO 21d
claim-redacted-119 Bound pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 10Gi RWO 21d
claim-redacted-64 Bound pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 10Gi RWO 21d

In [152]:
df['username'] = np.array(df.index.str.extract('\w+-([\w-]+)-\d+$'))
df['id'] = np.array(df.index.str.extract('\w+-\w+-(\d+)$'))
df.head()


/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:1: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
  if __name__ == '__main__':
/Users/redacted/anaconda3/lib/python3.5/site-packages/ipykernel/__main__.py:2: FutureWarning: currently extract(expand=None) means expand=False (return Index/Series/DataFrame) but in a future version of pandas this will be changed to expand=True (return DataFrame)
  from ipykernel import kernelapp as app
Out[152]:
STATUS VOLUME CAPACITY ACCESSMODES AGE username id
NAME
claim-redacted-205 Bound pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 205
claim-redacted-1088 Bound pvc-bc68d752-ecba-11e6-98ef-42010af000c3 10Gi RWO 2d redacted 1088
claim-redacted-389 Bound pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 389
claim-redacted-119 Bound pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 119
claim-redacted-64 Bound pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 64

In [153]:
df.tail()


Out[153]:
STATUS VOLUME CAPACITY ACCESSMODES AGE username id
NAME
claim-redacted-562 Bound pvc-4418d1d6-de0a-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 562
claim-redacted-400 Bound pvc-2399fe54-ddcf-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 400
claim-redacted-946 Bound pvc-294cb2c7-e35e-11e6-98ef-42010af000c3 10Gi RWO 14d redacted 946
claim-redacted-900 Bound pvc-f6859607-e1c0-11e6-98ef-42010af000c3 10Gi RWO 16d redacted 900
hub-db-dir Bound pvc-ea8a0bef-dd44-11e6-98ef-42010af000c3 10Gi RWO 22d NaN NaN

In [159]:
valids = df[~df['AGE'].str.contains('m')].dropna()
valids = valids[valids['id'] != '14']
valids.head()


Out[159]:
STATUS VOLUME CAPACITY ACCESSMODES AGE username id
NAME
claim-redacted-205 Bound pvc-5643aadf-ddb3-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 205
claim-redacted-1088 Bound pvc-bc68d752-ecba-11e6-98ef-42010af000c3 10Gi RWO 2d redacted 1088
claim-redacted-389 Bound pvc-5d67d43e-ddcb-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 389
claim-redacted-119 Bound pvc-73c5d644-ddb1-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 119
claim-redacted-64 Bound pvc-3c2345e4-dd9f-11e6-98ef-42010af000c3 10Gi RWO 21d redacted 64

In [161]:
valids['username'].head().value_counts()


Out[161]:
redacted              1
redacted    1
redacted              1
redacted            1
redacted         1
Name: username, dtype: int64

In [162]:
valids['id'].value_counts().head()


Out[162]:
400     1
90      1
756     1
488     1
1029    1
Name: id, dtype: int64

In [163]:
len(valids['id'])


Out[163]:
1105

In [55]:
import sqlite3
conn = sqlite3.connect('jupyterhub.sqlite')
c = conn.cursor()

In [94]:
c.execute('PRAGMA TABLE_INFO({})'.format('users'))

c.fetchall()


Out[94]:
[(0, 'id', 'INTEGER', 1, None, 1),
 (1, 'name', 'VARCHAR(1023)', 0, None, 0),
 (2, '_server_id', 'INTEGER', 0, None, 0),
 (3, 'admin', 'BOOLEAN', 0, None, 0),
 (4, 'last_activity', 'DATETIME', 0, None, 0),
 (5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
 (6, 'state', 'TEXT', 0, None, 0),
 (7, 'auth_state', 'TEXT', 0, None, 0)]

In [66]:
c.execute('SELECT * FROM users LIMIT 20')
current_users = c.fetchall()
current_users[:2]


Out[66]:
[(1,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.936620',
  '8ec09f8def774b668e838d1e70e00329',
  None,
  None),
 (2,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.938610',
  'c98f59afc1fe4a70b18b451ac1190d45',
  None,
  None)]

In [164]:
admins = {user[1] for user in current_users if user[3] == 1}
admins


Out[164]:
{'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted',
 'redacted'}

In [73]:
# I just did this manually in the sqlite3 CLI
# c.execute('DROP FROM users where id > -1')
# c.fetchall()


---------------------------------------------------------------------------
OperationalError                          Traceback (most recent call last)
<ipython-input-73-a8eb9e4f2c5d> in <module>()
----> 1 c.execute('DROP FROM users where id > -1')
      2 c.fetchall()

OperationalError: near "FROM": syntax error

In [165]:
import datetime
import itertools

records = list(zip(
    valids['id'].astype(int),
    valids['username'],
    itertools.repeat('NULL'),
    [1 if name in admins else 0 for name in valids['username']],
#     itertools.repeat('NULL'), # Pick a random valid value
    itertools.repeat('2017-02-09 09:07:03.936620'), # Pick a random valid value
    itertools.repeat('thisisadummycookiehopefullyitworks'),
    itertools.repeat('NULL'),
    itertools.repeat('NULL')
))
records[:3]


Out[165]:
[(205,
  'redacted',
  'NULL',
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  'NULL',
  'NULL'),
 (1088,
  'redacted',
  'NULL',
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  'NULL',
  'NULL'),
 (389,
  'redacted',
  'NULL',
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  'NULL',
  'NULL')]

In [166]:
# I hate everything
def record_to_sql(record):
    return "INSERT INTO users VALUES({}, '{}', {}, '{}', '{}', '{}', {}, {})".format(
        *record
    )

sql_statements = [record_to_sql(record) for record in records]
record_to_sql(records[0])


Out[166]:
"INSERT INTO users VALUES(205, 'redacted', NULL, '0', '2017-02-09 09:07:03.936620', 'thisisadummycookiehopefullyitworks', NULL, NULL)"
[(0, 'id', 'INTEGER', 1, None, 1),
 (1, 'name', 'VARCHAR(1023)', 0, None, 0),
 (2, '_server_id', 'INTEGER', 0, None, 0),
 (3, 'admin', 'BOOLEAN', 0, None, 0),
 (4, 'last_activity', 'DATETIME', 0, None, 0),
 (5, 'cookie_id', 'VARCHAR(1023)', 0, None, 0),
 (6, 'state', 'TEXT', 0, None, 0),
 (7, 'auth_state', 'TEXT', 0, None, 0)]

In [167]:
with conn:
    c = conn.cursor()
    for statement in sql_statements:
        c.execute(statement)

In [173]:
c.execute('SELECT * FROM users LIMIT 20')
c.fetchall()


Out[173]:
[(2,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (3,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (4,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (5,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (6,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (7,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (8,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (9,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (10,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (11,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (12,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (13,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (15,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (17,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (18,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (19,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (20,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (21,
  'redacted',
  None,
  1,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (22,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None),
 (24,
  'redacted',
  None,
  0,
  '2017-02-09 09:07:03.936620',
  'thisisadummycookiehopefullyitworks',
  None,
  None)]

In [ ]: