In [12]:
import pymongo as pm

In [13]:
db_address = "afruizc-office.cs.unm.edu"
username = 'populator'
password = 'malware_challenge'

mg = pm.MongoClient(db_address)
if not mg.malware.authenticate(username, password):
    sys.stderr.write("Authentication error. Terminating...")
    sys.stderr.flush()
    

# Obtain the collection
samples = mg.malware.samples

In [14]:
mg.malware.collection_names()


Out[14]:
['reduced', 'samples', 'system.indexes', 'test_samples']

In [191]:
mg.malware.create_collection('reduced')


---------------------------------------------------------------------------
CollectionInvalid                         Traceback (most recent call last)
<ipython-input-191-427dec88c433> in <module>()
----> 1 mg.malware.create_collection('reduced')

/usr/local/lib/python3.4/site-packages/pymongo/database.py in create_collection(self, name, **kwargs)
    237 
    238         if name in self.collection_names():
--> 239             raise CollectionInvalid("collection %s already exists" % name)
    240 
    241         return Collection(self, name, **opts)

CollectionInvalid: collection reduced already exists

In [15]:
reduced = mg.malware.reduced
samples = mg.malware.samples

In [16]:
reduced.create_index('id', unique=True)


Out[16]:
'id_1'

In [17]:
reduced.index_information()


Out[17]:
{'_id_': {'key': [('_id', 1)], 'ns': 'malware.reduced', 'v': 1},
 'id_1': {'key': [('id', 1)], 'ns': 'malware.reduced', 'unique': True, 'v': 1}}

In [71]:


In [18]:
import random

In [26]:
random.sample(range(400), 100)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-26-080df07dff30> in <module>()
----> 1 sort(random.sample(range(400), 100))

NameError: name 'sort' is not defined

In [ ]:


In [21]:
import random
def rand_samp(documents, n=100):
    samp_size = documents.count()
    if n > samp_size:
        n = samp_size
    # makes a 
    _random = random.sample(range(sampe_size), n)
    rands = []
    # for i, doc in enumerate(documents):
    #    if i in _random:
    #        rands.append(doc['id'])
    # return rands  
    return [doc['id'] for i, doc in enumerate(documents) if i in _random]

In [8]:
def clone_doc(namelist, f_collection, to_collection):
    for name in namelist:
        p = f_collection.find_one({'id': name})
        to_collection.drop({'id': name})
        to_collection.insert(p)
        print('inserted %s to %s' % (p['id'], str(to_collection)))

In [9]:
def _clone_docs(label):
    documents = samples.find({'class': str(label)})
    rand = rand_samp(documents)
    clone_doc(rand, samples, reduced)

In [11]:
rand = rand_samp(docs)


---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-53a7b5404289> in <module>()
----> 1 rand = rand_samp(docs)

NameError: name 'docs' is not defined

In [128]:
rand


Out[128]:
['0sM6DQlxcP3oAfd9ZVBT',
 '7QNAUrmZEql3XR1I42jz',
 'AHErPLd2pTyWusBgXZOx',
 'DQ6WlKRIEGM4BF9se1zg',
 'e10PUY97MwByIDjA8LEF',
 'BFmbPXnMtl9oVyI1SYvT',
 '6Ep5PZ3J09edDNo4ahx8',
 'exGy3iaKJmRprdHcB0NO',
 '0Cq4wfhLrKBJiut1lYAZ',
 '0MmZ8j5pn2R3VG9wlxYi',
 '0EL7OGZKozbiNCVP61gk',
 '0sLBvJeYl5UkiRzV1atI',
 '0sM2atqTo48GgnFwhQXu',
 '1A7UqoIMrxHgVuW3FS9c',
 '1cdtwSyhmiekLoH7OUEs',
 '1d79vVgNkt4fMjQELaw3',
 '1IjdZ2HuarlMYEBFbTCK',
 '1i2SThtkCGEplHeoJxqI',
 '1Skcwz5YxWihMqIyt30X',
 '1R0pmNFb5znDi6aTVeZt',
 '1UfOQcbmP40I298aeCpJ',
 '2bdEFfrZnB04XGIDig3t',
 '2Bd4uWfoseKRLOF7ngE3',
 '3wNfTntvMkpl8LuY0WPm',
 '3VdhPHBJMc1nzEyqARWj',
 '45bI6DlJUGh8KF01ExTW',
 '4FQo2tfqCepl1ArsyMN8',
 '4Kr1hQyLdEMq9AmYIlzR',
 '4kuxDJlwWHMi5KX3SVQq',
 '6yRO7gDZAtm1ebazJIWq',
 '6XBvnPoFpYk8S3lbCjwW',
 '70f4QazO1NlpXMST2LcE',
 '7LWYyj9uaCoVqJiEck62',
 '7sNR0UFCQniz8vbhVpIo',
 '7schXvl2FHSujB3aVK9e',
 '7rvpSAIkeV8TKQDi0lw5',
 '82L6irNBWpMQZ9DRlXKw',
 '7zIMcQ35pnFfgE9qr6vs',
 'Br5kJPaoUAEHjGcINVtS',
 'bSeMuYvEqX4a9J02VWl8',
 'BThVXxrfyLW6dQGZS7Um',
 'bUtG1D8cgh4SzifWksaH',
 'bxdM9LUoRTGO4lvaAmPt',
 'bYC6r58y9BWvjScdVaFg',
 'c8Q7CVawB6vyUIoN0PA2',
 'clDmndwYzEaRtyrQusUJ',
 'D3P5wso2J0hYbKRIWaCL',
 'd7XHlwrYtyQk8CNIa0AU',
 'D5gTOIGAk81460VUqlye',
 'dHsIFJomVwubBQYCp0K4']

In [192]:
for doc in reduced.find():
    print(doc['id'] + ', ' + doc['class'])


a9oIzfw03ED4lTBCt52Y, 1
58kxhXouHzFd4g3rmInB, 1
LEnDGzVIjHaqmrPMJyR0, 4
32r6nRhN1UCwtFTfy8ZG, 4
30htxi8FRcmfUkInewlS, 4
DALwrgpdQcolMWVBzb1t, 4
BFmbPXnMtl9oVyI1SYvT, 6
3ekVow2ajZHbTnBcsDfX, 1
3X2nY7iQaPBIWDrAZqJe, 1
6tfw0xSL2FNHOCJBdlaA, 1
IidxQvXrlBkWPZAfcqKT, 1
d0iHC6ANYGon7myPFzBe, 1
fRLS3aKkijp4GH0Ds6Pv, 1
da3XhOZzQEbKVtLgMYWv, 1
60vgAOVtBRdxTMisJw5X, 4
CkzJnxomRNpMBfeK8TDg, 4
jERVLnaTwhHFrZbvNfCy, 4
jzf91HDNIbFMPaKEmZvL, 4
KfQ58FVTkB9sb1i4u7zH, 4
0sM6DQlxcP3oAfd9ZVBT, 6
7QNAUrmZEql3XR1I42jz, 6
AHErPLd2pTyWusBgXZOx, 6
DQ6WlKRIEGM4BF9se1zg, 6
e10PUY97MwByIDjA8LEF, 6
6Ep5PZ3J09edDNo4ahx8, 6
exGy3iaKJmRprdHcB0NO, 6
0Cq4wfhLrKBJiut1lYAZ, 6
0MmZ8j5pn2R3VG9wlxYi, 6
0EL7OGZKozbiNCVP61gk, 6
4QlfSpex83Nsh2oBcWXv, 1
63RfPeChL1tsJwy2lVHo, 1
6d0uJ9rYK1FcjRimvVNt, 4
6GRw1oqBOEjgbYI093UA, 4
ljFT1KeZmEiHxhuRbrcd, 4
7LCKOhHDaXRbcFwxdnz5, 4
ErlsfYCZaD0tuLjSo8GM, 4
HKgJMPYDlcUmF74G0pis, 4
1S9ui2XqltCJAOGUPw7v, 4
1yC7BzWHgtI2FibhQ0km, 4
0sLBvJeYl5UkiRzV1atI, 6
0sM2atqTo48GgnFwhQXu, 6
1A7UqoIMrxHgVuW3FS9c, 6
1cdtwSyhmiekLoH7OUEs, 6
1d79vVgNkt4fMjQELaw3, 6
1IjdZ2HuarlMYEBFbTCK, 6
1i2SThtkCGEplHeoJxqI, 6
1Skcwz5YxWihMqIyt30X, 6
1R0pmNFb5znDi6aTVeZt, 6
1UfOQcbmP40I298aeCpJ, 6
2bdEFfrZnB04XGIDig3t, 6
2Bd4uWfoseKRLOF7ngE3, 6
3wNfTntvMkpl8LuY0WPm, 6
3VdhPHBJMc1nzEyqARWj, 6
45bI6DlJUGh8KF01ExTW, 6
4FQo2tfqCepl1ArsyMN8, 6
4Kr1hQyLdEMq9AmYIlzR, 6
4kuxDJlwWHMi5KX3SVQq, 6
6yRO7gDZAtm1ebazJIWq, 6
6XBvnPoFpYk8S3lbCjwW, 6
70f4QazO1NlpXMST2LcE, 6
7LWYyj9uaCoVqJiEck62, 6
7sNR0UFCQniz8vbhVpIo, 6
7schXvl2FHSujB3aVK9e, 6
7rvpSAIkeV8TKQDi0lw5, 6
82L6irNBWpMQZ9DRlXKw, 6
IGE0k5g4oyYmOKiAZ1ut, 7
2HesW5JxQNOGSIUubRzX, 1
2DBKbxPnVCyiLzqAHU9c, 1
2F6ZfVCQRi3vrwcj4zxL, 1
3fgX5GuthkIcJaTvP9UN, 1
3v7fe0Diw9cOu6dAzsaE, 1
49buyig6pa25Z1IAqtSr, 1
4lCg5d8GD17tpwQso6RZ, 1
2A6SgLfdzGEopT7XcQhD, 1
0ZTEyLXaWReMK3rYVCjv, 1
1DNWriJEg56S03yvjTBu, 1
1her6tuVWjBkqZPUf8KR, 1
1OHzGWXBTlpyVsIPK3kj, 1
2M9jHWhCOGBtY4Jbsvcy, 1
0gkj92oIleU4SYiCWpaM, 1
19zYbuW3XONcEedv7xUl, 1
1kfhXMSUYKtn7uciR6HV, 3
1L8Wt9XTV3pInwaqiobG, 3
1loLEn48jqmNfpudgGXI, 3
h9amYlEipKOJtCN25uvB, 4
4AGv29WBlocjZiPIKqsp, 4
5aqZV2vfkm63IeXYPlWg, 4
6u25JYNBy8wzSHUjqxvh, 4
7zIMcQ35pnFfgE9qr6vs, 6
Br5kJPaoUAEHjGcINVtS, 6
bSeMuYvEqX4a9J02VWl8, 6
BThVXxrfyLW6dQGZS7Um, 6
bUtG1D8cgh4SzifWksaH, 6
bxdM9LUoRTGO4lvaAmPt, 6
bYC6r58y9BWvjScdVaFg, 6
c8Q7CVawB6vyUIoN0PA2, 6
clDmndwYzEaRtyrQusUJ, 6
D3P5wso2J0hYbKRIWaCL, 6
d7XHlwrYtyQk8CNIa0AU, 6
D5gTOIGAk81460VUqlye, 6
dHsIFJomVwubBQYCp0K4, 6
EWjvFU5GXeSaHzxBh0f9, 7
KCxI1ZA3oiEqc8Xe4MkO, 7
2eqnSQrGfCXupwkDtIJ6, 1
2oUq3FLziRydvHSXM7na, 1
3fxHsVZeOikwBYzM5KCJ, 1
3jRZMsoI9VmuhTGnUrQi, 1
3ojXTJ7uAkvVnimG5rpL, 1
3SHYJ561ndFZAqWT4sND, 1
41wOoxNbCqTKQImafyDJ, 1
4KSqD2j0yXBPGgskLCFa, 1
4pQWju6nLDqSwUIiZhs3, 1
6zYSlNx9tBbUJkT0aGhr, 1
0DNVFKwYlcjO7bTfJ5p1, 1
0WdoYq78xDkFMcIwRpmJ, 1
0ZiQmgtxzHe9v5O8Lf2k, 1
1h0FDrPmV7gfCzs9Id43, 1
1l5V2AwZS9ixLMmecCkp, 1
1LE6uK9BdgNXIcSTnrMJ, 1
1Rr0hWX8Qz6nm3IgYLuF, 1
70xElcmWgS1fChNpniIG, 2
JyBvdherH16jbUktYL79, 3
IpNGJ84WE1kguZ0tKAPn, 3
iRDBIyTSNfnr93j864dc, 3
iSaFcugoKwRrmPTDZOyJ, 3
iXOK1q4SN38EUDbnAWlG, 3
j6zenOwUWpPruio3Xk4b, 3
JcaYWHPMxb4jSBLiXZNC, 3
2VhINMP4oKmsnjZkY38U, 4
LH5pzdDSPOtgIaBC1jWo, 4
2ZY5e4PHsEAvkziQGFR6, 4
cMDuGY0R5UbhJNtkHnq9, 4
fqtA64LyDpER8nvrTslF, 4
6JIAcqDxegQoLzdNM91w, 4
8LFCXHZ2TMdNhc7aUAGf, 4
1xGM8wcYTV4pQFhJBd2f, 1
2oyTpmYF4tSuBH9kXQc7, 1
2PFLzYNrp1d96DAVQlIk, 1
2bcyFiRa8zKBolCGZSvO, 1
2F5DOMVLNGfQKePHdwru, 1
4l7o0adxGiDvStmFVnzb, 1
4SBiOYxhVHqXWgKwtdLG, 1
7gbpotqxaYGmMzEs2Uyc, 2
7JjsOZzyXR9ScewIoMTp, 2
7e0vxYy6b24g9O3UKLqC, 2
7cUi8xtw35ESNMKJsXhD, 2
7dYo5p6fSV0rkgqLvzbT, 2
73VYXPJgsRlKIMiOLwG4, 2
7BNLHFXEAJRS1bwZxziq, 2
2UcBWPt6mf8dohDsYxOR, 2
2tb9hEeGUao8nTrIplZJ, 2
2SMiaYAztwhCREWfQlkq, 2
2wCPm7giSGuRejqFU0BN, 2
6Y7KC0nLeOUjH9mpEtFh, 2
i4f81CyIkZtEprWaOVRS, 2
edyzYvJXM7oLhGwHE61D, 2
5eKrwLvObGWDydgPmXai, 2
5FwqQXsTrni4xlKDOdZ0, 2
5FXu1yi9MvKsJIGPC72c, 2
5G6DcQgmuSxOaAEhb4UL, 2
5gl4sqLftFB2KVNvGrJZ, 2
5nSZXBYgVob1qf4avrDP, 2
5t6pDKTJ2Pw0fxQiEjbo, 2
5UVMFqt2RCoIGWxsj6NP, 2
5Zrij7tnYzFRaxI9edvf, 2
67RMbxiLz1OgnVYSElPH, 2
6LmWsZA2e5dbfhR1SB4P, 2
6ml5DH9TvnIiKReLuP2d, 2
6mUHQtCBjzWA0fGIEnP7, 2
6q2Y5BOx8RoluXjA10se, 2
6U7aMPmel4gBDkRCnt3E, 2
6VTkgzifPd5Z0BXF3cuY, 2
6Wd9umcsexZNDP7IfJnq, 2
73LO4S6We2aGQvM0qCXo, 2
7iQNYb2mIPzy354RMLX6, 2
7LbrmXchDZJled5HnAfM, 2
1OTMQ5zCn6hZGPdBcEje, 2
1Pcq2sTQEV7r3gBSwFyU, 2
1orTCt8q4V5B02mzRN9p, 2
1ZfKbU4HjtQIpoElzDTC, 2
21OaoipR7tqxzF4HQ9f0, 2
2ctFLBsIi5wEPAuXKxZv, 2
2fsOxnz0hDMp6Qt17PoN, 2
2fJyCR49hpGrWIZ7F6tH, 2
2lUahz5WrDpB6ejTcYtL, 2
1pCDZ6lLytxzQOKYXqe3, 2
2bf65CdkZTUGXjzq9KNJ, 2
2qJP9B5QlxeDECIVzLo4, 2
0Gu4misTcKynQD2Ol1Jx, 2
1BDCxE2AMQN9yH6sWdIo, 2
1CbXMgAIJtiNLR0So9ul, 2
1lWSeRJfOo0X9xqC67si, 3
1PGWZqdnurksDEYgbB9X, 3
0xUc2vRzyYtqFa4VhLnG, 3
1PNSFdXH4QYaky5T0n8Z, 3
1QHbFGR3qcur6aWZw0Jk, 3
1RZ8lSpEKcjaUru5k6Jb, 3
1u3qTGiRvckQZW7dBY58, 3
1xiLno6Ef3PVlCT0kUde, 3
1xSW8f3MoVAKsjTCmE79, 3
21K8k3FDRwv5qbEWPaQl, 3
24YOX6UIvdmf0CD9PS1H, 3
2jf7mkt8YCUM3AriKVas, 3
2mxzRY0nGXtkb45lDrqC, 3
2nTuJ7exWyqDzEIc0SY8, 3
1YkSNHcbq4y0ofMpR93Z, 3
6nCl3JdMAkKjXOgoN8U4, 3
6NIybqOdJ4sMVfGkhAH5, 3
6OhHNGdUpoi7tv0k9qwM, 3
6OtViL80ylEW5ogaUH7m, 3
6q9PF1AdMcOIBjhi58CY, 3
6Qp2b3wMFnzhlKjxLJrk, 3
aE6dSw0lysO8Qn4cf2FM, 3
aI5RJnpEKd7Or3VYB4Zc, 3
aIB1DcJ893RjpdgMELfy, 3
aIn3UKNJyCvFkD4dGljR, 3
aJAUOx5gLdFSYMTGiP4Q, 3
AJryF7o4hU2LdfDXMaR9, 3
aKQHksThE3fdVCizYpGI, 3
AKZi3yaUj6nkoDm1HeGB, 3
AlfXLOuvBphS0FPJZgGD, 3
ALuyE2Q9mjGdOkq8HWoz, 3
aN8PetkbW4RfxABMh2TE, 3
ANwkGK2YbhD3VjsvHrLQ, 3
Ao2BOaDcrdZ8xLJt4Xes, 3
aOmQG03HATWje26Fznbl, 3
AoOFhxKMScWdRvXNTYGy, 3
aq4SxQF59fpbDI3wdojH, 3
AqKpGPEdBXWCia6DNMrm, 3
ArckypD4u81qtOnU5i96, 3
ARI0JysNSXle1EOQCMtf, 3
5HbKUA1oyJDeLwhM26Fq, 4
7DSdcK2ZLkFmhlXUpzJw, 4
9M6r8hsleUKTmvfB7Yn2, 4
9ybCjYPNtAS3pBU7IfdR, 4
AiTGE0dPvU4JH92LYoXD, 4
DSWecTduKPtZNQoVR8qh, 4
ErbRWGIYgktQ1mf5Hw2u, 4
GgHz1ZSPklNmUJao9xht, 4
idDSEvTsVqZhI7MlKnGu, 4
j9xpRuC3HiYyoDlQ6JFO, 4
jV8cSxM45pXgW7JPo6qt, 4
jOZMUJF5waGhqLQbWsYz, 4
L5MO1uWhDZ2yplTkoRUd, 4
32rcXoa5QI1EmxBFS9he, 4
3OX0rbks7LQmeDcKx8C4, 4
3WhAuJ8OlUsk2XT0ImiK, 4
4j5ZXQI3ghezluq1snEp, 4
50fz3EgBdYQKMjvinR9w, 4
5dqijn0zcEPgONTLFRoI, 4
4ZBJzEqnW52fFUw0PG3v, 4
5JLHiDhkzyYPdTeuMqXb, 4
5YMeDkHjclrCPd8OuymR, 4
b59dlNEacoYUAyeR4Ipq, 5
bGPHZFpAL3N957064wzj, 5
epLciPaHrTu1gl4CW7Uf, 5
BKXtxeYlLsprabEWIQhn, 5
5tMCNKDogQ2x7zwUbpcZ, 5
gB105vrJPbcGCtuj7p4a, 5
FsirpP0oDwXeAzC2KndG, 5
DEyOUKzN2g8suv96eYoC, 5
asKPnzUXjShMc0Tl6Wge, 5
GbTUXFYkMoafQSe1zN94, 5
GkMBlvTidr5oyYOVJCwc, 5
F6nOrb9ipd0SskWuJjUw, 5
3zZpqyclD9B2v5Qas18m, 5
i8WIr0dtVHSGMmwFlcUY, 5
40KRbGeQZ8PwcUgt5joa, 5
1KB3Z7gd5aN4Xmx8W0sf, 5
2qAtoGOuMQZdmH3y7bEY, 5
AeCXyFRcU2SsgvVTpMzi, 5
Dj3FqX9IAHco2tG1UOZS, 5
Fnda3PuqJT6Ep5vjOWCk, 5
hZiVAw5nREjHU1qodatK, 5
idjesBrKybwkzPXMalUL, 5
7cyYPthi5KmvXEs2NkRj, 5
B6dzEY3kKIn8me9McfGg, 5
3m8kb5ILPrHcMC1o9Nht, 5
dKcE7Q1zuFViSaDRUtxY, 5
e2r6IncxE1LQOKFGgphj, 5
CRrAOvzk452LVZIGMWsq, 5
EB0d2fJGLHam1tQCUDWu, 5
ei0C84XpoqcAS6v5ZExI, 5
fyH8oWql4rg7tEJSLpIB, 5
2pwjzv6eGEb8QmHPfxSc, 5
cIojVJGQOtrL0S1ApeDY, 5
cYb4XuNSqOA9IFLHseG1, 5
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-192-132d7788baae> in <module>()
----> 1 for doc in reduced.find():
      2     print(doc['id'] + ', ' + doc['class'])

/usr/local/lib/python3.4/site-packages/pymongo/cursor.py in __next__(self)
   1074             raise StopIteration
   1075         db = self.__collection.database
-> 1076         if len(self.__data) or self._refresh():
   1077             if self.__manipulate:
   1078                 return db._fix_outgoing(self.__data.popleft(),

/usr/local/lib/python3.4/site-packages/pymongo/cursor.py in _refresh(self)
   1035                 self.__send_message(
   1036                     message.get_more(self.__collection.full_name,
-> 1037                                      limit, self.__id))
   1038 
   1039         else:  # Cursor id is zero nothing else to return

/usr/local/lib/python3.4/site-packages/pymongo/cursor.py in __send_message(self, message)
    931 
    932             try:
--> 933                 res = client._send_message_with_response(message, **kwargs)
    934                 self.__connection_id, (response, sock, pool) = res
    935                 if self.__exhaust:

/usr/local/lib/python3.4/site-packages/pymongo/mongo_client.py in _send_message_with_response(self, message, _must_use_master, **kwargs)
   1203                 sock_info.sock.settimeout(kwargs["network_timeout"])
   1204 
-> 1205             response = self.__send_and_receive(message, sock_info)
   1206 
   1207             if not exhaust:

/usr/local/lib/python3.4/site-packages/pymongo/mongo_client.py in __send_and_receive(self, message, sock_info)
   1180         try:
   1181             sock_info.sock.sendall(data)
-> 1182             return self.__receive_message_on_socket(1, request_id, sock_info)
   1183         except:
   1184             sock_info.close()

/usr/local/lib/python3.4/site-packages/pymongo/mongo_client.py in __receive_message_on_socket(self, operation, rqst_id, sock_info)
   1172         assert operation == struct.unpack("<i", header[12:])[0]
   1173 
-> 1174         return self.__receive_data_on_socket(length - 16, sock_info)
   1175 
   1176     def __send_and_receive(self, message, sock_info):

/usr/local/lib/python3.4/site-packages/pymongo/mongo_client.py in __receive_data_on_socket(self, length, sock_info)
   1155                 raise ConnectionFailure("connection closed")
   1156             length -= len(chunk)
-> 1157             message += chunk
   1158         return message
   1159 

KeyboardInterrupt: 

In [ ]: