In [1]:
import db
from IPython.core.display import display, HTML
import logic
import matplotlib.pyplot as plt
import pandas as pd
from numpy.random import normal
from wordcloud import WordCloud
import nltk
from pprint import pprint
%matplotlib nbagg


---------------------------------------------------------------------------
ConnectionRefusedError                    Traceback (most recent call last)
/usr/local/lib/python3.4/dist-packages/pymysql/connections.py in connect(self, sock)
    873                             sock = socket.create_connection(
--> 874                                 (self.host, self.port), self.connect_timeout)
    875                             break

/usr/lib/python3.4/socket.py in create_connection(address, timeout, source_address)
    511     if err is not None:
--> 512         raise err
    513     else:

/usr/lib/python3.4/socket.py in create_connection(address, timeout, source_address)
    502                 sock.bind(source_address)
--> 503             sock.connect(sa)
    504             return sock

ConnectionRefusedError: [Errno 111] Connection refused

During handling of the above exception, another exception occurred:

OperationalError                          Traceback (most recent call last)
<ipython-input-1-5913f8cf2c8e> in <module>()
----> 1 import db
      2 from IPython.core.display import display, HTML
      3 import logic
      4 import matplotlib.pyplot as plt
      5 import pandas as pd

/notebooks/db.py in <module>()
      6 import pymysql
      7 
----> 8 import logic
      9 import settings
     10 from db_patches import db_patches_list

/notebooks/logic.py in <module>()
----> 1 import settings
      2 import requests
      3 
      4 VALID_UNITS = {
      5     'g': ['g','gram', 'gr', 'grams', 'grs']

/notebooks/settings.py in <module>()
     19 
     20 
---> 21 connection = pymysql.connect(**DATABASE)
     22 connection.commit()
     23 cursor = connection.cursor()

/usr/local/lib/python3.4/dist-packages/pymysql/__init__.py in Connect(*args, **kwargs)
     86     """
     87     from .connections import Connection
---> 88     return Connection(*args, **kwargs)
     89 
     90 from pymysql import connections as _orig_conn

/usr/local/lib/python3.4/dist-packages/pymysql/connections.py in __init__(self, host, user, password, database, port, unix_socket, charset, sql_mode, read_default_file, conv, use_unicode, client_flag, cursorclass, init_command, connect_timeout, ssl, read_default_group, compress, named_pipe, no_delay, autocommit, db, passwd, local_infile, max_allowed_packet, defer_connect, auth_plugin_map)
    676             self.socket = None
    677         else:
--> 678             self.connect()
    679 
    680     def _create_ssl_ctx(self, sslp):

/usr/local/lib/python3.4/dist-packages/pymysql/connections.py in connect(self, sock)
    919                 exc.traceback = traceback.format_exc()
    920                 if DEBUG: print(exc.traceback)
--> 921                 raise exc
    922 
    923             # If e is neither DatabaseError or IOError, It's a bug.

OperationalError: (2003, "Can't connect to MySQL server on 'localhost' ([Errno 111] Connection refused)")

Drugs and Fake IDs: Data Mining The Dark Web


This Jupyter Notebook belongs to a data mining project takign place at UEL. The aim of this research is to monitor a selection of dark web resources to gain insights regarding the type of products and services that are sold, and to possibly identify any significant trends.

On the summer of 2015, a web crawling script was run over a dark web hosted e-commerce platform, collecting records of products being sold on the site. The purpose of this notebook is to analyse the data collected.

1. Introduction

Database schema:

Figures below describe the schema of the given database


In [2]:
db.pandas_query('SHOW tables;')


Out[2]:
Tables_in_dark_web
0 tblCategory
1 tblProduct
2 tblSubCategory
3 tblVendor
4 tblWebsite


Data definition can be obtained by the following query. Replace the table name for any of the above

In [3]:
db.pandas_query('DESCRIBE tblProduct
                ;')


Out[3]:
Field Type Null Key Default Extra
0 vendor_id int(10) NO PRI None auto_increment
1 vendor_name varchar(50) NO MUL None
2 vendor_description varchar(500) YES None
3 website_id varchar(50) NO MUL None

2. Market Activity Analysis

Perhaps the first thing that comes to mind when analysing a market place is to measure its size in terms of units traded, product taxonomy, trade value, overall value etc.

These data set has been collected during the following date range:


In [4]:
db.pandas_query('''
    SELECT min(time_stamp) as 'Start Date', max(time_stamp) as 'End Date' FROM tblProduct;
    ''')


Out[4]:
Start Date End Date
0 2015-07-22 2015-09-03

Within this time, the following number of records have been collected:


In [5]:
db.pandas_query('''
    SELECT count(*) as 'Total Product Records' FROM tblProduct;
    ''')


Out[5]:
Total Product Records
0 30679

2.1 Product Categories

The site divides the product listing into a number of categories and subcategories. We will now have a look at those categories and see which products are the most popular in this particular site.


In [33]:
dataframe = db.pandas_query('''
        SELECT B.subcategory_name, count(A.product_name) as 'Total Products' 
        FROM tblProduct A
        LEFT JOIN tblSubCategory B
            ON A.subCategory_id = B.subCategory_id
        GROUP BY B.subcategory_name
        ORDER BY -count(A.product_name)
        limit 30;
        ''',
        index_col='subcategory_name')
dataframe.plot(kind='barh', title='Most Popular Categories')


Out[33]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dae948c50>

From the above query we find that 20 out the 30 most common subcategories are drug related. The other main area of research we are interested on is the illegal market of passports and ids. While there seems not to be any trace of those sub categories, we yet have to analyse the "Other" subcategory.

Let's find out what percentage of these listings do actually fall in the drugs category.


In [7]:
comparison_df = db.pandas_query('''
    SELECT count(*) as 'drug_listings' FROM tblProduct
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        ); 
    ''')

comparison_df['other'] = dataframe.sum()[0] - comparison_df.iloc[0][0]
print (comparison_df)

# Create a figure with a single subplot
f, ax = plt.subplots(1, figsize=(10,5))
bar_width = 1

# positions of the left bar-boundaries
bar_l = [i for i in range(len(comparison_df['drug_listings']))] 

# positions of the x-axis ticks (center of the bars as bar labels)
tick_pos = [i+(bar_width/2) for i in bar_l] 

# Create the total
totals = [i+j for i,j in zip(comparison_df['drug_listings'], comparison_df['other'])]

# Create the percentage
pre_rel = [i / j * 100 for  i,j in zip(comparison_df['drug_listings'], totals)]
mid_rel = [i / j * 100 for  i,j in zip(comparison_df['other'], totals)]

# Create a bar chart in position bar_1
ax.bar(bar_l, 
       pre_rel, 
       label='Drug Listings', 
       alpha=0.9, 
       color='#019600',
       width=bar_width,
       edgecolor='white'
       )
ax.bar(bar_l, 
       mid_rel, 
       bottom=pre_rel,  
       label='Other', 
       alpha=0.9, 
       color='#3C5F5A', 
       width=bar_width,
       edgecolor='white'
       )



# Set the ticks to be first names
ax.set_ylabel("Percentage")
ax.set_xlabel("")
ax.get_xaxis().set_ticks([])
ax.legend()

# Let the borders of the graphic
plt.xlim([min(tick_pos)-bar_width, max(tick_pos)+bar_width])
plt.ylim(-10, 110)

# shot plot
plt.show()


   drug_listings  other
0          20849   4440

2.2 Geografical Activity Distribution

2.2.1 Total Products listed by country of origin


In [8]:
countries_df = db.pandas_query('''
    SELECT count(*) as 'total_products', product_origin FROM tblProduct
    WHERE product_origin != ''
    GROUP BY product_origin
    ORDER BY total_products DESC;
    ''', index_col='product_origin')

countries_df.plot(kind='barh', title='Total Productes lsted by country of origin')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dbcd554e0>

2.2.2 Total drug listings by product country of origin.


In [9]:
countries_df = db.pandas_query('''
    SELECT count(*) as 'total_drug_listings', product_origin FROM tblProduct
    WHERE product_origin != ''
    and subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        )
    GROUP BY product_origin
    ORDER BY total_drug_listings DESC
    LIMIT 10;
    ''', index_col='product_origin')

countries_ax = countries_df.plot(kind='barh')
for p in countries_ax.patches:
    countries_ax.annotate(
        "%.2f" % p.get_width(), (p.get_x() + p.get_width(), 
        p.get_y()), xytext=(5, 10), textcoords='offset points')


HIGHLIGHTS:

  • USA has the leading position in total products sold as well as in the drugs market
  • For Most countries, drugs are by far the most common type of product
  • Interestingly, in the case of China, only 24% of their listed products belong to drug related subcategories

Given the chinese scenario, lets dive a little bit deeper and see what kind of products are attracting more attention in the Chinese Market.


In [37]:
china_df = db.pandas_query('''
    SELECT B.subcategory_name, count(A.product_name) as 'Total Products' 
    FROM tblProduct A
    LEFT JOIN tblSubCategory B
        ON A.subCategory_id = B.subCategory_id
    WHERE product_origin = 'China'
    GROUP BY B.subcategory_name
    ORDER BY -count(A.product_name)
    limit 30;
    ''',
    index_col='subcategory_name')
china_df.plot(kind='barh', title='Most Popular Categories')


Out[37]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dae70e390>

2.2.3 Main Drug Providers

As we have previously seen, we know that some of the most popular drugs on offer in this market place are Cocaine, weed, MDMA, LSD or Heroin. We will try to find the top three providing countries for each of these drugs


In [11]:
base_drugs_query = '''
    SELECT count(*) as 'total_listed', product_origin FROM tblProduct
    WHERE subCategory_id in (SELECT subCategory_id FROM tblSubCategory
        WHERE subCategory_name = '{}')
    AND product_origin != ''
    GROUP BY product_origin
    ORDER BY total_listed DESC
    LIMIT 3;
    '''
drugs_list = ['Cocaine', 'Weed', 'MDMA', 'LSD', 'Heroin', 'Steroids', 'Hash']
drug_data = {}
ax = None
for drug in drugs_list:
    df = db.pandas_query(base_drugs_query.format(drug), index_col='product_origin')
    df.plot.barh(title='{} top providing countries'.format(drug))


2.3 Time series Analysis

This section will try to understand the evolution over time of Agora's supply to find out growth rates in terms of total offerings as well as in the specifics of drugs.

2.3.1 Day-on-day supply analysis


In [12]:
dayonday_df = db.pandas_query('''
    SELECT count(*) as total_products, time_stamp FROM tblProduct
    GROUP BY time_stamp;
    ''', index_col='time_stamp')
dayonday_drugs_df = db.pandas_query('''
    SELECT count(*) as drugs, time_stamp FROM tblProduct
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        )
    GROUP BY time_stamp;
    ''', index_col='time_stamp')
dayonday_df2 = pd.concat([dayonday_df, dayonday_drugs_df], axis='time_stamp', join='inner')
dayonday_df2.plot()


Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3db68c5128>


Unfortunately, we can't analyse this market growth over time, as there are not enough data points. The growth of the drugs market share over time seems to be also unbearable for the same reason.

3. Market quantitative analysis

The currency used in this dark web hosted commerce platform is the Bitcoin. The dataset does contain the bitcoin price at which each listing has been placed, however, Bitcoin exchange rates are very volatile. Therefore, it would be meaningless to analyse the dataset in terms of its bitcoin value. Instead, each of the Bitcoin values have been converted to both GBP and USD as of the market exchange rate as of the day the product was listed on the website.

3.1 Natural Language processing: Data Mining Listing quantities and measurement units

Our listings product quantites are not suitable for data analysis, as they are cotained within title and description fields which are just composed of unstructured text blocks. In order to extract those quantities we can either manipulate the string fields with regular expressions or use Natural Language proccessing to extract the required values using semantic and grammar machine learning algorithms. There is a powerfull NLP library called NLTK that we will use in our attempt to extract those values. Let's look at the folliwng two title examples extracted from the 'Cocaine' subcategory dataset:

"6 Grams Pure Cocaine (SAFE,ZERO CUTS & PURE+FREE COKE DAILY)"

"SALE!! 28G Cocaine - 92% Purity -Platinum Quality - No Cuts FE LISTING"
We start by tokenizing the sentence into words and analyze the word types grammatically


In [16]:
string = "6 Grams Pure Cocaine (SAFE,ZERO CUTS & PURE+FREE COKE DAILY)"
tokenized = nltk.word_tokenize(string)
tagged = nltk.pos_tag(tokenized)
print (tagged)


[('6', 'CD'), ('Grams', 'NNP'), ('Pure', 'NNP'), ('Cocaine', 'NNP'), ('(', '('), ('SAFE', 'NNP'), (',', ','), ('ZERO', 'NNP'), ('CUTS', 'NNP'), ('&', 'CC'), ('PURE+FREE', 'NNP'), ('COKE', 'NNP'), ('DAILY', 'NNP'), (')', ')')]

In [15]:
named_entities = nltk.ne_chunk(tagged)
named_entities
#named_entities.draw()


Out[15]:

As expected, the unsupervised learning algorithm wasn't very accurate and has classified the words 'Grams Pure Cocaine' as a persons name, given that is composed of 3 capitalized words.

Let's now move into the second example.


In [17]:
string = "SALE!! 28G Cocaine - 92% Purity -Platinum Quality - No Cuts FE LISTING"
tokenized = nltk.word_tokenize(string)
tagged2 = nltk.pos_tag(tokenized)
tagged2


Out[17]:
[('SALE', 'NN'),
 ('!', '.'),
 ('!', '.'),
 ('28G', 'CD'),
 ('Cocaine', 'NNP'),
 ('-', ':'),
 ('92', 'CD'),
 ('%', 'NN'),
 ('Purity', 'NNP'),
 ('-Platinum', 'NNP'),
 ('Quality', 'NNP'),
 ('-', ':'),
 ('No', 'DT'),
 ('Cuts', 'NNP'),
 ('FE', 'NNP'),
 ('LISTING', 'NNP')]

In this particular case our grammar learning has diverted from the previous example as the word tokenizing algorithm has returned the word 28G rather than "28" and "G" as separate words. As the problem has happened at the word tokenization process, let's add a regular expression check to make sure CD and unit are returned separately:


In [20]:
tokenizer_regex = r'(?u)\d+(?:\.\,\d+)?|\w+|%'
tokenized = nltk.regexp_tokenize(string, tokenizer_regex)
tagged3 = nltk.pos_tag(tokenized)
tagged3


Out[20]:
[('SALE', 'NNP'),
 ('28', 'CD'),
 ('G', 'NNP'),
 ('Cocaine', 'NNP'),
 ('92', 'CD'),
 ('%', 'NN'),
 ('Purity', 'NNP'),
 ('Platinum', 'NNP'),
 ('Quality', 'NNP'),
 ('No', 'NNP'),
 ('Cuts', 'NNP'),
 ('FE', 'NNP'),
 ('LISTING', 'NNP')]

The regular expression based tokenizer has worked better, as it does now effectively split the unit from it's numerical value. The unit though is being classified as a proper noun (NNP) instead of a nount (NN) as the word is capitalized. Finally, we can "chunk" the POS tags using a POS regex match to get those pairs


In [21]:
chunk_exp = r'''amount: {<CD><NN|NNP|NNS|NNPS>}'''
chunk_parser = nltk.RegexpParser(chunk_exp)
chunked = chunk_parser.parse(tagged3)
chunked
for i in chunked.subtrees():
    leaves = i.leaves()
    if leaves[1][0] == '%':
        i._label = 'purity'
chunked


Out[21]:

In [10]:
chunked.draw()

3.1.1 Applying NLP to obtain Cocaine prices per gram

We want now to tune and refine the algorithm to fit our dataset. This code must be available to be used whenever we need to data mine values with a similar structure to the cocaine subset.

The following pair of functions will work as generators that an be applied to a series object:


In [22]:
def amount_series_generator(chunk, unit):
    """
    function that yields measurment units from a given chunk
    """
    valid_units = logic.VALID_UNITS.get(unit)
    for subtree in chunk.subtrees(filter=lambda t: t.label() == 'amount'):
        leaves = subtree.leaves()
        if leaves[1][0].lower() in valid_units:
            return float(leaves[0][0])
        #added this line as I found a few listings selling KILOS of cocaine!
        elif leaves[1][0].lower() in ['kg', 'kilo', 'kilogram', 'kgram']:
            return float(leaves[0][0]) * 1000
        #another hack for oz
        elif leaves[1][0].lower() in ['oz', 'ounce', 'ounces']:
            if logic.NUMBER_STRING.get(leaves[0][0].lower()):
                return logic.NUMBER_STRING.get(leaves[0][0].lower()) * 28.35
            return float(leaves[0][0]) * 28.35

    return None

def unit_series_generator(chunk, unit):
    """
    function that yields measurement units from a given chunk
    """
    valid_units = logic.VALID_UNITS.get(unit)
    if unit == 'g':
        valid_units + ['kg', 'kilo', 'kilogram', 'kgram', 'oz', 'ounce', 'ounces']
    for subtree in chunk.subtrees(filter=lambda t: t.label() == 'amount'):
        leaves = subtree.leaves()
        if leaves[1][0].lower() in valid_units:
            return unit
    return None

The data we want to extract is contained within the product_name field. Then, we need to process the query and then apply the previously described algorithms subsequently


In [24]:
tokenizer_regex = r'(?u)\d+(?:\.\d+)?|\w+|%'
chunk_exp = r'''amount: {<CD><NN|NNP|NNS|NNPS|JJ>}'''
chunk_parser = nltk.RegexpParser(chunk_exp)
cocaine_listings = db.get_products_by_subcategory('Cocaine')
cocaine_listings_names = cocaine_listings['product_name']
cocaine_words = cocaine_listings_names.apply(nltk.regexp_tokenize, args=(tokenizer_regex,))
cocaine_tagged = cocaine_words.apply(nltk.pos_tag)
cocaine_chunked = cocaine_tagged.apply(chunk_parser.parse)

cocaine_quants = cocaine_chunked.apply(amount_series_generator, args='g')
cocaine_units = cocaine_chunked.apply(unit_series_generator, args='g')
cocaine_listings['cocaine_amount'] = cocaine_quants
cocaine_listings['cocaine_units'] = cocaine_units
cocaine_listings[['product_id', 'product_name','GBP', 'cocaine_amount', 'cocaine_units']]


Out[24]:
product_id product_name GBP cocaine_amount cocaine_units
0 26564 5 gram Pure Flakes free shipping 188.34 5.0 g
1 26565 1G Cocaine - 70% Purity - Silver Quality - ESCROW 162.81 1.0 g
2 26566 5g HIGH HEAT COCAINE 268.79 5.0 g
3 26567 Synthetic Cocaine 10g 182.60 10.0 g
4 26568 28 Grams of UKWHITE&#39;S Sociable Cocaine [FL... 715.07 28.0 g
5 26569 FE - High Quality Cocaine 0.5 Grams 22.35 0.5 g
6 26570 3.5g Cocaine **PROMOTION** 166.64 3.5 g
7 26571 9 gram Pure free shipping 339.02 9.0 g
8 26572 Ching 1g Free Shipping! Buy 3 get 1 Free! 14.68 1.0 g
9 26573 56g Highest Purity Cocaine - Direct From Colombia 2002.20 56.0 g
10 26574 6 Grams Pure Cocaine (SAFE,ZERO CUTS &amp; PUR... 276.87 6.0 g
11 26575 COCAINE, 1g, A+, High 61.29 1.0 g
12 26576 Half Gram 95%+ Lab Tested Cocaine Straight off... 95.77 NaN None
13 26577 CUSTOM 3 Grams of Premium FISHSCALE Cocaine HQ 197.92 3.0 g
14 26578 SALE!! 28G Cocaine - 92% Purity -Platinum Qual... 3917.57 28.0 g
15 26579 1.5 Gram Cocaine High Quality &#70;&#76;EX 135... 97.68 1.5 g
16 26580 2G Cocaine - 60% Purity - Party Quality - ESCROW 274.54 2.0 g
17 26581 COCAINE &quot;A GRADE!!!&quot; 2 GRAMS $100 pe... 153.23 2.0 g
18 26582 Custom Cocaine Acetone Washed- 510.13 NaN None
19 26583 112g Highest Purity Cocaine - Direct From Colo... 3789.88 112.0 g
20 26584 High Heat Colombian Cocaine - 10 GRAMS UNTOUC... 517.15 10.0 g
21 26585 3.5g HIGH QUALITY COCAINE 504.38 3.5 g
22 26586 1 gram pure fish scale cocaine 217.68 1.0 g
23 26587 3.5 GRAMS HIGH HEAT COCAINE - SCOTT AND PURITY... 268.15 3.5 g
24 26588 1g Good Quality Cocaine 29.75 1.0 g
25 26589 Custom for george!isa 177.29 NaN None
26 26590 **MONTHLY SPECIAL**Cocaine .5g A+ Grade cola. ... 83.00 5.0 g
27 26591 14G Cocaine AusMate (A Grade) 1206.68 14.0 g
28 26592 SALE!! 1G Cocaine - 92% Purity -Platinum Quali... 172.38 1.0 g
29 26593 28G Cocaine AusMate (A Grade) 2049.45 28.0 g
... ... ... ... ... ...
1292 32423 0,5gr - Crack Cocaine (FREE SHIPPING) 47.81 5.0 g
1293 32424 Uncut FS Cocaine 3.5g (8-ball) 191.23 3.5 g
1294 32425 Uncut FS Cocaine 7g (quarter) 356.95 7.0 g
1295 32426 1G Peruvian Flake 40-50% 44.99 1.0 g
1296 32427 Uncut FS Cocaine 2g 114.74 2.0 g
1297 32428 Uncut FS Cocaine 0.5g 34.42 0.5 g
1298 32429 250g A+ &quot;Very High Cocaine Content&quot; ... 4844.37 250.0 g
1299 32430 FE - High Quality Cocaine 20.0 Grams 701.16 20.0 g
1300 32431 kinshela HQ Flake/Crystal Brick Cocaine (84%) 541.80 NaN None
1301 32432 .25 Great Cocaine 11.47 NaN None
1302 32433 2.3g Highest Purity Cocaine - Direct From Colo... 420.70 2.3 g
1303 32434 1G Peruvian Flake 90%+ 161.96 1.0 g
1304 32435 0.5G Peruvian Flake 90%+ 89.98 0.5 g
1305 32436 2G Peruvian Flake 90%+ 323.92 2.0 g
1306 32437 7G Peruvian Flake 40-50% 287.93 7.0 g
1307 32438 0.5G Peruvian Flake 40-50% 23.39 0.5 g
1308 32439 3.5G Peruvian Flake 40-50% 143.96 3.5 g
1309 32440 2G Peruvian Flake 40-50% 89.98 2.0 g
1310 32441 3.5g Cocaine Good Quailty Pressed &pound;115 (... 126.85 3.5 g
1311 32442 14g Cocaine Good Quailty Pressed FE &pound;370... 362.06 14.0 g
1312 32443 7g Cocaine Good Quailty Pressed FE &pound;200 ... 195.71 7.0 g
1313 32444 28g Cocaine Good Quailty Pressed FE &pound;700... 686.53 28.0 g
1314 32445 Custom fizalis 6055.47 NaN None
1315 32446 1x EZ-TEST Cocaine Purity 13.39 NaN None
1316 32447 3.5g Crack Cocaine - FE 223.10 3.5 g
1317 32448 3.5G Peruvian Flake 90%+ 413.90 3.5 g
1318 32449 1g Cocaine Good Quailty Pressed &pound;35 (UK... 23.20 1.0 g
1319 32450 1g Crack Cocaine - FE 70.12 1.0 g
1320 32451 Social Cocaine *1 g* 70.12 1.0 g
1321 32452 7G Peruvian Flake 90%+ 809.80 7.0 g

1322 rows × 5 columns

The overview shows that the algorithms have performed as epected, but we still see some empty values still, mainly due to the lack of information on the product name.

In order to understand the success rate of the algorithms we can do:


In [25]:
failed = cocaine_units.isnull().sum()
print ('{} out of {} units obtained obtained'.format(len(cocaine_units) - failed, len(cocaine_units)))
print ('{}% success rate'.format(100-(failed/len(cocaine_units)*100)))


1169 out of 1322 units obtained obtained
88.42662632375189% success rate

As we had just a 12% failure, we can handle the Null values normalizing the distribution with the help of the standard deviation.


In [26]:
gbp_grams_df = cocaine_listings[['GBP', 'cocaine_amount']]
#remove the NaN rows to calculate mean
gbp_grams_df = gbp_grams_df[pd.notnull(gbp_grams_df['cocaine_amount'])]
gbp_grams_df['price_per_gram'] = gbp_grams_df['GBP'] / gbp_grams_df['cocaine_amount']
mean = cocaine_mean_price = gbp_grams_df['price_per_gram'].mean()
mean


Out[26]:
95.231296329954333

And now we can use the value to fill the missing cells in our main dataframe:


In [27]:
cocaine_listings['cocaine_amount'] = cocaine_listings.cocaine_amount.fillna(cocaine_listings['GBP'] / mean)
cocaine_listings['cocaine_amount'] = cocaine_listings['cocaine_amount'].round(2)
cocaine_listings['cocaine_units'] = cocaine_listings.cocaine_units.fillna('g')
cocaine_listings['price_per_gram'] = cocaine_listings['GBP'] / cocaine_listings['cocaine_amount']
cocaine_listings


Out[27]:
product_id product_model product_name product_description product_origin product_postage product_price vendor_id subCategory_id time_stamp product_picture USD GBP cocaine_amount cocaine_units price_per_gram
0 26564 http://agorahooawayyfoe.onion/p/AX1h9ECpFV 5 gram Pure Flakes free shipping 5 gram uncut cocaine.<br/>100% pure! (92% puri... Netherlands 1.05068205 BTC 729 62 2015-08-01 AX1h9ECpFV.jpg 294.23 188.34 5.00 g 37.668000
1 26565 http://agorahooawayyfoe.onion/p/iL6euvZTdG 1G Cocaine - 70% Purity - Silver Quality - ESCROW ESCROW LISTING - 10% Cheaper on my FE Listings... Australia Australia 0.90821668 BTC 771 62 2015-08-01 iL6euvZTdG.jpg 254.33 162.81 1.00 g 162.810000
2 26566 http://agorahooawayyfoe.onion/p/YZm4bG64KF 5g HIGH HEAT COCAINE 5g High Heat Cocaine<br/>This listing is for H... Canada World 1.49944794 BTC 725 62 2015-08-01 YZm4bG64KF.jpg 419.90 268.79 5.00 g 53.758000
3 26567 http://agorahooawayyfoe.onion/p/SdVLjJymBX Synthetic Cocaine 10g This is new drugs - synthetic cocaine (Revers ... Czech Republic 1.01862734 BTC 893 62 2015-08-01 SdVLjJymBX.jpg 285.25 182.60 10.00 g 18.260000
4 26568 http://agorahooawayyfoe.onion/p/yESjSZr5Sg 28 Grams of UKWHITE&#39;S Sociable Cocaine [FL... 28 Grams of UKWHITE&#39;S Sociable Cocaine ---... UK WORLDWIDE 3.98903016 BTC 1058 62 2015-08-01 None 1117.08 715.07 28.00 g 25.538214
5 26569 http://agorahooawayyfoe.onion/p/nZX91dU7fv FE - High Quality Cocaine 0.5 Grams Welcome to CocoMarley&acute;s Nose Candy Shop!... Dominican Republic Worldwide 0.12465719 BTC 1059 62 2015-08-01 nZX91dU7fv.jpg 34.91 22.35 0.50 g 44.700000
6 26570 http://agorahooawayyfoe.onion/p/FA9xZ18wzi 3.5g Cocaine **PROMOTION** ***** ESCROW FOR ALL ORDERS *****<br/><br/>We ... Germany WORLDWIDE 0.92958649 BTC 747 62 2015-08-01 None 260.32 166.64 3.50 g 47.611429
7 26571 http://agorahooawayyfoe.onion/p/mlc5wawkEs 9 gram Pure free shipping 9 gram uncut cocaine.<br/>100% pure! (92% puri... Netherlands 1.89122769 BTC 729 62 2015-08-01 mlc5wawkEs.jpg 529.61 339.02 9.00 g 37.668889
8 26572 http://agorahooawayyfoe.onion/p/JDgeVUxtlm Ching 1g Free Shipping! Buy 3 get 1 Free! Best selling and now banned in the UK! 1g pac... UK UK, EUROPE, USA 0.08191758 BTC 600 62 2015-08-01 JDgeVUxtlm.jpg 22.94 14.68 1.00 g 14.680000
9 26573 http://agorahooawayyfoe.onion/p/V0X34WR6ox 56g Highest Purity Cocaine - Direct From Colombia Mission Statement: To provide the best quality... USA Worldwide 11.16928446 BTC 1060 62 2015-08-01 V0X34WR6ox.jpg 3127.81 2002.20 56.00 g 35.753571
10 26574 http://agorahooawayyfoe.onion/p/i0SBK9EWlm 6 Grams Pure Cocaine (SAFE,ZERO CUTS &amp; PUR... FREE COKE EVERDAY | SAFE,ZERO CUTS &amp; PURE ... USA USA 1.54450261 BTC 1061 62 2015-08-01 i0SBK9EWlm.jpg 432.52 276.87 6.00 g 46.145000
11 26575 http://agorahooawayyfoe.onion/p/LbZLrqPxsC COCAINE, 1g, A+, High This batch of cocaine is less energetic, but g... USA USA 0.34191687 BTC 451 62 2015-08-01 LbZLrqPxsC.jpg 95.75 61.29 1.00 g 61.290000
12 26576 http://agorahooawayyfoe.onion/p/gWEUbaXXVX Half Gram 95%+ Lab Tested Cocaine Straight off... Mother of Pearl Cocaine Straight off the brick... Australia 0.53424511 BTC 1062 62 2015-08-01 gWEUbaXXVX.jpg 149.61 95.77 1.01 g 94.821782
13 26577 http://agorahooawayyfoe.onion/p/xrcdJlE0jx CUSTOM 3 Grams of Premium FISHSCALE Cocaine HQ ------ PRODUCT ------<br/>Hi all,<br/><br/>Fir... UK WORLDWIDE 1.10410656 BTC 1058 62 2015-08-01 None 309.19 197.92 3.00 g 65.973333
14 26578 http://agorahooawayyfoe.onion/p/hh5CEM3FpZ SALE!! 28G Cocaine - 92% Purity -Platinum Qual... NOW THAT EVOLUTION HAS GONE UNDER WE ARE RUNNI... Australia 21.85418670 BTC 771 62 2015-08-01 hh5CEM3FpZ.jpg 6119.98 3917.57 28.00 g 139.913214
15 26579 http://agorahooawayyfoe.onion/p/BndPjx07L3 1.5 Gram Cocaine High Quality &#70;&#76;EX 135... High Quality &#70;&#76;EX Cocaine 1.5 Gram 135... Netherlands Europe 0.54493001 BTC 824 62 2015-08-01 BndPjx07L3.jpg 152.60 97.68 1.50 g 65.120000
16 26580 http://agorahooawayyfoe.onion/p/GTmjL74XZ4 2G Cocaine - 60% Purity - Party Quality - ESCROW ESCROW LISTING - 10% Cheaper on my FE Listings... Australia Australia 1.53150265 BTC 771 62 2015-08-01 GTmjL74XZ4.jpg 428.88 274.54 2.00 g 137.270000
17 26581 http://agorahooawayyfoe.onion/p/M1rPiA3VBq COCAINE &quot;A GRADE!!!&quot; 2 GRAMS $100 pe... This is the best Coke we have come across. No ... USA USA 0.85479217 BTC 308 62 2015-08-01 M1rPiA3VBq.jpg 239.37 153.23 2.00 g 76.615000
18 26582 http://agorahooawayyfoe.onion/p/iiBFF0Uglt Custom Cocaine Acetone Washed- Ready for you 1/2 ounce - 14+ grams pure cocai... USA 2.84574562 BTC 908 62 2015-08-01 None 796.91 510.13 5.36 g 95.173507
19 26583 http://agorahooawayyfoe.onion/p/ZXnqwEFT5a 112g Highest Purity Cocaine - Direct From Colo... Mission Statement: To provide the best quality... USA Worldwide 21.14185988 BTC 1060 62 2015-08-01 ZXnqwEFT5a.jpg 5920.51 3789.88 112.00 g 33.838214
20 26584 http://agorahooawayyfoe.onion/p/FZmtubMi8M High Heat Colombian Cocaine - 10 GRAMS UNTOUC... High Heat Pure Colombian Cocaine<br/><br/>BIG... Canada 2.88492360 BTC 217 62 2015-08-01 FZmtubMi8M.jpg 807.89 517.15 10.00 g 51.715000
21 26585 http://agorahooawayyfoe.onion/p/B2bPbxgryg 3.5g HIGH QUALITY COCAINE High quality cocaine - NEW BATCH -<br/><br/>I&... Australia Australia 2.81369092 BTC 802 62 2015-08-01 B2bPbxgryg.jpg 787.94 504.38 3.50 g 144.108571
22 26586 http://agorahooawayyfoe.onion/p/srBYCGWgYE 1 gram pure fish scale cocaine 1 gram pure fish scale cocaine<br/><br/><br/><... Australia Australia 1.21433913 BTC 491 62 2015-08-01 srBYCGWgYE.jpg 340.06 217.68 1.00 g 217.680000
23 26587 http://agorahooawayyfoe.onion/p/q0gUhRiP4p 3.5 GRAMS HIGH HEAT COCAINE - SCOTT AND PURITY... This Listing Is For: HIGH HEAT COCAINE - SCOTT... Australia 1.49588631 BTC 737 62 2015-08-01 q0gUhRiP4p.jpg 418.90 268.15 3.50 g 76.614286
24 26588 http://agorahooawayyfoe.onion/p/wuC556Nckg 1g Good Quality Cocaine 1g coke 30gbp<br/><br/>Purity around 35-40%<br... UK Worldwide 0.16597214 BTC 255 62 2015-08-01 wuC556Nckg.jpg 46.48 29.75 1.00 g 29.750000
25 26589 http://agorahooawayyfoe.onion/p/njT5aD1FWK Custom for george!isa as discussed\r\n <br/>\r\n Brought to yo... 0.98900000 BTC 771 62 2015-08-01 None 276.96 177.29 1.86 g 95.317204
26 26590 http://agorahooawayyfoe.onion/p/NDwX12p18h **MONTHLY SPECIAL**Cocaine .5g A+ Grade cola. ... 0.5g of high quality cocaine. Nice clean high.... Australia 0.46301243 BTC 1063 62 2015-08-01 NDwX12p18h.jpg 129.66 83.00 5.00 g 16.600000
27 26591 http://agorahooawayyfoe.onion/p/XRx1l2p97D 14G Cocaine AusMate (A Grade) * This is the weakest level of Cocaine I have,... Australia Australia 6.73148840 BTC 280 62 2015-08-01 XRx1l2p97D.jpg 1885.07 1206.68 14.00 g 86.191429
28 26592 http://agorahooawayyfoe.onion/p/PP6rwRwXcw SALE!! 1G Cocaine - 92% Purity -Platinum Quali... ESCROW LISTING - 10% Cheaper on my FE Listings... Australia 0.96164120 BTC 771 62 2015-08-01 PP6rwRwXcw.jpg 269.30 172.38 1.00 g 172.380000
29 26593 http://agorahooawayyfoe.onion/p/k6iA5aoShw 28G Cocaine AusMate (A Grade) * This is the weakest level of Cocaine I have,... Australia Australia 11.43284538 BTC 280 62 2015-08-01 k6iA5aoShw.jpg 3201.62 2049.45 28.00 g 73.194643
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1292 32423 http://agorahooawayyfoe.onion/p/shtj2BajEw 0,5gr - Crack Cocaine (FREE SHIPPING) 0,5gr - High Quality unmixed Crack Cocaine<br/... Netherlands World (expt: USA, AUS, NZ, Scandinavia and Rus... 0.26565599 BTC 121 62 2015-08-03 shtj2BajEw.jpg 74.51 47.81 5.00 g 9.562000
1293 32424 http://agorahooawayyfoe.onion/p/bT0xrAK55V Uncut FS Cocaine 3.5g (8-ball) This listing is for my Uncut Fishscale Cocaine... USA USA 1.06262397 BTC 934 62 2015-08-03 bT0xrAK55V.jpg 298.04 191.23 3.50 g 54.637143
1294 32425 http://agorahooawayyfoe.onion/p/YqYsLepABD Uncut FS Cocaine 7g (quarter) This listing is for my Uncut Fishscale Cocaine... USA USA 1.98356474 BTC 934 62 2015-08-03 None 556.35 356.95 7.00 g 50.992857
1295 32426 http://agorahooawayyfoe.onion/p/UjLEoXRMoL 1G Peruvian Flake 40-50% Very strong Made with 90% + straight from peru... UK UK 0.25000000 BTC 1210 62 2015-08-03 UjLEoXRMoL.jpg 70.12 44.99 1.00 g 44.990000
1296 32427 http://agorahooawayyfoe.onion/p/va8FNlBySY Uncut FS Cocaine 2g This listing is for my Uncut Fishscale Cocaine... USA USA 0.63757438 BTC 934 62 2015-08-03 va8FNlBySY.jpg 178.83 114.74 2.00 g 57.370000
1297 32428 http://agorahooawayyfoe.onion/p/AJ7aVNqUe7 Uncut FS Cocaine 0.5g This listing is for my Uncut Fishscale Cocaine... USA 0.19127231 BTC 934 62 2015-08-03 AJ7aVNqUe7.jpg 53.65 34.42 0.50 g 68.840000
1298 32429 http://agorahooawayyfoe.onion/p/zXqL124xwX 250g A+ &quot;Very High Cocaine Content&quot; ... This product is only adulterated once with a n... USA 26.91980731 BTC 594 62 2015-08-03 zXqL124xwX.jpg 7550.46 4844.37 250.00 g 19.377480
1299 32430 http://agorahooawayyfoe.onion/p/NTvhYrVwlL FE - High Quality Cocaine 20.0 Grams Welcome to CocoMarley&acute;s Nose Candy Shop!... Dominican Republic Worldwide 3.89628790 BTC 1059 62 2015-08-03 NTvhYrVwlL.jpg 1092.83 701.16 20.00 g 35.058000
1300 32431 http://agorahooawayyfoe.onion/p/x33tFLvEy1 kinshela HQ Flake/Crystal Brick Cocaine (84%) We only provide cocaine of the highest quality... Netherlands 3.01076792 BTC 796 62 2015-08-03 None 844.46 541.80 5.69 g 95.219684
1301 32432 http://agorahooawayyfoe.onion/p/UJEffP0021 .25 Great Cocaine this listing is for testing!!\r\n <br/>\r\n... USA 0.06375743 BTC 1088 62 2015-08-03 None 17.88 11.47 0.12 g 95.583333
1302 32433 http://agorahooawayyfoe.onion/p/ybvZSPq6ps 2.3g Highest Purity Cocaine - Direct From Colo... ***************************************<br/>**... Australia 2.33777274 BTC 1072 62 2015-08-03 None 655.70 420.70 2.30 g 182.913043
1303 32434 http://agorahooawayyfoe.onion/p/s9BiTf6sLb 1G Peruvian Flake 90%+ Very strong 90% + straight from peru!<br/><br/... UK UK 0.90000000 BTC 1210 62 2015-08-03 s9BiTf6sLb.jpg 252.43 161.96 1.00 g 161.960000
1304 32435 http://agorahooawayyfoe.onion/p/vAub2UPnpY 0.5G Peruvian Flake 90%+ Very strong 90% + straight from peru!<br/><br/... UK UK 0.50000000 BTC 1210 62 2015-08-03 vAub2UPnpY.jpg 140.24 89.98 0.50 g 179.960000
1305 32436 http://agorahooawayyfoe.onion/p/gLZCdkJTk1 2G Peruvian Flake 90%+ Very strong 90% + straight from peru!<br/><br/... UK UK 1.80000000 BTC 1210 62 2015-08-03 gLZCdkJTk1.jpg 504.86 323.92 2.00 g 161.960000
1306 32437 http://agorahooawayyfoe.onion/p/eJeg1TcYfV 7G Peruvian Flake 40-50% Very strong Made with 90% + straight from peru... UK UK 1.60000000 BTC 1210 62 2015-08-03 eJeg1TcYfV.jpg 448.77 287.93 7.00 g 41.132857
1307 32438 http://agorahooawayyfoe.onion/p/Tmkxbje2jA 0.5G Peruvian Flake 40-50% Very strong Made with 90% + straight from peru... UK 0.13000000 BTC 1210 62 2015-08-03 Tmkxbje2jA.jpg 36.46 23.39 0.50 g 46.780000
1308 32439 http://agorahooawayyfoe.onion/p/aMtgWDaayn 3.5G Peruvian Flake 40-50% Very strong Made with 90% + straight from peru... UK UK 0.80000000 BTC 1210 62 2015-08-03 aMtgWDaayn.jpg 224.38 143.96 3.50 g 41.131429
1309 32440 http://agorahooawayyfoe.onion/p/ZDpKSmvNpK 2G Peruvian Flake 40-50% Very strong Made with 90% + straight from peru... UK UK 0.50000000 BTC 1210 62 2015-08-03 ZDpKSmvNpK.jpg 140.24 89.98 2.00 g 44.990000
1310 32441 http://agorahooawayyfoe.onion/p/CSAn7kK6Uv 3.5g Cocaine Good Quailty Pressed &pound;115 (... For sale we have Good Quality Pressed Colombia... UK UK ONLY 0.70489600 BTC 1112 62 2015-08-03 CSAn7kK6Uv.jpg 197.71 126.85 3.50 g 36.242857
1311 32442 http://agorahooawayyfoe.onion/p/ekkwUSxK7V 14g Cocaine Good Quailty Pressed FE &pound;370... For sale we have Good Quality Pressed Colombia... UK UK ONLY 2.01196300 BTC 1112 62 2015-08-03 ekkwUSxK7V.jpg 564.31 362.06 14.00 g 25.861429
1312 32443 http://agorahooawayyfoe.onion/p/vP9mRYnhj9 7g Cocaine Good Quailty Pressed FE &pound;200 ... For sale we have Good Quality Pressed Colombia... UK UK ONLY 1.08754800 BTC 1112 62 2015-08-03 vP9mRYnhj9.jpg 305.04 195.71 7.00 g 27.958571
1313 32444 http://agorahooawayyfoe.onion/p/Yltc0cGgyV 28g Cocaine Good Quailty Pressed FE &pound;700... For sale we have Good Quality Pressed Colombia... UK UK ONLY 3.81501000 BTC 1112 62 2015-08-03 Yltc0cGgyV.jpg 1070.03 686.53 28.00 g 24.518929
1314 32445 http://agorahooawayyfoe.onion/p/Szpctz2XwG Custom fizalis .<br/><br/><br/>*This bulk listing is for 500 ... Germany Europe 33.64975913 BTC 74 62 2015-08-03 Szpctz2XwG.jpg 9438.07 6055.47 63.59 g 95.226765
1315 32446 http://agorahooawayyfoe.onion/p/r1SRqgNEdn 1x EZ-TEST Cocaine Purity This test provides a quick determination of th... Australia Australia 0.07438367 BTC 20 62 2015-08-03 r1SRqgNEdn.jpg 20.86 13.39 0.14 g 95.642857
1316 32447 http://agorahooawayyfoe.onion/p/AKL3LysCWb 3.5g Crack Cocaine - FE 3.5g crack cocaine. Nothing but oils!<br/><br/... USA USA 1.23972796 BTC 931 62 2015-08-03 AKL3LysCWb.jpg 347.72 223.10 3.50 g 63.742857
1317 32448 http://agorahooawayyfoe.onion/p/jWs7dbWp2k 3.5G Peruvian Flake 90%+ Very strong 90% + straight from peru!<br/><br/... UK UK 2.30000000 BTC 1210 62 2015-08-03 jWs7dbWp2k.jpg 645.10 413.90 3.50 g 118.257143
1318 32449 http://agorahooawayyfoe.onion/p/W13VhmZLe7 1g Cocaine Good Quailty Pressed &pound;35 (UK... For sale we have Good Quality Pressed Colombia... UK UK ONLY 0.12890000 BTC 1112 62 2015-08-03 W13VhmZLe7.jpg 36.15 23.20 1.00 g 23.200000
1319 32450 http://agorahooawayyfoe.onion/p/RVjPauTt9b 1g Crack Cocaine - FE 1g crack cocaine. Nothing but oils!<br/><br/>I... USA USA 0.38962879 BTC 931 62 2015-08-03 RVjPauTt9b.jpg 109.28 70.12 1.00 g 70.120000
1320 32451 http://agorahooawayyfoe.onion/p/hh0b2Ko9dC Social Cocaine *1 g* Herzlich Willkommen,<br/><br/>hier ist unser S... Germany Europe 0.38962879 BTC 1201 62 2015-08-03 hh0b2Ko9dC.jpg 109.28 70.12 1.00 g 70.120000
1321 32452 http://agorahooawayyfoe.onion/p/e8o1oSMMyr 7G Peruvian Flake 90%+ Very strong 90% + straight from peru!<br/><br/... UK UK 4.50000000 BTC 1210 62 2015-08-03 e8o1oSMMyr.jpg 1262.16 809.80 7.00 g 115.685714

1322 rows × 16 columns

After all this data wranglig we can effectively calculate the amount of cocaine being sold on the site as well as further analyse the data set by price and grams.


In [40]:
print ('{} grams on sale for {} GBP'.format(
        cocaine_listings['cocaine_amount'].sum(), 
        round(cocaine_listings['GBP'].sum(), 2)))
print ('Average price: {} GBP per gram'.format(cocaine_listings['price_per_gram'].mean()))
print('Median price: {} GBP per gram'.format(cocaine_listings['price_per_gram'].median()))
#cocaine_listings['price_per_gram'].hist()
cocaine_plot_df = cocaine_listings[cocaine_listings['price_per_gram'] <1000 ]
#cocaine_plot_df['price_per_gram'].hist(x)

import matplotlib.pyplot as plt
plt.hist(cocaine_plot_df['price_per_gram'], bins=10)
plt.title("Price per gram of cocaine Histogram")
plt.xlabel("Price in GBP")
plt.ylabel("listed")
plt.show()


80382.39 grams on sale for 2069279.89 GBP
Average price: 95.26773128488627 GBP per gram
Median price: 57.816428571428574 GBP per gram

And now we can group our results by country of origin:


In [43]:
cocaine_plot_df.groupby('product_origin')['price_per_gram'].mean().sort_values().plot(
    kind='barh', 
    title='Average price of a gram of cocaine by country of origin')


Out[43]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dae4bdef0>

In [44]:
cocaine_plot_df.groupby('product_origin')['cocaine_amount'].sum().sort_values().plot(
    kind='barh', 
    title='grams of cocaine in sale by country of origin')


Out[44]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dae3e8da0>

While dealing with the gaps some outliers where generated, with prices per gram orders of magnitude higher than the mean. After analyzing that sample, we discovered that some of the listings where offereing KILOGRAMS of cocaine! After tunning the NLP algorithm to also understand kilograms (and ounces) the distribution was normalized again. The following code exposes a subset of some of those suprising listings (offering more than 500g):


In [45]:
cocaine_listings[[
        'product_id', 'product_name', 'vendor_id' ,
        'product_origin', 'GBP', 
        'cocaine_amount', 'cocaine_units']
        ][cocaine_listings['cocaine_amount'] > 500]


Out[45]:
product_id product_name vendor_id product_origin GBP cocaine_amount cocaine_units
150 26714 1kg Lidocaine 99.6% Pure 1038 UK 748.43 1000 g
193 26757 1 KILO AAA Fishscale Cocaine 459 USA 9632.72 1000 g
227 26791 1kg Highest Purity Cocaine - Direct From Colombia 1060 USA 28387.47 1000 g
314 26878 1kg Lidocaine 99.6% Pure **direct from source** 1038 353.35 1000 g
441 27005 *** Lidocaine HCL 99% PURE ** 1 KG ** 118 China 161.11 1000 g
589 27153 2kg Highest Purity Cocaine - Direct From Colombia 1060 USA 54410.45 2000 g
605 27169 1 KEY 1000g A+ &quot;Very High Cocaine Content... 594 USA 22675.91 1000 g
642 27206 *** Benzocaine HCL 99% PURE ** 1 KG ** 118 China 165.70 1000 g
749 27313 1 KILO COCAINE UNCUT FISHSCALE 92% FREE SHIPPING! 825 Netherlands 27882.29 1000 g
756 27320 5000g / 5kg 85%+ Cocaine, pure and UNCUT. Bulk... 597 Netherlands 114257.43 5000 g
779 27343 1kg Benzocaine 99.8% Pure **direct from source** 1038 304.48 1000 g
797 27361 900 GRAM COCAINE UNCUT FISHSCALE 92% FREE SHIP... 825 Netherlands 25413.08 900 g
873 27437 Cocaine / 1 KILO *Bulk Listing* 74 Germany 24320.97 1000 g
884 27448 10kg Highest Purity Cocaine - Direct From Colo... 1060 USA 246452.49 10000 g
914 27478 700 GRAM COCAINE UNCUT FISHSCALE 92% FREE SHIP... 825 Netherlands 20024.06 700 g
920 27484 1000 Gramm / 1kg Cocaine 90%+ FE 1081 Germany 25277.44 1000 g
953 27517 600 GRAM COCAINE UNCUT FISHSCALE 92% FREE SHIP... 825 Netherlands 17635.96 600 g
1041 27605 One brick. 1000gr. Flakecoke 729 Netherlands 25923.99 1000 g
1056 27620 ★☆★1 KG 9+ COKE ★☆☠839 Germany 28063.43 1000 g
1116 27680 Cocaine HQ ~87% - 1000G / 1KG - ***OPEN OFFER*... 724 Germany 19443.48 1000 g
1155 27719 800 GRAM COCAINE UNCUT FISHSCALE 92% FREE SHIP... 825 Netherlands 22367.11 800 g
1158 27722 5kg Highest Purity Cocaine - Direct From Colombia 1060 USA 129623.22 5000 g
1171 27735 1kg Benzocaine 99.8% Pure 1038 UK 709.69 1000 g
1258 30555 1000g / 1kg 85%+ Cocaine, pure and UNCUT. Bulk... 597 Netherlands 28230.83 1000 g
1265 30562 Cocaine HQ ~87% - 10KG - ***OPEN OFFER*** (FE... 724 Peru 80201.22 10000 g
1283 30580 Cocaine HQ ~87% - 5KG - ***OPEN OFFER*** (FE-... 724 Peru 48120.73 5000 g
1285 30582 Cocaine HQ ~87% - 1KG - ***OPEN OFFER*** (FE-... 724 Peru 12832.20 1000 g

HIGHLIGHTS

  • One listing offers 10kg of cocaine
  • We can see that some vendors offer cocaine shipping from different areas of the world (possibly there could be criminal organizations behind this sales

3.2 Drugs market totals

With the bitcoin values added to both GBP and USD, we can rank each of the drugs in total value, which gives a better picture of the size of the market than just the activity measured by the number of products listed in the site.


In [46]:
print ('Total Market Value: {} USD'.format(db.query('''SELECT SUM(USD) from tblProduct''').fetchone()[0]))


Total Market Value: 38982291.89 USD

In [47]:
all_products_df = db.pandas_query('''
    SELECT product_id, GBP, B.subCategory_name FROM tblProduct 
    JOIN tblSubCategory B
    ON B.subCategory_id = tblProduct.subCategory_id;
''')

all_products_df.groupby('subCategory_name')['GBP'].sum()\
    .sort_values(ascending=False)\
    .head(15)\
    .plot(kind='barh', title = 'Top 15 categories market value in GBP',
         x='Total GBP', y='Category')


Out[47]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3dadd6a6a0>

3.3 Illegal Documents

Up until now we have looked at the category as one single group in order to build a general picture of the market in terms of its size, its value, grographic distributions and time series.

In addition, we have also dug deeper onto the drug markets in a number of ways and described in much more detail one of the most popular areas of this particular market: The cocaine market

In this section we are going to focus instead on the different fake and illegal documents that are available for sale in this website. More recently, this topic has been covered on the media, relating to how different interantional terrorist groups have gained access to illegal documents that allow them to travel through ultiple countries using this fake documents.

3.3.1 Passports

We have seen records of passports on the database but we are not sure under which categories are they being labeled. Let's have a look:


In [48]:
passport_categories = db.pandas_query('''
    SELECT count(A.subCategory_id) as total_passports, B.subCategory_name FROM tblProduct A
    JOIN tblSubCategory B ON B.subCategory_id = A.subcategory_id
    WHERE A.product_name LIKE '%passport%'
    GROUP BY B.subcategory_name
    ORDER BY total_passports DESC;
    ''')
passport_categories


Out[48]:
total_passports subCategory_name
0 84 Scans/Photos
1 12 Physical documents
2 5 Other
3 2 Pirated
4 1 Money

Unfortunately there isn't a large dataset of passport items but still worth making an exploratory analysis.

After manually revising the databse entries I have learned that those passports under the only two categories containning fake passports are the following:

  • Physical documents: Fake passports are on offer, the vendors claim that they will be accepted by the authorities as real for being exact copies with all the safety features being replicated
  • Scans/photos: AS the title says, just scanned copies of real passports. Some vendors will add your personal details in.


In [49]:
passport_dataframe = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE A.product_name LIKE '%passport%'
    AND B.subCategory_name = 'Physical documents'
    ORDER BY GBP DESC; 
    ''')
passport_dataframe


Out[49]:
product_id product_name GBP subCategory_name vendor_id
0 12599 EU, Asian, &amp; African Government Issued Pas... 2904.78 Physical documents 307
1 12493 Cloned UK/FRENCH/GERMAN PASSPORTS. 1:1 QUALITY! 2632.32 Physical documents 180
2 12533 International Passports for Different Countries 1937.19 Physical documents 307
3 12593 Genuine EU passports 1936.52 Physical documents 275
4 12496 Real Swiss Passport Customized to your needs 1880.23 Physical documents 180
5 12490 Cloned UK/French Passport. Private listing. 1316.16 Physical documents 180
6 12544 Lithuanian Passport+DL(ID) Premium quality 1162.31 Physical documents 205
7 12514 Lithuanian Passport 1:1 Superb quality 936.31 Physical documents 253
8 12554 Lithuanian Passport+DL(ID) Premium quality 904.02 Physical documents 205
9 12537 Lithuanian Passport 1:1 Good quality 839.45 Physical documents 253
10 12491 Cloned UK Passport. Private listing. 752.09 Physical documents 180
11 12596 US PASSPORT TAMPLATE 1.29 Physical documents 175

In [50]:
passport_dataframe = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE A.product_name LIKE '%passport%'
    AND B.subCategory_name = 'Scans/photos'
    ORDER BY GBP DESC; 
    ''')
passport_dataframe


Out[50]:
product_id product_name GBP subCategory_name vendor_id
0 12780 Hacked files, passports, UK documents, CVs, ma... 1880.23 Scans/Photos 298
1 12774 Hacked files, passports, italian hacked docume... 1128.14 Scans/Photos 298
2 12746 Complete AU Virtual Identity VID, Passport Sca... 141.19 Scans/Photos 207
3 12750 Complete UK Virtual Identity VID, Passport Sca... 128.30 Scans/Photos 207
4 12754 Complete US Virtual Identity VID, Passport Sca... 128.30 Scans/Photos 207
5 12669 PSHQ UK Custom Passport &amp; Utility Bill Sca... 94.86 Scans/Photos 207
6 12667 PSHQ US Custom Passport &amp; Utility Bill Sca... 94.86 Scans/Photos 207
7 12643 PSHQ DE Custom Passport &amp; Utility Bill Sca... 94.86 Scans/Photos 207
8 12691 PSHQ POLISH Custom Passport &amp; Utility Bill... 94.86 Scans/Photos 207
9 12787 ★ ★ ★Customized ID, Passport and Utility... 74.11 Scans/Photos 151
10 12758 Netherlands passport and utility bill for daryum 64.47 Scans/Photos 188
11 12641 PSHQ CAN Custom Passport &amp; Utility Bill Sc... 63.89 Scans/Photos 207
12 12703 Custom HQ Passport Scan 56.79 Scans/Photos 199
13 12629 10 Passport Scan of the U.K. 54.85 Scans/Photos 357
14 12710 10 Passport Scan Italy 45.17 Scans/Photos 357
15 12624 10 Passport scan of the Netherlands 45.17 Scans/Photos 357
16 12785 ★ ★ ★ PASSPORT or DL SCANS , MANY COUNTR... 41.37 Scans/Photos 151
17 12618 10 Passport Scan of the Russian Federation 38.72 Scans/Photos 357
18 12671 10 Passport Scans of Venezuela 38.72 Scans/Photos 357
19 12760 FRANCE Passport+ID+DL scan copy 38.68 Scans/Photos 205
20 12706 10 Passport Scan France 35.49 Scans/Photos 357
21 12700 10 Passport Scan Colombia 32.27 Scans/Photos 357
22 12649 10 Passport Scan Germany 32.27 Scans/Photos 357
23 12725 Custom UK Passport ID Scanned Verification Doc... 32.27 Scans/Photos 201
24 12734 Custom USA Post 2007 Passport ID Scanned Verif... 32.27 Scans/Photos 201
25 12728 Custom Russia Passport ID Scanned Verification... 32.27 Scans/Photos 201
26 12744 Custom Australia Passport ID Scanned Verificat... 32.24 Scans/Photos 201
27 12762 Custom Malaysia Passport ID Scanned Verificati... 32.24 Scans/Photos 201
28 12741 Australia Passport+DL scan copy-real and valid 32.24 Scans/Photos 205
29 12755 Custom India Passport ID Scanned Verification ... 32.24 Scans/Photos 201
... ... ... ... ... ...
54 12678 1 Passport Scan France 6.45 Scans/Photos 357
55 12650 1 Passport Scan Italy 6.45 Scans/Photos 357
56 12681 1 Passport Scan Spain 6.45 Scans/Photos 357
57 12646 Passport Scan Sweden 5.16 Scans/Photos 347
58 12621 1 Passport Scan Poland 5.16 Scans/Photos 347
59 12674 1 Passport Scan Netherland 5.16 Scans/Photos 347
60 12658 1 Passport Scan Japan 5.16 Scans/Photos 347
61 12663 1 Passport Scan Austria 5.16 Scans/Photos 347
62 12688 1 Passport Scan Switzerland 5.16 Scans/Photos 347
63 12644 1 Passport Scan Ukraine 5.16 Scans/Photos 347
64 12648 1 Passport Scan Ireland 5.16 Scans/Photos 347
65 12705 1 Passport Scan Belgium 5.16 Scans/Photos 347
66 12651 Passport Scan Germany 5.16 Scans/Photos 347
67 12630 1 Passport Scan Egypt 5.16 Scans/Photos 347
68 12657 1 Passport Scan Finland 5.16 Scans/Photos 347
69 12682 1 Passport Scan South Africa 5.16 Scans/Photos 347
70 12723 1 Passport Scan Spain 5.16 Scans/Photos 347
71 12661 1 Passport Scan China 5.16 Scans/Photos 347
72 12687 1 Passport Scan Cyprus 5.16 Scans/Photos 347
73 12666 1 Passport Scan France 5.16 Scans/Photos 347
74 12617 1 Passport Scan Indea 5.16 Scans/Photos 347
75 12647 1 Passport Scan Israel 5.16 Scans/Photos 347
76 12670 1 Passport Scan Slovak 5.16 Scans/Photos 347
77 12707 1 Passport Scan Italy 5.16 Scans/Photos 347
78 12656 1 Passport Scan Mexico 5.16 Scans/Photos 347
79 12718 1 Passport Scan Argentina 5.16 Scans/Photos 347
80 12637 1 Passport Scan United States 5.16 Scans/Photos 347
81 12659 1 Passport Scan Russia 5.16 Scans/Photos 347
82 12684 1 Passport Scan England 5.16 Scans/Photos 347
83 12727 US PASSPORT TAMPLATE 1.29 Scans/Photos 175

84 rows × 5 columns

HIGHLIGHTS

  • A UK passport can be bought for as cheap as £752
  • If we are after a scanned passport we can get them from £5 or even buy them in bulks, with a wide range of countries to choose from
  • more interenstingly, the vendors for this type of documents seem to be very concentrated, which will prove usefull when we do our analysis on the vendors dataset

    ### 3.2.2 Fake IDs

When manually inspecting the 'Physical documents' dataset I noticed a pattern to classify fake ids:

- Fake IDs: They refer to fake US ID cards, offers include different states within the US

In [51]:
db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE A.product_name LIKE '%fake id%'
    AND B.subCategory_name = 'Physical documents'
    ORDER BY GBP DESC; 
    ''')


Out[51]:
product_id product_name GBP subCategory_name vendor_id
0 12470 Australian Fake ID 100 Points 450.88 Physical documents 352
1 12506 ♔ New York fake ID ♔ the one &amp; only!! ... 387.44 Physical documents 208
2 12489 Virginia Fake ID Template 322.86 Physical documents 350
3 12497 New York Fake ID Template 322.86 Physical documents 350
4 12602 REAL Australian Fake ID - Victoria w/ Correct ... 225.93 Physical documents 352
5 12477 ♔ New York fake ID ♔ the one &amp; only!! 128.82 Physical documents 208
6 12429 NY Fake ID 103.06 Physical documents 350
7 12418 HQ Massachusetts Fake ID. All Security Feature... 96.62 Physical documents 342
8 12424 South Carolina Fake ID *FULL ESCROW* (UV/2D+1D... 96.62 Physical documents 346
9 12382 (Police Passable) New Jersey Fake ID Passes Po... 96.62 Physical documents 343
10 12395 Florida Fake ID *FULL ESCROW* (UV/2D+1D/HOLOGR... 96.62 Physical documents 346
11 12505 [Fake ID] Indiana: Passes all security tests; ... 90.40 Physical documents 349
12 12471 [Fake ID] Massachusetts: Passes all security t... 90.18 Physical documents 349
13 12442 HQ Washington Fake ID. All Security Features. ... 83.73 Physical documents 342
14 12432 HQ Maine Fake ID. All Security Features. Great... 83.73 Physical documents 342
15 12390 HQ South Carolina Fake ID. All Security Featur... 83.73 Physical documents 342
16 12478 HQ Delaware Fake ID. All Security Features. Gr... 83.73 Physical documents 342
17 12422 HQ New Jersey Fake ID. All Security Features.... 83.73 Physical documents 342
18 12381 HQ Florida Fake ID. All Security Features. Gre... 80.51 Physical documents 342
19 12400 HQ Pennsylvania Fake ID. All Security Features... 80.51 Physical documents 342
20 12414 HQ Ohio Fake ID. All Security Features. Great ... 80.51 Physical documents 342
21 12430 HQ Arizona Fake ID. All Security Features. Gre... 80.51 Physical documents 342
22 12392 HQ Maryland Fake ID. All Security Features. Gr... 77.29 Physical documents 342
23 12455 HQ North Carolina Fake ID. All Security Featur... 77.29 Physical documents 342
24 12398 HQ Rhode Island Fake ID. All Security Features... 77.29 Physical documents 342
25 12482 HQ Connecticut Fake ID. All Security Features.... 77.29 Physical documents 342
26 12494 [Fake ID] Ohio: Passes all security tests; UV/... 71.03 Physical documents 349
27 12492 [Fake ID] New Jersey: Passes all security test... 71.03 Physical documents 349
28 12501 [Fake ID] South Carolina: Passes all security ... 71.03 Physical documents 349
29 12503 [Fake ID] Washington: Passes all security test... 71.03 Physical documents 349
30 12507 [Fake ID] Maryland: Passes all security tests;... 71.03 Physical documents 349
31 12486 [Fake ID] Maine: Passes all security tests; UV... 71.03 Physical documents 349
32 12428 [Fake ID] Delaware: Passes all security tests;... 70.85 Physical documents 349
33 12483 [Fake ID] Rhode Island: Passes all security te... 70.85 Physical documents 349
34 12465 [Fake ID] Illinois: Passes all security tests;... 70.85 Physical documents 349
35 12411 HQ Illinois Fake ID. All Security Features. Gr... 70.85 Physical documents 342
36 12604 HQ Maine Fake ID. UV + HOLO + SCANNABLE. 64.55 Physical documents 353
37 12611 HQ Deleware Fake ID. UV + HOLO + SCANNABLE. 64.53 Physical documents 353
38 12457 (Fake Drivers License) BEST Fake IDs on Agora ... 64.41 Physical documents 343
39 12481 Mississippi Driver&#39;s License (Fake ID) 64.41 Physical documents 354
40 12607 HQ Tennessee Fake ID. UV + HOLO + SCANNABLE. 48.41 Physical documents 353
41 12443 Fake ID: South Carolina (SC) Driver&#39;s License 45.09 Physical documents 351
42 12612 HQ New Jersey Fake ID. UV + HOLO + SCANNABLE. 41.95 Physical documents 353
43 12603 HQ Ohio Fake ID. UV + HOLO + SCANNABLE. 38.73 Physical documents 353
44 12502 NY Fake ID (Old &#86;ersion) 25.83 Physical documents 350
45 12484 Laser Engraving Service For Fake IDs 9.66 Physical documents 350
  • National IDs: A wide variety of nationallity IDs are on offer but mostly european.

In [52]:
db.pandas_query('''    
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE A.product_name LIKE '%national id%'
    AND B.subCategory_name = 'Physical documents'
    ORDER BY GBP DESC;
    ''')


Out[52]:
product_id product_name GBP subCategory_name vendor_id
0 12529 Fake German National ID Card (Personalausweis)... 442.32 Physical documents 355
1 12540 Fake French National ID - 1:1 Bank Quality Rep... 442.32 Physical documents 355
2 12511 Fake Italian National ID - 1:1 Bank Quality Re... 248.61 Physical documents 355
3 12528 Fake Netherlands (Dutch) National ID Card - Hi... 248.61 Physical documents 355
4 12569 Fake Romanian National ID - 1:1 Bank Quality R... 248.61 Physical documents 355
5 12526 Spain National ID card 193.72 Physical documents 185
6 12562 Singapore National ID Card 142.06 Physical documents 185
7 12560 Austrian National ID Card 142.06 Physical documents 185
8 12591 French National ID Card 142.01 Physical documents 185
9 12589 German National ID Card 142.01 Physical documents 185

HIGHLIGHTS

  • Once again there seems to be a limited number of vendors for this type of product, this will prove useful later when we carry out our study on the vendors datasets.
  • Fake ids can be bought for as cheap as £142 but for an european ID even cheaper for US state ID cards with prices ranging between £25 and £92
  • It is relevant to mention that any EU ID card would allow the potential buyer to travel through any country in the EU, open bank accounts or even register for a National Insurrance number.
  • The total number of listings for fake Ids and passports is 65 but acording to some of the descriptions, these vendors claim to be able to produce any number of them. They can personalize any detail fo the fake document

In [53]:
id_description = db.query('''    
    SELECT A.product_description
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE A.product_name LIKE '%National id%'
    AND B.subCategory_name = 'Physical documents'
    ORDER BY GBP DESC;
    LIMIT 1
    ''').fetchone()[0]

display(HTML(id_description))


German Fake National ID Card (Personalausweis).

Description:

We are especially proud of our German National ID Card Replica, as we have gone to painstaking efforts to replicate its many advanced security features. This ID was issued until Mid-2010, and is in current circulation, and valid until 2020. This ID includes: Watermarked paper which is visible when held to the light, Green Holographic of the Imperial Eagle and features across the photo face, Holograhpic MRZ text, and an Identigram (Holographic photo of the actual ID holders face). It also includes UV features on the front and rear of the ID that appear under black light. This IDs MRZ area (Machine Readable Zone) will also scan. This ID is bank quality, and will pass visual inspection anywhere you use it.

Price:

$685 US Dollars

Shipping:

Included in the price. IDs ship via 1st class Air Mail. Shipping Times are: 3-5 Working Days within Europe, 5-7 Working Days to North America, 10-14 Days all other locations. Tracked and courier shipping is not available.

Terms and Conditions: Due to Bitcoin value fluctuation, and Agora market stability, Pre-Payment (FE) in full is required to order from us. Orders must be finalized at the time of purchase, or they will be canceled. We will produce and ship your ID within 1 working day (We work Monday to Friday).

***All prices terms and conditions are strictly non-negotiable***

To Order:

Please prepare a Passport style photograph, and signature sample in .jpg format and upload them to anonfiles.com or sendspace.com and send us the link along with the following order data:

Shipping Name (First and Last)
Shipping Address
Type of ID
ID Last Name (Name)
ID First Name (Vorname)
ID Middle Name(s) (Mittlename)
Date of Birth
City of Birth
ID Address
Height in Centimeters
Eye Colour
ID Issue Date (latest mid 2010)
ID Expiry Date (Latest December 2020)
Issuing Authority

Any data you do not provide will be randomly generated.

***Please Note: we are NOT responsible for your spelling or formatting mistakes, so please check your order details VERY carefully before submitting, as we will not do free re-prints if the mistake was in your order submission, and you will not have a chance to change it once submitted as we work very quickly to get your ID printed and shipped to you***

Any Questions? Please contact us - we are here to help!

Best Regards,

Flawless Fake IDs
Brought to you by:

3.2.3 Driving licenses

From the initial exploration, another relevant family of fake document was found in the form of driving Licenses:


In [54]:
db.pandas_query('''    
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE B.subCategory_name = 'Physical documents'
    AND (A.product_name LIKE '%drivers license%'
        OR A.product_name LIKE '%driving licence%'
        OR A.product_name LIKE '%drivers licence%')
    
    ORDER BY GBP DESC;
    ''')


Out[54]:
product_id product_name GBP subCategory_name vendor_id
0 12541 EU Drivers License (Genuine and System Registe... 3228.64 Physical documents 307
1 12548 Custom Listing 12 X VIC Drivers License 1937.19 Physical documents 352
2 12583 EU Drivers License (Physical - Not Scans) 419.58 Physical documents 307
3 12469 (NEW) New York Drivers License 322.06 Physical documents 343
4 12592 Fake UK Driving Licence and Paper Counterpart ... 284.02 Physical documents 355
5 12523 Fake Italian Drivers Licence - 1:1 Bank Quality 248.61 Physical documents 355
6 12547 Fake UK Driving Licence - 1:1 Bank Quality Rep... 248.61 Physical documents 355
7 12539 Fake Netherlands (Dutch) Driving Licence - Hig... 248.61 Physical documents 355
8 12578 Fake Irish Driving Licence - 1:1 Bank Quality ... 248.52 Physical documents 355
9 12590 Fake French Driving Licence - UV and OVI secur... 248.52 Physical documents 355
10 12588 Romanian Driving Licence - 1:1 Bank Quality Re... 248.52 Physical documents 355
11 12555 US Pennsylvania Drivers License 226.00 Physical documents 185
12 12538 US South Carolina Drivers License 226.00 Physical documents 185
13 12466 US Delaware Drivers License 225.44 Physical documents 185
14 12416 US Rhode Island Drivers License 225.44 Physical documents 185
15 12474 US Arizona Drivers License 225.44 Physical documents 185
16 12597 French Drivers Licence 200.11 Physical documents 185
17 12462 New South Wales Drivers Licence 161.03 Physical documents 344
18 12565 Latvia Drivers Licence 142.06 Physical documents 185
19 12571 AUS VIC Drivers Licence 142.06 Physical documents 185
20 12534 Denmark Drivers Licence 142.06 Physical documents 185
21 12520 Singapore Drivers Licence 142.06 Physical documents 185
22 12570 New Zealand Drivers Licence 142.06 Physical documents 185
23 12532 Italian Drivers Licence 142.06 Physical documents 185
24 12545 Israel Drivers Licence 142.06 Physical documents 185
25 12549 Swedish Drivers Licence 142.06 Physical documents 185
26 12519 Spain Drivers Licence 142.06 Physical documents 185
27 12572 Poland Drivers Licence 142.06 Physical documents 185
28 12543 Czech Drivers Licence 142.06 Physical documents 185
29 12601 Hungary Drivers Licence 142.01 Physical documents 185
... ... ... ... ... ...
37 12407 Alberta Drivers License 96.62 Physical documents 344
38 12423 Ohio Drivers License 96.62 Physical documents 344
39 12439 Tennessee Drivers License 96.62 Physical documents 344
40 12451 Ontario Drivers License 96.62 Physical documents 344
41 12388 Florida Drivers License 96.62 Physical documents 344
42 12401 South Carolina Drivers License 96.62 Physical documents 344
43 12472 New Jersey Drivers License 96.62 Physical documents 344
44 12476 Quebec Drivers License 96.62 Physical documents 344
45 12387 Michigan Drivers License 96.62 Physical documents 344
46 12454 Connecticut Drivers License 96.62 Physical documents 344
47 12463 Colorado Drivers License 96.62 Physical documents 344
48 12425 Rhode Island Drivers License 96.62 Physical documents 344
49 12445 1pcs real physical Dutch drivers license 93.40 Physical documents 348
50 12527 NZ Drivers License [Escrow] 64.57 Physical documents 345
51 12391 California Drivers License [Escrow] 64.41 Physical documents 345
52 12457 (Fake Drivers License) BEST Fake IDs on Agora ... 64.41 Physical documents 343
53 12449 NSW Drivers License [Escrow] 64.41 Physical documents 345
54 12399 New York Drivers License [Escrow] 64.41 Physical documents 345
55 12610 Washington Drivers License. UV + HOLO + SCANNA... 61.32 Physical documents 353
56 12568 NZ Drivers License [FE] 51.66 Physical documents 345
57 12473 New York Drivers License [FE] 51.53 Physical documents 345
58 12431 California Drivers License [FE] 51.53 Physical documents 345
59 12415 NSW Drivers License [FE] 51.53 Physical documents 345
60 12608 Old New York Drivers License. UV + HOLO + SCAN... 32.28 Physical documents 353
61 12504 Pennsylvania Drivers License. SCAN + HOLO. !... 19.37 Physical documents 353
62 12485 South Carolina Drivers License. SCAN + HOLO. ... 19.32 Physical documents 353
63 12467 South Carolina Drivers License. SCAN + HOLO. ... 12.88 Physical documents 353
64 12487 North Carolina Drivers License. Scannable. Fre... 9.69 Physical documents 353
65 12495 Ohio Drivers License. Scannable. Free Duplicate. 9.69 Physical documents 353
66 12488 Florida Drivers License. Scannable. Free Dupli... 9.69 Physical documents 353

67 rows × 5 columns

HIGHLIGHTS

  • Again, the vendors are very concentrated and the IDs seem to be similar from what we have seen in the past
  • The vast majority of them are US valid for a number of states. Prices for those licenses range between £51 pounds up to £300. For an European driving license we could find one for £419 but more immpresively, in one of the listings, the vendor claims the licenses come directly from the traffic office:

In [56]:
display(HTML(db.query('''
     SELECT product_description FROM tblProduct WHERE product_id = 12541;''').fetchone()[0]))


EU Driving License (Genuine and System Registered) for EU nationals and residents only.
This listing is different from our fake DL listing. This is directly from the traffic office and the officials need 30-40 days to process it.

Your data will be registered in the traffic office, and the DL issued without you going for any driving test or living in the country.
In addition to other information you will provide for issuing the license, you must send a scanned copy of your passport or ID, passport photograph and signature scan. Other required documents: local address/residency proof and medical forms will be provided by our partners.

Please don't buy this main listing; this service must be paid in 2 parts as stated below:
First Custom listing: $3,000 (Paid to start the process).
Second Custom Listing: $2,000 (Paid once we show you the pictures/video and you confirm it before shipping).

Please don't write us asking for the country, we won't tell you. You will get that information after buying the first custom listing.
Brought to you by:


After reading the description we can see that the vendor is attempting to drag the user outside of the website to pay a much higher price: £3000 up front + £2000 once the license is ready to be shipped. Readers may find more disturbing the fact is that the vendor claims having connections with a traffic registration office in order to validate the license into legal course. This liting being real, would have implications beyond the problem of fake documents, as this license would be at all effects perfectly legal.

3.2.4 Other Documents

We have covered the main fake docuemtns we were interested about for this research, but what other documents could possibly be listed in this market that we might be overlooking?


In [4]:
count_of_documents = db.query('''
    SELECT count(*)
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id  
    WHERE B.subCategory_name = 'Physical documents';
    ''').fetchone()[0]
print ('{} total document listings'.format(count_of_documents))


234 total document listings

For the purpose of analysing the reminder of the document listings, and with the idea of applying this to other categories, the following code builds on the algorithms previously build to do the Natural Language processing, this time focusing on extracting the nouns:


In [5]:
def nlp_noun_extractor(series):
    '''Given a Series object of strings, applies a tagging 
    algorithm to obtain the POS value of each word in the 
    Series, returning a new series with the broken down strings'''
    tagged_series = series_tagger(series)
    noun_series = tagged_series.apply(noun_tag_extractor)
    words = [word for l in noun_series for word in l if word.isalpha()]
    words_freqs = {}
    
    for word in words:
        if words_freqs.get(word):
            words_freqs[word] += 1
        else :
            words_freqs[word] = 1
            
    return words_freqs


def noun_tag_extractor(tag_list):
    '''Given a list of words and POS tags, returns a list
    containing just the NN tags'''
    return [word for word,pos in tag_list if pos.startswith('NN')]
    
    
def series_tagger(series):
    '''Given a Series object S of type strings, returns a Series of
    all the tokenized strings from S'''
    tokenized_series = series.apply(nltk.word_tokenize)
    tagged_series = tokenized_series.apply(nltk.pos_tag)
    
    return tagged_series

In [6]:
fake_docs = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id
    WHERE B.subCategory_name = 'Physical documents'
    AND (A.product_name LIKE '%drivers license%'
    OR A.product_name LIKE '%driving licence%'
    OR A.product_name LIKE '%drivers licence%'
    OR A.product_name LIKE '%passport%'
    OR A.product_name LIKE '%fake id%'
    OR A.product_name LIKE '%resident id%'
    OR A.product_name LIKE '%passport%')
    ORDER BY GBP DESC;
    ''')

other_docs = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, A.product_description, B.subCategory_name, A.vendor_id 
    FROM dark_web.tblProduct A 
    JOIN tblSubCategory B ON B.subCategory_id = A.subCategory_id
    WHERE B.subCategory_name = 'Physical documents'
    AND product_id not in {};
    '''.format(tuple(i for i in fake_docs['product_id'])))

Now we can build a nice word cloud from all those words


In [7]:
words_freqs = nlp_noun_extractor(other_docs['product_name'])
wordcloud = WordCloud().generate_from_frequencies(words_freqs.items())
plt.imshow(wordcloud)
plt.axis("off");



In [61]:
all_product_names = db.pandas_query('''
    SELECT product_name from tblProduct;
    ''')
wf = nlp_noun_extractor(all_product_names['product_name'])

In [62]:
wc = WordCloud().generate_from_frequencies(wf.items())
plt.imshow(wc)
plt.axis('off')


Out[62]:
(-0.5, 399.5, 199.5, -0.5)

3.3 Other Categories

All along this section we have analysed the value of the market in different dimensions, but all this analysis has been done on the domain of drugs andfake ids. For pure completeness, we can apply a similar analysis to better understand what lies beyond


In [5]:
drugs_ids = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.vendor_id FROM tblProduct A
    JOIN tblSubCategory B
    ON B.subCategory_id = A.subCategory_id
    WHERE A.subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Scans/photos', 'Physical documents')
        );
    ''')

In [6]:
other_categories = db.pandas_query('''
    SELECT A.product_id, A.product_name, A.GBP, B.subCategory_name, A.product_origin, A.vendor_id FROM tblProduct A
    JOIN tblSubCategory B
    ON B.subCategory_id = A.subCategory_id
    WHERE A.subCategory_id NOT IN
    (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Scans/photos', 'Physical documents')
        );
    '''.format(tuple(set(cat for cat in drugs_ids['subCategory_name']))))

In [7]:
other_categories_df = other_categories[['subCategory_name', 'GBP']]
other_categories\
    .groupby('subCategory_name')['GBP']\
    .sum()\
    .sort_values(ascending=False)\
    .plot(kind='barh', title='Other markets by GBP value')


Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0d5736bb00>

In [8]:
other_categories\
    .groupby('subCategory_name')['GBP']\
    .mean()\
    .sort_values(ascending=False)\
    .plot(kind='barh', title='Other markets average GBP value')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0d57235940>

In [9]:
db.pandas_query('''
        SELECT * FROM dark_web.tblProduct
        WHERE subCategory_id in
            (select subCategory_id from tblSubCategory
                where subCategory_name = 'Software')
        ORDER BY GBP;
    ''')


Out[9]:
product_id product_model product_name product_description product_origin product_postage product_price vendor_id subCategory_id time_stamp product_picture USD GBP
0 32583 http://agorahooawayyfoe.onion/p/XbemC1cpD4 HotSpot Shield Elite VPN v.4.15.3 HotSpot Shield VPN v.4.15.3<br/><br/><br/>Bene... USA 0.00348603 BTC 654 77 2015-08-03 XbemC1cpD4.jpg 0.98 0.63
1 12169 http://agorahooawayyfoe.onion/p/zNE6jDuyNG &#87;indows XP SP3 Lite Netbook Edition &#87;indows XP SP3 Lite Netbook Edition of &#1... Everywhere 0.00338600 BTC 214 77 2015-07-26 zNE6jDuyNG.jpg 0.99 0.64
2 12170 http://agorahooawayyfoe.onion/p/ThAdNhUhzq Microsoft Office 2007 Enterprise Blue Edition What exactly is Microsoft Office 2007 Enterpri... Everywhere 0.00338600 BTC 214 77 2015-07-26 ThAdNhUhzq.jpg 0.99 0.64
3 12183 http://agorahooawayyfoe.onion/p/DbrUk4BGgh Adobe Photoshop CS2 - With KeyGen Mount/burn the iso image, install Photoshop as... Everywhere 0.00338600 BTC 214 77 2015-07-26 DbrUk4BGgh.jpg 0.99 0.64
4 12185 http://agorahooawayyfoe.onion/p/Mzzwdit0jr Microsoft Genuine Tools v1.0 (All Activators) A collection of all the activators for the OS ... Everywhere 0.00338600 BTC 214 77 2015-07-26 Mzzwdit0jr.jpg 0.99 0.64
5 12189 http://agorahooawayyfoe.onion/p/ssWb9ERKwZ iWork &#39;09 (no key needed) iWork, Apple&#39;s productivity suite, is the ... Everywhere 0.00338600 BTC 214 77 2015-07-26 ssWb9ERKwZ.jpg 0.99 0.64
6 12191 http://agorahooawayyfoe.onion/p/EMBrPVbfBq WinRAR v5.11 Final (x86/x64) - PreActivated WinRAR is a powerful archive manager. It can b... Everywhere 0.00338600 BTC 214 77 2015-07-26 EMBrPVbfBq.jpg 0.99 0.64
7 12194 http://agorahooawayyfoe.onion/p/UPKYeY3zdB Your Uninstaller PRO v7.5.2014.03 Description: <br/>You&rsquo;ve probably encoun... Everywhere 0.00338600 BTC 214 77 2015-07-26 UPKYeY3zdB.jpg 0.99 0.64
8 12204 http://agorahooawayyfoe.onion/p/Sd4B3MVlje Adobe Photoshop CS6 Portable Adobe Photoshop CS6 Extended Portable<br/><br/... Everywhere 0.00338207 BTC 214 77 2015-07-26 Sd4B3MVlje.jpg 0.99 0.64
9 12206 http://agorahooawayyfoe.onion/p/gRKL66TAnt Pocket Tanks Deluxe v1.3 - ALL Weapon Packs This download is an installer with ALL the wea... Everywhere 0.00338207 BTC 214 77 2015-07-26 gRKL66TAnt.jpg 0.99 0.64
10 12209 http://agorahooawayyfoe.onion/p/UE4ReLT9Pe &#87;indows XP Pro SP3 32-bit - Black Edition &#87;indows XP Professional SP3 Black Edition ... Everywhere 0.00338207 BTC 214 77 2015-07-26 UE4ReLT9Pe.jpg 0.99 0.64
11 12214 http://agorahooawayyfoe.onion/p/eFdAKGFwrb Microsoft Office 2008 for Mac Microsoft Office 2008 for Mac is a &#118;ersio... Everywhere 0.00338207 BTC 214 77 2015-07-26 eFdAKGFwrb.jpg 0.99 0.64
12 12218 http://agorahooawayyfoe.onion/p/XBw5raEEPg Sega Megadrive/Genesis Emulator + 843 Games This was originally made by someone else, I di... Everywhere 0.00338207 BTC 214 77 2015-07-26 XBw5raEEPg.jpg 0.99 0.64
13 12219 http://agorahooawayyfoe.onion/p/K2JKTXlmhs Antidetect FF Browser: Ver 5.21.1.50 Best For ... make your carding 95% valid rate its the best ... USA 0.00338207 BTC 289 77 2015-07-26 None 0.99 0.64
14 12220 http://agorahooawayyfoe.onion/p/ckPo7wj4DL List of Android Hacking apps A list of all the tools you can use to hack an... 0.00338207 BTC 17 77 2015-07-26 ckPo7wj4DL.jpg 0.99 0.64
15 12223 http://agorahooawayyfoe.onion/p/o9AkLmcnF6 &#87;indows 7 (SP1) 32/64 bit (ACTIVATED) This is the most complete release which includ... Everywhere 0.00338207 BTC 214 77 2015-07-26 o9AkLmcnF6.jpg 0.99 0.64
16 12224 http://agorahooawayyfoe.onion/p/JKfeCaomhB Microsoft Visual &#66;asic 6.0 Enterprise Edition Visual &#66;asic (VB) is the third-generation ... Everywhere 0.00338207 BTC 214 77 2015-07-26 JKfeCaomhB.jpg 0.99 0.64
17 12231 http://agorahooawayyfoe.onion/p/uan47eSwxC Office 2011 for Mac (retail &#118;ersion) Make the Most of Every Minute <br/>Create docu... Everywhere 0.00338207 BTC 214 77 2015-07-26 uan47eSwxC.jpg 0.99 0.64
18 12238 http://agorahooawayyfoe.onion/p/YrBjq5fg3t Navicat Premium Enterprise v9.1.8 INCLUDES SERIAL AND KEYGEN!<br/><br/>All-In-On... Everywhere 0.00338207 BTC 214 77 2015-07-26 YrBjq5fg3t.jpg 0.99 0.64
19 12244 http://agorahooawayyfoe.onion/p/bB3Mm7nk8n 99MB &#87;indows XP - MicroXP v0.82 With a &#87;INDOWS folder that is only 199Mb a... Everywhere 0.00338207 BTC 214 77 2015-07-26 bB3Mm7nk8n.jpg 0.99 0.64
20 12245 http://agorahooawayyfoe.onion/p/b8ihR3G9Sn Roller Coaster Tycoon 2 DESCRIPTION: <br/><br/>Now you can be a roller... Everywhere 0.00338207 BTC 214 77 2015-07-26 b8ihR3G9Sn.jpg 0.99 0.64
21 12247 http://agorahooawayyfoe.onion/p/d4mzBylbsx Microsoft Office Word 2007 (Portable) NO INSTALLATION NEEDED! JUST EXTRACT AND CLICK... Everywhere 0.00338207 BTC 214 77 2015-07-26 d4mzBylbsx.jpg 0.99 0.64
22 12248 http://agorahooawayyfoe.onion/p/NZSiftSUc7 Command &amp; Conquer Tiberian Sun + Firestorm... INSTALLATION:<br/>Extract with winrar or 7-zip... Everywhere 0.00338207 BTC 214 77 2015-07-26 NZSiftSUc7.jpg 0.99 0.64
23 12260 http://agorahooawayyfoe.onion/p/FSUq0JfmTU Adobe Photoshop CS3 Portable Instructions: <br/>Just simply downlaod the ra... Everywhere 0.00338415 BTC 214 77 2015-07-26 FSUq0JfmTU.jpg 0.99 0.64
24 12263 http://agorahooawayyfoe.onion/p/yBC8g9jCaN RemoveWAT 2.2.6: Activator for All &#87;indows... RemoveWAT can crack &#119;indows 7 ultimate, e... Everywhere 0.00338415 BTC 214 77 2015-07-26 yBC8g9jCaN.jpg 0.99 0.64
25 12269 http://agorahooawayyfoe.onion/p/VP3dcYWNgu &#87;indows XP Pro SP3 Activated + passes WGA This is &#87;indows XP Pro with SP3 (English).... Everywhere 0.00338415 BTC 214 77 2015-07-26 VP3dcYWNgu.jpg 0.99 0.64
26 12301 http://agorahooawayyfoe.onion/p/lmUT0Tfj7v &#87;indows 8 eLearning Kit For Dummies &#87;indows 8 eLearning Kit For Dummies\r\n ... 0.00341833 BTC 11 77 2015-07-26 lmUT0Tfj7v.jpg 1.00 0.64
27 12307 http://agorahooawayyfoe.onion/p/p1Fbjr6i1D Beginning HTML5 and CSS3 For Dummies Paperback<br/>384 pages<br/>Price @ http://www... 0.00341833 BTC 11 77 2015-07-26 p1Fbjr6i1D.jpg 1.00 0.64
28 12321 http://agorahooawayyfoe.onion/p/dd308awGP3 Excel 2013 For Dummies Excel 2013 For Dummies\r\n <br/>\r\n Bro... 0.00341530 BTC 11 77 2015-07-26 dd308awGP3.jpg 1.00 0.64
29 12299 http://agorahooawayyfoe.onion/p/C3g8eGbXCf Microsoft Access 2013 Step by Step Experience learning made easy&mdash;and quickl... 0.00444383 BTC 11 77 2015-07-26 C3g8eGbXCf.jpg 1.30 0.84
... ... ... ... ... ... ... ... ... ... ... ... ... ...
154 12282 http://agorahooawayyfoe.onion/p/yJrBLChkpk Email List Management Tool This software is the swiss-army knife for mana... 0.17091679 BTC 307 77 2015-07-26 yJrBLChkpk.jpg 49.85 32.14
155 12179 http://agorahooawayyfoe.onion/p/ZuvJPDNX17 (Must Have For ALL Tor Users) Anonymous VPN So... ARE YOU USING TOR WITHOUT A VPN? IF YOUR ANSWE... 0.17101032 BTC 188 77 2015-07-26 ZuvJPDNX17.jpg 49.87 32.15
156 12217 http://agorahooawayyfoe.onion/p/foqazBvlcE Microsoft &#87;indows &#83;erver 2008 with Ser... 32-bit &#118;ersion request by customer.<br/>L... 0.18789286 BTC 332 77 2015-07-26 foqazBvlcE.jpg 54.80 35.33
157 12215 http://agorahooawayyfoe.onion/p/a9Jl8LAZLz 250 SOCKS5 PROXIES ANY COUNTRY NOOB FRIENDLY I&#39;LL GIVE YOU AN EASY TO USE SOFTWARE THAT... 0.19814156 BTC 22 77 2015-07-26 a9Jl8LAZLz.jpg 57.79 37.26
158 12181 http://agorahooawayyfoe.onion/p/lTGSeLBcUk 1 MONTH 250 SOCKS PROXIES + SOFTWARE EASY TO USE I&#39;LL GIVE YOU AN EASY TO USE SOFTWARE THAT... 0.20517819 BTC 188 77 2015-07-26 lTGSeLBcUk.jpg 59.84 38.58
159 32585 http://agorahooawayyfoe.onion/p/NbVz7lELYE Verifone SDK Skimmer / Skimming Software Everything you need to crack a verifone pos an... 0.26409380 BTC 269 77 2015-08-03 None 74.07 47.53
160 12332 http://agorahooawayyfoe.onion/p/lTijPcKrbd Custom listing Microsoft Visio Standard 2013 :... Custom listing use combination disc image SW_D... 0.25614754 BTC 332 77 2015-07-26 None 74.70 48.16
161 12235 http://agorahooawayyfoe.onion/p/rAYjFyy33M Microsoft Office Professional Plus 2013 : legi... Legitimate and fully online activate-able.<br/... 0.25621754 BTC 332 77 2015-07-26 rAYjFyy33M.jpg 74.72 48.17
162 12246 http://agorahooawayyfoe.onion/p/rqC7f42zvY Over 400 SEO Programs &amp; Plugins Including ... 1clickcashbot<br/>A<br/>AAC Trackback Submitte... 0.25621754 BTC 187 77 2015-07-26 rqC7f42zvY.jpg 74.72 48.17
163 12222 http://agorahooawayyfoe.onion/p/MtEMi9UJnm 1000 Real Facebook Likes (Fan/Business Page) Better marketing of your brand &ndash; This ca... 0.27329871 BTC 32 77 2015-07-26 MtEMi9UJnm.jpg 79.70 51.39
164 12177 http://agorahooawayyfoe.onion/p/U3YRCZ3WhR Microsoft &#87;indows &#83;erver 2012 R2 Stand... Legitimate and fully online activate-able.<br/... 0.27361652 BTC 332 77 2015-07-26 U3YRCZ3WhR.jpg 79.80 51.45
165 12257 http://agorahooawayyfoe.onion/p/cf3z6gECKm Everyone should buy this! -Tails Encryted USB ... Tails Encryted USB Sick 100% Annonymity - 32GB... UK world 0.30000000 BTC 186 77 2015-07-26 cf3z6gECKm.jpg 87.49 56.41
166 12232 http://agorahooawayyfoe.onion/p/SxVm77dpux (Must Have For ALL Tor Users) Anonymous VPN So... ARE YOU USING TOR WITHOUT A VPN? IF YOUR ANSWE... 0.34162339 BTC 188 77 2015-07-26 SxVm77dpux.jpg 99.63 64.23
167 12175 http://agorahooawayyfoe.onion/p/lLcAaavx0z Microsoft Visio Professional 2013 : legitimate... Legitimate and fully online activate-able.<br/... 0.34202065 BTC 332 77 2015-07-26 lLcAaavx0z.jpg 99.75 64.31
168 12168 http://agorahooawayyfoe.onion/p/vUJ0T76nAF Everyone should buy this! - Tails Encryted USB... Tails Encryted USB Sick 100% Annonymity - 128G... UK world 0.35000000 BTC 186 77 2015-07-26 vUJ0T76nAF.jpg 102.07 65.81
169 12258 http://agorahooawayyfoe.onion/p/fcwqr9ZCcB Microsoft &#87;indows &#83;erver 2012 R2 Datac... Legitimate and fully online activate-able.<br/... 0.41020031 BTC 332 77 2015-07-26 fcwqr9ZCcB.jpg 119.63 77.13
170 12180 http://agorahooawayyfoe.onion/p/r56Rpvt5j4 Microsoft Project Professional 2013 : legitima... Legitimate and fully online activate-able.<br/... 0.41042478 BTC 332 77 2015-07-26 r56Rpvt5j4.jpg 119.70 77.17
171 12208 http://agorahooawayyfoe.onion/p/PAqYWLTvCE Smtp Scanner-Cracker Unlimited license for one very good SMTP Scann... 0.47827275 BTC 165 77 2015-07-26 PAqYWLTvCE.jpg 139.48 89.93
172 12346 http://agorahooawayyfoe.onion/p/GMCnbdlAbL 1 Million Belgium Email Database 1 Million Belgium Email Database. Email Addres... 0.51229508 BTC 250 77 2015-07-26 GMCnbdlAbL.jpg 149.40 96.32
173 12205 http://agorahooawayyfoe.onion/p/vmoupZGfpJ Microsoft &#87;indows 8.1 Enterprise : legitim... Legitimate and fully online activate-able.<br/... 0.51243509 BTC 332 77 2015-07-26 vmoupZGfpJ.jpg 149.45 96.35
174 12254 http://agorahooawayyfoe.onion/p/jLMfd0g5hr SEO PowerSuite Enterprise - $699 Value! Turn your website into an unshakable search en... 0.51243509 BTC 187 77 2015-07-26 jLMfd0g5hr.jpg 149.45 96.35
175 12271 http://agorahooawayyfoe.onion/p/elwyyEr5Ck ♔ ID templates ♔ the one &amp; only !! The only templates out there you will ever nee... USA 0.68366719 BTC 208 77 2015-07-26 elwyyEr5Ck.jpg 199.38 128.55
176 12275 http://agorahooawayyfoe.onion/p/CZRkM0qq7K ♔ cc templates ♔ the one &amp; only !! The only templates out there you will ever nee... USA 0.68366719 BTC 208 77 2015-07-26 CZRkM0qq7K.jpg 199.38 128.55
177 12338 http://agorahooawayyfoe.onion/p/jcrkUl6T2A 2 Million Canadian Email Database 2 Million Canadian Email Database. Email Addre... 0.85382513 BTC 250 77 2015-07-26 jcrkUl6T2A.jpg 249.01 160.54
178 12243 http://agorahooawayyfoe.onion/p/ajkxuCamvR Facebook hacking service CUSTOM Website hacking - Get passwords, e-mail... 1.70811697 BTC 32 77 2015-07-26 ajkxuCamvR.jpg 498.15 321.17
179 12250 http://agorahooawayyfoe.onion/p/eeAwwqewMD Facebook hacking service CUSTOM Website hacking - Get passwords, e-mail... 1.70811697 BTC 32 77 2015-07-26 eeAwwqewMD.jpg 498.15 321.17
180 12310 http://agorahooawayyfoe.onion/p/YxKwaTRN7W Cryptocurrency Exchange Market Arbitrage Script Cryptocurrency Exchange Market Arbitrage Scrip... 1.70916797 BTC 250 77 2015-07-26 YxKwaTRN7W.jpg 498.46 321.36
181 12272 http://agorahooawayyfoe.onion/p/Ki8goEhd1g ♔ decoding audio software ♔ the one &amp; ... This is our universal Decoding Audio Software ... USA 3.41833595 BTC 208 77 2015-07-26 Ki8goEhd1g.jpg 996.91 642.73
182 12270 http://agorahooawayyfoe.onion/p/SeJfXYsWPk ★ EMAIL DATABASE ★ Most Comp&#108;ex on th... MESSAGE US WITH WHAT YOU ARE INTERESTED IN !<b... 25.00000000 BTC 32 77 2015-07-26 SeJfXYsWPk.jpg 7290.93 4700.58
183 12345 http://agorahooawayyfoe.onion/p/u10PGGfzey Most Powerful Android Hacking Tool - Log Reque... No Longer For Sale\r\n <br/>\r\n Brought... 341.53005464 BTC 250 77 2015-07-26 u10PGGfzey.jpg 99602.87 64215.57

184 rows × 13 columns


In [71]:
software_listings_df = db.get_products_by_subcategory('Software')
freqs = nlp_noun_extractor(software_listings_df['product_name'])
wc = WordCloud().generate_from_frequencies(freqs.items())
plt.imshow(wc)
plt.axis("off");


And finally we can use the same algorithm to better understand the category labeled as other th happens to be the mode on the total listings distribution by category


In [72]:
other_listings_df = db.get_products_by_subcategory('Other')
freqs = nlp_noun_extractor(other_listings_df['product_name'])
wc = WordCloud().generate_from_frequencies(freqs.items())
plt.imshow(wc)
plt.axis("off");


4 Vendor Cluster Analysis

This final section covers objective 4 of this project

To gain insight on how agents operate in these markets with appropiate classification and association algorithms

The web scraped datasets contain information about the sup one single website from just the supply side. Therefore our records contain details on the vendors only, and no data has been obtained from the possible buyers.

From the given data, we aim to gain insight on the size of the vendors operating on the site. Example questions include: "How concentrated is the supply within the different vendors? ARe there any existing patterns that would manifest the supply is operated by well-coordinated organizations instead of individuals? How do this patterns manifest within the different categories (focusing on drugs and fake documents distribution)? Do these groups of vendors operate locally or are they rather internationally coordinated, providing products from different locations?

The vendors dataset can be obtained by the following query:


In [10]:
vendors_df = db.pandas_query('''
    SELECT * FROM tblVendor;
    ''')
vendors_df


Out[10]:
vendor_id vendor_name vendor_description website_id
0 1 optiman NO DESCRIPTION agorahooawayyfoe.onion
1 2 ROCKETCHEM NO DESCRIPTION agorahooawayyfoe.onion
2 3 etxmx12 NO DESCRIPTION agorahooawayyfoe.onion
3 4 theRoadLessToxic NO DESCRIPTION agorahooawayyfoe.onion
4 5 SolidSupply NO DESCRIPTION agorahooawayyfoe.onion
5 6 CanadaGold-MDMA NO DESCRIPTION agorahooawayyfoe.onion
6 7 precursor-sultan NO DESCRIPTION agorahooawayyfoe.onion
7 8 DRUGDOME NO DESCRIPTION agorahooawayyfoe.onion
8 9 Troubadour NO DESCRIPTION agorahooawayyfoe.onion
9 10 thomascheer NO DESCRIPTION agorahooawayyfoe.onion
10 11 Optumis NO DESCRIPTION agorahooawayyfoe.onion
11 12 psycho_gear NO DESCRIPTION agorahooawayyfoe.onion
12 13 BreezeTheGnome NO DESCRIPTION agorahooawayyfoe.onion
13 14 bulkikas2424 NO DESCRIPTION agorahooawayyfoe.onion
14 15 PlutoPete NO DESCRIPTION agorahooawayyfoe.onion
15 16 stiffstyles NO DESCRIPTION agorahooawayyfoe.onion
16 17 etimbuk NO DESCRIPTION agorahooawayyfoe.onion
17 18 vicious86 NO DESCRIPTION agorahooawayyfoe.onion
18 19 soulpusher NO DESCRIPTION agorahooawayyfoe.onion
19 20 OzAlpha NO DESCRIPTION agorahooawayyfoe.onion
20 21 GuMbA NO DESCRIPTION agorahooawayyfoe.onion
21 22 fake NO DESCRIPTION agorahooawayyfoe.onion
22 23 DrugsINC NO DESCRIPTION agorahooawayyfoe.onion
23 24 Dutchmen NO DESCRIPTION agorahooawayyfoe.onion
24 25 ShootingGallery NO DESCRIPTION agorahooawayyfoe.onion
25 26 AUsTORe NO DESCRIPTION agorahooawayyfoe.onion
26 27 dgslabz NO DESCRIPTION agorahooawayyfoe.onion
27 28 TheADC NO DESCRIPTION agorahooawayyfoe.onion
28 29 SupplyGuy NO DESCRIPTION agorahooawayyfoe.onion
29 30 Straightbiz NO DESCRIPTION agorahooawayyfoe.onion
... ... ... ... ...
1210 1211 pantherpower NO DESCRIPTION agorahooawayyfoe.onion
1211 1212 SleekTheElite NO DESCRIPTION agorahooawayyfoe.onion
1212 1213 lovetoxlr8 NO DESCRIPTION agorahooawayyfoe.onion
1213 1214 AusXTC NO DESCRIPTION agorahooawayyfoe.onion
1214 1215 Kaskade NO DESCRIPTION agorahooawayyfoe.onion
1215 1216 scafewest NO DESCRIPTION agorahooawayyfoe.onion
1216 1217 Saralea NO DESCRIPTION agorahooawayyfoe.onion
1217 1218 btheway2 NO DESCRIPTION agorahooawayyfoe.onion
1218 1219 Drugstore_UK NO DESCRIPTION agorahooawayyfoe.onion
1219 1220 Kiefernzapfen NO DESCRIPTION agorahooawayyfoe.onion
1220 1221 buyersclub NO DESCRIPTION agorahooawayyfoe.onion
1221 1222 BlueViking NO DESCRIPTION agorahooawayyfoe.onion
1222 1223 Pilzhof NO DESCRIPTION agorahooawayyfoe.onion
1223 1224 spartanlabsoz NO DESCRIPTION agorahooawayyfoe.onion
1224 1225 CaliT NO DESCRIPTION agorahooawayyfoe.onion
1225 1226 nzt48givesyouwings NO DESCRIPTION agorahooawayyfoe.onion
1226 1227 Dutchpirate NO DESCRIPTION agorahooawayyfoe.onion
1227 1228 counterfeitcorner NO DESCRIPTION agorahooawayyfoe.onion
1228 1229 swissgermanquality NO DESCRIPTION agorahooawayyfoe.onion
1229 1230 Satire NO DESCRIPTION agorahooawayyfoe.onion
1230 1231 TheOnlineSource NO DESCRIPTION agorahooawayyfoe.onion
1231 1232 john_bro NO DESCRIPTION agorahooawayyfoe.onion
1232 1233 GetPercocet NO DESCRIPTION agorahooawayyfoe.onion
1233 1234 fromustou NO DESCRIPTION agorahooawayyfoe.onion
1234 1235 Refill-101 NO DESCRIPTION agorahooawayyfoe.onion
1235 1236 DoctorNick NO DESCRIPTION agorahooawayyfoe.onion
1236 1237 MissTripzz NO DESCRIPTION agorahooawayyfoe.onion
1237 1238 galactica NO DESCRIPTION agorahooawayyfoe.onion
1238 1239 Spacelab NO DESCRIPTION agorahooawayyfoe.onion
1239 1240 NorthEast NO DESCRIPTION agorahooawayyfoe.onion

1240 rows × 4 columns

4.1 Classifying vendors by market value

The first pattern classification will be to extract the total value of the listings for each of the vendors. With this information we hope to understand how concentrated the supply is. For this purpose, it is required to add an additional column to our dataframe containing the total GBP value of each vendors listings:

In [11]:
vendors_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')
vendors_df['GBP_value'].sort_values(ascending=False).head(25).plot(kind='barh')


Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0d56eef358>


The plot renders just the largest 25 vendors, and we can see that these portion of the data is not even close to be normally distributed. We can produce a histogram to confirm this hypothesis with the following code:


In [12]:
import matplotlib.pyplot as plt
plt.hist(vendors_df['GBP_value'], bins=20)
plt.title("Vendors value Histogram")
plt.xlabel("Vendor Value in GBP")
plt.ylabel("Frequency")
plt.show()


The Histogram proves a great disparity between the vendors. The distribution is completely left-skewed, which could potentially mean that while the majoriy of the vendors total products value is below 4 figures, there is a smaller number of vendors whose total value is orders of magnitude higher, with some of them responsible for listings valued over 6 figures.

In order to prove our hypthesis it is required to break the distribution into percentiles and compare the new distribution with the median and the mean


In [84]:
quant_df = pd.DataFrame({'mean': vendors_df.mean(), 'median': vendors_df.median(),
                   '25%': vendors_df.quantile(0.25), '50%': vendors_df.quantile(0.5),
                   '75%': vendors_df.quantile(0.90)})
quant_df.plot(kind='bar')


Out[84]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f3da4d307b8>

In [85]:
vendors_df.quantile(.9)


Out[85]:
GBP_value    17408.912
dtype: float64

In [87]:
vendors_df.std()


Out[87]:
GBP_value    203969.227601
dtype: float64

As we can see the 75tth percentile is 5 times higher than the 50th, while the mean is 10 times higher than the median, confirmed by a standard deviation of 203.969. These numbers denote how the highest percentiles are responsible to a much larger percentage of the market than all the other vendors combined. Now we will try to understand to what extent, as well as finding what is the sitatuitiont within the main categories object of this analysis: Drugs and Fake documents:


In [88]:
drug_vendors_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

coke_vendors_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('Cocaine')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

docs_vendors_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE B.subCategory_id in 
        (SELECT subCategory_id FROM tblSubCategory where subCategory_name = 'Physical documents')
    AND (B.product_name LIKE '%drivers license%'
    OR B.product_name LIKE '%driving licence%'
    OR B.product_name LIKE '%drivers licence%'
    OR B.product_name LIKE '%passport%'
    OR B.product_name LIKE '%fake id%'
    OR B.product_name LIKE '%resident id%'
    OR B.product_name LIKE '%passport%')
    GROUP BY B.vendor_id
    ''', index_col='vendor_id')

In [89]:
quantiles = [(x*10, x/10) for x in range(0,10)]
#quantiles = [(1,.25),(2,.75)]



quantiles_sum = pd.DataFrame()
quantiles_sum['total_gbp'] = pd.Series(
    [drug_vendors_df[drug_vendors_df.GBP_value > drug_vendors_df.GBP_value.quantile(q[1])]['GBP_value'].sum() for q in quantiles],
    [q[0] for q in quantiles]
)
sorted_vendors = vendors_df.sort_values('GBP_value')
sorted_drug_vendors = drug_vendors_df.sort_values('GBP_value')
sorted_docs_vendors = docs_vendors_df.sort_values('GBP_value')
sorted_coke_vendors = coke_vendors_df.sort_values('GBP_value')

In [132]:
largest_value = sorted_vendors.tail((len(sorted_vendors))*10//100)['GBP_value'].sum()
total_value = sorted_vendors['GBP_value'].sum()

print ('10% largest vendors market value {}'.format(largest_value))
print ('Market total value: {} GBP'.format(total_value))
print ('###############################')

largest_drug_value = sorted_drug_vendors.tail((len(sorted_drug_vendors))*10//100)['GBP_value'].sum()
total_drug_value = sorted_drug_vendors['GBP_value'].sum()

print ('10% largest drug vendors market value {}'.format(largest_drug_value))
print ('Drugs Market total value: {} GBP'.format(total_drug_value))
print ('##############################')

largest_docs_value = sorted_docs_vendors.tail((len(sorted_docs_vendors))*10//100)['GBP_value'].sum()
total_docs_value = sorted_docs_vendors['GBP_value'].sum()

print ('10% largest fake documents vendors market value {}'.format(largest_docs_value))
print ('Fake Documents Market total value: {} GBP'.format(total_docs_value))
print ('##############################')

largest_cocaine_value = sorted_coke_vendors.tail((len(sorted_coke_vendors))*10//100)['GBP_value'].sum()
total_cocaine_value = sorted_coke_vendors['GBP_value'].sum()
                                                                   
print ('10% largest cocaine vendors market value {}'.format(largest_cocaine_value))
print ('Cocaine Market total value: {} GBP'.format(total_cocaine_value))

import matplotlib.pyplot as plt


labels = 'Largest Vendors (90th percentile)', 'Rest of the market'
sizes = [largest_value, total_value - largest_value]
colors = ['gold', 'lightskyblue']
explode = (0.1, 0)
plt.axis('equal')
plt.title('Largest vendors total value compared with the rest of the market')
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct='%1.1f%%', shadow=True, startangle=90)


10% largest vendors market value 22252702.589999996
Market total value: 25040844.63 GBP
###############################
10% largest drug vendors market value 13280777.99
Drugs Market total value: 15816306.129999999 GBP
##############################
10% largest fake documents vendors market value 15070.990000000002
Fake Documents Market total value: 36308.85 GBP
##############################
10% largest cocaine vendors market value 1644374.6099999999
Cocaine Market total value: 2069279.89 GBP
Out[132]:
([<matplotlib.patches.Wedge at 0x7f3dad794940>,
  <matplotlib.patches.Wedge at 0x7f3dad79ac88>],
 [<matplotlib.text.Text at 0x7f3dad79a9e8>,
  <matplotlib.text.Text at 0x7f3dad79d1d0>],
 [<matplotlib.text.Text at 0x7f3dad79a4e0>,
  <matplotlib.text.Text at 0x7f3dad7a0828>])

The Hypothesis is now confirmed, the largest vendors concentrate the majority of the market value. This becomes even more prevalent within the drugs marke (which remeber, copes 85% of the total market) Having such concentration of the value within a small vendors is a factor that points us to the fact that the agents behind these markets are not small independant vendors but rather larger criminal organizations.

However, while this hypothesis is supported by the evidence, it doesn't fully prove to what extent this criminal organizations may be operating behind these sites.

4.2 Product Country of origin variety per vendor

In our attempt to unveil the size and behaviour of the agents operating in the site, it would be helpfull to undrestand the range of countris from which these vendors are provided. the hypothesis is that the vendors not only operate locally, but rather represented by larger criminal organizations that operate at an international scope.

4.2.1 Total market geographical variety


In [13]:
vendors_geo_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.product_origin) as countries FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    GROUP BY B.vendor_id
    ORDER BY GBP_value;
    ''', index_col='vendor_id')
vendors_geo_df['countries'].mean()


Out[13]:
1.2611879576891782

In [14]:
vendors_geo_df[(vendors_geo_df['GBP_value'] > vendors_geo_df['GBP_value'].quantile(.9))]['countries'].mean()


Out[14]:
1.7154471544715446

In [15]:
drug_vendors_geo_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.product_origin) as countries FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

coke_vendors_geo_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.product_origin) as countries FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('Cocaine')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

docs_vendors_geo_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.product_origin) as countries FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE B.subCategory_id in 
        (SELECT subCategory_id FROM tblSubCategory where subCategory_name = 'Physical documents')
    AND (B.product_name LIKE '%drivers license%'
    OR B.product_name LIKE '%driving licence%'
    OR B.product_name LIKE '%drivers licence%'
    OR B.product_name LIKE '%passport%'
    OR B.product_name LIKE '%fake id%'
    OR B.product_name LIKE '%resident id%'
    OR B.product_name LIKE '%passport%')
    GROUP BY B.vendor_id
    ''', index_col='vendor_id')

4.2.2 Drugs and fake ids provider geolocation per vendor


In [16]:
sorted_vendors = vendors_df.sort_values('GBP_value')
sorted_drug_geo_vendors = drug_vendors_geo_df.sort_values('GBP_value')
sorted_docs_geo_vendors = docs_vendors_geo_df.sort_values('GBP_value')
sorted_coke_geo_vendors = coke_vendors_geo_df.sort_values('GBP_value')

In [17]:
print ('10% largest drug vendors countries avg {}'.format(
        sorted_drug_geo_vendors.tail((len(sorted_drug_geo_vendors))*10//100)['countries'].mean())
)
print ('10% largest fake documents vendors countries avg {}'.format(
        sorted_docs_geo_vendors.tail((len(sorted_docs_geo_vendors))*10//100)['countries'].mean())
)
print ('10% largest cocaine vendors countries avg {}'.format(
        sorted_coke_geo_vendors.tail((len(sorted_coke_geo_vendors))*10//100)['countries'].mean())
)


10% largest drug vendors countries avg 1.654867256637168
10% largest fake documents vendors countries avg 1.0
10% largest cocaine vendors countries avg 1.368421052631579

In [ ]:


In [28]:
print ('10 largest drug vendors countries avg {}'.format(
        sorted_drug_geo_vendors.tail(10)['countries'].mean())
)
print ('10 largest fake documents vendors countries avg {}'.format(
        sorted_docs_geo_vendors.tail(10)['countries'].mean())
)
print ('10 largest cocaine vendors countries avg {}'.format(
        sorted_coke_geo_vendors.tail(10)['countries'].mean())
)


10 largest drug vendors countries avg 2.5
10 largest fake documents vendors countries avg 1.1
10 largest cocaine vendors countries avg 1.3

4.3 Vendors Catalog variety

Finally we can study the variety in the vendors catalog according to their value


In [103]:
vendors_cat_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.subCategory_id) as categories FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    GROUP BY B.vendor_id
    ORDER BY GBP_value;
    ''', index_col='vendor_id')

drug_vendors_cat_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.subCategory_id) as categories FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('2C', '5-MeO', 'Barbiturates', 'Benzos', 'Buprenorphine', 'Cannabis', 'Chemicals', 'Cocaine', 
            'Codeine', 'Concentrates', 'Containers', 'Dihydrocodeine', 'Dissociatives', 'DMT', 'Doomsday', 
            'Drugs', 'Ecstasy', 'Edibles', 'Fentanyl', 'Filters', 'Fireworks', 'GBL', 'GHB', 'Grinders', 
            'Hash', 'Heroin', 'Hydrocodone', 'Hydromorphone', 'Injecting equipment', 'Ketamine', 'LSD', 'MDA', 
            'MDMA', 'Mephedrone', 'Mescaline', 'Meth', 'Morphine', 'Mushrooms', 'MXE', 'NB', 'Needles', 'Opioids', 
            'Opium', 'Oxycodone', 'Paper', 'Paraphernalia', 'PCP', 'Pills', 'Pipes', 'Prescription', 'Psychedelics', 
            'RCs', 'Salvia', 'Scales', 'Science', 'Seeds', 'Shake/trim', 'Smoked', 'Speed', 'Spores', 'Stashes', 
            'Steroids', 'Stimulants', 'Synthetics', 'Syringes', 'Weed', 'Weight loss')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

coke_vendors_cat_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.subCategory_id) as categories FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE subCategory_id in
        (SELECT subCategory_id from tblSubCategory where subCategory_name IN
            ('Cocaine')
        )
    GROUP BY B.vendor_id;
    ''', index_col='vendor_id')

docs_vendors_cat_df = db.pandas_query('''
    SELECT A.vendor_id, SUM(B.GBP) as GBP_value, COUNT(DISTINCT B.subCategory_id) as categories FROM tblVendor A
    JOIN tblProduct B 
        ON B.vendor_id = A.vendor_id
    WHERE B.subCategory_id in 
        (SELECT subCategory_id FROM tblSubCategory where subCategory_name = 'Physical documents')
    AND (B.product_name LIKE '%drivers license%'
    OR B.product_name LIKE '%driving licence%'
    OR B.product_name LIKE '%drivers licence%'
    OR B.product_name LIKE '%passport%'
    OR B.product_name LIKE '%fake id%'
    OR B.product_name LIKE '%resident id%'
    OR B.product_name LIKE '%passport%')
    GROUP BY B.vendor_id
    ''', index_col='vendor_id')

In [104]:
sorted_cat_vendors = vendors_cat_df.sort_values('GBP_value')
sorted_drug_cat_vendors = drug_vendors_cat_df.sort_values('GBP_value')
sorted_docs_cat_vendors = docs_vendors_cat_df.sort_values('GBP_value')
sorted_coke_cat_vendors = coke_vendors_cat_df.sort_values('GBP_value')

In [109]:
print ('10% largest vendors categories avg {}'.format(
        sorted_cat_vendors.tail((len(sorted_cat_vendors))*10//100)['categories'].mean())
)
print ('10% largest drug vendors categories avg {}'.format(
        sorted_drug_cat_vendors.tail((len(sorted_drug_cat_vendors))*10//100)['categories'].mean())
)
print ('10% largest fake documents vendors categories avg {}'.format(
        sorted_docs_cat_vendors.tail((len(sorted_docs_cat_vendors))*10//100)['categories'].mean())
)


10% largest vendors categories avg 5.491803278688525
10% largest drug vendors categories avg 4.097345132743363
10% largest fake documents vendors categories avg 1.0

In [108]:
print ('10 largest drug vendors categories avg {}'.format(
        sorted_drug_cat_vendors.tail(10)['categories'].mean())
)
print ('10 largest fake documents vendors categories avg {}'.format(
        sorted_docs_cat_vendors.tail(10)['categories'].mean())
)


10 largest drug vendors categories avg 4.4
10 largest fake documents vendors categories avg 1.0

In [ ]: