Instagram Image Analysis

Author: Kat Chuang @katychuang on twitter

This is a research project that strives to understand how instagram users identify colors in images and tag them.

Research Questions

  1. Color reference: Does "blue" during the day refer to sky blue whereas "blue" at night refer to midnight blue?
  2. Color min/max: What percentage of the image has the tagged color? Is it an accent or dominant color?
  3. Color specificity: "bleu" vs "blue" "crimson" vs "red"

Packages used:

  • SciKit-Image
  • Python-Instagram

Tip: Install the Instagram API wrapper

$ pip install python-instagram

then you can import it in your python file

from instagram.client import InstagramAPI


In [1]:
%pylab inline 

from instagram.client import InstagramAPI
import skimage
from skimage import data, filter, segmentation, color


Welcome to pylab, a matplotlib-based Python environment [backend: module://IPython.zmq.pylab.backend_inline].
For more information, type 'help(pylab)'.

In [2]:
# A LOT MORE IMPORTS
from skimage import io
from skimage.morphology import watershed, is_local_maximum
from scipy import ndimage

# We will be looking at the following techniques to begin with
from skimage.morphology import erosion, dilation, opening, closing, white_tophat, black_tophat, skeletonize, convex_hull, convex_hull_image

# Importing 'square' and 'disk' modules for creating BINARY structuring elements
from skimage.morphology import square as sq
from skimage.morphology import disk

# Skimage supports NumPy data types and takes in images as type 'ndarray'. matplotlib.pyplot is a python library for providing MATLAB-like functionality, hence the same function names. E.g: imshow
import matplotlib.pyplot as plt
import numpy as np

# Importing the '_io' module for reading, writing and showing images. Note thatin skimage, all files having the same name as the folder have been renamed with an '_'. Hence '_io'
import skimage.io._io as io
from skimage.data import load
from skimage.util import img_as_ubyte
from pylab import *
import urllib
import urllib2
import cStringIO
from PIL import Image

Examples of SciKit-Image


In [3]:
from skimage.data import camera

im_orig = camera()

print 'image shape:', im_orig.shape
print 'image (min,max):', im_orig.min(), im_orig.max()

gray()          # set default colormap to gray scale
imshow(im_orig)
#show()


image shape: (512, 512)
image (min,max): 0 255
Out[3]:
<matplotlib.image.AxesImage at 0x106b1bb50>

In [4]:
from skimage import data, segmentation, filter, color
import matplotlib.pyplot as plt

coins = data.coins()
mask = coins > filter.threshold_otsu(coins)
clean_border = segmentation.clear_border(mask)

coins_edges = segmentation.visualize_boundaries(color.gray2rgb(coins),
                            clean_border)

plt.figure(figsize=(8, 3.5))
plt.subplot(121)
plt.imshow(clean_border, cmap='gray')
plt.axis('off')
plt.subplot(122)
plt.imshow(coins_edges)
plt.axis('off')

plt.tight_layout()
plt.show()


#u = 'http://distilleryimage1.s3.amazonaws.com/7780073610cb11e3b23122000a1f98cf_7.jpg'
#im = Image.open(cStringIO.StringIO(urllib.urlopen(p).read()))
#e = segmentation.mark_boundaries(im, clean_border)
#imshow(e,origin='lower',cmap=plt.cm.gray)
#show()


Instagram API example

Now let's try connecting to Instagram and editing those images.

You can install wrapper with the following command in your commandline

$ pip install python-instagram


In [4]:
INSTAGRAM_CLIENT_ID = 'd60340d55d864859a9d3a34f50a6d816'
INSTAGRAM_CLIENT_SECRET = '89d0d579e9714a9780c4352aa3872968'

api = InstagramAPI(client_id=INSTAGRAM_CLIENT_ID,
                   client_secret=INSTAGRAM_CLIENT_SECRET)

#get popular images feed
popular_media = api.media_popular(count=20)

#extract urls of popular images to a list
photolist = []
for media in popular_media:
    photolist.append(media.images['standard_resolution'].url)

print 'Top photos from Instagram'
html = ''

#show the original image thumbnail
for p in photolist:
    html = html + '<img src=' + p + ' width="150" />'
from IPython.core.display import HTML
HTML(html)


Top photos from Instagram
Out[4]:

In [5]:
from skimage import exposure

# process images and show a more exposed version of photo
for p in photolist:
    im = Image.open(cStringIO.StringIO(urllib.urlopen(p).read()))
    i = img_as_ubyte(im)  # To convert to uint8 data type
    
    image = exposure.rescale_intensity(i, in_range=(0, 2**7 - 1))
    
    axis('off')
    imshow(image,origin='lower',cmap=plt.cm.gray)
    show()



In [8]:
p = 'http://distilleryimage1.s3.amazonaws.com/7780073610cb11e3b23122000a1f98cf_7.jpg'
im = Image.open(cStringIO.StringIO(urllib.urlopen(p).read()))
i = img_as_ubyte(im)

from skimage import exposure
from skimage.transform import resize, rotate, rescale

image10 = exposure.rescale_intensity(i, in_range=(0, 2**10 - 1))
axis('off')
imshow(image10,origin='lower',cmap=plt.cm.gray)
show()

image8 = exposure.rescale_intensity(i, in_range=(0, 2**8 - 1))
axis('off')
imshow(image8,origin='lower',cmap=plt.cm.gray)
show()

# High Exposure and Rotated
image5 = exposure.rescale_intensity(i, in_range=(0, 2**5 - 1))
im_r = rotate(image5.astype('float')/255, angle=15, order=2)
axis('off')
imshow(im_r,origin='lower',cmap=plt.cm.gray)
show()


Let's flip the image right side up and change the threshold


In [9]:
from skimage.filter import threshold_otsu

u = 'http://distilleryimage9.s3.amazonaws.com/a1392b5c10d511e39edf22000ae916b0_7.jpg'
im = Image.open(cStringIO.StringIO(urllib.urlopen(u).read()))
iimg = img_as_ubyte(im)

im_r = rotate(iimg.astype('float')/255, angle=15, order=2)
thres = threshold_otsu(im_r)
plt.figure(figsize=(8,4))

plt.subplot(121)
#plt.imshow(im_r)
plt.imshow(im_r > thres)
plt.title("80's image")
plt.axis('off')


Out[9]:
(-0.5, 611.5, 611.5, -0.5)

In [10]:
import matplotlib.pyplot as plt
from skimage.filter import threshold_otsu

for p in photolist:
    im = Image.open(cStringIO.StringIO(urllib.urlopen(p).read()))
    iimg = img_as_ubyte(im)
    im_r = rotate(iimg.astype('float')/255, angle=1, order=2)
    thres = threshold_otsu(im_r)
    plt.figure(figsize=(8,4))
    #plt.subplot(121)
    
    #imshow(e,origin='lower',cmap=plt.cm.gray)
    plt.axis('off')
    plt.title("80's Style Image")
    plt.imshow(im_r > thres)
    show()



In [13]:
import skimage
from skimage import data, io, filter

image = data.coins() # or any NumPy array!
edges = filter.sobel(image)
axis('off')
imshow(edges)

u = 'http://distilleryimage9.s3.amazonaws.com/a1392b5c10d511e39edf22000ae916b0_7.jpg'
im = Image.open(cStringIO.StringIO(urllib.urlopen(u).read()))
iimg = img_as_ubyte(im)
edge1 = filter.sobel(iimg)
axis('off')
imshow(edge1)


---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-13-4b5151c09a7d> in <module>()
     10 im = Image.open(cStringIO.StringIO(urllib.urlopen(u).read()))
     11 iimg = img_as_ubyte(im)
---> 12 edge1 = filter.sobel(iimg)
     13 axis('off')
     14 imshow(edge1)

/Library/anaconda/lib/python2.7/site-packages/skimage/filter/edges.pyc in sobel(image, mask)
     61     has to be further processed to perform edge detection.
     62     """
---> 63     return np.sqrt(hsobel(image, mask)**2 + vsobel(image, mask)**2)
     64 
     65 

/Library/anaconda/lib/python2.7/site-packages/skimage/filter/edges.pyc in hsobel(image, mask)
     95                              np.array([[ 1, 2, 1],
     96                                        [ 0, 0, 0],
---> 97                                        [-1,-2,-1]]).astype(float) / 4.0))
     98     return _mask_filter_result(result, mask)
     99 

/Library/anaconda/lib/python2.7/site-packages/scipy/ndimage/filters.pyc in convolve(input, weights, output, mode, cval, origin)
    677     """
    678     return _correlate_or_convolve(input, weights, output, mode, cval,
--> 679                                   origin, True)
    680 
    681 

/Library/anaconda/lib/python2.7/site-packages/scipy/ndimage/filters.pyc in _correlate_or_convolve(input, weights, output, mode, cval, origin, convolution)
    511     wshape = [ii for ii in weights.shape if ii > 0]
    512     if len(wshape) != input.ndim:
--> 513         raise RuntimeError('filter weights array has incorrect shape.')
    514     if convolution:
    515         weights = weights[tuple([slice(None, None, -1)] * weights.ndim)]

RuntimeError: filter weights array has incorrect shape.

In [ ]:
import numpy as np
import matplotlib.pyplot as plt

from skimage import data
from skimage.filter.rank import entropy
from skimage.morphology import disk
from skimage.util import img_as_ubyte


# defining a 8- and a 16-bit test images
a8 = img_as_ubyte(data.camera())
a16 = a8.astype(np.uint16) * 4

ent8 = entropy(a8, disk(5)) # pixel value contain 10x the local entropy
ent16 = entropy(a16, disk(5)) # pixel value contain 1000x the local entropy

# display results
plt.figure(figsize=(10, 10))

plt.subplot(2,2,1)
plt.imshow(a8, cmap=plt.cm.gray)
plt.xlabel('8-bit image')
plt.colorbar()

plt.subplot(2,2,2)
plt.imshow(ent8, cmap=plt.cm.jet)
plt.xlabel('entropy*10')
plt.colorbar()

plt.subplot(2,2,3)
plt.imshow(a16, cmap=plt.cm.gray)
plt.xlabel('16-bit image')
plt.colorbar()

plt.subplot(2,2,4)
plt.imshow(ent16, cmap=plt.cm.jet)
plt.xlabel('entropy*1000')
plt.colorbar()
plt.show()

In [1]:
#example from http://scikit-image.org/docs/dev/auto_examples/applications/plot_rank_filters.html#example-applications-plot-rank-filters-py

import numpy as np
import matplotlib.pyplot as plt

from skimage import data

ima = data.camera()
hist = np.histogram(ima, bins=np.arange(0, 256))

plt.figure(figsize=(8, 3))
plt.subplot(1, 2, 1)
plt.imshow(ima, cmap=plt.cm.gray, interpolation='nearest')
plt.axis('off')
plt.subplot(1, 2, 2)
plt.plot(hist[1][:-1], hist[0], lw=2)
plt.title('histogram of grey values')

#noise removal
noise = np.random.random(ima.shape)
nima = data.camera()
nima[noise > 0.99] = 255
nima[noise < 0.01] = 0

from skimage.filter.rank import median
from skimage.morphology import disk

fig = plt.figure(figsize=[10, 7])

lo = median(nima, disk(1))
hi = median(nima, disk(5))
ext = median(nima, disk(20))
plt.subplot(2, 2, 1)
plt.imshow(nima, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('noised image')
plt.subplot(2, 2, 2)
plt.imshow(lo, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('median $r=1$')
plt.subplot(2, 2, 3)
plt.imshow(hi, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('median $r=5$')
plt.subplot(2, 2, 4)
plt.imshow(ext, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('median $r=20$')

# image smoothing 
from skimage.filter.rank import mean

fig = plt.figure(figsize=[10, 7])

loc_mean = mean(nima, disk(10))
plt.subplot(1, 2, 1)
plt.imshow(ima, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('original')
plt.subplot(1, 2, 2)
plt.imshow(loc_mean, cmap=plt.cm.gray, vmin=0, vmax=255)
plt.xlabel('local mean $r=10$')


Out[1]:
<matplotlib.text.Text at 0x1082d28d0>

In [ ]: