Parse Twitter Data

  1. Import retrieved JSON files (from S3)
  2. Read in individual tweets
  3. Geolocate
  4. Add gender
  5. Create CSV file (and drop unwanted data)

Get Data and Enrich It


In [1]:
import sys,re,json,os,csv,glob
import graphlab as gl


---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-1-0351ad49e384> in <module>()
      1 import sys,re,json,os,csv,glob
----> 2 import graphlab as gl

ImportError: No module named graphlab

In [3]:
# Create data folder
# !mkdir -p ../data

In [4]:
# Grab (JSON) files from S3, takes a bunch of time
# %%capture capt # should work in IPython 3 
!s3cmd sync s3://PLNY_Brasil/2014-08 ../data/


s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328220.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328220.json  [1 of 295]
 4373 of 4373   100% in    0s    96.40 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328256.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328256.json  [2 of 295]
 12821 of 12821   100% in    0s   141.28 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328262.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328262.json  [3 of 295]
 4434 of 4434   100% in    0s   108.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328273.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328273.json  [4 of 295]
 1990 of 1990   100% in    0s    34.41 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328332.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328332.json  [5 of 295]
 9958 of 9958   100% in    0s   254.65 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328339.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328339.json  [6 of 295]
 1980 of 1980   100% in    0s    33.75 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328374.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328374.json  [7 of 295]
 12578 of 12578   100% in    0s   312.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328445.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328445.json  [8 of 295]
 15760 of 15760   100% in    0s   367.33 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328484.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328484.json  [9 of 295]
 4073 of 4073   100% in    0s    84.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328544.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328544.json  [10 of 295]
 8430 of 8430   100% in    0s    91.18 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328569.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328569.json  [11 of 295]
 4266 of 4266   100% in    0s    87.84 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328687.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328687.json  [12 of 295]
 18457 of 18457   100% in    0s   227.25 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328706.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328706.json  [13 of 295]
 6901 of 6901   100% in    0s   139.51 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328743.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328743.json  [14 of 295]
 19578 of 19578   100% in    0s   472.97 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328813.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328813.json  [15 of 295]
 18932 of 18932   100% in    0s   462.17 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328849.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328849.json  [16 of 295]
 14474 of 14474   100% in    0s   327.24 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328917.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328917.json  [17 of 295]
 29220 of 29220   100% in    0s   612.53 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328930.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328930.json  [18 of 295]
 2236 of 2236   100% in    0s    47.12 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328967.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328967.json  [19 of 295]
 13001 of 13001   100% in    0s   273.10 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329038.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329038.json  [20 of 295]
 32883 of 32883   100% in    0s   564.28 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329048.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329048.json  [21 of 295]
 11268 of 11268   100% in    0s   204.08 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329086.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329086.json  [22 of 295]
 4019 of 4019   100% in    0s   100.33 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329091.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329091.json  [23 of 295]
 6319 of 6319   100% in    0s   127.81 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329096.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329096.json  [24 of 295]
 2024 of 2024   100% in    0s    35.56 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329103.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329103.json  [25 of 295]
 1920 of 1920   100% in    0s    52.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329165.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329165.json  [26 of 295]
 27527 of 27527   100% in    0s   507.56 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329177.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329177.json  [27 of 295]
 2135 of 2135   100% in    0s    42.51 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329216.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329216.json  [28 of 295]
 31669 of 31669   100% in    0s   751.98 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329288.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329288.json  [29 of 295]
 33188 of 33188   100% in    0s   714.81 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329294.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329294.json  [30 of 295]
 2381 of 2381   100% in    0s    51.84 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329298.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329298.json  [31 of 295]
 1994 of 1994   100% in    0s    39.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329304.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329304.json  [32 of 295]
 1962 of 1962   100% in    0s    40.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329309.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329309.json  [33 of 295]
 2229 of 2229   100% in    0s    32.48 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329346.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329346.json  [34 of 295]
 16865 of 16865   100% in    0s   386.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329424.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329424.json  [35 of 295]
 32442 of 32442   100% in    0s   751.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329430.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329430.json  [36 of 295]
 2458 of 2458   100% in    0s    56.55 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329468.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329468.json  [37 of 295]
 21301 of 21301   100% in    0s   467.56 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329478.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329478.json  [38 of 295]
 4063 of 4063   100% in    0s    84.59 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329501.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329501.json  [39 of 295]
 4002 of 4002   100% in    0s    75.67 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329529.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329529.json  [40 of 295]
 14534 of 14534   100% in    0s   223.39 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329567.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329567.json  [41 of 295]
 8196 of 8196   100% in    0s   162.23 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329572.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329572.json  [42 of 295]
 15964 of 15964   100% in    0s   239.72 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329576.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329576.json  [43 of 295]
 2299 of 2299   100% in    0s    45.26 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329643.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329643.json  [44 of 295]
 26737 of 26737   100% in    0s   549.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329652.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329652.json  [45 of 295]
 10760 of 10760   100% in    0s   196.76 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329657.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329657.json  [46 of 295]
 2208 of 2208   100% in    0s    45.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329695.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329695.json  [47 of 295]
 12829 of 12829   100% in    0s   345.36 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329775.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329775.json  [48 of 295]
 29742 of 29742   100% in    0s   542.81 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329782.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329782.json  [49 of 295]
 4281 of 4281   100% in    0s   109.63 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329786.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329786.json  [50 of 295]
 2174 of 2174   100% in    0s    44.12 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329822.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329822.json  [51 of 295]
 25008 of 25008   100% in    0s   509.23 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329894.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329894.json  [52 of 295]
 42053 of 42053   100% in    0s   838.16 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329901.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329901.json  [53 of 295]
 1981 of 1981   100% in    0s    36.52 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329940.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413329940.json  [54 of 295]
 29259 of 29259   100% in    0s   782.36 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330006.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330006.json  [55 of 295]
 19643 of 19643   100% in    0s   366.91 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330043.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330043.json  [56 of 295]
 6136 of 6136   100% in    0s   150.47 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330049.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330049.json  [57 of 295]
 8231 of 8231   100% in    0s   181.93 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330054.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330054.json  [58 of 295]
 2031 of 2031   100% in    0s    51.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330126.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330126.json  [59 of 295]
 51168 of 51168   100% in    0s   865.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330132.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330132.json  [60 of 295]
 14430 of 14430   100% in    0s   225.00 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330140.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330140.json  [61 of 295]
 2102 of 2102   100% in    0s    60.98 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330144.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330144.json  [62 of 295]
 4615 of 4615   100% in    0s   107.48 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330181.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330181.json  [63 of 295]
 12721 of 12721   100% in    0s   267.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330248.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330248.json  [64 of 295]
 20774 of 20774   100% in    0s   546.76 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330254.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330254.json  [65 of 295]
 2133 of 2133   100% in    0s    40.19 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330290.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330290.json  [66 of 295]
 28584 of 28584   100% in    0s   633.82 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330296.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330296.json  [67 of 295]
 2018 of 2018   100% in    0s    37.03 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330366.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330366.json  [68 of 295]
 35344 of 35344   100% in    0s   707.16 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330406.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330406.json  [69 of 295]
 8497 of 8497   100% in    0s   167.85 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330412.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330412.json  [70 of 295]
 1990 of 1990   100% in    0s    38.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330490.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330490.json  [71 of 295]
 32611 of 32611   100% in    0s   676.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330496.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330496.json  [72 of 295]
 4537 of 4537   100% in    0s    79.71 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330532.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330532.json  [73 of 295]
 29970 of 29970   100% in    0s   558.14 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330537.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330537.json  [74 of 295]
 4415 of 4415   100% in    0s   106.01 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330541.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330541.json  [75 of 295]
 2041 of 2041   100% in    0s    36.77 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330547.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330547.json  [76 of 295]
 6680 of 6680   100% in    0s   124.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330611.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330611.json  [77 of 295]
 39089 of 39089   100% in    0s   757.38 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330616.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330616.json  [78 of 295]
 21614 of 21614   100% in    0s   370.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330621.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330621.json  [79 of 295]
 1953 of 1953   100% in    0s    46.41 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330626.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330626.json  [80 of 295]
 4568 of 4568   100% in    0s   120.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330663.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330663.json  [81 of 295]
 21287 of 21287   100% in    0s   474.62 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330669.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330669.json  [82 of 295]
 2323 of 2323   100% in    0s    43.60 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330733.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330733.json  [83 of 295]
 52471 of 52471   100% in    0s  1198.77 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330738.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330738.json  [84 of 295]
 4519 of 4519   100% in    0s   105.96 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330743.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330743.json  [85 of 295]
 10153 of 10153   100% in    0s   351.52 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330779.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330779.json  [86 of 295]
 8530 of 8530   100% in    0s   205.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330849.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330849.json  [87 of 295]
 43961 of 43961   100% in    0s  1018.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330860.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330860.json  [88 of 295]
 6847 of 6847   100% in    0s   115.12 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330865.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330865.json  [89 of 295]
 2320 of 2320   100% in    0s    47.45 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330870.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330870.json  [90 of 295]
 2070 of 2070   100% in    0s    50.80 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330874.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330874.json  [91 of 295]
 19275 of 19275   100% in    0s   348.60 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330880.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330880.json  [92 of 295]
 4565 of 4565   100% in    0s   108.40 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330943.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330943.json  [93 of 295]
 16826 of 16826   100% in    0s   335.96 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330976.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413330976.json  [94 of 295]
 14901 of 14901   100% in    0s   247.73 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331014.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331014.json  [95 of 295]
 19607 of 19607   100% in    0s   438.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331019.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331019.json  [96 of 295]
 6471 of 6471   100% in    0s   159.79 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331024.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331024.json  [97 of 295]
 4417 of 4417   100% in    0s    48.33 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331035.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331035.json  [98 of 295]
 4567 of 4567   100% in    0s   111.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331060.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331060.json  [99 of 295]
 14709 of 14709   100% in    0s   302.59 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331087.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331087.json  [100 of 295]
 4143 of 4143   100% in    0s    93.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331129.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331129.json  [101 of 295]
 16764 of 16764   100% in    0s   456.96 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331213.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331213.json  [102 of 295]
 40160 of 40160   100% in    0s   935.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331233.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331233.json  [103 of 295]
 8395 of 8395   100% in    0s   168.60 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331270.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331270.json  [104 of 295]
 50636 of 50636   100% in    0s   999.68 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331278.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331278.json  [105 of 295]
 1975 of 1975   100% in    0s    51.19 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331314.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331314.json  [106 of 295]
 19496 of 19496   100% in    0s   561.69 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331331.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331331.json  [107 of 295]
 1934 of 1934   100% in    0s    45.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331336.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331336.json  [108 of 295]
 10100 of 10100   100% in    0s   229.60 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331348.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331348.json  [109 of 295]
 4413 of 4413   100% in    0s    85.59 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331350.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331350.json  [110 of 295]
 7088 of 7088   100% in    0s   263.80 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331356.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331356.json  [111 of 295]
 2050 of 2050   100% in    0s    57.68 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331409.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331409.json  [112 of 295]
 21912 of 21912   100% in    0s   421.34 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331456.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331456.json  [113 of 295]
 28961 of 28961   100% in    0s   569.68 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331496.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331496.json  [114 of 295]
 11915 of 11915   100% in    0s   277.13 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331501.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331501.json  [115 of 295]
 2330 of 2330   100% in    0s    46.26 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331566.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331566.json  [116 of 295]
 19027 of 19027   100% in    0s   351.32 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331573.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331573.json  [117 of 295]
 12610 of 12610   100% in    0s   309.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331666.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331666.json  [118 of 295]
 28576 of 28576   100% in    0s   631.15 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331688.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331688.json  [119 of 295]
 10993 of 10993   100% in    0s   243.97 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331726.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331726.json  [120 of 295]
 12682 of 12682   100% in    0s   243.91 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331812.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331812.json  [121 of 295]
 47364 of 47364   100% in    0s   314.97 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331848.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331848.json  [122 of 295]
 22685 of 22685   100% in    0s   494.86 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331941.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331941.json  [123 of 295]
 59831 of 59831   100% in    0s  1442.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331979.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331979.json  [124 of 295]
 11422 of 11422   100% in    0s   230.14 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331985.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413331985.json  [125 of 295]
 2067 of 2067   100% in    0s    37.07 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332023.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332023.json  [126 of 295]
 27306 of 27306   100% in    0s   514.92 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332052.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332052.json  [127 of 295]
 15608 of 15608   100% in    0s   437.42 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332093.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332093.json  [128 of 295]
 10955 of 10955   100% in    0s   285.84 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332171.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332171.json  [129 of 295]
 52303 of 52303   100% in    0s  1057.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332207.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332207.json  [130 of 295]
 4081 of 4081   100% in    0s   105.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332213.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332213.json  [131 of 295]
 12891 of 12891   100% in    0s   207.57 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332218.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332218.json  [132 of 295]
 2019 of 2019   100% in    0s    48.01 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332223.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332223.json  [133 of 295]
 2277 of 2277   100% in    0s    53.28 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332289.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332289.json  [134 of 295]
 20857 of 20857   100% in    0s   485.20 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332294.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332294.json  [135 of 295]
 12662 of 12662   100% in    0s   262.69 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332300.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332300.json  [136 of 295]
 2331 of 2331   100% in    0s    61.75 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332304.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332304.json  [137 of 295]
 4024 of 4024   100% in    0s    91.41 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332341.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332341.json  [138 of 295]
 15702 of 15702   100% in    0s   342.79 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332346.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332346.json  [139 of 295]
 11217 of 11217   100% in    0s   304.16 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332363.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332363.json  [140 of 295]
 4460 of 4460   100% in    0s    71.55 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332405.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332405.json  [141 of 295]
 12687 of 12687   100% in    0s   290.75 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332412.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332412.json  [142 of 295]
 4141 of 4141   100% in    0s    98.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332418.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332418.json  [143 of 295]
 2518 of 2518   100% in    0s    59.39 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332454.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332454.json  [144 of 295]
 20739 of 20739   100% in    0s   409.18 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332459.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332459.json  [145 of 295]
 1950 of 1950   100% in    0s    45.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332531.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332531.json  [146 of 295]
 23068 of 23068   100% in    0s   703.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332576.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332576.json  [147 of 295]
 6381 of 6381   100% in    0s   141.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332641.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332641.json  [148 of 295]
 8388 of 8388   100% in    0s   166.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332652.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332652.json  [149 of 295]
 2198 of 2198   100% in    0s    48.43 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332658.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332658.json  [150 of 295]
 2070 of 2070   100% in    0s    44.52 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332665.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332665.json  [151 of 295]
 1998 of 1998   100% in    0s    49.68 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332703.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332703.json  [152 of 295]
 3899 of 3899   100% in    0s    78.98 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332765.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332765.json  [153 of 295]
 12935 of 12935   100% in    0s   273.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332804.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332804.json  [154 of 295]
 13776 of 13776   100% in    0s   272.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332866.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332866.json  [155 of 295]
 31805 of 31805   100% in    0s   783.66 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332888.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332888.json  [156 of 295]
 6859 of 6859   100% in    0s   159.30 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332895.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332895.json  [157 of 295]
 6498 of 6498   100% in    0s   156.21 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332935.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332935.json  [158 of 295]
 19081 of 19081   100% in    0s   364.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332994.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413332994.json  [159 of 295]
 12199 of 12199   100% in    0s   325.58 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333022.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333022.json  [160 of 295]
 2278 of 2278   100% in    0s    52.12 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333033.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333033.json  [161 of 295]
 2129 of 2129   100% in    0s    34.86 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333067.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333067.json  [162 of 295]
 10623 of 10623   100% in    0s   179.53 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333076.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333076.json  [163 of 295]
 1991 of 1991   100% in    0s    33.31 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333138.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333138.json  [164 of 295]
 16537 of 16537   100% in    0s   335.80 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333179.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333179.json  [165 of 295]
 6317 of 6317   100% in    0s   171.80 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333246.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333246.json  [166 of 295]
 2300 of 2300   100% in    0s    60.28 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333254.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333254.json  [167 of 295]
 2173 of 2173   100% in    0s    48.00 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333291.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333291.json  [168 of 295]
 2096 of 2096   100% in    0s    54.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333353.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333353.json  [169 of 295]
 17157 of 17157   100% in    0s   396.08 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333371.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333371.json  [170 of 295]
 4206 of 4206   100% in    0s    82.22 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333407.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333407.json  [171 of 295]
 8901 of 8901   100% in    0s   166.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333485.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333485.json  [172 of 295]
 8014 of 8014   100% in    0s   232.85 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333524.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333524.json  [173 of 295]
 4092 of 4092   100% in    0s    83.07 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333529.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333529.json  [174 of 295]
 1940 of 1940   100% in    0s    34.41 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333606.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333606.json  [175 of 295]
 8580 of 8580   100% in    0s   198.16 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333615.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333615.json  [176 of 295]
 1913 of 1913   100% in    0s    43.20 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333623.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333623.json  [177 of 295]
 2130 of 2130   100% in    0s    46.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333661.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333661.json  [178 of 295]
 6516 of 6516   100% in    0s   120.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333669.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333669.json  [179 of 295]
 2311 of 2311   100% in    0s    45.77 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333731.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333731.json  [180 of 295]
 4675 of 4675   100% in    0s   111.89 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333741.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333741.json  [181 of 295]
 9548 of 9548   100% in    0s   252.42 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333779.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333779.json  [182 of 295]
 4305 of 4305   100% in    0s    93.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333860.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333860.json  [183 of 295]
 35640 of 35640   100% in    0s   619.95 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333863.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333863.json  [184 of 295]
 2069 of 2069   100% in    0s    27.25 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333901.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333901.json  [185 of 295]
 6264 of 6264   100% in    0s   134.58 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333968.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413333968.json  [186 of 295]
 37788 of 37788   100% in    0s   735.65 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334008.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334008.json  [187 of 295]
 8117 of 8117   100% in    0s   181.93 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334064.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334064.json  [188 of 295]
 18399 of 18399   100% in    0s   340.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334091.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334091.json  [189 of 295]
 9260 of 9260   100% in    0s   252.21 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334097.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334097.json  [190 of 295]
 6259 of 6259   100% in    0s   168.25 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334103.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334103.json  [191 of 295]
 1981 of 1981   100% in    0s    48.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334108.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334108.json  [192 of 295]
 2022 of 2022   100% in    0s    22.13 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334146.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334146.json  [193 of 295]
 19197 of 19197   100% in    0s   256.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334207.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334207.json  [194 of 295]
 16566 of 16566   100% in    0s   397.90 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334220.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334220.json  [195 of 295]
 4767 of 4767   100% in    0s    98.54 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334226.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334226.json  [196 of 295]
 4308 of 4308   100% in    0s   131.79 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334231.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334231.json  [197 of 295]
 8677 of 8677   100% in    0s   222.21 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334278.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334278.json  [198 of 295]
 8569 of 8569   100% in    0s   250.55 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334332.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334332.json  [199 of 295]
 12858 of 12858   100% in    0s   219.41 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334450.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334450.json  [200 of 295]
 14007 of 14007   100% in    0s   359.95 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334458.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334458.json  [201 of 295]
 4479 of 4479   100% in    0s   114.03 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334493.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334493.json  [202 of 295]
 6171 of 6171   100% in    0s   158.92 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334499.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334499.json  [203 of 295]
 3823 of 3823   100% in    0s    80.90 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334565.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334565.json  [204 of 295]
 4438 of 4438   100% in    0s   112.88 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334573.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334573.json  [205 of 295]
 1944 of 1944   100% in    0s    33.74 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334609.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334609.json  [206 of 295]
 14221 of 14221   100% in    0s   315.44 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334614.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334614.json  [207 of 295]
 2124 of 2124   100% in    0s    36.13 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334619.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334619.json  [208 of 295]
 1977 of 1977   100% in    0s    48.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334680.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334680.json  [209 of 295]
 36103 of 36103   100% in    0s   708.38 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334720.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334720.json  [210 of 295]
 6529 of 6529   100% in    0s   129.50 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334726.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334726.json  [211 of 295]
 10044 of 10044   100% in    0s   198.24 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334822.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334822.json  [212 of 295]
 38789 of 38789   100% in    0s   780.82 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334825.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334825.json  [213 of 295]
 4051 of 4051   100% in    0s    87.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334861.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334861.json  [214 of 295]
 23586 of 23586   100% in    0s   610.44 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334930.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334930.json  [215 of 295]
 34223 of 34223   100% in    0s   613.77 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334969.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413334969.json  [216 of 295]
 15007 of 15007   100% in    0s   363.61 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335049.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335049.json  [217 of 295]
 33912 of 33912   100% in    0s   899.31 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335054.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335054.json  [218 of 295]
 1981 of 1981   100% in    0s    40.27 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335091.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335091.json  [219 of 295]
 10862 of 10862   100% in    0s   247.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335171.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335171.json  [220 of 295]
 21670 of 21670   100% in    0s   434.73 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335176.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335176.json  [221 of 295]
 2032 of 2032   100% in    0s    32.64 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335181.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335181.json  [222 of 295]
 1990 of 1990   100% in    0s    75.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335219.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335219.json  [223 of 295]
 7639 of 7639   100% in    0s   198.75 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335281.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335281.json  [224 of 295]
 1826 of 1826   100% in    0s    35.53 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335325.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335325.json  [225 of 295]
 9839 of 9839   100% in    0s   284.32 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335409.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335409.json  [226 of 295]
 32663 of 32663   100% in    0s   929.98 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335447.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335447.json  [227 of 295]
 20782 of 20782   100% in    0s   480.31 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335535.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335535.json  [228 of 295]
 26629 of 26629   100% in    0s   671.02 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335571.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335571.json  [229 of 295]
 10670 of 10670   100% in    0s   268.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335652.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335652.json  [230 of 295]
 44583 of 44583   100% in    0s  1085.87 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335690.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335690.json  [231 of 295]
 3911 of 3911   100% in    0s    89.20 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335695.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335695.json  [232 of 295]
 4848 of 4848   100% in    0s    90.98 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335777.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335777.json  [233 of 295]
 18351 of 18351   100% in    0s   556.45 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335783.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335783.json  [234 of 295]
 2286 of 2286   100% in    0s    67.26 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335787.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335787.json  [235 of 295]
 1972 of 1972   100% in    0s    32.49 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335824.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335824.json  [236 of 295]
 6288 of 6288   100% in    0s   107.75 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335842.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335842.json  [237 of 295]
 17677 of 17677   100% in    0s   368.50 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335885.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335885.json  [238 of 295]
 10170 of 10170   100% in    0s   199.93 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335930.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335930.json  [239 of 295]
 9358 of 9358   100% in    0s   250.74 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335987.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413335987.json  [240 of 295]
 21030 of 21030   100% in    0s   578.28 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336042.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336042.json  [241 of 295]
 18955 of 18955   100% in    0s   331.24 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336101.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336101.json  [242 of 295]
 27102 of 27102   100% in    0s   730.97 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336131.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336131.json  [243 of 295]
 6521 of 6521   100% in    0s   169.50 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336139.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336139.json  [244 of 295]
 4042 of 4042   100% in    0s    77.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336176.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336176.json  [245 of 295]
 9026 of 9026   100% in    0s   231.53 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336253.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336253.json  [246 of 295]
 35984 of 35984   100% in    0s   761.21 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336288.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336288.json  [247 of 295]
 10479 of 10479   100% in    0s   179.69 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336380.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336380.json  [248 of 295]
 54270 of 54270   100% in    0s  1333.08 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336385.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336385.json  [249 of 295]
 2008 of 2008   100% in    0s    53.86 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336390.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336390.json  [250 of 295]
 1928 of 1928   100% in    0s    47.07 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336426.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336426.json  [251 of 295]
 33753 of 33753   100% in    0s   856.93 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336443.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336443.json  [252 of 295]
 8556 of 8556   100% in    0s   194.60 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336487.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336487.json  [253 of 295]
 56651 of 56651   100% in    0s  1108.06 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336526.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336526.json  [254 of 295]
 22141 of 22141   100% in    0s   658.95 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336531.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336531.json  [255 of 295]
 3972 of 3972   100% in    0s    91.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336618.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336618.json  [256 of 295]
 33238 of 33238   100% in    0s   720.13 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336654.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336654.json  [257 of 295]
 17142 of 17142   100% in    0s   458.25 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336733.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336733.json  [258 of 295]
 10553 of 10553   100% in    0s   271.11 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336741.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336741.json  [259 of 295]
 2288 of 2288   100% in    0s    67.84 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336752.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336752.json  [260 of 295]
 18549 of 18549   100% in    0s   439.73 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336795.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336795.json  [261 of 295]
 9040 of 9040   100% in    0s   196.86 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336847.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336847.json  [262 of 295]
 38229 of 38229   100% in    0s   921.30 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336885.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336885.json  [263 of 295]
 20913 of 20913   100% in    0s   428.99 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336891.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336891.json  [264 of 295]
 4387 of 4387   100% in    0s    74.99 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336898.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336898.json  [265 of 295]
 6753 of 6753   100% in    0s   107.08 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336972.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336972.json  [266 of 295]
 20503 of 20503   100% in    0s   490.70 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336980.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336980.json  [267 of 295]
 8306 of 8306   100% in    0s   113.68 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336985.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336985.json  [268 of 295]
 1906 of 1906   100% in    0s    46.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336990.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413336990.json  [269 of 295]
 2200 of 2200   100% in    0s    55.93 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337026.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337026.json  [270 of 295]
 12656 of 12656   100% in    0s   302.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337093.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337093.json  [271 of 295]
 12637 of 12637   100% in    0s   286.78 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337097.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337097.json  [272 of 295]
 1891 of 1891   100% in    0s    33.21 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337103.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337103.json  [273 of 295]
 2552 of 2552   100% in    0s    64.86 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337108.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337108.json  [274 of 295]
 11685 of 11685   100% in    0s   308.30 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337144.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337144.json  [275 of 295]
 7942 of 7942   100% in    0s   172.69 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337153.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337153.json  [276 of 295]
 19833 of 19833   100% in    0s   492.97 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337188.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337188.json  [277 of 295]
 6581 of 6581   100% in    0s   114.35 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337212.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337212.json  [278 of 295]
 17009 of 17009   100% in    0s   369.74 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337248.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337248.json  [279 of 295]
 4416 of 4416   100% in    0s   110.50 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337253.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337253.json  [280 of 295]
 4077 of 4077   100% in    0s    82.45 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337331.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337331.json  [281 of 295]
 15385 of 15385   100% in    0s   435.24 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337338.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337338.json  [282 of 295]
 12705 of 12705   100% in    0s   279.85 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337386.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337386.json  [283 of 295]
 12524 of 12524   100% in    0s   309.04 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337440.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337440.json  [284 of 295]
 11477 of 11477   100% in    0s   292.44 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337459.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337459.json  [285 of 295]
 2439 of 2439   100% in    0s    50.49 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337490.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337490.json  [286 of 295]
 15310 of 15310   100% in    0s   410.83 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337498.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337498.json  [287 of 295]
 4234 of 4234   100% in    0s   123.09 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337559.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337559.json  [288 of 295]
 13340 of 13340   100% in    0s   249.77 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337616.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337616.json  [289 of 295]
 11185 of 11185   100% in    0s   224.24 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337626.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337626.json  [290 of 295]
 4392 of 4392   100% in    0s   148.03 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337658.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337658.json  [291 of 295]
 4195 of 4195   100% in    0s    76.51 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337680.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337680.json  [292 of 295]
 16872 of 16872   100% in    0s   377.58 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337731.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337731.json  [293 of 295]
 13363 of 13363   100% in    0s   214.13 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337794.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337794.json  [294 of 295]
 6634 of 6634   100% in    0s   156.94 kB/s  done
s3://PLNY_Brasil/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337857.json -> ../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413337857.json  [295 of 295]
 2243 of 2243   100% in    0s    51.07 kB/s  done
Done. Downloaded 3768192 bytes in 13.9 seconds, 265.31 kB/s

In [5]:
files=glob.glob('../data/2014-08/*json')
files.sort()
print files[0]


../data/2014-08/DataSift-0caa7377dc9c1434844fcd67cb83f477-1413328220.json

In [6]:
# Find Number of JSON Files
print('We have a total of %d files' % len(files))


We have a total of 295 files

Number of tweets


In [7]:
tweets=[]
for file in files:
# Cycle through files
    fileString=open(file,'r').read().decode('utf-8')
    # Read file as one long string and convert to unicode
    fileTweets=[json.loads(line) for line in fileString.split('\n')]
    # Split into lines and load as JSON
    tweets.extend(fileTweets)
    # Add list of tweets from file to global list
print('We have %d tweets' % len(tweets))


We have 1748 tweets

Get Rid of Line Breaks in Tweets


In [8]:
intLinebreakError=0
tweetLinebreakError=0

for tweet in tweets:
  try:
    tweet['interaction']['content'] = tweet['interaction']['content'].replace('\n', ' ').replace('\r', '')
  except:
    intLinebreakError+=1
    tweet['interaction']['content']= 'NaN'
  try:
    tweet['twitter']['text'] = tweet['twitter']['text'].replace('\n', ' ').replace('\r', '')
  except:
    tweetLinebreakError+=1
    tweet['twitter']['text']='NaN'

print('Failed removing line breaks in %d interaction content' % intLinebreakError)
print('Failed removing line breaks in %d tweets' % tweetLinebreakError)


Failed removing line breaks in 0 interaction content
Failed removing line breaks in 0 tweets

Geolocate From User Location


In [9]:
geoError=0
for tweet in tweets:
  try:
    tweet['geolocated']=geo.geoLocate(tweet['twitter']['retweet']['user']['location'])[0][3] 
  except:
    try:
        tweet['geolocated']=geo.geoLocate(tweet['twitter']['user']['location'])[0][3]
    except:
        geoError+=1
        tweet['geolocated']=None
print('Couldn\'t geolocate %d tweets' % geoError)
print('Geolocated %d tweets' % (len(tweets) - (geoError)))
print('Managed to geolocate %d percent' % (100.0*(1.0-(float(geoError)/len(tweets)))))


Couldn't geolocate 893 tweets
Geolocated 855 tweets
Managed to geolocate 48 percent

In [10]:
# Testing that it worked
tweets[3]['geolocated']


Out[10]:
u'BR'

Insert Gender


In [12]:
import gender
g=gender.Gender()
g.gender(tweets[0]['interaction']['author']['name']) #Testing that it works


Out[12]:
{u'JR': {'gender': 'm',
  'probability': 1.0,
  'volume_female': 0.0,
  'volume_male': 2816.0}}

In [13]:
# Gender of tweeter or retweeter
genderError=0
for tweet in tweets:
  try:
    tweet['gender']=g.gender(tweet['interaction']['author']['name'])
  except:
    genderError+=1
    tweet['gender']=None
print('Couldn\'t add gender probability for %d tweets' % genderError)
print('Managed to add gender to %d p.c.' % (100.0*(1.0-(float(genderError)/len(tweets)))))


Couldn't add gender probability for 0 tweets
Managed to add gender to 100 p.c.

In [15]:
# Testing that it worked
tweets[0]['gender'].values()[0]['gender']


Out[15]:
'm'

Save the Topics and Hashtags


In [16]:
topics=['Campaign','Discrimination','Prevention','Testing']
emptyTopics=[0 for t in topics]
header=['id','time','content','type','datasift_lang','twitter_lang','twitter_location','UNGP_location',
        'datasift_gender','UNGP_gender','gender_prob','followers','friends','topic','subtopic']
header.extend(topics)
header.extend(['interaction_hashtags','twitter_mentions','normalised_links','links_domain','user_description',
               'user_screen_name'])

Save Data to Disk

Save as JSON


In [17]:
with open('../data/2014-08.json','wb') as f: f.write(json.dumps(tweets))

Save as TSV


In [18]:
outFile=csv.writer(open('../data/2014-08.tsv','wb'),delimiter='\t')
outFile.writerow(header)

In [19]:
nIdError=0
nDateError=0
nContentError=0
nTypeError=0
nLanguageError=0
nTwitterLanguageError=0
nLocationError=0
nUngpLocationError=0
nGenderError=0
nUngpGenderError=0
nUngpGenderProbError=0
nFollowersError=0
nFriendsError=0
nTopicKeyError=0
nTopicValueError=0
nTopicError=0
nTopicLengthError=0
nTagsError=0
nMentionsError=0
nLinksError=0
nDomainsError=0
nDescriptionError=0
nScreenNameError=0

documents=[]

for tweet in tweets:
  outList=[]
  try:
    outList.append(tweet['interaction']['id'])
    documents.append(tweet['interaction']['id'])
  except:
    outList.append('NaN')
    documents.append('NaN')
    nIdError+=1
  try:
    outList.append(tweet['interaction']['created_at'])
    documents.append(tweet['interaction']['created_at'])
  except:
    outList.append('NaN')
    documents.append('NaN')
    nDateError+=1
  try:
    outList.append(tweet['interaction']['content'].encode('utf-8').replace('\n',' '))
    documents.append(tweet['interaction']['content'].encode('utf-8').replace('\n',' '))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nContentError+=1
  try:
    outList.append(tweet['interaction']['type'].encode('utf-8'))
    documents.append(tweet['interaction']['type'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nTypeError+=1
  try:
    outList.append(tweet['language']['tag'].encode('utf-8'))
    documents.append(tweet['language']['tag'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nLanguageError+=1
  try:
    outList.append(tweet['twitter']['lang'].encode('utf-8'))
    documents.append(tweet['twitter']['lang'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nTwitterLanguageError+=1
  try:
    outList.append(tweet['twitter']['user']['location'].encode('utf-8'))
    documents.append(tweet['twitter']['user']['location'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nLocationError+=1
  try:
    outList.append(tweet['geolocated'].encode('utf-8'))
    documents.append(tweet['geolocated'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nUngpLocationError+=1
  try:
    outList.append(tweet['demographic']['gender'].encode('utf-8'))
    documents.append(tweet['demographic']['gender'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nGenderError+=1
  try:
    outList.append(tweet['gender'].values()[0]['gender'].encode('utf-8'))
    documents.append(tweet['gender'].values()[0]['gender'].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nUngpGenderError+=1
  try:
    outList.append(tweet['gender'].values()[0]['probability'])
    documents.append(tweet['gender'].values()[0]['probability'])
  except:
    outList.append('NaN')
    documents.append('NaN')
    nUngpGenderProbError+=1
  try:
    outList.append(tweet['twitter']['user']['followers_count'])
    documents.append(tweet['twitter']['user']['followers_count'])
  except:
    outList.append('NaN')
    documents.append('NaN')
    nFollowersError+=1
  try:
    outList.append(tweet['twitter']['user']['friends_count'])
    documents.append(tweet['twitter']['user']['friends_count'])
  except:
    outList.append('NaN')
    documents.append('NaN')
    nFriendsError+=1
  try:
    outList.append(tweet['interaction']['tag_tree']['topic'].keys()[0].encode('utf-8'))
    documents.append(tweet['interaction']['tag_tree']['topic'].keys()[0].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nTopicKeyError+=1
  try:
    outList.append(tweet['interaction']['tag_tree']['topic'].values()[0][0].encode('utf-8'))
    documents.append(tweet['interaction']['tag_tree']['topic'].values()[0][0].encode('utf-8'))
  except:
    outList.append('NaN')
    documents.append('NaN')
    nTopicValueError+=1
  try:
    tweetTopics=tweet['interaction']['tag_tree']['topic']
    binaryTopics=[0 for e in emptyTopics]
    for t in tweetTopics:
        binaryTopics[topics.index(t)]=1
    outList.extend(binaryTopics)
    documents.extend(binaryTopics)
  except:
    outList.extend(emptyTopics)
    nTopicError+=1
  try:
    tweetTags=','.join([h.lower() for h in tweet['interaction']['hashtags']])
    outList.append(tweetTags.decode('utf-8'))
    documents.append(tweetTags.decode('utf-8'))
  except:
    nTagsError+=1
    outList.append('NaN')
    documents.append('NaN')
  try:
    tweetMentions=','.join([m.lower() for m in tweet['twitter']['mentions']])
    outList.append(tweetMentions.decode('utf-8'))
    documents.append(tweetMentions.decode('utf-8'))
  except:
    nMentionsError+=1
    outList.append('NaN')
    documents.append('NaN')
  try:
    tweetLinks=','.join(tweet['links']['normalized_url'])
    outList.append(tweetLinks.decode('utf-8'))
    documents.append(tweetLinks.decode('utf-8'))
  except:
    nLinksError+=1
    outList.append('NaN')
    documents.append('NaN')
  try:
    tweetDomain=','.join(tweet['links']['domain'])
    outList.append(tweetDomain.decode('utf-8'))
    documents.append(tweetDomain.decode('utf-8'))
  except:
    nDomainsError+=1
    outList.append('NaN')
    documents.append('NaN')
  try:
    outList.append(tweet['twitter']['user']['description'].encode('utf-8'))
    documents.append(tweet['twitter']['user']['description'].encode('utf-8'))
  except:
    nDescriptionError+=1
    outList.append('NaN')
    documents.append('NaN')
  try:
    outList.append(tweet['twitter']['user']['screen_name'].encode('utf-8'))
    documents.append(tweet['twitter']['user']['screen_name'].encode('utf-8'))
  except:
    nScreenNameError+=1
    outList.append('NaN')
    documents.append('NaN')
    
            
  outFile.writerow(outList)

print "%d ID errors." % nIdError
print "%d Date errors." % nDateError
print "%d Content errors." % nContentError
print "%d Type errors." % nTypeError
print "%d DataSift language errors." % nLanguageError
print "%d Twitter language errors." % nTwitterLanguageError
print "%d Twitter Location errors." % nLocationError
print "%d UNGP Location errors." % nUngpLocationError
print "%d Gender errors." % nGenderError
print "%d UNGP gender errors." % nUngpGenderError
print "%d UNGP gender probability errors." % nUngpGenderProbError
print "%d Follower errors." % nFollowersError
print "%d Friends errors." % nFriendsError
print "%d Topic Key errors." % nTopicKeyError
print "%d Topic Value errors." % nTopicValueError
print "%d Topic errors." % nTopicError
print "%d Topic lengtherrors." % nTopicLengthError
print "%d Interaction hashtag errors." % nTagsError
print "%d Interaction mention errors." % nMentionsError
print "%d Interaction link errors." % nLinksError
print "%d Domain errors." % nDomainsError
print "%d Description errors." % nDescriptionError
print "%d Screen name errors." % nScreenNameError


0 ID errors.
0 Date errors.
0 Content errors.
0 Type errors.
87 DataSift language errors.
0 Twitter language errors.
569 Twitter Location errors.
893 UNGP Location errors.
837 Gender errors.
719 UNGP gender errors.
719 UNGP gender probability errors.
0 Follower errors.
0 Friends errors.
0 Topic Key errors.
0 Topic Value errors.
0 Topic errors.
0 Topic lengtherrors.
1668 Interaction hashtag errors.
1245 Interaction mention errors.
1725 Interaction link errors.
1748 Domain errors.
189 Description errors.
0 Screen name errors.

In [1]:
from IPython.core.display import HTML
styles = open("../css/custom.css", "r").read()
HTML(styles)


Out[1]:

In [ ]: