In [3]:
import pandas as pd

In [17]:
df1 = pd.read_csv("Youtube Subscribed vs Not Subscribed.csv")
df1.set_index("date",inplace=True)
df2 = pd.read_csv("Youtube Net Subscribers.csv")
df2.set_index("date",inplace=True)
df2.head()


Out[17]:
views averageViewDuration averageViewPercentage comments likes dislikes estimatedMinutesWatched subscribersGained subscribersLost netSubscribers totalWatchTime
date
2015-12-01 2927 135 64.175947 3 28 0 6599 43 1 42 395145
2015-11-30 4872 142 71.099189 3 32 2 11550 45 0 45 691824
2015-11-29 10281 142 71.707858 4 23 1 24452 89 5 84 1459902
2015-11-28 9906 150 77.324290 21 44 0 24886 155 4 151 1485900
2015-11-27 14725 149 79.935564 15 76 3 36808 211 5 206 2194025

In [18]:
df2.head()
views = df1["views"] > 0
subscribers = df1["subscribedStatus"]=="SUBSCRIBED"
df2[views & subscribers]


/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/frame.py:1942: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  "DataFrame index.", UserWarning)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-18-5bec1b15ba57> in <module>()
      2 views = df1["views"] > 0
      3 subscribers = df1["subscribedStatus"]=="SUBSCRIBED"
----> 4 df2[views & subscribers]

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in __getitem__(self, key)
   1906         if isinstance(key, (Series, np.ndarray, Index, list)):
   1907             # either boolean or fancy integer index
-> 1908             return self._getitem_array(key)
   1909         elif isinstance(key, DataFrame):
   1910             return self._getitem_frame(key)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _getitem_array(self, key)
   1946             # check_bool_indexer will throw exception if Series key cannot
   1947             # be reindexed to match DataFrame rows
-> 1948             key = check_bool_indexer(self.index, key)
   1949             indexer = key.nonzero()[0]
   1950             return self.take(indexer, axis=0, convert=False)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/indexing.pyc in check_bool_indexer(ax, key)
   1664     result = key
   1665     if isinstance(key, ABCSeries) and not key.index.equals(ax):
-> 1666         result = result.reindex(ax)
   1667         mask = com.isnull(result._values)
   1668         if mask.any():

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in reindex(self, index, **kwargs)
   2257     @Appender(generic._shared_docs['reindex'] % _shared_doc_kwargs)
   2258     def reindex(self, index=None, **kwargs):
-> 2259         return super(Series, self).reindex(index=index, **kwargs)
   2260 
   2261     @Appender(generic._shared_docs['fillna'] % _shared_doc_kwargs)

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in reindex(self, *args, **kwargs)
   1846         # perform the reindex on the axes
   1847         return self._reindex_axes(axes, level, limit, tolerance,
-> 1848                                   method, fill_value, copy).__finalize__(self)
   1849 
   1850     def _reindex_axes(self, axes, level, limit, tolerance, method,

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
   1865             obj = obj._reindex_with_indexers(
   1866                 {axis: [new_index, indexer]},
-> 1867                 fill_value=fill_value, copy=copy, allow_dups=False)
   1868 
   1869         return obj

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/generic.pyc in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
   1957                                                 fill_value=fill_value,
   1958                                                 allow_dups=allow_dups,
-> 1959                                                 copy=copy)
   1960 
   1961         if copy and new_data is self._data:

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/internals.pyc in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
   3390         # some axes don't allow reindexing with dups
   3391         if not allow_dups:
-> 3392             self.axes[axis]._can_reindex(indexer)
   3393 
   3394         if axis >= self.ndim:

/Users/Mike/anaconda/lib/python2.7/site-packages/pandas/core/index.pyc in _can_reindex(self, indexer)
   2017         # trying to reindex on an axis with duplicates
   2018         if not self.is_unique and len(indexer):
-> 2019             raise ValueError("cannot reindex from a duplicate axis")
   2020 
   2021     def reindex(self, target, method=None, level=None, limit=None,

ValueError: cannot reindex from a duplicate axis

In [24]:
subscribers = df1["subscribedStatus"] == "SUBSCRIBED"
df1[subscribers]
df2.loc[:,"subscriberViews"] = df1[subscribers]["views"]

In [25]:
df2.head()


Out[25]:
views averageViewDuration averageViewPercentage comments likes dislikes estimatedMinutesWatched subscribersGained subscribersLost netSubscribers totalWatchTime subscriberViews
date
2015-12-01 2927 135 64.175947 3 28 0 6599 43 1 42 395145 442
2015-11-30 4872 142 71.099189 3 32 2 11550 45 0 45 691824 586
2015-11-29 10281 142 71.707858 4 23 1 24452 89 5 84 1459902 907
2015-11-28 9906 150 77.324290 21 44 0 24886 155 4 151 1485900 1124
2015-11-27 14725 149 79.935564 15 76 3 36808 211 5 206 2194025 1337

In [ ]: