Player types

understanding types of players requires 3 differnt tables: player, extra (stores carousel) and bid (playlist - type of playlist)

1) load playlists data (hive)

2) load extras with extra type (mongo)


In [1]:
import pandas as pd

1) load playlist (Bid) data (hive)

CREATE TABLE temp_e_bid
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|'
LINES TERMINATED BY '\n' AS
select id,company_id,semantic,dynamic,static_first,name
from vidible_dim_bid
where deleted = 0
order by id desc

In [57]:
bid_df = pd.read_csv('./bid_2017_07_30.csv', sep='|',index_col=False ,names=['id','cid','semantic','dynamic','static_first'], skiprows=1 )
print 'bid_df size: ', bid_df.shape[0]
bid_df.head(2)


bid_df size:  166031
Out[57]:
id cid semantic dynamic static_first
0 597d7d81c214e31eb31bd1e6 545db032e4b0af1a81424b48 False False True
1 597d1bd91de5a15b594e9ff8 57d325e276a6057ba4a05a00 False False True

2) Extras

extraId,subType,uiPosition , currently subType==CAROUSEL always

var extrasWithCarousel = db.extra.find({deleted:false, subType:"CAROUSEL"});
var temp;
print('extraId,subType,uiPosition')
while ( extrasWithCarousel.hasNext() ){
   temp = extrasWithCarousel.next();
   print(temp._id.valueOf() + ',' + temp.subType + ',' + temp.uiPosition)
}

In [58]:
extras_df = pd.read_csv('./extras_with_carousel_2017_07_30.csv')
extras_df['uiPosition'].replace('undefined','DEFAULT', inplace=True) # some are undefined but java fallback to default
print extras_df.uiPosition.unique()
extras_df[extras_df['uiPosition'] != 'DEFAULT'].head(20)


['DEFAULT' 'BOTTOM']
Out[58]:
extraId subType uiPosition
28 551d1c1ee4b0297fa1beebcd CAROUSEL BOTTOM
32 55388b54e4b099bcc1225caa CAROUSEL BOTTOM
38 559f6a87e4b0c0b7288dadb6 CAROUSEL BOTTOM

3) players

mongo query for players with extra + additional info

fields: pid,parent_cid,cid,extraIds,templateId,staticVideos,bidId,playlistId

var extrasWithCarousel = db.extra.find({deleted:false, subType:"CAROUSEL"});
var temp, extraIds = [], extraValueOfIds = [];
while ( extrasWithCarousel.hasNext() ){
   temp = extrasWithCarousel.next()._id;
   extraIds.push(temp);
   extraValueOfIds.push(temp.valueOf());
}

var playersWithCarousel = db.player.find({
  deleted:false,
  $or:[
    {extraConfigs:{$exists:true, $elemMatch: {"extraId" : { "$in": extraIds}}}},
    { "templateConfiguration.extraConfigs": {$exists:true, $elemMatch: {"extraId" : { "$in": extraIds}}}}
  ] },
  {_id:1, parentCompanyId:1, companyId:1, templateId:1, templateConfiguration:1, extraConfigs:1, bidId:1,playlistId:1,videoIds:1});
var parentCompanyId,eConfig,eId,templateId,staticVideos,bidId,playlistId,playerIds = [];
print('pid,parent_cid,cid,extraIds,templateId,staticVideos,bidId,playlistId'); //header
while ( playersWithCarousel.hasNext() ){
   temp = playersWithCarousel.next();

   eId = "";
   eConfig = (temp.templateConfiguration && temp.templateConfiguration.extraConfigs && temp.templateConfiguration.extraConfigs != 'NULL_OVERRIDE') ? temp.templateConfiguration.extraConfigs : temp.extraConfigs;
   if (eConfig && typeof eConfig == 'object' && eConfig[0] && eConfig[0].extraId  && eConfig.length > 0) {
      for (i in eConfig) {
         extra = eConfig[i];
     if (extra.extraId && extraValueOfIds.indexOf(extra.extraId.valueOf()) != -1) {
           eId = extra.extraId.valueOf();
           continue;
         }
      }
   }
   parentCompanyId = temp.parentCompanyId ? temp.parentCompanyId.valueOf() : ''
   templateId = temp.templateId ? temp.templateId.valueOf() : ''
   staticVideos = temp.videoIds ? temp.videoIds.length : 0; //number of static videos
   bidId = temp.bidId ? temp.bidId.valueOf() : '';
   playlistId = temp.playlistId ? temp.playlistId.valueOf() : '';
   print(temp._id.valueOf() + ',' + parentCompanyId + ',' + temp.companyId.valueOf() + ',' +eId + ',' + templateId + ',' + staticVideos + ',' + bidId + ',' + playlistId);
   playerIds.push(temp.valueOf());
}

after reading it from file - we merge it in order to enrich it with data from Bid and Extra


In [66]:
players_df = pd.read_csv('./players_with_carousel_2017_07_30.csv')
print 'number of players with carousel: ', players_df.shape[0]
## merge with extra
players_df = players_df.merge(extras_df, left_on='extraIds', right_on='extraId', how='left')
players_df.drop('extraIds', axis=1, inplace=True)
print 'number of players with carousel: ', players_df.shape[0]

##merge with bid
players_df = players_df.merge(bid_df, left_on='bidId', right_on='id', how='left',suffixes=['_x-pl', '_x-bid'])
print 'number of players with carousel: ', players_df.shape[0]
players_df.head()


number of players with carousel:  9485
number of players with carousel:  9485
number of players with carousel:  9485
Out[66]:
pid parent_cid cid_x-pl templateId staticVideos bidId playlistId extraId subType uiPosition id cid_x-bid semantic dynamic static_first
0 57223186e4b0af35cc801d63 50d595ec0364e95588c77bd2 50d595ec0364e95588c77bd2 5668ad07e4b0f82be105f939 0 572192e8e4b0e7aba837de62 NaN 55388b54e4b099bcc1225caa CAROUSEL BOTTOM 572192e8e4b0e7aba837de62 551c163d1146fb1188cea145 False False True
1 5723a735e4b0e6cbf9c142e0 5314327fe4b015a18eb92bab 5314327fe4b015a18eb92bab 559ba4619fa0c117ee299ea0 0 57111cd1e4b007de6512a839 NaN 5539dff8e4b09639b4f30375 CAROUSEL DEFAULT 57111cd1e4b007de6512a839 5314327fe4b015a18eb92bab False True True
2 5728585fe4b0679e80b68293 56bdff2c67b6233ed4c2d3c5 56be00124076e70ee4171d93 561cd753e4b08c7ae81c8244 0 593971bc955a316f1c4f56d1 NaN 559f6a87e4b0c0b7288dadb6 CAROUSEL BOTTOM 593971bc955a316f1c4f56d1 564dfd34bbe5c146d4b33c86 False False True
3 5729c273e4b08bc56f3824bc 50d595ec0364e95588c77bd2 50d595ec0364e95588c77bd2 56bca647e4b0a5976ac0b95d 0 53ff82e5e4b0dba7506c3e7b NaN 559f6a87e4b0c0b7288dadb6 CAROUSEL BOTTOM 53ff82e5e4b0dba7506c3e7b 50d595ec0364e95588c77bd2 False False True
4 57304a2de4b0cda13709266d 53180f5de4b066208a63279a 53180f5de4b066208a63279a 565ec775e4b092ebc9685cdf 0 565dc414e4b043f1b9be85a8 NaN 5410474ae4b0904615b980cb CAROUSEL DEFAULT 565dc414e4b043f1b9be85a8 53180f5de4b066208a63279a False False True

In [ ]: