In [1]:
import pandas as pd
In [57]:
bid_df = pd.read_csv('./bid_2017_07_30.csv', sep='|',index_col=False ,names=['id','cid','semantic','dynamic','static_first'], skiprows=1 )
print 'bid_df size: ', bid_df.shape[0]
bid_df.head(2)
Out[57]:
extraId,subType,uiPosition , currently subType==CAROUSEL always
var extrasWithCarousel = db.extra.find({deleted:false, subType:"CAROUSEL"});
var temp;
print('extraId,subType,uiPosition')
while ( extrasWithCarousel.hasNext() ){
temp = extrasWithCarousel.next();
print(temp._id.valueOf() + ',' + temp.subType + ',' + temp.uiPosition)
}
In [58]:
extras_df = pd.read_csv('./extras_with_carousel_2017_07_30.csv')
extras_df['uiPosition'].replace('undefined','DEFAULT', inplace=True) # some are undefined but java fallback to default
print extras_df.uiPosition.unique()
extras_df[extras_df['uiPosition'] != 'DEFAULT'].head(20)
Out[58]:
mongo query for players with extra + additional info
fields: pid,parent_cid,cid,extraIds,templateId,staticVideos,bidId,playlistId
var extrasWithCarousel = db.extra.find({deleted:false, subType:"CAROUSEL"});
var temp, extraIds = [], extraValueOfIds = [];
while ( extrasWithCarousel.hasNext() ){
temp = extrasWithCarousel.next()._id;
extraIds.push(temp);
extraValueOfIds.push(temp.valueOf());
}
var playersWithCarousel = db.player.find({
deleted:false,
$or:[
{extraConfigs:{$exists:true, $elemMatch: {"extraId" : { "$in": extraIds}}}},
{ "templateConfiguration.extraConfigs": {$exists:true, $elemMatch: {"extraId" : { "$in": extraIds}}}}
] },
{_id:1, parentCompanyId:1, companyId:1, templateId:1, templateConfiguration:1, extraConfigs:1, bidId:1,playlistId:1,videoIds:1});
var parentCompanyId,eConfig,eId,templateId,staticVideos,bidId,playlistId,playerIds = [];
print('pid,parent_cid,cid,extraIds,templateId,staticVideos,bidId,playlistId'); //header
while ( playersWithCarousel.hasNext() ){
temp = playersWithCarousel.next();
eId = "";
eConfig = (temp.templateConfiguration && temp.templateConfiguration.extraConfigs && temp.templateConfiguration.extraConfigs != 'NULL_OVERRIDE') ? temp.templateConfiguration.extraConfigs : temp.extraConfigs;
if (eConfig && typeof eConfig == 'object' && eConfig[0] && eConfig[0].extraId && eConfig.length > 0) {
for (i in eConfig) {
extra = eConfig[i];
if (extra.extraId && extraValueOfIds.indexOf(extra.extraId.valueOf()) != -1) {
eId = extra.extraId.valueOf();
continue;
}
}
}
parentCompanyId = temp.parentCompanyId ? temp.parentCompanyId.valueOf() : ''
templateId = temp.templateId ? temp.templateId.valueOf() : ''
staticVideos = temp.videoIds ? temp.videoIds.length : 0; //number of static videos
bidId = temp.bidId ? temp.bidId.valueOf() : '';
playlistId = temp.playlistId ? temp.playlistId.valueOf() : '';
print(temp._id.valueOf() + ',' + parentCompanyId + ',' + temp.companyId.valueOf() + ',' +eId + ',' + templateId + ',' + staticVideos + ',' + bidId + ',' + playlistId);
playerIds.push(temp.valueOf());
}
In [66]:
players_df = pd.read_csv('./players_with_carousel_2017_07_30.csv')
print 'number of players with carousel: ', players_df.shape[0]
## merge with extra
players_df = players_df.merge(extras_df, left_on='extraIds', right_on='extraId', how='left')
players_df.drop('extraIds', axis=1, inplace=True)
print 'number of players with carousel: ', players_df.shape[0]
##merge with bid
players_df = players_df.merge(bid_df, left_on='bidId', right_on='id', how='left',suffixes=['_x-pl', '_x-bid'])
print 'number of players with carousel: ', players_df.shape[0]
players_df.head()
Out[66]:
In [ ]: