In [1]:
    
import ga_utils as g
import pandas as pd
import auth as auth
import matplotlib
from ggplot import *
import config as config
%matplotlib inline
    
In [2]:
    
# ggplot required if products have special characters.
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
    
In [3]:
    
# launch auth process
auth.main()
    
    Out[3]:
In [4]:
    
g.list_accounts()
    
    Out[4]:
In [5]:
    
g.list_properties(config.property)
    
    Out[5]:
In [6]:
    
g.list_profiles(config.property,config.ua)
    
    Out[6]:
In [7]:
    
df = g.get_transactions(config.profile_id, start_date='2016-01-01', end_date='2016-05-20')
    
In [8]:
    
df['count'] = 1
left = df.set_index('ga:transactionId')
right = left
df_product_comb = left.join(right, lsuffix='_l', rsuffix='_r')
df_product_comb_filtered = df_product_comb[df_product_comb['ga:productName_l'] < df_product_comb['ga:productName_r']]
    
In [9]:
    
product_cluster = df_product_comb_filtered.groupby([df_product_comb_filtered['ga:productName_l'], df_product_comb_filtered['ga:productName_r']]).sum()
product_cluster = product_cluster.drop('count_r', axis=1)
product_cluster = product_cluster.reset_index()
    
In [10]:
    
# Check if ok. Should be same number
df_product_comb['count_l'].sum()
product_cluster['count_l'].sum()
    
    Out[10]:
In [11]:
    
transactions_by_product = df.groupby(['ga:productName']).sum()
transactions_by_product.columns = ['transactions']
transactions_by_product
    
    Out[11]:
In [12]:
    
total_transactions = pd.to_numeric(df['count']).sum()
total_transactions
    
    Out[12]:
In [13]:
    
pc = product_cluster.join(transactions_by_product['transactions'], how='left', on='ga:productName_l', rsuffix='_tot')
pc = pc.join(transactions_by_product['transactions'], how='left', on='ga:productName_r', rsuffix='_r')
    
In [14]:
    
pc['p_achat_l'] = pc['transactions'] / total_transactions
pc['p_achat_r'] = pc['transactions_r'] / total_transactions
pc['p_r_x_l'] = pc['p_achat_l'] * pc['p_achat_r']
pc['prob_observed'] = pc['count_l'] / total_transactions
pc['lift'] = pc['prob_observed'] / pc['p_r_x_l']
    
In [15]:
    
pc.sort_values(by='lift', ascending=False)
    
    Out[15]:
In [19]:
    
ggplot(aes(x='lift', y='transactions_r', colour='ga:productName_l', size='lift'), data=pc) + geom_point() + theme_matplotlib(rc={"figure.figsize": "11,15", "legend.loc": "lower center"}, matplotlib_defaults=False)
    
    
    Out[19]:
In [20]:
    
from IPython.display import Javascript
Javascript("""
           window.pc={};
           """.format(pc.to_json()))
    
    Out[20]:
In [21]:
    
%%javascript
/*
 * As it turns out, pandas returns a dictionary with 
 * the top-level keys as the column headers, and values
 * as the list of values under that column.
 * I'd recommend looking yourself.
 * This function, in JS, converts to what D3 wants.
 * Of course we could also easily do this on the python side.
 */
function convertData(data){
  var convertedData = [];
  //pandas gives each column the same number of
  //objects, arbitrarily choose one to iterate over
  for(var i in data[Object.keys(data)[0]]){
    var convertedDatum = {};
    for(var key in data){
      convertedDatum[key] = data[key][i];
    }
    convertedData.push(convertedDatum);
  }
  return convertedData;
}
window.convertData = convertData;
    
    
In [22]:
    
%%javascript
require.config({
  paths: {
      d3: '//cdnjs.cloudflare.com/ajax/libs/d3/3.4.8/d3.min'
  }
});
    
    
In [122]:
    
%%javascript
require(['d3'], function(d3){
  var graph = convertData(window.pc);
  $("#chart1").remove();
  element.append("<div id='chart1'></div>");
  $("#chart1").width("960px");
  $("#chart1").height("600px");
      
  var margin = {top: 20, right: 20, bottom: 30, left: 40};
  var width = 880 - margin.left - margin.right;
  var height = 500 - margin.top - margin.bottom;
  var svg = d3.select("#chart1").append("svg")
    .style("position", "relative")
    .style("max-width", "960px")
    .attr("width", width + "px")
    .attr("height", (height + 50) + "px");
    
  var data = {};  
    
  function unique(list) {
    var result = [];
    $.each(list, function(i, e) {
        if ($.inArray(e, result) == -1) result.push(e);
    });
    return result;
}
  var n = [];
  var n2 = [];
  var lt = [];
    
  var graph_u = function() { 
      for(var i in graph){ 
          n.push(graph[i]['ga:productName_l']) 
          n.push(graph[i]['ga:productName_r'])
      }
      return unique(n)
  }()
  
  
  var nodes = function(){ 
      for(var i in graph_u){ 
          n2.push({"group":parseInt(i), "product":graph_u[i]})  
      }
      return n2
  }()
  
  data.nodes = nodes;
  function get_group(product_name){
       for(var f in nodes){
           if(nodes[f].product == product_name){
               return parseInt(nodes[f].group)
           }
       }
   }
    
    var l = [];
    for(var g in graph){
        l.push(
            {"source": get_group(graph[g]['ga:productName_l']), "target": get_group(graph[g]['ga:productName_r']), "value": graph[g]['lift']}
        )
    }
  data.links = l;
  console.log(data)
    
  var color = d3.scale.category20();
  var force = d3.layout.force()
    .charge(-120)
    .linkDistance(30)
    .size([width, height]);
     
  force
      .nodes(data.nodes)
      .links(data.links)
      .start();
  var link = svg.selectAll(".link")
      .data(data.links)
    .enter().append("line")
      .attr("class", "link")
      .style("stroke-width", function(d) { return Math.sqrt(d.value) *2 });
  var node = svg.selectAll(".node")
      .data(data.nodes)
    .enter().append("circle")
      .attr("class", "node")
      .attr("r", 5)
      .style("fill", function(d) { return color(d.product); })
      .call(force.drag);
  node.append("title")
      .attr("dx", 12)
      .attr("dy", ".35em")
      .attr('color', "black")
      .text(function(d) { return d.product; });
  force.on("tick", function() {
    link.attr("x1", function(d) { return d.source.x; })
        .attr("y1", function(d) { return d.source.y; })
        .attr("x2", function(d) { return d.target.x; })
        .attr("y2", function(d) { return d.target.y; });
    node.attr("cx", function(d) { return d.x; })
        .attr("cy", function(d) { return d.y; });
  });
  $('.node').css('stroke', "#fff")
  $('.node').css('stroke-width','1.5')   
  $('.link').css('stroke', '#999')
  $('.link').css('stroke-opacity', '0.6')
  
  
});
    
    
In [ ]: