In [1]:
using DataFrames

In [2]:
type NPedNode
    ind::Int64
    sire::Int64
    dam::Int64
    f::Float64 
end

In [3]:
type NPedigree
    currentID::Int64
    idMap::Dict
    nped::Array{NPedNode,1}
    aij::SparseMatrixCSC{Float64,Int64}
end

In [4]:
type PedNode
    seqID::Int64
    sire::UTF8String
    dam::UTF8String
end

In [5]:
function code!(ped::NPedigree,id::UTF8String)
    if ped.idMap[id].seqID!=0
        return
    end
    sireID = ped.idMap[id].sire
    damID  = ped.idMap[id].dam
    if sireID!="0" && ped.idMap[sireID].seqID==0
        code!(ped,sireID)
    end 
    if damID!="0" && ped.idMap[damID].seqID==0
        code!(ped,damID)
    end
    ped.idMap[id].seqID = ped.currentID
    ped.currentID += 1
end


Out[5]:
code! (generic function with 1 method)

In [6]:
function fillMap!(ped::NPedigree,df)
    n = size(df,1)
    for i in df[:,2]
        if i!="0"
            ped.idMap[i]=PedNode(0,"0","0")
        end
    end
    for i in df[:,3]
        if i!="0"
            ped.idMap[i]=PedNode(0,"0","0")
        end
    end 
    j=1
    for i in df[:,1]
        ped.idMap[i]=PedNode(0,df[j,2],df[j,3])
        j+=1
    end 
end


Out[6]:
fillMap! (generic function with 1 method)

In [29]:
function fillNPed!(ped::NPedigree,df)
    resize!(ped.nped,ped.currentID-1)
    for val in values(ped.idMap)
        npedi = NPedNode(0,0,0,0.0)
        ped.nped[val.seqID] = npedi
        npedi.ind = val.seqID
        if val.sire!="0"
            npedi.sire = ped.idMap[val.sire].seqID
        else
            npedi.sire = 0
        end
        if val.dam!="0"
            npedi.dam  = ped.idMap[val.dam].seqID
        else
           npedi.dam  = 0 
        end
        npedi.f    = -1.0
    end
end


Out[29]:
fillNPed! (generic function with 1 method)

In [152]:
function calcAddRel1234!(ped::NPedigree,id1::Int64,id2::Int64)
    #@printf "calcRel between %s and %s \n" id1 id2
    if id1==0 || id2==0           # zero
        return 0.0
    end
    old,yng = id1<id2 ? (id1,id2):(id2,id1)
    if ped.aij[old,yng]>0.0      # already done
        #println("returning saved value")
        return ped.aij[yng,old]
    end
    sireOfYng = ped.nped[yng].sire
    damOfYng  = ped.nped[yng].dam 
    if old==yng                       # aii
        #aii = 1.0 + calcInbreeding!(ped,old)
        aii = 1 + 0.5*calcAddRel1234!(ped,damOfYng,sireOfYng) 
        ped.aij[old,old] = aii
        return (aii)   
    end
    aOldDamYoung  = (old==0 || damOfYng ==0)? 0.0:calcAddRel1234!(ped,old,damOfYng)
    aOldSireYoung = (old==0 || sireOfYng==0)? 0.0:calcAddRel1234!(ped,old,sireOfYng)
    aij = 0.5*(aOldSireYoung + aOldDamYoung)
    #aij = 0.5*(calcAddRel!(ped,old,sireOfYng) + calcAddRel!(ped,old,damOfYng))
    ped.aij[yng,old] = aij
    ped.aij[old,yng] = 1.0
    return(aij)
end


Out[152]:
calcAddRel1234! (generic function with 1 method)

In [153]:
function calcInbreeding!(ped::NPedigree,id::Int64)
    #@printf "calcInbreeding for: %s \n" id
    if ped.nped[id].f > -1.0
        return ped.nped[id].f
    end
    sireID = ped.nped[id].sire
    damID  = ped.nped[id].dam
    if (sireID==0 || damID==0 )
        ped.nped[id].f = 0.0
    else
        ped.nped[id].f = 0.5*calcAddRel1234!(ped,sireID,damID)
    end
end


Out[153]:
calcInbreeding! (generic function with 1 method)

In [162]:
pedFile = "LICPedigree.txt"
df = readtable(pedFile, separator = ' ',header=false)


Out[162]:
x1x2x3
1IDSireIDDamID
2622567256804854627
3675975252222664690
4678983645432431169
5729177149440402490792
6750075060007324141
7771100490854418801
8771299190614916673
9774913890615922933
10791395890403192245
11794506089812563472
12812127290245902744
13815664844033815508
1483539153588143323591
15874143145437982643
16879605388334947017192
17879609258482717172
18894448829857167995245
19898700945706724086
20899874829857336971274
21903355290652980443
2290353383590645871662
2391017693590644800105
24915685645704917638
25916769129856984295172
26925643152632655466938
27927980371465114485836
28928699564983995480172
29939330790658417036
30942308150145345833161
&vellip&vellip&vellip&vellip

In [163]:
idMap = Dict()
aij = spzeros(1,1)
nped = NPedNode[]
ped = NPedigree(1,idMap,nped,aij)
fillMap!(ped,df)
for id in keys(ped.idMap)
    code!(ped,id)
end
n = ped.currentID - 1
ped.aij = spzeros(n,n)


Out[163]:
1148677x1148677 sparse matrix with 0 Float64 entries:

In [165]:
fillNPed!(ped,df)

In [166]:
pedFile = open("LICPedigree.seq", "w")
@printf(pedFile,"ind sire dam\n")
n = ped.currentID - 1
for i=1:n
    @printf(pedFile,"%4d %4d %4d \n",ped.nped[i].ind, ped.nped[i].sire, ped.nped[i].dam)
end 
close(pedFile)

In [132]:
Profile.clear()
Profile.init(delay=0.001)

In [159]:
@time for ind in 1:1000
    calcInbreeding!(ped,ind)
end


  2.586495 seconds (1.51 k allocations: 87.891 KB)

In [160]:
typeof(ped.aij)


Out[160]:
SparseMatrixCSC{Float64,Int64}

In [ ]:
Profile.print()

In [126]:
function  mkPed(pedFile::AbstractString) 
    df = readtable(pedFile,eltypes=[UTF8String,UTF8String,UTF8String],separator = ' ',header=false)  
    idMap = Dict()
    aij = spzeros(1,1)
    nped = NPedNode[]
    ped = NPedigree(1,idMap,nped,aij)
    fillMap!(ped,df)
    for id in keys(ped.idMap)
        code!(ped,id)
    end
    n = ped.currentID - 1
    ped.aij = spzeros(n,n)
    fillNPed!(ped,df)
    for i in 1:n
        calcInbreeding!(ped,i)
        if i%1000 == 0
            println(i)
        end
    end 
    return (ped)
end


Out[126]:
mkPed (generic function with 1 method)

In [60]:
Profile.clear()

In [61]:
Profile.init(delay=0.01)

In [127]:
ped = mkPed("ped.txt");

In [128]:
ped.nped


Out[128]:
9-element Array{NPedNode,1}:
 NPedNode(1,0,0,0.0)  
 NPedNode(2,0,0,0.0)  
 NPedNode(3,1,2,0.0)  
 NPedNode(4,1,3,0.25) 
 NPedNode(5,4,0,0.0)  
 NPedNode(6,0,0,0.0)  
 NPedNode(7,0,0,0.0)  
 NPedNode(8,6,7,0.0)  
 NPedNode(9,3,4,0.375)