Commit 97394d35 authored by Guillaume Gautier's avatar Guillaume Gautier

Refactor /src, need more docstrings, argument types, explicit variable names

parent f1dd1f84
module RandomForests
using LightGraphs,LinearAlgebra,SparseArrays,SimpleWeightedGraphs
import StatsBase.denserank,Statistics.mean,Base.show,Base.sum,
StatsBase.counts
import LightGraphs.SimpleDiGraph,LightGraphs.nv,LightGraphs.ne,LightGraphs.degree,LightGraphs.outneighbors
import SimpleWeightedGraphs.AbstractSimpleWeightedGraph
import LightGraphs:
nv,ne,outneighbors,is_directed,inneighbors
include("./alias.jl")
using LinearAlgebra, SparseArrays
import Base:
show,
sum
import Statistics: mean
import StatsBase:
counts,
denserank
export random_forest,smooth,smooth_rf,smooth_rf_adapt,RandomForest,
SimpleDiGraph,nroots,next,Partition,PreprocessedWeightedGraph
export reduced_graph,smooth_ms
export self_roots
export random_successor
export random_spanning_tree
using LightGraphs
import LightGraphs:
degree,
inneighbors,
is_directed,
nv,
ne,
outneighbors,
SimpleDiGraph
using SimpleWeightedGraphs
import SimpleWeightedGraphs:
AbstractSimpleWeightedGraph
struct RandomForest
next :: Array{Int}
roots :: Set{Int}
nroots :: Int
root :: Array{Int,1}
end
include("./alias.jl")
# abstract type AbstractSimpleWeightedGraph{T<:Integer,U<:Real} <: AbstractGraph{T} end
struct PreprocessedWeightedGraph{T<:Integer, U<:Real} <: AbstractSimpleWeightedGraph{T, U}
weights::SparseMatrixCSC{U, T}
K :: SparseMatrixCSC{Int64, T}
P :: SparseMatrixCSC{Float64, T}
export
random_forest,
smooth,
smooth_rf,
smooth_rf_adapt,
RandomForest,
SimpleDiGraph,
nroots,
next,
Partition,
PreprocessedWeightedGraph,
reduced_graph,
smooth_ms,
self_roots,
random_successor,
random_spanning_tree
#=
TODO:
- need clean up using and import
- Too many functions/methods are exposed, with potential overlaps
Suggestion: use a Python inspired approach "import nnumpy as np"
import LightGraphs; const LG = LightGraphs
to then call LG.whatever_function
=#
# abstract type AbstractSimpleWeightedGraph{T<:Integer,U<:Real}<:AbstractGraph{T} end
struct PreprocessedWeightedGraph{T<:Integer,U<:Real}<:AbstractSimpleWeightedGraph{T,U}
weights::SparseMatrixCSC{U,T}
K::SparseMatrixCSC{Int64,T}
P::SparseMatrixCSC{Float64,T}
end
function PreprocessedWeightedGraph(adjmx::SparseMatrixCSC{U,T}) where T <: Integer where U <: Real
K,P= alias_preprocess(SimpleWeightedGraph(adjmx))
function PreprocessedWeightedGraph(adjmx::SparseMatrixCSC{U,T}) where {T<:Integer,U<:Real}
K, P = alias_preprocess(SimpleWeightedGraph(adjmx))
PreprocessedWeightedGraph{T, U}(adjmx,K,P)
end
PreprocessedWeightedGraph(g::LightGraphs.SimpleGraphs.SimpleGraph{T}, ::Type{U}=Float64) where T <: Integer where U <: Real =
## todo : not sure
PreprocessedWeightedGraph(g::LightGraphs.SimpleGraphs.SimpleGraph{T}, ::Type{U}=Float64) where {T<:Integer,U<:Real} =
PreprocessedWeightedGraph(adjacency_matrix(g, U))
PreprocessedWeightedGraph(g::SimpleWeightedGraph)= PreprocessedWeightedGraph(g.weights)
PreprocessedWeightedGraph(g::SimpleWeightedGraph) = PreprocessedWeightedGraph(g.weights)
inneighbors(g::PreprocessedWeightedGraph, v::Integer) = g.weights[v,:].nzind
is_directed(::Type{PreprocessedWeightedGraph}) = false
is_directed(::Type{PreprocessedWeightedGraph{T, U}}) where T where U = false
is_directed(::Type{PreprocessedWeightedGraph{T, U}}) where {T, U} = false
is_directed(g::PreprocessedWeightedGraph) = false
ne(g::PreprocessedWeightedGraph) = nnz(g.weights)/2
degree(g::PreprocessedWeightedGraph) = sum(g.weights,dims=1)
degree(g::SimpleWeightedGraph) = sum(g.weights,dims=1)
ne(g::PreprocessedWeightedGraph) = nnz(g.weights) / 2
degree(g::PreprocessedWeightedGraph) = sum(g.weights, dims=1)
degree(g::SimpleWeightedGraph) = sum(g.weights, dims=1)
struct RandomForest
next::Array{Int} # todo what size?
roots::Set{Int}
nroots::Int
root::Array{Int,1}
end
function show(io::IO, rf::RandomForest)
println(io, "Random forest. Size of original graph $(nv(rf)).")
println(io,"Number of trees $(nroots(rf))")
println(io, "Number of trees $(nroots(rf))")
end
function nv(rf::RandomForest)
length(rf.next)
end
function nroots(rf::RandomForest)
rf.nroots
end
function ne(rf::RandomForest)
nv(rf)-nroots(rf)
end
nv(rf::RandomForest) = length(rf.next)
nroots(rf::RandomForest) = rf.nroots
ne(rf::RandomForest) = nv(rf) - nroots(rf)
"""
next(rf::RandomForest)
......@@ -69,13 +98,9 @@ end
Return a vector of indices v, where v[i] = j means that node i points to node j
in the forest. If v[i] = 0 i is a root.
"""
function next(rf::RandomForest)
rf.next
end
next(rf::RandomForest) = rf.next
function outneighbors(rf::RandomForest,i)
rf.next[i] > 0 ? [rf.next[i]] : Array{Int64,1}()
end
outneighbors(rf::RandomForest,i) = rf.next[i] > 0 ? [rf.next[i]] : Array{Int64,1}()
"""
random_forest(G::AbstractGraph,q)
......@@ -96,30 +121,36 @@ nroots(rf)
next(rf) #who points to whom in the forest
````
TODO:
- Add a rng parameter for reproducibility
- Code duplication for random_forest(G::AbstractGraph, q::AbstractFloat/AbstractVector), maybe define a function computing the acceptance to sink node
"""
function random_forest(G::AbstractGraph,q::AbstractFloat)
roots = Set{Int64}()
root = zeros(Int64,nv(G))
nroots = Int(0)
d = degree(G)
function random_forest(G::AbstractGraph, q::AbstractFloat)
n = nv(G)
root = zeros(Int64, n)
roots, nroots = Set{Int64}(), 0
in_tree = falses(n)
next = zeros(Int64,n)
next = zeros(Int64, n)
d = degree(G)
@inbounds for i in 1:n
u = Int64(i)
u = i
while !in_tree[u]
if (((q+d[u]))*rand() < q)
if rand() * (q + d[u]) < q
in_tree[u] = true
push!(roots,u)
nroots+=1
nroots += 1
root[u] = u
next[u] = 0
else
next[u] = random_successor(G,u)
next[u] = random_successor(G, u)
u = next[u]
end
end
r = root[u]
#Retrace steps, erasing loops
u = i
while !in_tree[u]
......@@ -128,31 +159,31 @@ function random_forest(G::AbstractGraph,q::AbstractFloat)
u = next[u]
end
end
RandomForest(next,roots,nroots,root)
end
function random_forest(G::AbstractGraph,q::AbstractVector)
@assert length(q)==nv(G)
roots = Set{Int64}()
root = zeros(Int64,nv(G))
nroots = Int(0)
return RandomForest(next, roots, nroots, root)
end
function random_forest(G::AbstractGraph, q::AbstractVector)
n = nv(G)
@assert length(q) == n
root = zeros(Int64, n)
roots, nroots = Set{Int64}(), 0
in_tree = falses(n)
next = zeros(Int64,n)
@inbounds for i in 1:n
u = Int64(i)
next = zeros(Int64, n)
@inbounds for i in 1:n
u = i
while !in_tree[u]
if (rand() < q[u]/(q[u]+degree(G,u)))
if rand() * (q[u] + degree(G, u)) < q[u]
in_tree[u] = true
push!(roots,u)
nroots+=1
nroots += 1
root[u] = u
next[u] = 0
else
next[u] = random_successor(G,u)
next[u] = random_successor(G, u)
u = next[u]
end
end
......@@ -166,57 +197,45 @@ function random_forest(G::AbstractGraph,q::AbstractVector)
u = next[u]
end
end
RandomForest(next,roots,nroots,root)
RandomForest(next, roots, nroots, root)
end
function avg_rf(root :: Array{Int64,1},y :: Array{Float64,1})
xhat = zeros(Float64,length(y))
"""
TODO: sum_by function is commented thus not in scope...
"""
function avg_rf(root::Array{Int64,1}, y::Array{Float64,1})
# ysum = weighted_sum_by(y,deg,state.root)
ysum = sum_by(y,root)
for v in 1:length(xhat)
xhat[v] = ysum[root[v]]
end
xhat
ysum = sum_by(y, root)
return [ysum[r] for r in root]
end
function sure(y,xhat,nroots,s2)
function sure(y, xhat, nroots, s2)
err = sum((y .- xhat).^2)
@show err
-length(y)*s2+err+2*s2*nroots
return -length(y)*s2 + err + 2*s2*nroots
end
function random_successor(G::SimpleGraph{Int},i :: T) where T <: Int64
nbrs = neighbors(G, i)
rand(nbrs)
end
random_successor(g::SimpleGraph{Int}, i::T) where T<:Int64 = rand(neighbors(g, i))
function random_successor(g :: SimpleWeightedGraph,i :: T) where T <: Int64
function random_successor(g::SimpleWeightedGraph, i::T) where T<:Int64
W = weights(g)
rn = W.colptr[i]:(W.colptr[i+1]-1)
w = W.nzval[rn]
w /= sum(w)
w ./= sum(w)
u = rand()
j = 0
s = 0
while s < u && j < length(w)
s+= w[j+1]
j+=1
s += w[j+1]
j +=1
end
W.rowval[rn[1]+j-1]
end
function random_successor(g :: PreprocessedWeightedGraph,i :: T) where T <: Int64
sample = alias_draw(g,i)
sample
end
random_successor(g::PreprocessedWeightedGraph, i::T) where T<:Int64 = alias_draw(g, i)
"""
SimpleDiGraph(rf :: RandomForest)
SimpleDiGraph(rf::RandomForest)
Convert a RandomForest rf to a SimpleDiGraph.
......@@ -228,15 +247,14 @@ rf = random_forest(g,.4)
f = SimpleDiGraph(rf)
connected_components(f)
```
"""
function SimpleDiGraph(rf :: RandomForest)
function SimpleDiGraph(rf::RandomForest)
n = length(rf.next)
ff = SimpleDiGraph(n)
for i in 1:n
(rf.next[i] > 0) && add_edge!(ff,i,rf.next[i])
(rf.next[i] > 0) && add_edge!(ff, i, rf.next[i])
end
ff
return ff
end
include("random_spanning_tree.jl")
......@@ -244,7 +262,7 @@ include("smoothing.jl")
include("moments.jl")
include("multiscale.jl")
# function smooth_rf(G :: SimpleGraph{T},q,y :: Vector;nrep=10,variant=1) where T
# function smooth_rf(G::SimpleGraph{T},q,y::Vector;nrep=10,variant=1) where T
# xhat = zeros(Float64,length(y));
# nr = 0;
# for indr in Base.OneTo(nrep)
......@@ -259,9 +277,7 @@ include("multiscale.jl")
# (est=xhat ./ nrep,nroots=nr/nrep)
# end
# function sum_by(v :: Array{T,1}, g :: Array{Int64,1}) where T
# function sum_by(v::Array{T,1}, g::Array{Int64,1}) where T
# cc = spzeros(Int64,length(v))
# vv = spzeros(Float64,length(v))
# for i in 1:length(v)
......@@ -275,7 +291,7 @@ include("multiscale.jl")
# vv
# end
# function sum_by(v :: Array{T,2}, g :: Array{Int64,1}) where T
# function sum_by(v::Array{T,2}, g::Array{Int64,1}) where T
# cc = spzeros(Int64,length(v))
# vv = spzeros(Float64,size(v,1),size(v,2))
# for i in 1:size(v,1)
......@@ -290,7 +306,4 @@ include("multiscale.jl")
# vv
# end
end # module
using LightGraphs,LinearAlgebra,SparseArrays,SimpleWeightedGraphs
using LinearAlgebra, SparseArrays
using LightGraphs, SimpleWeightedGraphs
using RandomForests
export alias_preprocess
export alias_draw
function alias_preprocess(g :: SimpleWeightedGraph)
"""
This is the implementation of the preprocessing for the alias method.
Overall cost for a graph is O(N^2)
https://en.wikipedia.org/wiki/Alias_method
"""
function alias_preprocess(g::SimpleWeightedGraph)
W = weights(g)
n = size(g)[1]
D = (sum(W.>0,dims=1))[1,:]
K = spzeros(Int64,n,n)
U = spzeros(Float64,n,n)
K = spzeros(Int64, n, n)
U = spzeros(Float64, n, n)
for k = 1: n
for k in 1:n
nhbrs = neighbors(g,k)
nhbrssize = size(nhbrs,1)
nhbrs = neighbors(g, k)
nhbrssize = size(nhbrs, 1)
probs = zeros(Float64,nhbrssize)
probs = zeros(Float64, nhbrssize)
Kvec = zeros(Int64,nhbrssize)
Uvec = zeros(Float64,nhbrssize)
Kvec = zeros(Int64, nhbrssize)
Uvec = zeros(Float64, nhbrssize)
rn = W.colptr[k]:(W.colptr[k+1]-1)
probs = W.nzval[rn] ## <---- Lookup time for sparse array!!!
probs /= sum(probs)
probs = W.nzval[rn] # <---- Lookup time for sparse array!!!
probs /= sum(probs) # TODO ./= possible ?
# Overfull and Underfull stacks
ofull = Int64[]
ufull = Int64[]
# For keeping indices
# For keeping indices : TODO, what is this line made for ?
# For keeping probabilities
# Initialize indices and stacks with original probabilities
for (idx, i) in enumerate(probs)
Uvec[idx] = nhbrssize*i
if(Uvec[idx] > 1)
push!(ofull,idx)
elseif(Uvec[idx] < 1)
push!(ufull,idx)
else
Kvec[idx] = idx
for (i, prob) in enumerate(probs)
Uvec[i] = nhbrssize * prob
if Uvec[i] > 1
push!(ofull, i)
elseif Uvec[i] < 1
push!(ufull, i)
else # Uvec[i] == 1
Kvec[i] = i
end
end
tol = 1e-6 # Due to floating point errors (but not elegant)
# Loop until all bins are "equally full"
while !(isempty(ofull) && isempty(ufull))
i = pop!(ofull)
j = pop!(ufull)
Kvec[j] = i
# Recompute overfull bin
Uvec[i] = Uvec[i] + Uvec[j] - 1
Uvec[i] += Uvec[j] - 1
# Reassign the bin
if(Uvec[i] - 1.0 > 0.000001) # Due to floating point errors (but not elegant)
push!(ofull,i)
elseif((Uvec[i] - 1.0 < -0.000001))
push!(ufull,i)
if Uvec[i] - 1.0 > tol
push!(ofull, i)
elseif Uvec[i] - 1.0 < -tol
push!(ufull, i)
else
Kvec[i] = i
end
end
K[nhbrs,k] = nhbrs[Kvec]
U[nhbrs,k] = Uvec
K[nhbrs, k] = nhbrs[Kvec]
U[nhbrs, k] = Uvec
end
K,U
return K, U
end
function alias_draw(g,i)
"""
Drawing procedure of alias method after the preprocessing
Its cost is constant!
"""
"""
Drawing procedure of alias method after the preprocessing
Its cost is constant!
TODO : add types of arguments
"""
function alias_draw(g, i)
rn = g.P.colptr[i]:(g.P.colptr[i+1]-1)
v = rand(rn)
if(rand() < g.P.nzval[v])
return g.P.rowval[v]
else
return g.K.nzval[v]
end
return rand() < g.P.nzval[v] ? g.P.rowval[v] : g.K.nzval[v]
end
# Moment estimators
"""
TODO: doctstring neeeded
"""
#self-roots
function self_roots(rfs :: Vector{RandomForest})
k = length(rfs)
n = nv(rfs[1])
v = 1:n
for i in 2:k
function self_roots(rfs::Vector{RandomForest})
v = 1:nv(rfs[1])
for i in 2:length(rfs)
v = rfs[i].root[v]
end
sum(v .== rfs[1].root)
return count(v .== rfs[1].root)
end
export Hierarchy,graph,depth,HVector,smooth_subspace
function reduced_graph(g :: SimpleGraph,p :: Partition)
export
depth,
graph,
Hierarchy,
HVector,
smooth_subspace
#=
TODO: write docstrings
=#
function reduced_graph(g::SimpleGraph, p::Partition)
gp = SimpleWeightedGraph(p.nparts)
W = Dict{Tuple{Int,Int},Int}()
for i in vertices(g)
for j in neighbors(g,i)
if i < j && (p.part[i] != p.part[j]) #avoid double counting
pr = (p.part[i],p.part[j])
if haskey(W,pr)
for j in neighbors(g, i)
if i < j && (p.part[i] != p.part[j]) #avoid double counting
pr = (p.part[i], p.part[j])
if haskey(W, pr)
W[pr] += 1
else
W[pr] = 1
......@@ -14,80 +25,75 @@ function reduced_graph(g :: SimpleGraph,p :: Partition)
end
end
end
for (key,value) in W
add_edge!(gp,key[1],key[2],value)
for (key, value) in W
add_edge!(gp, key[1], key[2], value)
end
gp
end
function reduced_graph(g :: SimpleWeightedGraph,p :: Partition)
function reduced_graph(g::SimpleWeightedGraph, p::Partition)
gp = SimpleWeightedGraph(p.nparts)
W = Dict{Tuple{Int,Int},Int}()
for i in vertices(g)
for j in neighbors(g,i)
if i < j && (p.part[i] != p.part[j]) #avoid double counting
pr = (p.part[i],p.part[j])
if haskey(W,pr)
W[pr] += weights(g)[i,j]
if i < j && p.part[i] != p.part[j] #avoid double counting
pr = (p.part[i], p.part[j])
if haskey(W, pr)
W[pr] += weights(g)[i, j]
else
W[pr] = weights(g)[i,j]
W[pr] = weights(g)[i, j]
end
end
end
end
for (key,value) in W
add_edge!(gp,key[1],key[2],value)
for (key, value) in W
add_edge!(gp, key[1], key[2], value)
end
gp
end
function cfun(y,x,g,q)
function cfun(y, x, g, q)
(q/2)*sum((y-x).^2) + .5*x'*laplacian_matrix(g)*x
end
function smooth_ms_min(g :: SimpleGraph,p :: Partition,q,y)
Z = zeros(nv(p),p.nparts)
function smooth_ms_min(g::SimpleGraph, p::Partition, q, y)
Z = zeros(nv(p), p.nparts)
for i in 1:p.nparts
for j in 1:nv(p)
if (p.part[j] == i)
Z[j,i]=1