Estimate a multivariate distributionΒΆ

In this example we are going to estimate a joint distribution from a multivariate sample by fitting marginals and finding a set of copulas.

While the estimation of marginals is quite straightforward, the estimation of the dependency structure takes several steps:

  • find the dependent components

  • estimate a copula on each dependent bloc

  • assemble the estimated copulas

[13]:
from __future__ import print_function
import openturns as ot
import math as m
[14]:
# generate some multivariate data to estimate, with correlation
cop1 = ot.AliMikhailHaqCopula(0.6)
cop2 = ot.ClaytonCopula(2.5)
copula = ot.ComposedCopula([cop1, cop2])
marginals = [ot.Uniform(5.0, 6.0), ot.Arcsine(), ot.Normal(-40.0, 3.0), ot.Triangular(100.0, 150.0, 300.0)]
distribution = ot.ComposedDistribution(marginals, copula)
sample = distribution.getSample(10000).getMarginal([0, 2, 3, 1])
[15]:
# estimate marginals
dimension = sample.getDimension()
marginalFactories = []
for factory in ot.DistributionFactory.GetContinuousUniVariateFactories():
    if str(factory).startswith('Histogram'):
        # ~ non-parametric
        continue
    marginalFactories.append(factory)
estimated_marginals = [ot.FittingTest.BestModelBIC(sample.getMarginal(i), marginalFactories)[0] for i in range(dimension)]
estimated_marginals
[15]:
[class=Uniform name=Uniform dimension=1 a=5 b=6.00009,
 class=Normal name=Normal dimension=1 mean=class=Point name=Unnamed dimension=1 values=[-39.9881] sigma=class=Point name=Unnamed dimension=1 values=[2.96909] correlationMatrix=class=CorrelationMatrix dimension=1 implementation=class=MatrixImplementation name=Unnamed rows=1 columns=1 values=[1],
 class=Trapezoidal name=Trapezoidal dimension=1 a=99.9351 b=147.064 c=161.719 d=298.851 h=0.00936454,
 class=Beta name=Beta dimension=1 r=0.493967 t=0.996548 a=-1.0002 b=1.0002]
[16]:
# find connected components of a graph defined from its adjacency matrix

def find_neighbours(head, covariance, to_visit, visited):
    N = covariance.getDimension()
    visited[head] = 1
    to_visit.remove(head)
    current_component = [head]
    for i in to_visit:
        # If i is connected to head and has not yet been visited
        if covariance[head, i] > 0:
            # Add i to the current component
            component = find_neighbours(i, covariance, to_visit, visited)
            current_component += component
    return current_component

def connected_components(covariance):
    N = covariance.getDimension()
    to_visit = list(range(N))
    visited = [0] * N
    all_components = []
    for head in range(N):
        if visited[head] == 0:
            component = find_neighbours(head, covariance, to_visit, visited)
            all_components.append(sorted(component))
    return all_components
[17]:
# estimate copula

# 1. find dependent components

# compute correlation
C = sample.computeSpearmanCorrelation()
print(C)
# consider only significantly non-zero correlations
epsilon = 1.0 / m.sqrt(sample.getSize())
for j in range(dimension):
    for i in range(j):
        C[i, j] = 1.0 if abs(C[i, j]) > epsilon else 0.0
print(C)
# find independent blocs
blocs = connected_components(C)
blocs
[[  1          -0.00178802 -0.00983874  0.237491   ]
 [ -0.00178802  1           0.740004    0.00226999 ]
 [ -0.00983874  0.740004    1           0.00119686 ]
 [  0.237491    0.00226999  0.00119686  1          ]]
[[ 1 0 0 1 ]
 [ 0 1 1 0 ]
 [ 0 1 1 0 ]
 [ 1 0 0 1 ]]
[17]:
[[0, 3], [1, 2]]
[18]:
# 2. estimate the copulas on each dependent blocs
copulaFactories = []
for factory in ot.DistributionFactory.GetContinuousMultiVariateFactories():
    if not factory.build().isCopula():
        continue
    if factory.getImplementation().getClassName()=='BernsteinCopulaFactory':
        continue
    copulaFactories.append(factory)
estimated_copulas = [ot.FittingTest.BestModelBIC(sample.getMarginal(bloc), copulaFactories)[0] for bloc in blocs]
estimated_copulas
[18]:
[class=AliMikhailHaqCopula name=AliMikhailHaqCopula dimension=2 theta=0.595867,
 class=ClaytonCopula name=ClaytonCopula dimension=2 theta=2.46804]
[19]:
# 3. assemble the copula
estimated_copula_perm = ot.ComposedCopula(estimated_copulas)
# take care of the order of each bloc vs the order of original components
permutation = []
for bloc in blocs:
    permutation.extend(bloc)
inverse_permutation = [-1] * dimension
for i in range(dimension):
    inverse_permutation[permutation[i]] = i
estimated_copula = estimated_copula_perm.getMarginal(inverse_permutation)
estimated_copula
[19]:

MarginalDistribution(distribution=ComposedCopula(AliMikhailHaqCopula(theta = 0.595867), ClaytonCopula(theta = 2.46804)), indices=[0,2,3,1])

[20]:
# Build joint distribution from marginal distributions and dependency structure
estimated_distribution = ot.ComposedDistribution(estimated_marginals, estimated_copula)
estimated_distribution
[20]:

ComposedDistribution(Uniform(a = 5, b = 6.00009), Normal(mu = -39.9881, sigma = 2.96909), Trapezoidal(a = 99.9351, b = 147.064, c = 161.719, d = 298.851), Beta(r = 0.493967, t = 0.996548, a = -1.0002, b = 1.0002), MarginalDistribution(distribution=ComposedCopula(AliMikhailHaqCopula(theta = 0.595867), ClaytonCopula(theta = 2.46804)), indices=[0,2,3,1]))