The following notebook is comprised of 4 primary steps:
import sys, os
import pygeostat as gs
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
%matplotlib inline
outdir = 'Output'
gs.mkdir(outdir)
#path to GSLIB executables
exe_dir="../pygeostat/executable/"
gs.PlotStyle['font.size'] = 12
gs.Parameters['data.tmin'] = -998
dfl = gs.ExampleData('point2d_mv')
dfl.head()
varnum = 3
n_components = 10
max_num_iterations = 100
gmm = gs.Program(program='gmm_fit')
parstr = """ Parameters for GMM_EM
*********************
START OF PARAMETERS:
{file} - file with data
{varnum} 3 4 5 - Number of variables and columns
-998 1e21 - trimming limits
{output} - output file
{n_components} - number of components
0.0001 - regularization constant (treat instability)
{max_num_iterations} - maximum number of iterations for EM algorithm
14641 - seed number
0 - fit only homotopic data (1=yes; 0=no)
=================================================================
This program fit a Gaussian mixture to the data based on the EM (Expected maximum liklihood)
algorithm.
"""
gmm.run(parstr=parstr.format(file=dfl.flname,
varnum=varnum,
n_components=n_components,
max_num_iterations=max_num_iterations,
output=os.path.join(outdir, 'gmm_fit.out')),
liveoutput=False)
gmm_util = gs.GmmUtility(gmm_file=os.path.join(outdir, 'gmm_fit.out'),
data=dfl.data, variable_names=['Var1', 'Var2','Var3'])
gmm_util.bivariate_plot(var_index=[1,2], cmap='viridis',title='Bivariate Plot',fname='test')
gmm_util.summary_plot(pad=0.1)
gmm_util.univariate_conditional_plot(conditioning_data=[0, 0,None])
# Clean up
try:
gs.rmfile('test.png')
gs.rmfile('temp')
gs.rmdir(outdir)
except:
pass