The following notebook is comprised of 7 primary steps:
import pygeostat as gs
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
Load the previously set Matplotlib and Pygeostat settings.
#path to GSLIB executables
exe_dir="../pygeostat/executable/"
gs.Parameters['data.griddef'] = gs.GridDef('''
120 5.0 10.0
110 1205.0 10.0
1 0.5 1.0''')
gs.Parameters['data.catdict'] = {1: 'Inside', 0: 'Outside'}
# Data values
gs.Parameters['data.tmin'] = -998
gs.Parameters['data.null'] = -999
# Color map settings
gs.Parameters['plotting.cmap'] = 'bwr'
gs.Parameters['plotting.cmap_cat'] = 'bwr'
# Number of realizations
nreal = 100
gs.Parameters['data.nreal'] = nreal
# Parallel Processing threads
gs.Parameters['config.nprocess'] = 4
# Pot Style settings
gs.PlotStyle['legend.fontsize'] = 12
gs.PlotStyle['font.size'] = 11
# Create the output directory
outdir = 'Output/'
gs.mkdir(outdir)
dat = gs.ExampleData('reservoir_boundary', cat='Domain Indicator')
dat.info
print(dat.describe())
dat.head()
gs.location_plot(dat)
The indicator variogram is calculated and modeled, since this is required input to calculation of the Gaussian variogram model in the next section (used for distance function $df$ modeling).
Variogram calculation, modeling, plotting and checking are readily accomplished with the variogram object, although unprovided parameters are inferred.
# get the proportions
proportion = sum(dat['Domain Indicator'])/len(dat)
print('Proportion of inside data: %.3f'%(proportion))
variance = proportion - proportion**2
# Perform data spacing analysis
dat.spacing(n_nearest=1)
lag_length = dat['Data Spacing (m)'].values.mean()
print('average data spacing in XY plane: {:.3f} {}'.format(lag_length,
gs.Parameters['plotting.unit']))
mean_range = (np.ptp(dat[dat.x].values) + np.ptp(dat[dat.y].values)) * 0.5
n_lag = np.ceil((mean_range * 0.5) / lag_length)
lag_tol = lag_length * 0.6
var_calc = gs.Program(program=exe_dir+'varcalc')
parstr = """ Parameters for VARCALC
**********************
START OF PARAMETERS:
{file} -file with data
2 3 0 - columns for X, Y, Z coordinates
1 4 - number of variables,column numbers (position used for tail,head variables below)
{t_min} 1.0e21 - trimming limits
{n_directions} -number of directions
0.0 90 1000 0.0 22.5 1000 0.0 -Dir 01: azm,azmtol,bandhorz,dip,diptol,bandvert,tilt
{n_lag} {lag_length} {lag_tol} - number of lags,lag distance,lag tolerance
{output} -file for experimental variogram points output.
0 -legacy output (0=no, 1=write out gamv2004 format)
1 -run checks for common errors
1 -standardize sills? (0=no, 1=yes)
1 -number of variogram types
1 1 10 1 {variance} -tail variable, head variable, variogram type (and cutoff/category), sill
"""
n_directions = 1
varcalc_outfl = os.path.join(outdir, 'varcalc.out')
var_calc.run(parstr=parstr.format(file=dat.flname,
n_directions = n_directions,
t_min = gs.Parameters['data.tmin'],
n_lag=n_lag,
lag_length = lag_length,
lag_tol = lag_tol,
variance = variance,
output=varcalc_outfl),
liveoutput=True)
varfl = gs.DataFile(varcalc_outfl)
varfl.head()
var_model = gs.Program(program=exe_dir+'varmodel')
parstr = """ Parameters for VARMODEL
***********************
START OF PARAMETERS:
{varmodel_outfl} -file for modeled variogram points output
1 -number of directions to model points along
0.0 0.0 100 6 - azm, dip, npoints, point separation
2 0.05 -nst, nugget effect
1 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
1 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
1 100000 -fit model (0=no, 1=yes), maximum iterations
1.0 - variogram sill (can be fit, but not recommended in most cases)
1 - number of experimental files to use
{varcalc_outfl} - experimental output file 1
1 1 - # of variograms (<=0 for all), variogram #s
1 0 10 - # pairs weighting, inverse distance weighting, min pairs
0 10.0 - fix Hmax/Vert anis. (0=no, 1=yes)
0 1.0 - fix Hmin/Hmax anis. (0=no, 1=yes)
{varmodelfit_outfl} - file to save fit variogram model
"""
varmodel_outfl = os.path.join(outdir, 'varmodel.out')
varmodelfit_outfl = os.path.join(outdir, 'varmodelfit.out')
var_model.run(parstr=parstr.format(varmodel_outfl= varmodel_outfl,
varmodelfit_outfl = varmodelfit_outfl,
varcalc_outfl = varcalc_outfl), liveoutput=False, quiet=True)
varmdl = gs.DataFile(varmodel_outfl)
varmdl.head()
ax = gs.variogram_plot(varfl, index=1, color='b', grid=True, label = 'Indicator Variogram (Experimental)')
gs.variogram_plot(varmdl, index=1, ax=ax, color='b', experimental=False, label = 'Indicator Variogram (Model)')
_ = ax.legend(fontsize=12)
The Gaussian variogram that yields the indicator variogram after truncation of a Gaussian random field is calculated. This Gaussian variogram is modeled and input to $df$ modeing.
The bigaus2 program applies the Gaussian integration method, given the indicator variogram and the proportion of the indicator.
bigaus2 = gs.Program(exe_dir+'bigaus2')
parstr = """ Parameters for BIGAUS2
**********************
START OF PARAMETERS:
1 -input mode (1) model or (2) variogram file
nofile.out -file for input variogram
{proportion} -threshold/proportion
2 -calculation mode (1) NS->Ind or (2) Ind->NS
{outfl} -file for output of variograms
1 -number of thresholds
{proportion} -threshold cdf values
1 {n_lag} -number of directions and lags
0 0.0 {lag_length} -azm(1), dip(1), lag(1)
{varstr}
"""
with open(varmodelfit_outfl, 'r') as f:
varmodel_ = f.readlines()
varstr = ''''''
for line in varmodel_:
varstr += line
pars = dict(proportion=proportion,
lag_length=lag_length,
n_lag=n_lag,
outfl= os.path.join(outdir, 'bigaus2.out'),
varstr=varstr)
bigaus2.run(parstr=parstr.format(**pars), nogetarg=True)
The bigaus2 program outputs an odd (legacyish) variogram format, which must be translated to the standard Variogram format.
# Read in the data before demonstrating its present form
expvargs = gs.readvarg(os.path.join(outdir, 'bigaus2.out'), 'all')
expvargs.head()
varclac_gaussian = gs.DataFile(data = varfl.data[:-1].copy(), flname=os.path.join(outdir,'gaussian_exp_variogram.out'))
varclac_gaussian['Lag Distance'] = expvargs['Distance']
varclac_gaussian['Variogram Value'] = expvargs['Value']
varclac_gaussian.write_file(varclac_gaussian.flname)
varclac_gaussian.head()
This model is input to distance function estimation.
parstr = """ Parameters for VARMODEL
***********************
START OF PARAMETERS:
{varmodel_outfl} -file for modeled variogram points output
1 -number of directions to model points along
0.0 0.0 100 6 - azm, dip, npoints, point separation
2 0.01 -nst, nugget effect
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
1 100000 -fit model (0=no, 1=yes), maximum iterations
1.0 - variogram sill (can be fit, but not recommended in most cases)
1 - number of experimental files to use
{varcalc_outfl} - experimental output file 1
1 1 - # of variograms (<=0 for all), variogram #s
1 0 10 - # pairs weighting, inverse distance weighting, min pairs
0 10.0 - fix Hmax/Vert anis. (0=no, 1=yes)
0 1.0 - fix Hmin/Hmax anis. (0=no, 1=yes)
{varmodelfit_outfl} - file to save fit variogram model
"""
varmodel_outfl_g = os.path.join(outdir, 'varmodel_g.out')
varmodelfit_outfl_g = os.path.join(outdir, 'varmodelfit_g.out')
var_model.run(parstr=parstr.format(varmodel_outfl= varmodel_outfl_g,
varmodelfit_outfl = varmodelfit_outfl_g,
varcalc_outfl = varclac_gaussian.flname), liveoutput=True, quiet=False)
varmdl_g = gs.DataFile(varmodel_outfl_g)
varmdl_g.head()
fig, axes = plt.subplots(1, 2, figsize= (15,4))
ax = axes[0]
ax = gs.variogram_plot(varfl, index=1, ax=ax, color='b', grid=True, label = 'Indicator Variogram (Experimental)')
gs.variogram_plot(varmdl, index=1, ax=ax, color='b', experimental=False, label = 'Indicator Variogram (Model)')
_ = ax.legend(fontsize=12)
ax = axes[1]
gs.variogram_plot(varclac_gaussian, index=1, ax=ax, color='g', grid=True, label = 'Gaussian Variogram (Experimental)')
gs.variogram_plot(varmdl_g, index=1, ax=ax, color='g', experimental=False, label = 'Gaussian Variogram (Model)')
_ = ax.legend(fontsize=12)
The $df$ is calculated at the data locations, before being estimated at the grid locations. The $c$ parameter is applied to the $df$ calculation, defining the bandwidth of uncertainty that will be simulated in the next section.
Normally the optimal $c$ would be calculated using a jackknife study, but it is simply provided here.
selected_c = 200
dfcalc = gs.Program(exe_dir+'dfcalc')
# Print the columns for populating the parameter file without variables
print(dat.columns)
parstr = """ Parameters for DFCalc
*********************
START OF PARAMETERS:
{datafl} -file with input data
1 2 3 0 4 -column for DH,X,Y,Z,Ind
1 -in code: indicator for inside domain
0.0 0.0 0.0 -angles for anisotropy ellipsoid
1.0 1.0 -first and second anisotropy ratios (typically <=1)
0 -proportion of drillholes to remove
696969 -random number seed
{c} -C
{outfl} -file for distance function output
'nofile.out' -file for excluded drillholes output
"""
pars = dict(datafl=dat.flname, c=selected_c,
outfl=os.path.join(outdir,'df_calc.out'))
dfcalc.run(parstr=parstr.format(**pars))
A standard naming convention of the distance function variable is used for convenience in the workflow, motivating the manipulation.
# Load the data and note the abbreviated name of the distance function
dat_df = gs.DataFile(os.path.join(outdir,'df_calc.out'), notvariables='Ind', griddef=gs.Parameters['data.griddef'])
print('Initial distance Function variable name = ', dat_df.variables)
# Set a standard distance function name
dfvar = 'Distance Function'
dat_df.rename({dat_df.variables:dfvar})
print('Distance Function variable name = ', dat_df.variables)
# Set symmetric color limits for the distance function
df_vlim = (-350, 350)
gs.location_plot(dat_df, vlim=df_vlim, cbar_label='m')
Kriging is performed with a large number of data to provide a smooth and conditionally unbiased estimate. Global kriging would also be appropriate.
kd3dn = gs.Program(exe_dir+'kt3dn')
varmodelfit_outfl_g
parstr = """ Parameters for KT3DN
********************
START OF PARAMETERS:
{input_file} -file with data
1 2 3 0 6 0 - columns for DH,X,Y,Z,var,sec var
-998.0 1.0e21 - trimming limits
0 -option: 0=grid, 1=cross, 2=jackknife
xvk.dat -file with jackknife data
1 2 0 3 0 - columns for X,Y,Z,vr and sec var
nofile.out -data spacing analysis output file (see note)
0 15.0 - number to search (0 for no dataspacing analysis, rec. 10 or 20) and composite length
0 100 0 -debugging level: 0,3,5,10; max data for GSKV;output total weight of each data?(0=no,1=yes)
{out_sum} -file for debugging output (see note)
{out_grid} -file for kriged output (see GSB note)
{gridstr}
1 1 1 -x,y and z block discretization
1 100 100 1 -min, max data for kriging,upper max for ASO,ASO incr
0 0 -max per octant, max per drillhole (0-> not used)
700.0 700.0 500.0 -maximum search radii
0.0 0.0 0.0 -angles for search ellipsoid
1 -0=SK,1=OK,2=LVM(resid),3=LVM((1-w)*m(u))),4=colo,5=exdrift,6=ICCK
0.0 0.6 0.8 1.6 - mean (if 0,4,5,6), corr. (if 4 or 6), var. reduction factor (if 4)
0 0 0 0 0 0 0 0 0 -drift: x,y,z,xx,yy,zz,xy,xz,zy
0 -0, variable; 1, estimate trend
extdrift.out -gridded file with drift/mean
4 - column number in gridded file
keyout.out -gridded file with keyout (see note)
0 1 - column (0 if no keyout) and value to keep
{varmodelstr}
"""
with open(varmodelfit_outfl_g, 'r') as f:
varmodel_ = f.readlines()
varstr = ''''''
for line in varmodel_:
varstr += line
pars = dict(input_file=os.path.join(outdir,'df_calc.out'),
out_grid=os.path.join(outdir,'kt3dn_df.out'),
out_sum=os.path.join(outdir,'kt3dn_sum.out'),
gridstr=gs.Parameters['data.griddef'], varmodelstr=varstr)
kd3dn.run(parstr=parstr.format(**pars))
pixelplt selects pointvar as the color of the overlain dat_df point data since its name matches the column name of est_df.
est_df = gs.DataFile(os.path.join(outdir,'kt3dn_df.out'))
# Drop the variance since we won't be using it,
# allowing for specification of the column to be avoided
est_df.drop('EstimationVariance')
# Rename to the standard distance function name for convenience
est_df.rename({est_df.variables:dfvar})
est_df.describe()
# Generate a figure object
fig, axes = gs.subplots(1, 2, figsize=(10, 8),cbar_mode='each',
axes_pad=0.8, cbar_pad=0.1)
# Location map of indicator data for comparison
gs.location_plot(dat, ax=axes[0])
# Map of distance function data and estimate
gs.slice_plot(est_df, pointdata=dat_df,
pointkws={'edgecolors':'k', 's':25},
cbar_label='Distance Function (m)', vlim=df_vlim, ax=axes[1])
This section is subdivided into 4 sub-sections:
# Required package for this calculation
from scipy.stats import norm
# Create a directory for the output
domaindir = os.path.join(outdir, 'Domains/')
gs.mkdir(domaindir)
for real in range(nreal):
# Transform the Gaussian deviates to probabilities
sim = np.random.rand()
# Transform the probabilities to distance function deviates
sim = 2 *selected_c * sim - selected_c
# Initialize the final realization as the distance function estimate
df = est_df[dfvar].values
idx = np.logical_and(est_df[dfvar].values>selected_c, est_df[dfvar].values<selected_c)
# Add the distance function deviates to the distance function estimate,
# yielding a distance function realization
df[idx] = df[idx] + sim
# If the distance function is greater than 0, the simulated indicator is 1
sim = (df <= 0).astype(int)
# Convert the Numpy array to a Pandas DataFrame, which is required
# for initializing a DataFile (aside from the demonstrated flname approach).
# The DataFile is then written out
sim = pd.DataFrame(data=sim, columns=[dat.cat])
sim = gs.DataFile(data=sim)
sim.write_file(domaindir+'real{}.out'.format(real+1))
fig, axes = gs.subplots(2, 3, figsize=(15, 8), cbar_mode='single')
for real, ax in enumerate(axes):
sim = gs.DataFile(domaindir+'real{}.out'.format(real+1))
gs.slice_plot(sim, title='Realization {}'.format(real+1),
pointdata=dat,
pointkws={'edgecolors':'k', 's':25},
vlim=(0, 1), ax=ax)
gs.Parameters.save('Parameters.json')
gs.rmdir(outdir) #command to delete generated data file
gs.rmfile('temp')