The following notebook is comprised of 9 primary steps:
import pygeostat as gs
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
Load the Matplotlib and Pygeostat project defaults.
gs.Parameters.load('Parameters.json')
gs.PlotStyle.load('Parameters.json')
Assign the number of realization and griddef to variables for convenience.
# a path to executables can be defined
exedir='../pygeostat/executable/'
# nreal = gs.Parameters['data.nreal']
nreal=100
gs.Parameters['data.nreal']=nreal
print('nreal = {}'.format(nreal))
griddef = gs.Parameters['data.griddef']
print(griddef)
The cmap was set to bwr in the previous section - reset to the Matplotlib default
gs.Parameters['plotting.cmap'] = 'viridis'
# create the output directory
outdir = 'Output/'
gs.mkdir(outdir)
dat = gs.ExampleData('reservoir_surface')
print('columns = {}'.format(dat.columns.values))
print('x column = {}, y column = {}'.format(dat.x, dat.y))
The DataFile describe excludes special attributes.
dat.describe()
fig, axes = plt.subplots(2, 2, figsize=(10, 10))
axes = axes.flatten()
for var, ax in zip(dat.variables, axes):
gs.location_plot(dat, var=var, cbar_label='m', title=var, ax=ax)
# Remove the unneeded fourth axis
axes[-1].remove()
Declustering defines the spatially representative distribution of the Top Elevation and Thickness. These distributions are then used for the normal score transformation, generating conditioning data that is used for calculating normal score variograms and conditioning sequential Gaussian simulation.
Note that variograms are often calculated on data that are normal score transformed without declustering weights. Given the controversy of the subject, the more convenient method is selected for this course (since declustering with weights is absolutely required for conditioning).
All variables are homotopically sampled, so only one variable need be considered for declustering.
declus = gs.Program(exedir+'declus')
os.path.join(outdir, 'declus.out')
# Cell size based on the data spacing study in the intro notebook
cellsize = 90
parstr = """ Parameters for DECLUS
*********************
START OF PARAMETERS:
{datafl} -file with data
{xyzcol} {varcol} - columns for X, Y, Z, and variable
-1.0e21 1.0e21 - trimming limits
junk.out -file for summary output
{outfl} -file for output with data & weights
1.0 1.0 -Y and Z cell anisotropy (Ysize=size*Yanis)
0 -0=look for minimum declustered mean (1=max)
1 {cellsize} {cellsize} -number of cell sizes, min size, max size
5 -number of origin offsets
"""
pars = dict(datafl=dat.flname, xyzcol=dat.gscol(dat.xyz),
varcol=dat.gscol('Top Elevation'),
outfl=os.path.join(outdir, 'declus.out'), cellsize=cellsize)
declus.run(parstr=parstr.format(**pars))
gs.rmfile('junk.out')
In the context of the upcoming modeling steps, Base Elevation is not considered a variable. Use of the notvariables kwarg on initialization excludes it from the variables attribute.
dat = gs.DataFile('Output/declus.out', notvariables='Base Elevation')
print('declustering weight = ', dat.weights)
print('variables = ', dat.variables)
gs.location_plot(dat, var=dat.weights)
unscore = gs.Program(exedir+'unscore')
parstr = """ Parameters for NSCORE
*********************
START OF PARAMETERS:
{datafl} -file with data
{nvar} {varcols} - number of variables and columns
{wtcol} - column for weight, 0 if none
0 - column for category, 0 if none
0 - number of records if known, 0 if unknown
-1.0e21 1.0e21 - trimming limits
0 -transform using a reference distribution, 1=yes
../histsmth/histsmth.out -file with reference distribution.
1 2 0 -columns for variable, weight, and category
1000 -maximum number of quantiles, 0 for all
{outfl} -file for output
{trnfl} -file for output transformation table
"""
pars = dict(datafl=dat.flname, nvar=dat.nvar,
varcols=dat.gscol(dat.variables),
wtcol=dat.gscol(dat.weights),
outfl = os.path.join(outdir, 'nscore.out'), trnfl = os.path.join(outdir,'nscore.trn'))
unscore.run(parstr=parstr.format(**pars))
Use the notvariables kwarg leads to isolation of the normal score variables as the dat_ns.variables attribute. Since DataFiles with iniitialized wts are passed to histplt, wt may be True.
dat_ns = gs.DataFile(os.path.join(outdir, 'nscore.out'),
notvariables=['Base Elevation']+dat.variables)
print('variables = ', dat_ns.variables)
dat_ns.data
fig, axes = plt.subplots(2 , 2, figsize=(10, 10))
for var, ax in zip(dat.variables, axes[0]):
gs.histogram_plot(dat, var=var, weights=True, ax=ax, stat_blk='minimal', color='salmon')
for var, ax in zip(dat_ns.variables, axes[1]):
gs.histogram_plot(dat_ns, var, weights=True, ax=ax,
xlim=(-3, 3), stat_blk='minimal', color='lime')
fig.tight_layout()
Normal score variograms are calculated and modeled, before being input to sequential Gaussian simulation in the next section.
var_calc = gs.Program(program=exedir+'varcalc')
parstr = """ Parameters for VARCALC
**********************
START OF PARAMETERS:
{file} -file with data
2 3 0 - columns for X, Y, Z coordinates
2 8 9 - number of variables,column numbers (position used for tail,head variables below)
{t_min} 1.0e21 - trimming limits
{n_directions} -number of directions
0.0 90 10000 0.0 22.5 1000 0.0 -Dir 01: azm,azmtol,bandhorz,dip,diptol,bandvert,tilt
{n_lag} {lag_length} {lag_tol} - number of lags,lag distance,lag tolerance
{output} -file for experimental variogram points output.
0 -legacy output (0=no, 1=write out gamv2004 format)
1 -run checks for common errors
1 -standardize sills? (0=no, 1=yes)
2 -number of variogram types
1 1 1 1 -tail variable, head variable, variogram type (and cutoff/category), sill
2 2 1 1 -tail variable, head variable, variogram type (and cutoff/category), sill
"""
n_directions = 1
varcalc_outfl = os.path.join(outdir, 'varcalc.out')
var_calc.run(parstr=parstr.format(file=dat_ns.flname,
n_directions = 1,
t_min = gs.Parameters['data.tmin'],
n_lag=10,
lag_length = 30.0,
lag_tol = 20.0,
output=varcalc_outfl),
liveoutput=True)
varfl_exp = gs.DataFile(varcalc_outfl)
varfl_exp.head()
var_model = gs.Program(program=exedir+'varmodel')
parstr = """ Parameters for VARMODEL
***********************
START OF PARAMETERS:
{varmodel_outfl} -file for modeled variogram points output
1 -number of directions to model points along
0.0 0.0 150 3 - azm, dip, npoints, point separation
2 0.01 -nst, nugget effect
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
1 100000 -fit model (0=no, 1=yes), maximum iterations
1.0 - variogram sill (can be fit, but not recommended in most cases)
1 - number of experimental files to use
{varcalc_outfl} - experimental output file 1
1 {var_index} - # of variograms (<=0 for all), variogram #s
1 0 10 - # pairs weighting, inverse distance weighting, min pairs
0 10.0 - fix Hmax/Vert anis. (0=no, 1=yes)
1 1.0 - fix Hmin/Hmax anis. (0=no, 1=yes)
{varmodelfit_outfl} - file to save fit variogram model
"""
for i, variable in enumerate(dat_ns.variables):
varmodel_outfl = os.path.join(outdir, 'varmodel_{}.out'.format(variable.replace(' ', '')))
varmodelfit_outfl = os.path.join(outdir, 'varmodelfit_{}.out'.format(variable.replace(' ', '')))
var_model.run(parstr=parstr.format(varmodel_outfl= varmodel_outfl,
varmodelfit_outfl = varmodelfit_outfl,
var_index = i+1,
varcalc_outfl = varcalc_outfl), liveoutput=True, quiet=False)
fig, axes = plt.subplots(1 , 2, figsize=(14, 5))
for i, variable in enumerate(dat_ns.variables):
varmodel_outfl = os.path.join(outdir, 'varmodel_{}.out'.format(variable.replace(' ', '')))
varmdl = gs.DataFile(varmodel_outfl)
gs.variogram_plot(varfl_exp, index = i+1, color = 'b', grid=True,
label='Experimental Variogram', title=variable, ax = axes[i])
gs.variogram_plot(varmdl, index = 1, color = 'b',
label='Model Variogram', title=variable, ax = axes[i], experimental=False)
simdir = os.path.join(outdir,'USGSIM/')
gs.mkdir(simdir)
usgsim = gs.Program('usgsim')
# loading the variogram models
var_models = []
for i, variable in enumerate(dat_ns.variables):
varmodelfit_outfl = os.path.join(outdir, 'varmodelfit_{}.out'.format(variable.replace(' ', '')))
with open(varmodelfit_outfl, 'r') as f:
varmodel_ = f.readlines()
varstr = ''''''
for line in varmodel_:
varstr += line
var_models.append(varstr)
parstr = """ Parameters for USGSIM
*********************
START OF MAIN:
1 -number of realizations to generate, 0=kriging
2 -number of variables being simulated
0 -number of rock types to consider
{seed} -random number seed
{griddef}
{outfl} -file for simulation output
0 - output format: (0=reg, 1=coord, 2=binary)
impute.out -file for imputed values in case of heterotopic samples
0 -debugging level: 0,1,2,3
sgsim.dbg -file for debugging output
START OF SRCH:
25 -number of data to use per variable
300 300 10 -maximum search radii (hmax,hmin,vert)
0 0 0 -angles for search ellipsoid
1 -sort by distance (0) or covariance (1)
0 1 1 -if sorting by covariance, indicate variogram rock type, head, tail to use
START OF VARG:
2 -number of variograms
0 1 1 -rock type, variable 1, variable 2
{varmodel1}
0 2 2 -rock type, variable 1, variable 2
{varmodel2}
START OF DATA:
{datafl} -file with primary data
{xyzcols} 0 0 - columns for X,Y,Z,wt,rock type
{varcols} - columns for variables
1 - clip data to grid, 1=yes
1 - assign to the grid, 0=none, 1=nearest, 2=average
-8.0 1.0e21 - trimming limits
"""
pars = dict(griddef=griddef, varmodel1=var_models[0],
varmodel2=var_models[1], datafl=dat_ns.flname,
xyzcols=dat_ns.gscol(dat_ns.xyz),
varcols=dat_ns.gscol(dat_ns.variables))
callpars = []
seeds = gs.rseed_list(nreal, seed=23243)
for i, seed in enumerate(seeds):
pars['seed'] = seed
pars['outfl'] = simdir+'real{}.out'.format(i+1)
callpars.append(dict(parstr=parstr.format(**pars)))
gs.runparallel(usgsim, callpars, progressbar=True)
This step is not strictly required, but is presented for demonstration.
# Read in the simulation to inspect
sim = gs.DataFile(os.path.join(simdir,'real1.out'))
# Rename the simulation columns
sim.data.columns=dat_ns.variables
sim.variables = dat_ns.variables
# Summary statistics
print('\nProperties of the realization:\n', sim.describe())
# The gs.subplots is useful when multiple panels are plotted that
# should use the same colorbar
fig, axes = gs.subplots(1, 2, figsize=(10, 10), cbar_mode='single')
for var, ax in zip(dat_ns.variables, axes):
gs.slice_plot(sim, var=var, vlim=(-3, 3), title=var+' Realization', ax=ax, grid=True)
Note that ubacktr program only requires a prefix of the transformation table, before it infers the file name based on the number of variables and categories.
backdir = os.path.join(outdir, 'UBACKTR/')
gs.mkdir(backdir)
ubacktr = gs.Program('ubacktr')
parstr = """ Parameters for UBACKTR
**********************
START OF PARAMETERS:
{datafl} -file with simulated Gaussian variables (see Note6)
-7.0 1.0e21 - trimming limits
2 - number of variables
1 2 - columns for variables
0 -number of rocktypes (NRT) (0 if none)
nofile.out - file for simulated RTs (see Note1 and Note6)
5 - column for RT
31 32 34 35 36 37 - RT category codes (see Note2)
{nx} {ny} 1 -nx, ny, nz (0=calculated)(see Note3)
1 -number of realizations
{trnfl} -prefix for trans tables (see Note4 and Note7)
{outfl} -output file (see Note6)
"""
pars = dict(nx=griddef.nx, ny=griddef.ny, trnfl=os.path.join(outdir,'nscore'))
callpars = []
for i in range(nreal):
pars['datafl'] = os.path.join(simdir,'real{}.out'.format(i+1))
pars['outfl'] = os.path.join(backdir,'real{}.out'.format(i+1))
callpars.append(dict(parstr=parstr.format(**pars)))
gs.runparallel(ubacktr, callpars, progressbar=True)
Generate a figure for each variable, where a single color bar is used for multiple realizations.
for var in dat.variables:
fig, axes = gs.subplots(2, 2, figsize=(10, 8), cbar_mode='single')
# Color limits based on the data
vlim = (np.round(dat[var].min()), np.round(dat[var].max()))
for real, ax in enumerate(axes):
sim = gs.DataFile(os.path.join(backdir,'real{}.out').format(real+1))
# Renaming columns
sim.data.columns = dat.variables
sim.variables = dat.variables
gs.slice_plot(sim, var=var, title='Realization {}'.format(real+1),
pointdata=dat, pointkws={'edgecolors':'k', 's':25, 'lw': 1},
vlim=vlim, cbar_label='m', ax=ax, grid=True)
# Label the figure
fig.suptitle(var, **{'weight':'bold'})
Using histogram plot iteratively
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
# Realizations
for real in range(nreal):
sim = gs.DataFile(os.path.join(backdir,'real{}.out').format(real+1))
# Renaming columns
sim.data.columns = dat.variables
sim.variables = dat.variables
for i, (var, ax) in enumerate(zip(dat.variables, axes)):
gs.histogram_plot(sim, var=var, ax=ax, icdf=True, color='k', grid =True, stat_blk=False, alpha=0.5, lw = 0.5)
# Reference data
for i, (var, ax) in enumerate(zip(dat.variables, axes)):
gs.histogram_plot(dat, var=var, ax=ax, icdf=True, color='r', grid =True, weights='Declustering Weight', lw = 1)
Using histogram plot for simulation results and a '*' wildcard that leads the plotting function to assume 1,...,nreal files are present.
fig, axes = plt.subplots(1, 2, figsize=(10, 5))
for i, (var, ax) in enumerate(zip(dat.variables, axes)):
gs.histogram_plot_simulation(os.path.join(backdir,'real*.out'), dat, reference_variable=var,
reference_weight=True, reference_color='C3', simulated_column=i+1,
title=var, ax=ax, grid=True)
The variogram of the data in original units must be calculated first, since the normal score variogram was previously calculated.
# Calculate the experimental variograms for data in original unit
var_calc = gs.Program(program='varcalc')
parstr = """ Parameters for VARCALC
**********************
START OF PARAMETERS:
{file} -file with data
2 3 0 - columns for X, Y, Z coordinates
2 4 5 - number of variables,column numbers (position used for tail,head variables below)
{t_min} 1.0e21 - trimming limits
{n_directions} -number of directions
0.0 90 10000 0.0 22.5 1000 0.0 -Dir 01: azm,azmtol,bandhorz,dip,diptol,bandvert,tilt
{n_lag} {lag_length} {lag_tol} - number of lags,lag distance,lag tolerance
{output} -file for experimental variogram points output.
0 -legacy output (0=no, 1=write out gamv2004 format)
1 -run checks for common errors
1 -standardize sills? (0=no, 1=yes)
2 -number of variogram types
1 1 1 {sill1} -tail variable, head variable, variogram type (and cutoff/category), sill
2 2 1 {sill2} -tail variable, head variable, variogram type (and cutoff/category), sill
"""
ori_varcalc_outfl = os.path.join(outdir, 'ori_varcalc.out')
top_sill=dat['Top Elevation'].var()
thick_sill=dat['Thickness'].var()
var_calc.run(parstr=parstr.format(file=dat.flname,
n_directions = 1,
t_min = gs.Parameters['data.tmin'],
n_lag=10,
lag_length = 30.0,
lag_tol = 20.0,
sill1=top_sill,
sill2=thick_sill,
output=ori_varcalc_outfl),
liveoutput=True)
var_model = gs.Program(program='varmodel')
parstr = """ Parameters for VARMODEL
***********************
START OF PARAMETERS:
{varmodel_outfl} -file for modeled variogram points output
1 -number of directions to model points along
0.0 0.0 150 3 - azm, dip, npoints, point separation
2 0.01 -nst, nugget effect
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
3 ? 0.0 0.0 0.0 -it,cc,azm,dip,tilt (ang1,ang2,ang3)
? ? ? -a_hmax, a_hmin, a_vert (ranges)
1 100000 -fit model (0=no, 1=yes), maximum iterations
1.0 - variogram sill (can be fit, but not recommended in most cases)
1 - number of experimental files to use
{varcalc_outfl} - experimental output file 1
1 {var_index} - # of variograms (<=0 for all), variogram #s
1 0 10 - # pairs weighting, inverse distance weighting, min pairs
0 10.0 - fix Hmax/Vert anis. (0=no, 1=yes)
1 1.0 - fix Hmin/Hmax anis. (0=no, 1=yes)
{varmodelfit_outfl} - file to save fit variogram model
"""
vargstr1={}
vargmodel={}
for i, variable in enumerate(dat.variables):
varmodel_outfl = os.path.join(outdir, 'varmodel_{}.out'.format(variable.replace(' ', '')))
varmodelfit_outfl = os.path.join(outdir, 'varmodelfit_{}.out'.format(variable.replace(' ', '')))
var_model.run(parstr=parstr.format(varmodel_outfl= varmodel_outfl,
varmodelfit_outfl = varmodelfit_outfl,
var_index = i+1,
varcalc_outfl = ori_varcalc_outfl), liveoutput=False, quiet=True)
tempfile= open(varmodelfit_outfl,"r")
vargstr1[variable]=tempfile.read()
tempfile.close()
vargmodel[variable]=gs.DataFile(varmodel_outfl)
ori_varfl_exp = gs.DataFile(ori_varcalc_outfl)
ori_varfl_exp.head()
varsim=gs.Program(program=exedir+'varsim',getpar=True)
parstr = """ Parameters for VarSim
*********************
START OF PARAMETERS:
nodata.dat -file with lithology information
0 7 - lithology column (0=not used), code
{datafl} -file with data
1 {varcol} - number of variables, column numbers
-998 1.0e21 - trimming limits
{outfl} -output file for variograms of realizations
{outfl2} -output file for average variogram
{griddef}
1 -number of realizations
1 60 -number of directions, number of lags
1 0 0 -ixd(1),iyd(1),izd(1)
1 -standardize sill? (0=no, 1=yes)
1 -number of variograms
1 1 1 -tail variable, head variable, variogram type
"""
varsim_dir=outdir+'VARSIM/'
gs.mkdir(varsim_dir)
varsimdata={}
varsimdata[dat.variables[0]]=pd.DataFrame(index=range(60))
varsimdata[dat.variables[1]]=pd.DataFrame(index=range(60))
varcol_=[1,2]
for j,var in enumerate(dat.variables):
for i in range(nreal):
varsim.run(parstr=parstr.format(datafl=backdir+'real{}.out'.format(i+1),
outfl=varsim_dir+'varsim{f1}_{f2}.out'.format(f1=i+1,f2=var),
outfl2=varsim_dir+'varsim_avg.out',
griddef=griddef,varcol=varcol_[j]),liveoutput=False,quiet=True)
tempdata=gs.DataFile(varsim_dir+'varsim{f1}_{f2}.out'.format(f1=i+1,f2=var)).data
varsimdata[dat.variables[j]]=tempdata.iloc[:,3]
lagdist=tempdata.iloc[:,1]
# for each variable, read variograms of all realizations into one data file
varsimulated_data={}
for j,var in enumerate(dat.variables):
tempfile= gs.DataFile(varsim_dir+'varsim{f1}_{f2}.out'.format(f1=1,f2=var), readfl=True)
tempdata=pd.DataFrame(data=np.tile(np.transpose([range(nreal*len(tempfile))]),(1,8)),columns=tempfile.columns)
idst=0
for i in range(nreal):
tempfile= gs.DataFile(varsim_dir+'varsim{f1}_{f2}.out'.format(f1=i+1,f2=var), readfl=True)
tempfile['Variogram Index']=i+1
tempfile['Variogram Number']=1
tempdata.iloc[idst:idst+len(tempfile)]=tempfile.data.values
idst=idst+len(tempfile)
varsimulated_data[var] = gs.DataFile(data=tempdata)
varsimulated_data[var].data
reference_model={}
for var in dat.variables:
reference_model[var]=pd.DataFrame()
reference_model[var]['Distance']=vargmodel[var]['Lag Distance']
reference_model[var]['Variogram']=vargmodel[var]['Variogram Value']
# varsimulated_data = varsimdata2['varsim1']
# plot figures
fig,ax=plt.subplots(1,2,figsize=(14,7))
for i, var in enumerate(dat.variables):
_=gs.variogram_plot_simulation(simulated_data=varsimulated_data[var].data, simulated_var_id=1,
reference_data=reference_model[var],
xlim=(0, 300), ax=ax[i], variable=var, legend_label='experimental variogram')
_=ax[i].legend(fontsize=12)
Subtract the thickness from the top elevation, providing the base elevation realizations. Output all values within a single directory.
surfdir=os.path.join(outdir,'Surfaces')
gs.mkdir(surfdir)
for real in range(nreal):
sim = gs.DataFile(os.path.join(backdir,'real{}.out'.format(real+1)))
sim.data.columns=['Top Elevation', 'Thickness']
sim['Base Elevation'] = sim['Top Elevation'] - sim['Thickness']
sim.write_file(os.path.join(surfdir,'real{}.out'.format(real+1)))
var = 'Base Elevation'
vlim = (np.round(dat[var].min()), np.round(dat[var].max()))
fig, axes = gs.subplots(2, 2, figsize=(10, 8), cbar_mode='single')
for real, ax in enumerate(axes):
sim = gs.DataFile(os.path.join(surfdir,'real{}.out'.format(real+1)))
gs.slice_plot(sim, var=var, title='Realization {}'.format(real+1),
pointdata=dat, pointkws={'edgecolors':'k', 's':25, 'lw':1},
cbar_label='m', ax=ax, grid=True)
fig.suptitle(var, **{'weight':'bold'})
Output coordinates will be in single precision 'float32' following the gsParams setting below (may be problematic with large utms). This is used here to conserve output file size.
When the DataFile is initialized, it is registered as a structured grid (dftype='sgrid') with specified z coordinates.
Use of a vtk extension in writefile leads to VTK output, where x and y coordinates are assumed to follow the regular grid, whereas z follows irregular coordinates. Note that at least one coordinate must be irregular for sgrid to register as valid.
gs.Parameters['data.write_vtk.cdtype'] = 'float32'
for var in ['Base Elevation', 'Top Elevation']:
sim = gs.DataFile(os.path.join(surfdir,'real1.out'), z=var, dftype='sgrid', griddef=griddef)
sim.write_file(os.path.join(outdir,'{}_real1.vtk'.format(var)), variables=[var])
gs.rmdir(outdir)
gs.rmfile('temp')