import numpy as np
import pandas as pd
import pygimli as pg
from pygimli.physics import ert
import matplotlib.pyplot as plt
import math
# data_raw = ert.load('data_unified/CER2017-08-24_u_i.txt')
# data_raw = ert.load('data_unified/craterlake_2019_feb6.txt') # crater lake, good data quality
# data_raw = ert.load('data_unified/craterlake_2019_may3.txt') # crater lake, has bad electrode
# data_raw = ert.load('data_unified/oldcrow_wen13_2013_oct14.txt') # old crow, line 13
# data_raw = ert.load('data_unified/oldcrow_wen1_2013_oct13.txt') # old crow, line 1
data_raw = ert.load('data_unified/SCH2018-08-29_u_i.txt') # schilthorn
# data_raw = ert.load('data_unified/2001_ALK_2019-10-03_0600_fr.ohm') # seward peninsula summer, forward and reciprocal data
# data_raw = ert.load('data_unified/2001_ALK_2020-02-01_1000_fr.ohm') # seward peninsula winter, forward and reciprocal data
# data_raw = ert.load('data_unified/beavercreek_wen1_2010_july22.txt')
# data_raw = ert.load('data_unified/mp825_2021_sept8_fr.txt')
print(data_raw)
# calculate geometric factor and apparent resistivity if not already in data file
if data_raw.haveData('k')==False:
data_raw['k'] = ert.createGeometricFactors(data_raw,numerical=True) # include topo in k calculation
if data_raw.haveData('rhoa')==False:
if data_raw.haveData('r')==False:
data_raw['r'] = data_raw['u']/data_raw['i']
data_raw['rhoa'] = data_raw['r']*data_raw['k']
# put data into pandas dataframe
df = pd.DataFrame(np.array(data_raw.dataMap()).T)
header = df.iloc[0]
df = df[1:]
df.columns = header
df = df.apply(pd.Series.explode).reset_index(drop=True)
df_raw = pd.DataFrame(df)
df_raw['start_index'] = df_raw.index
# look for reciprocal or repeated measurements
src = np.stack([df_raw['a'],df_raw['b']]).T
rec = np.stack([df_raw['m'],df_raw['n']]).T
src_f = src[0]
rec_f = rec[0]
ind_f = [0]
ind_r = []
reps = []
recips = []
for i in range(1,len(src)):
if len(rec_f)==2:
a = np.where(np.all(rec_f==src[i],axis=0))[0] # reciprocals
b = np.where(np.all(src_f==rec[i],axis=0))[0]
c = np.where(np.all(src_f==src[i],axis=0))[0] # repeated
d = np.where(np.all(rec_f==rec[i],axis=0))[0]
else:
a = np.where(np.all(rec_f==src[i],axis=1))[0] # reciprocals
b = np.where(np.all(src_f==rec[i],axis=1))[0]
c = np.where(np.all(src_f==src[i],axis=1))[0] # repeated
d = np.where(np.all(rec_f==rec[i],axis=1))[0]
ind_recip = np.intersect1d(a,b)
ind_rep = np.intersect1d(c,d)
if len(ind_recip)>0:
ind_r.append(i)
recips.append(ind_recip)
elif len(ind_rep)>0:
ind_r.append(i)
reps.append(ind_rep)
else:
src_f = np.vstack((src_f,[src[i]]))
rec_f = np.vstack((rec_f,[rec[i]]))
ind_f.append(i)
recips = np.squeeze(recips)
reps = np.squeeze(reps)
print('%.0f reciprocal measurements detected'%len(recips))
print('%.0f repeated measurements detected'%len(reps))
# treat repeated and reciprocal measurements the same
r_all = np.hstack([reps,recips]).astype(int)
# if present, calculate new apparent resistivities and error vals
df_f = df_raw.loc[ind_f]
df_f = df_f.reset_index(drop=True)
df_r = df_raw.loc[ind_r]
df_r = df_r.reset_index(drop=True)
df_checked = df_f.copy()
df_checked['rep/recip'] = 0
if len(ind_r)>0: #TODO this only handles recips now
for i in range(len(ind_r)): # for all reciprocal measurements
r_mean = (df_f.iloc[r_all[i]]['r'] + df_r.iloc[i]['r'])/2
r_err = (np.abs(df_f.iloc[r_all[i]]['r'] - df_r.iloc[i]['r'])/abs(r_mean))*100
df_checked.at[r_all[i],'r'] = r_mean
df_checked.at[r_all[i],'rhoa'] = r_mean*df_r.iloc[i]['k']
df_checked.at[r_all[i],'err'] = r_err
df_checked.at[r_all[i],'rep/recip'] = 1
# put everything back into the pygimli data container to plot
data_checked = pg.DataContainerERT()
# sensors are the same
for i in range(0,len(data_raw.sensors())):
data_checked.createSensor(np.array(data_raw.sensors()[i])) # 2D, no topography
# add filtered quadripoles and data
cols = df_checked.columns
for i in range(len(cols)):
if max(df_checked[cols[i]]) > 0:
data_checked[cols[i]] = np.array(df_checked[cols[i]])
mgr = ert.ERTManager(data_checked)
fig, ax = plt.subplots(2,2,figsize=[20,10])
mgr.showData(data_checked, vals=data_checked['rhoa'],ax=ax[0,0],label=r'Apparent resistivity ($\Omega$m)',cMap='turbo_r');
if data_checked.haveData('err'):
mgr.showData(data_checked, vals=data_checked['err'],ax=ax[0,1],label='Measurement error (%)',cMap='plasma');
else:
ax[0,1].text(0.25,0.5,'no measurement error data available',fontsize=16)
if data_checked.haveData('u'):
mgr.showData(data_checked, vals=data_checked['u'],ax=ax[1,0],label='Voltage (V)',cMap='plasma');
else:
ax[1,0].text(0.3,0.5,'no voltage data available',fontsize=16)
if data_checked.haveData('i'):
mgr.showData(data_checked, vals=data_checked['i'],ax=ax[1,1],label='Current (A)',cMap='plasma');
else:
ax[1,1].text(0.3,0.5,'no current data available',fontsize=16)
df_filt = df_checked.copy()
I_tf = (np.unique(np.hstack([
np.where(df_filt['rhoa'] <= 0)[0],
np.where(df_filt['err'] > 10)[0],
np.where(df_filt['rhoa'] > 9*np.std(df_filt['rhoa']))[0]
])))
df_filt = df_filt.drop(I_tf)
n_tf = len(I_tf)
# to apply a moving median filter we will need to sort our data by depth level and array midpoint.
# find midpoint of array
mp = np.mean([df_filt['a'],df_filt['b'],df_filt['m'],df_filt['n']],axis=0)
# sort by depth level and midpoint so we can apply moving median filter
# note: this works for 2D lines with topography and even electrode spacing
# to find unique depth levels, check to find unique relative positions of electrodes
ab = df_filt['a'] - df_filt['b']
am = df_filt['a'] - df_filt['m']
an = df_filt['a'] - df_filt['n']
# pos is just a placeholder variable describing relative electrode positions...
pos = (np.array([ab,am,an]).T).astype(dtype=float)
pos_uniq = np.unique(pos, axis=0)
# ...and we'll add this info to dataframe for convenience
pos_i = []
for i in range(len(pos)):
pos_i.append(np.where((pos[i]==pos_uniq).all(axis=1))[0][0])
df_filt['pos'] = pos_i
# sort by depth level
sort_index = np.argsort(pos)
i_all = np.linspace(0,len(pos),len(pos)+1).astype('int')
sort_index = np.array([])
# sort by midpoint
for i in range(len(pos_uniq)):
j = np.where((pos==pos_uniq[i]).all(axis=1))
si = np.argsort(mp[j])
sort_index = np.append(sort_index,i_all[j][si])
sort_index = sort_index.astype('int')
# make a dataframe with sorted values
df_sort = pd.DataFrame(columns = df_filt.columns)
for i in range(len(df_filt)):
df_sort = df_sort.append(df_filt.iloc[sort_index[i]])
df_sort = df_sort.reset_index(drop=True)
df_filt = df_sort.copy()
th = 0.5 # threshold for filter TODO: could make this higher for shallower data
n_mmf = 0 # keeping track of how many data are removed
ikeep = np.ones(df_filt.shape[0],dtype=bool)
k=0
for j in range(len(pos_uniq)): # loop through each unique depth level
I = (np.where(df_filt['pos']==k)[0])
# moving median of data points
mm = []
r = np.array(df_filt['rhoa'],dtype=np.float)[I]
for i in range(len(I)): # loop through depth level from left to right
if i==0:
mm.append(np.median(r[i:3])) # end points only use two neighboring data points to calculate median
elif i==1:
mm.append(np.median(r[i-1:4]))
elif i==len(r)-2:
mm.append(np.median(r[i-2:]))
elif i==len(r)-1:
mm.append(np.median(r[i-2:]))
else:
mm.append(np.median(r[i-2:i+3]))
ithrow = np.where(abs(r-mm)/mm > th)[0]
n_mmf = n_mmf + len(ithrow)
# get rid of outlier data
for i in range(len(ithrow)):
ikeep[I[ithrow[i]]] = 0
k=k+1
I_mmf = (df_filt[ikeep==False])['start_index']
df_filt = df_filt[ikeep]
df_filt = df_filt.reset_index(drop=True)
# find the indices of all the points we removed
I_all = np.hstack([I_tf,I_mmf])
# how many times was each electrode used in the full dataset?
[elec_all,count_all] = np.unique(np.hstack([df_raw['a'],df_raw['b'],df_raw['m'],df_raw['n']]), return_counts=True)
# how many times was each electrode used in the data that got removed?
df_removed = df_raw[df_raw['start_index'].isin(I_all)]
[elec_removed,count_removed] = np.unique(np.hstack([df_removed['a'],df_removed['b'],df_removed['m'],df_removed['n']]), return_counts=True)
# loop through and, for each electrode, calculate what percentage of data points were removed
perc_filt = []
for i in range(len(elec_all)):
if elec_all[i] in elec_removed:
I = np.where(elec_removed == elec_all[i])
perc_filt.append(count_removed[I]/count_all[i]*100)
else:
perc_filt.append([0])
perc_filt = np.hstack(perc_filt)
# identify which electrodes are bad based on a threshold
# (here, if more than 25% of data with that electrode have been filtered)
e_bad = elec_all[perc_filt > 25]
I_ef = []
for i in range(len(df_filt['a'])):
if (df_filt['a'].iloc[i] in e_bad) or (df_filt['b'].iloc[i] in e_bad) or (df_filt['m'].iloc[i] in e_bad) or (df_filt['n'].iloc[i] in e_bad):
I_ef.append(True)
else:
I_ef.append(False)
df_elec_filt = df_filt.drop(index = np.where(I_ef)[0])
n_ef = len(e_bad)
if len(e_bad)==len(data_raw.sensors()):
# TODO: better flagging for poor quality datasets
print('STOP: too many data points filtered')
# put everything back into the pygimli data container
data_filt = pg.DataContainerERT()
# sensors are the same
for i in range(0,len(data_raw.sensors())):
data_filt.createSensor(np.array(data_raw.sensors()[i])) # 2D, no topography
# add filtered quadripoles and data
cols = df_filt.columns
for i in range(len(cols)):
if max(df_elec_filt[cols[i]]) > 0:
data_filt[cols[i]] = np.array(df_elec_filt[cols[i]])
# plot filtered data
fig, ax = plt.subplots(1,2,figsize=[20,5])
mgr = ert.ERTManager(data_checked)
mgr.showData(data_checked, vals=data_checked['rhoa'],ax=ax[0],label=r'$\rho_{app}$ ($\Omega$m)',cMap='turbo_r');
ax[0].set_title('Raw data',fontsize=24);
mgr = ert.ERTManager(data_filt)
mgr.showData(data_filt, vals=data_filt['rhoa'],ax=ax[1],label=r'$\rho_{app}$ ($\Omega$m)',cMap='turbo_r');
ax[1].set_title('Filtered data',fontsize=24);
print('data points removed by technical filter = %.0f'%n_tf)
print('data points removed by moving median filter = %.0f'%n_mmf)
print('number of bad electrodes = %.0f'%n_ef)
print('data points removed by bad electrode filter = %.0f'%len(np.where(I_ef)[0]))
print('%.0f data points removed in total (%.1f%% of the data)'%(n_tf+n_mmf+n_ef, float((data_checked.size()-data_filt.size())/data_checked.size()*100)))
if len(e_bad)==len(data_raw.sensors()):
# TODO: better flagging for poor quality datasets
print('STOP: too many data points filtered')
if len (r_all)>0:
fs=20
I = np.where(df_filt['rep/recip']==1)
x = abs(df_filt['r'].iloc[I])
y = abs(df_filt['err'].iloc[I])*abs(df_filt['r'].iloc[I])/100 # absolute error
nbins = 20
ndata = len(I[0])
npoints = math.ceil(ndata/nbins)
df_filt = df_filt.sort_values(by=['r'],key=abs)
xbin = []
ybin = []
for i in range(nbins):
xbin.append(np.mean(x[i*npoints:(i+1)*npoints]))
ybin.append(np.std(y[i*npoints:(i+1)*npoints]))
p = np.polyfit(xbin,ybin, 1)
if p[1]<0: # if absolute error is negative, just assign some small value
p[1]=1e-3
p[0] = p[0] + 0.02 # increase relative error by 2% to account for other sources of error
print('error model: err = %.4f*|R|+%.4f'%(p[0],p[1]))
# automated (from error model)
data_filt['err'] = ert.estimateError(
data_filt,
absoluteError=p[1],
relativeError=p[0] # % noise
)
else:
data_filt['err'] = ert.estimateError(
data_filt,
absoluteError=0.001,
relativeError=0.04 # % noise
)
# set whether lambda is being optimized by L-curve
mgr.inv.inv.setOptimizeLambda(True)
# use blocky model constraint
mgr.inv.inv.setBlockyModel(True)
# mgr.inv.inv.setRobustData(True)
# run inversion
mod = mgr.invert(
data_filt,
verbose=False,
)
# getting lambda, chi2, rms error
lam = mgr.inv.inv.getLambda()
chi2 = mgr.inv.inv.getChi2()
rms = np.sqrt(np.mean(((data_filt['rhoa']-mgr.inv.response)/data_filt['rhoa'])**2))*100
fig, ax = plt.subplots(1,1,figsize=[15,5])
ax, cBar = mgr.showResult(
mod,
ax=ax,
cMap='turbo_r',
coverage=mgr.coverage(),
cMin=np.sort(mgr.paraModel())[int(len(mgr.paraModel())/100)], # this part trims the color scale to make things easier to see
cMax=np.sort(mgr.paraModel())[-int(len(mgr.paraModel())/100)],
);
cBar.set_label(r'$\rho$ [$\Omega$m]')
ax.set_xlabel('X (m)')
ax.set_ylabel('Z (m)')
ax.set_title(r'$\lambda_{opt}$ = %.2f, chi$^2$ = %.2f, rms error = %.2f%%'%(lam,chi2,rms),fontsize=20);
fig, ax = plt.subplots(1,3,figsize=[15,5])
mgr.showData(data_filt, vals=data_filt['rhoa'],ax=ax[0],label=r'$\rho_{app}$ ($\Omega$m)',cMap='turbo_r');
mgr.showData(data_filt, vals=mgr.inv.response,ax=ax[1],label=r'$\rho_{app}$ ($\Omega$m)',cMap='turbo_r',cMin=min(data_filt['rhoa']),cMax=max(data_filt['rhoa']));
mgr.showData(data_filt, vals=((mgr.inv.response-data_filt['rhoa'])/data_filt['rhoa'])*100,ax=ax[2],label='Error (%)',cMap='seismic',cMin=-20,cMax=20);
ax[0].set_title('Observed data (filtered)',fontsize=20)
ax[1].set_title('Predicted data',fontsize=20)
ax[2].set_title('Misfit',fontsize=20);