def wrapper_merge_segys(file_list, txt_suffix: str = 'merge', verbosity=1):
"""
Merge SEG-Y files provided as list of input paths.
Potential gaps will be interpolated (linear) and duplicates removed while
keeping the last (aka newest) occurrence.
Parameters
----------
file_list : tuple,list
Iterable of files to be merged.
txt_suffix : str, optional
Filename suffix for merged file (default: 'merge')
verbosity : int, optional
Verbosity constant for stdout printing (default: 1).
"""
first_file = file_list[0]
basepath, filename = os.path.split(first_file)
basename, suffix = os.path.splitext(filename)
out_name = f'{basename}_{txt_suffix}{suffix}'
out_file = os.path.join(basepath, out_name)
trace_headers_list = []
specs = []
header_bin = []
swdep_list = []
data_list = []
trace_header_to_check = {
'tracl': segyio.TraceField.TRACE_SEQUENCE_LINE,
'tracr': segyio.TraceField.TRACE_SEQUENCE_FILE,
'fldr': segyio.TraceField.FieldRecord,
'ns': segyio.TraceField.TRACE_SAMPLE_COUNT,
'nt': segyio.TraceField.TRACE_SAMPLE_INTERVAL,
}
trace_header_dict = dict(
zip(
list(trace_header_to_check.keys()),
[np.array([], dtype='int')] * len(trace_header_to_check),
)
)
trace_header_datetime = {
'year': segyio.TraceField.YearDataRecorded,
'day': segyio.TraceField.DayOfYear,
'hour': segyio.TraceField.HourOfDay,
'minute': segyio.TraceField.MinuteOfHour,
'sec': segyio.TraceField.SecondOfMinute,
}
trace_header_datetime_dict = dict(
zip(list(trace_header_datetime.keys()), [np.array([])] * len(trace_header_datetime))
)
trace_header_coords = {
'sx': segyio.TraceField.SourceX,
'sy': segyio.TraceField.SourceY,
'gx': segyio.TraceField.GroupX,
'gy': segyio.TraceField.GroupY,
'cdpx': segyio.TraceField.CDP_X,
'cdpy': segyio.TraceField.CDP_Y,
}
trace_header_coords_dict = dict(
zip(list(trace_header_coords.keys()), [np.array([])] * len(trace_header_coords))
)
list_ntraces = []
for f in file_list:
xprint(f'Processing file < {os.path.split(f)[-1]} >', kind='info', verbosity=verbosity)
with segyio.open(f, 'r', strict=False, ignore_geometry=True) as file:
list_ntraces.append(file.tracecount)
specs.append(segyio.tools.metadata(file))
trace_headers_list.append(parse_trace_headers(file))
# load binary header
header_bin.append(file.bin)
# check binary header elements
binary_equal = all([header_bin[0] == b for b in header_bin])
if not binary_equal:
raise IOError(
'Specified SEG-Y files have different binary headers. No easy merging possible, please check your data!'
)
# load trace header values used for gap checking
xprint('Check trace headers', kind='debug', verbosity=verbosity)
for key, field in trace_header_to_check.items():
# print(key, field)
# get already stored values (initially empty array)
values = trace_header_dict.get(key)
# combine previous and new values
tr_header_attr = np.sort(
np.concatenate((values, file.attributes(field)[:]), axis=None)
)
# update dictionary with combined values
trace_header_dict[key] = tr_header_attr
# load datetimes from trace headers
xprint('Load timestamps from trace headers', kind='debug', verbosity=verbosity)
for key, field in trace_header_datetime.items():
# get already stored values (initially empty array)
datetime_values = trace_header_datetime_dict.get(key)
# combine previous and new values
tr_header_datetime = np.concatenate(
(datetime_values, file.attributes(field)[:]), axis=None
)
# update dictionary with combined values
trace_header_datetime_dict[key] = tr_header_datetime
# load coordinates from trace headers
xprint('Load coordinates from headers', kind='debug', verbosity=verbosity)
for key, field in trace_header_coords.items():
# get already stored values (initially empty array)
coords = trace_header_coords_dict.get(key)
# combine previous and new values
tr_header_coords = np.concatenate((coords, file.attributes(field)[:]), axis=None)
# update dictionary with combined values
trace_header_coords_dict[key] = tr_header_coords
# SourceWaterDepth
xprint('Load SourceWaterDepth from headers', kind='debug', verbosity=verbosity)
swdep_list.extend(file.attributes(segyio.TraceField.SourceWaterDepth)[:])
# trace data (as 2D np.ndarray)
xprint('Load seismic section', kind='debug', verbosity=verbosity)
data_list.append(file.trace.raw[:])
xprint('Merge trace headers', kind='debug', verbosity=verbosity)
# concat trace headers
trace_headers = pd.concat(trace_headers_list, ignore_index=True)
# create index from TRACE_SEQUENCE_LINE and get not existing traces (aka gaps)
trace_headers.set_index(trace_headers['TRACE_SEQUENCE_LINE'], inplace=True)
# drop duplicate traces from different files (only *exact* matches!)
# trace_headers.drop_duplicates(keep='last', inplace=True) # keep "newer" record
mask_overlapping_duplicates = trace_headers.duplicated(keep='last')
# drop duplicate traces from same file (!)
col_subset = list(trace_headers.columns)
col_subset.remove('TRACE_SEQUENCE_FILE')
# trace_headers.drop_duplicates(subset=col_subset, keep='first', inplace=True)
mask_internal_duplicates = trace_headers.duplicated(subset=col_subset, keep='first')
mask = (mask_overlapping_duplicates + mask_internal_duplicates).astype('bool')
# select non-duplicates
trace_headers = trace_headers[~mask]
nduplicates = np.count_nonzero(mask)
if nduplicates > 0:
xprint(f'Removed < {nduplicates} > duplicates', kind='debug', verbosity=verbosity)
# create gap records
trace_headers = trace_headers.reindex(
pd.RangeIndex(
trace_headers.iloc[0]['TRACE_SEQUENCE_LINE'],
trace_headers.iloc[-1]['TRACE_SEQUENCE_LINE'] + 1,
)
)
xprint('Merge seismic data', kind='debug', verbosity=verbosity)
data = np.concatenate(data_list, axis=0)
# remove duplicates
data = data[~mask]
# get gap indices
idx_gaps = pd.isnull(trace_headers).any(1).to_numpy().nonzero()[0]
# if gaps are present
if len(idx_gaps) > 0:
# interpolate gaps in merged trace header
xprint('Interpolate gaps in merged trace header', kind='debug', verbosity=verbosity)
trace_headers_interp = trace_headers.interpolate(method='linear').astype('int32')
trace_headers_interp['TRACE_SEQUENCE_FILE'] = np.arange(
1, trace_headers_interp.shape[0] + 1, 1
)
trace_headers_interp.rename(columns=segyio.tracefield.keys, inplace=True)
xprint('Fill gaps with zero traces', kind='debug', verbosity=verbosity)
idx_gaps_first = np.nonzero(np.diff(idx_gaps) > 1)[0] + 1
if idx_gaps_first.size == 0:
idx = [idx_gaps[0]]
ntr = [idx_gaps.size]
else:
idx = idx_gaps[np.insert(idx_gaps_first, 0, 0)]
ntr = [a.size for a in np.split(idx_gaps, idx_gaps_first)]
for i, n in zip(idx, ntr):
dummy_traces = np.zeros((n, data.shape[1]), dtype=data.dtype)
data = np.insert(data, i, dummy_traces, axis=0)
else:
trace_headers_interp = trace_headers
trace_headers_interp['TRACE_SEQUENCE_FILE'] = np.arange(
1, trace_headers_interp.shape[0] + 1, 1
)
trace_headers_interp = trace_headers_interp.rename(columns=segyio.tracefield.keys)
# init output SEG-Y
spec = specs[0]
spec.samples = specs[0].samples # get sample TWT from first file!
spec.tracecount = data.shape[0] # get number of trace from combined array
# save merged SEG-Y
xprint('Write merged SEG-Y to disk', kind='debug', verbosity=verbosity)
with segyio.open(first_file, 'r', ignore_geometry=True) as src:
with segyio.create(out_file, spec) as dst:
dst.text[0] = src.text[0]
dst.bin = src.bin
for i, trheader in zip(range(0, spec.tracecount + 1), dst.header):
trheader.update(trace_headers_interp.iloc[i].to_dict())
dst.trace = np.ascontiguousarray(data, dtype=data.dtype)
# update textual header
text = get_textual_header(out_file)
## add info about new CRS
info = ','.join([os.path.split(f)[-1].split('.')[0] for f in file_list])
text_updated = add_processing_info_header(text, info, prefix='MERGED')
write_textual_header(out_file, text_updated)
# save auxiliary file
out_file_aux = out_file.split('.')[0] + '.parts'
with open(out_file_aux, 'w', newline='\n') as fout:
fout.write(f'The merged SEG-Y file < {out_name} > contains the following files:\n')
for f, ntr in zip(file_list, list_ntraces):
fout.write(f' - {os.path.split(f)[-1]} {ntr:>6d} trace(s)\n')
string = f'Trace duplicates (different files): {np.count_nonzero(mask_overlapping_duplicates):>3d}\n'
string += f'Trace duplicates (within single file): {np.count_nonzero(mask_internal_duplicates):>3d}\n'
fout.write(string)