Upload New File

484e1508 · Lars Yström · 484e1508
Commit 484e1508 authored 1 year ago by Lars Yström
--- a/NRG.py
+++ b/NRG.py
+# -*- coding: utf-8 -*-
+"""
+NRG - An artificial neural network solute geothermometer
+FUNCTION:
+    - Version 0.03: M. Vollmer (KIT), L. H. Ystroem (KIT) - June 2023
+An artificial neural network solute geothermometer trained by data from
+the measured reservoir temperatures worldwide. Using a feedforward 
+multilayer perceptron to solve the regression analysis of fluid chemistry
+and reservoir temperature.
+INPUT:
+    - cvs-file:['pH','Na','K','Ca','Mg','SiO2','Cl','Temperature']
+OUTPUT:
+    - graphical output of predicted vs. measured data plus error diagrams
+    - array of errors and predictions
+PLEASE NOTE:
+    - The solute ANN geothermometer was programmed in Python 3.8 with
+      associated libraries: pandas, matplotlib, numpy, seaborn, tensorflow,
+      keras
+INSTRUCTION:
+- To use the geothermometer, Python (3.8) and associated libraries
+  must be installed
+- Training data and new unknown data must be in the recommended csv-file
+  template: ['pH','Na','K','Ca','Mg','SiO2','Cl','T']
+- csv-input-files must be renamed within the code (lines 59 & 151)
+- Start the ANN via the Run-button
+- Results are visualised in "plots" and output text on the console
+- Further results can be picked from the variables
+"""
+# Libraries
+import pandas as pd
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from numpy.random import seed
+# Preprocessing
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+# Metrics
+from sklearn import metrics
+from sklearn.metrics import r2_score
+from sklearn.metrics import mean_squared_error
+from sklearn.metrics import mean_absolute_percentage_error
+#Tensorflow & Keras
+from tensorflow.keras import Sequential
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.callbacks import EarlyStopping
+from tensorflow.keras.callbacks import ModelCheckpoint
+import tensorflow as tf
+# Reading the csv-inputfile and delet all nan/0 from data
+data= pd.read_csv("Training.csv", delimiter=",")
+data= data.dropna()
+# Fixing up global and local seed
+seed(0)
+tf.random.set_seed(0)
+# Splitting the input data
+temp, test = train_test_split(data, test_size=0.2)
+train, val = train_test_split(temp, test_size=0.1)
+# Define input variables and output variable
+X_train = train[['pH','Na','K','Ca','Mg','SiO2','Cl']]
+y_train = train[['T']]
+X_val = val[['pH','Na','K','Ca','Mg','SiO2','Cl']]
+y_val = val[['T']]
+X_test = test[['pH','Na','K','Ca','Mg','SiO2','Cl']]
+y_test = test[['T']]
+# Scale and centre data 
+scaler_input, scaler_target = StandardScaler(), StandardScaler()
+scaler_input.fit(X_train)
+scaler_target.fit(y_train)
+X_train = scaler_input.transform(X_train)
+y_train = scaler_target.transform(y_train)
+X_test = scaler_input.transform(X_test)
+y_test = scaler_target.transform(y_test)
+X_val = scaler_input.transform(X_val)
+y_val = scaler_target.transform(y_val)
+# Determine the input features
+n_features = X_train.shape[1]
+# Set initializer with optimiser
+kernel_initializer = 'normal'
+opt = tf.keras.optimizers.Adam(learning_rate=0.001)
+# Implementing Early Stopping
+es = EarlyStopping(monitor='val_loss', mode='auto', verbose=1, patience=20, restore_best_weights=True)
+# Save the trained model
+checkpoint_filepath = './checkpoint.hdf5'
+checkpoint = ModelCheckpoint(filepath = checkpoint_filepath, verbose = 1, save_best_only = True,
+                                   monitor ='val_loss', save_weights_only = True, mode = "auto")
+# Define model architecture
+model = Sequential()
+model.add(Dense(80, activation='relu', kernel_initializer=kernel_initializer, input_shape=(n_features,)))
+model.add(Dense(1))
+# Compile the model
+model.compile(optimizer=opt, loss='mean_squared_error')
+# Hyperparameter optimisation
+history = model.fit(X_train, y_train, epochs=300, batch_size=16, verbose=2, validation_data=(X_val,y_val),
+                    callbacks=[es])
+# Prediction of the test set
+yhat = model.predict(X_test)
+X_train_p = model.predict(X_train)
+X_val = model.predict(X_val)
+# Plot learning curves
+plt.xlabel('Epochs')
+plt.ylabel('Mean square error')
+plt.plot(history.history['loss'], label='Training')
+plt.plot(history.history['val_loss'], label='Validation')
+plt.legend()
+plt.title('Learning curves')
+plt.savefig('loss.png')
+plt.show()
+# Inverse transform scaled and centred data
+y_test = scaler_target.inverse_transform(y_test)
+ypred = scaler_target.inverse_transform(yhat)
+X_train_p = scaler_target.inverse_transform(X_train_p)
+y_train = scaler_target.inverse_transform(y_train)
+X_val = scaler_target.inverse_transform(X_val)
+y_val = scaler_target.inverse_transform(y_val)
+X_train = scaler_input.inverse_transform(X_train)
+# Metric scores (change for other sets)
+mse = mean_squared_error(y_test, ypred)
+rmse = np.sqrt(metrics.mean_squared_error(y_test,ypred))
+mape =mean_absolute_percentage_error(y_test, ypred)
+r2 = r2_score(y_test, ypred)
+print('MAPE: %.3f' % mape)
+print('MSE: %.3f' % mse)
+print('RMSE: %.3f' % np.sqrt(mse))
+print('R_squared: %.3f' % r2)
+# Reading in csv-inputfile of new transferable data
+extra = pd.read_csv("Transfer.csv", delimiter=",")
+x_a = extra[['pH','Na','K','Ca','Mg','SiO2','Cl']]
+y_a = extra[['T']]
+# Scale, centre, and predict new transferable centre data 
+scaler_x = StandardScaler()
+scaler_y = StandardScaler()
+scaler_x.fit(x_a)
+scaler_y.fit(y_a)
+Xnew = scaler_x.transform(x_a)
+y_pred_a = model.predict(Xnew)
+y_pred_a = scaler_y.inverse_transform(y_pred_a) 
+y_true = y_a
+# Polt of predicted temperature vs measured temperature plus transferred data
+plt.figure()
+plt.plot(X_train_p, y_train,'.b', label='Training',markersize=8)
+#color='#808080', marker='.', markersize=8, label='ANN', linewidth=0
+plt.plot(X_val, y_val,'.b',markersize=8)
+plt.plot(y_test, ypred,'.r', label='Testing',markersize=8)
+plt.plot(y_true, y_pred_a,'.g', label='Transfer',markersize=8)
+plt.plot(y_test,y_test,'k', label ='Regression') #color='#606060'
+plt.xlabel('Measured bottom hole temperature [°C]')
+plt.ylabel('Predicted bottom hole temperature [°C]')
+plt.xlim(0,350)
+plt.ylim(0,350)
+plt.legend(loc='upper left')
+plt.title('R$^2$: %.3f' % r2)
+plt.savefig('regression.png')
+plt.show()
+# Error histogram of test set
+error = ypred - y_test
+plt.hist(error, bins=20)
+plt.xlabel('Predicted temperature difference [K]')
+plt.ylabel('Quantity')
+plt.xlim(-40,40)
+plt.ylim(0,6)
+plt.title('Error histogram of tested data')
+plt.savefig('histogram.png')
+plt.show()
+error_train = y_train - X_train_p
+error_val = y_val - X_val
+error_main = np.concatenate([error,error_val,error_train])
+# Violinplot of the error distribution
+sns.violinplot(error_main, cut=1)
+plt.title('Error distribution')
+plt.ylabel('Dataset')
+plt.xlabel('Predicted temperature difference [K]')
+plt.xlim(-40,40)
+plt.savefig('disribution.png')
+plt.show()
+# Plot of the outlier removal
+abs_error_main = np.absolute(error_main)
+measured = np.concatenate([y_test , X_train_p, X_val])
+predict = np.concatenate([ypred , y_train, y_val])
+rmse_error = np.sqrt(metrics.mean_squared_error(measured, predict))
+plt.stem(abs_error_main, linefmt=':')
+plt.axhline(y=2*rmse_error, c='black', ls=':')
+plt.title('Outlier detection')
+plt.ylabel('RMSE')
+plt.xlabel('Datapoint')
+plt.ylim(-5,75)
+plt.savefig('outlier removal.png')
+plt.show()