Completed gui, some refactoring of files so it works

23be1da1 · Vladyslav Lubkovskyi · f44b4f33 · 23be1da1 · 23be1da1 · 23be1da1
Commit 23be1da1 authored 3 weeks ago by Vladyslav Lubkovskyi
--- a/.idea/PSE_Code.iml
+++ b/.idea/PSE_Code.iml
@@ -6,7 +6,7 @@
      <sourceFolder url="file://$MODULE_DIR$/code/package_capture/test" isTestSource="true" />
      <excludeFolder url="file://$MODULE_DIR$/.venv" />
    </content>
-    <orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.12 (PSE)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
\ No newline at end of file
--- a/code/machine_learning_models/decision_tree.py
+++ b/code/machine_learning_models/decision_tree.py
@@ -2,45 +2,42 @@ import numpy as np
 import pandas as pd
 import seaborn as sns
 import warnings
-import sys
 import os

-sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
-
 from sklearn.metrics import classification_report, confusion_matrix
+
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.tree import DecisionTreeClassifier
-import utilities as util
+
+from utilities import plot_counts
+from utilities import plot_features, ordinal_encode, normalize, plot_confusion_matrix, print_high_confidence_samples, import_data

 warnings.filterwarnings("ignore")

+
 # Constants
 y_data = 'class'
 y_columns = ['normal', 'anomaly']
-train_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTrain+.arff'))
-test_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTest+.arff'))
-
-df_train, df_test, model_name = util.import_data(
-    train_file_path=train_file_path,
-    test_file_path=test_file_path,
-    model_name="Decision Tree"
-)
+df_train, df_test, model_name = import_data(
+    train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
+    test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
+    model_name = "Decision Tree")
 sc = StandardScaler()
 enc = LabelEncoder()

 # Normalize data
-util.ordinal_encode(df=df_train, categories=y_columns, target=y_data)
-util.ordinal_encode(df=df_test, categories=y_columns, target=y_data)
+ordinal_encode(df = df_train, categories = y_columns, target = y_data)
+ordinal_encode(df = df_test, categories = y_columns, target = y_data)

-util.normalize(df_train, df_test, y_data, sc, enc)
+normalize(df_train, df_test, y_data, sc, enc)

 # Plot absolute quantities of class 0 and class 1
-sns.countplot(x=y_data, data=df_train)
-util.plot_counts(model_name=model_name)
+sns.countplot(x = y_data, data = df_train)
+plot_counts(model_name = model_name)

 # Separate X and y
-X_train = df_train.select_dtypes(include=[np.number]).drop(columns=[y_data])
-X_test = df_test.select_dtypes(include=[np.number]).drop(columns=[y_data])
+X_train = df_train.select_dtypes(include=[np.number]).drop(columns = [y_data])
+X_test = df_test.select_dtypes(include=[np.number]).drop(columns = [y_data])
 y_train = df_train[[y_data]]
 y_test = df_test[[y_data]]

@@ -75,20 +72,20 @@ dtc = DecisionTreeClassifier()
 dtc.fit(X_train, y_train)
 y_prediction = dtc.predict(X_test)
 print("Classification report: \n", classification_report(y_test, y_prediction))
-util.plot_confusion_matrix(confusion_matrix=confusion_matrix(y_test, y_prediction),
-                           accuracy=dtc.score(X_test, y_test),
-                           model_name=model_name)
+plot_confusion_matrix(confusion_matrix = confusion_matrix(y_test, y_prediction),
+                      accuracy = dtc.score(X_test, y_test),
+                      model_name=model_name)

 # Determine feature importance
 features = pd.DataFrame(dtc.feature_importances_,
-                        index=X_train.columns,
+                        index= X_train.columns,
                        columns=['Importance']).sort_values(by='Importance', ascending=False)
-util.plot_features(features, model_name=model_name)
+plot_features(features, model_name = model_name)

 def predict(prediction_input):
    if len(prediction_input) == 0:
        return
-    input_data = pd.DataFrame(prediction_input, columns=X_train.columns)
+    input_data = pd.DataFrame(prediction_input, columns = X_train.columns)
    return dtc.predict(input_data)

-util.print_high_confidence_samples(model=dtc, x=X_train)
\ No newline at end of file
+print_high_confidence_samples(model = dtc, x = X_train)
--- a/code/machine_learning_models/knn.py
+++ b/code/machine_learning_models/knn.py
@@ -2,6 +2,8 @@ import warnings
 import numpy as np
 import pandas as pd
 import seaborn as sns
+import os
+
 from sklearn.metrics import classification_report, confusion_matrix
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.preprocessing import StandardScaler, LabelEncoder
@@ -14,8 +16,8 @@ warnings.filterwarnings("ignore")
 y_data = 'class'
 y_values = ['normal', 'anomaly']
 df_train, df_test, model_name = import_data(
-    train_file_path="nsl-kdd-dataset/" + "KDDTrain+.arff",
-    test_file_path="nsl-kdd-dataset/" + "KDDTest+.arff",
+    train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
+    test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
    model_name="KNN")

 sc = StandardScaler()

--- a/code/machine_learning_models/logistic_regression.py
+++ b/code/machine_learning_models/logistic_regression.py
 import numpy as np
 import pandas as pd
 import seaborn as sns
+import os

 import warnings

@@ -20,8 +21,8 @@ from sklearn.metrics import classification_report, confusion_matrix
 y_data = 'class'
 y_values = ['normal', 'anomaly']
 df_train, df_test, model_name = import_data(
-    train_file_path = "nsl-kdd-dataset/" + "KDDTrain+.arff",
-    test_file_path =  "nsl-kdd-dataset/" + "KDDTest+.arff",
+    train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
+    test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
    model_name = "Logistic Regression")
 sc = StandardScaler()
 enc = LabelEncoder()

--- a/code/machine_learning_models/random_forest.py
+++ b/code/machine_learning_models/random_forest.py
+import sys
+
 import numpy as np
 import pandas as pd
+import os

 import warnings
 from sklearn.preprocessing import StandardScaler, LabelEncoder
@@ -10,15 +13,16 @@ from sklearn.metrics import classification_report, confusion_matrix, precision_r
 from utilities import plot_precision_recall_curve, plot_learning_curve, import_data, plot_roc_curve
 from utilities import ordinal_encode, heat_map, plot_features, plot_confusion_matrix, normalize, print_high_confidence_samples

+sys.stdout.reconfigure(line_buffering=True)
 warnings.filterwarnings("ignore")

 # Constants
 y_data = 'class'
 y_values = ['normal', 'anomaly']
 df_train, df_test, model_name = import_data(
-    train_file_path = "nsl-kdd-dataset/" + "KDDTrain+.arff",
-    test_file_path =  "nsl-kdd-dataset/" + "KDDTest+.arff",
-    model_name = "Random Forest")
+	train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
+	test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
+	model_name = "Random Forest")
 sc = StandardScaler()
 enc = LabelEncoder()

@@ -39,7 +43,6 @@ y_test = df_test[[y_data]]

 # Train Random Forest Model
 model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
-
 # Prediction function
 def predict(prediction_input):
 	if len(prediction_input) == 0:
@@ -50,7 +53,6 @@ def predict(prediction_input):

 def train():
 	model.fit(X_train, y_train.values.ravel())
-	graphs()
 	print("Training complete.")

 def graphs():
@@ -93,7 +95,6 @@ def graphs():
 	# Plot ROC curve using the function from utilities
 	plot_roc_curve(y_test, y_score, model_name=model_name)

-if __name__ == "__main__":
-	train()
+	print("Graphs complete.")


--- a/code/machine_learning_models/resulting_figures/Decision Tree - Confusion Matrix.png
+++ b/code/machine_learning_models/resulting_figures/Decision Tree - Confusion Matrix.png
--- a/code/machine_learning_models/resulting_figures/Decision Tree - Counts.png
+++ b/code/machine_learning_models/resulting_figures/Decision Tree - Counts.png
--- a/code/machine_learning_models/resulting_figures/Decision Tree - Feature Importance.png
+++ b/code/machine_learning_models/resulting_figures/Decision Tree - Feature Importance.png
--- a/code/machine_learning_models/resulting_figures/KNN - Confusion Matrix.png
+++ b/code/machine_learning_models/resulting_figures/KNN - Confusion Matrix.png
--- a/code/machine_learning_models/resulting_figures/KNN - Counts.png
+++ b/code/machine_learning_models/resulting_figures/KNN - Counts.png
--- a/code/machine_learning_models/resulting_figures/KNN - ROC Curve.png
+++ b/code/machine_learning_models/resulting_figures/KNN - ROC Curve.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - Confusion Matrix.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - Confusion Matrix.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - Counts.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - Counts.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - Data Correlations.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - Data Correlations.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - Feature Importance.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - Feature Importance.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - Learning Curve.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - Learning Curve.png
--- a/code/machine_learning_models/resulting_figures/Logistic Regression - ROC Curve.png
+++ b/code/machine_learning_models/resulting_figures/Logistic Regression - ROC Curve.png
--- a/code/machine_learning_models/resulting_figures/Random Forest - Confusion Matrix.png
+++ b/code/machine_learning_models/resulting_figures/Random Forest - Confusion Matrix.png
--- a/code/machine_learning_models/resulting_figures/Random Forest - Data Correlations.png
+++ b/code/machine_learning_models/resulting_figures/Random Forest - Data Correlations.png
--- a/code/machine_learning_models/resulting_figures/Random Forest - Feature Importance.png
+++ b/code/machine_learning_models/resulting_figures/Random Forest - Feature Importance.png