From 11caccf8ce58291b1124f55ebe1be11495027b66 Mon Sep 17 00:00:00 2001
From: VladLub <vladlubkovskiy1@gmail.com>
Date: Sun, 23 Mar 2025 17:12:28 +0100
Subject: [PATCH] simpl epush so i can continue on the pc

---
 code/machine_learning_models/decision_tree.py | 50 ++++++++++---------
 code/machine_learning_models/utilities.py     |  7 ++-
 code/{gui/gui.py => main.py}                  | 18 +++++++
 3 files changed, 48 insertions(+), 27 deletions(-)
 rename code/{gui/gui.py => main.py} (92%)

diff --git a/code/machine_learning_models/decision_tree.py b/code/machine_learning_models/decision_tree.py
index 8b45961..6d83d6e 100644
--- a/code/machine_learning_models/decision_tree.py
+++ b/code/machine_learning_models/decision_tree.py
@@ -2,41 +2,45 @@ import numpy as np
 import pandas as pd
 import seaborn as sns
 import warnings
+import sys
+import os
 
-from sklearn.metrics import classification_report, confusion_matrix
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
 
+from sklearn.metrics import classification_report, confusion_matrix
 from sklearn.preprocessing import StandardScaler, LabelEncoder
 from sklearn.tree import DecisionTreeClassifier
-
-from utilities import plot_counts
-from utilities import plot_features, ordinal_encode, normalize, plot_confusion_matrix, print_high_confidence_samples, import_data
+import utilities as util
 
 warnings.filterwarnings("ignore")
 
-
 # Constants
 y_data = 'class'
 y_columns = ['normal', 'anomaly']
-df_train, df_test, model_name = import_data(
-    train_file_path = "nsl-kdd-dataset/" + "KDDTrain+.arff",
-    test_file_path =  "nsl-kdd-dataset/" + "KDDTest+.arff",
-    model_name = "Decision Tree")
+train_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTrain+.arff'))
+test_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTest+.arff'))
+
+df_train, df_test, model_name = util.import_data(
+    train_file_path=train_file_path,
+    test_file_path=test_file_path,
+    model_name="Decision Tree"
+)
 sc = StandardScaler()
 enc = LabelEncoder()
 
 # Normalize data
-ordinal_encode(df = df_train, categories = y_columns, target = y_data)
-ordinal_encode(df = df_test, categories = y_columns, target = y_data)
+util.ordinal_encode(df=df_train, categories=y_columns, target=y_data)
+util.ordinal_encode(df=df_test, categories=y_columns, target=y_data)
 
-normalize(df_train, df_test, y_data, sc, enc)
+util.normalize(df_train, df_test, y_data, sc, enc)
 
 # Plot absolute quantities of class 0 and class 1
-sns.countplot(x = y_data, data = df_train)
-plot_counts(model_name = model_name)
+sns.countplot(x=y_data, data=df_train)
+util.plot_counts(model_name=model_name)
 
 # Separate X and y
-X_train = df_train.select_dtypes(include=[np.number]).drop(columns = [y_data])
-X_test = df_test.select_dtypes(include=[np.number]).drop(columns = [y_data])
+X_train = df_train.select_dtypes(include=[np.number]).drop(columns=[y_data])
+X_test = df_test.select_dtypes(include=[np.number]).drop(columns=[y_data])
 y_train = df_train[[y_data]]
 y_test = df_test[[y_data]]
 
@@ -71,20 +75,20 @@ dtc = DecisionTreeClassifier()
 dtc.fit(X_train, y_train)
 y_prediction = dtc.predict(X_test)
 print("Classification report: \n", classification_report(y_test, y_prediction))
-plot_confusion_matrix(confusion_matrix = confusion_matrix(y_test, y_prediction),
-                      accuracy = dtc.score(X_test, y_test),
-                      model_name=model_name)
+util.plot_confusion_matrix(confusion_matrix=confusion_matrix(y_test, y_prediction),
+                           accuracy=dtc.score(X_test, y_test),
+                           model_name=model_name)
 
 # Determine feature importance
 features = pd.DataFrame(dtc.feature_importances_,
-                        index= X_train.columns,
+                        index=X_train.columns,
                         columns=['Importance']).sort_values(by='Importance', ascending=False)
-plot_features(features, model_name = model_name)
+util.plot_features(features, model_name=model_name)
 
 def predict(prediction_input):
     if len(prediction_input) == 0:
         return
-    input_data = pd.DataFrame(prediction_input, columns = X_train.columns)
+    input_data = pd.DataFrame(prediction_input, columns=X_train.columns)
     return dtc.predict(input_data)
 
-print_high_confidence_samples(model = dtc, x = X_train)
+util.print_high_confidence_samples(model=dtc, x=X_train)
\ No newline at end of file
diff --git a/code/machine_learning_models/utilities.py b/code/machine_learning_models/utilities.py
index cfbca48..8c2f3a6 100644
--- a/code/machine_learning_models/utilities.py
+++ b/code/machine_learning_models/utilities.py
@@ -179,15 +179,14 @@ def save_plot(name):
 
 # Data processing
 
-def import_data(train_file_path: str, test_file_path: str, model_name: str):
+def import_data(train_file_path, test_file_path, model_name):
     data, meta = arff.loadarff(train_file_path)
     df_train = pd.DataFrame(data)
-    df_train = df_train.applymap(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
+    df_train = df_train.map(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
 
-    # Importing test data set
     data, meta = arff.loadarff(test_file_path)
     df_test = pd.DataFrame(data)
-    df_test = df_test.applymap(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
+    df_test = df_test.map(lambda x: x.decode('utf-8') if isinstance(x, bytes) else x)
 
     return df_train, df_test, model_name
 
diff --git a/code/gui/gui.py b/code/main.py
similarity index 92%
rename from code/gui/gui.py
rename to code/main.py
index e10add7..525e5e1 100644
--- a/code/gui/gui.py
+++ b/code/main.py
@@ -3,6 +3,24 @@ from tkinter import scrolledtext, ttk, Menu
 import subprocess
 import threading
 import os
+import sys
+
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), 'machine_learning_models')))
+
+from machine_learning_models import utilities as util
+
+train_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'machine_learning_models', 'nsl-kdd-dataset', 'KDDTrain+.arff'))
+test_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'machine_learning_models', 'nsl-kdd-dataset', 'KDDTest+.arff'))
+
+
+# Import data using the correct paths
+df_train, df_test, model_name = util.import_data(
+    train_file_path=train_file_path,
+    test_file_path=test_file_path,
+    model_name=None
+)
+
+from machine_learning_models import decision_tree, random_forest, knn, logistic_regression
 
 from matplotlib import pyplot as plt
 from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
-- 
GitLab