Skip to content
Snippets Groups Projects
Commit 23be1da1 authored by Vladyslav Lubkovskyi's avatar Vladyslav Lubkovskyi
Browse files

Completed gui, some refactoring of files so it works

parent f44b4f33
No related branches found
No related tags found
No related merge requests found
Showing
with 39 additions and 38 deletions
......@@ -6,7 +6,7 @@
<sourceFolder url="file://$MODULE_DIR$/code/package_capture/test" isTestSource="true" />
<excludeFolder url="file://$MODULE_DIR$/.venv" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.12 (PSE)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
......@@ -2,45 +2,42 @@ import numpy as np
import pandas as pd
import seaborn as sns
import warnings
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.tree import DecisionTreeClassifier
import utilities as util
from utilities import plot_counts
from utilities import plot_features, ordinal_encode, normalize, plot_confusion_matrix, print_high_confidence_samples, import_data
warnings.filterwarnings("ignore")
# Constants
y_data = 'class'
y_columns = ['normal', 'anomaly']
train_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTrain+.arff'))
test_file_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'nsl-kdd-dataset', 'KDDTest+.arff'))
df_train, df_test, model_name = util.import_data(
train_file_path=train_file_path,
test_file_path=test_file_path,
model_name="Decision Tree"
)
df_train, df_test, model_name = import_data(
train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
model_name = "Decision Tree")
sc = StandardScaler()
enc = LabelEncoder()
# Normalize data
util.ordinal_encode(df=df_train, categories=y_columns, target=y_data)
util.ordinal_encode(df=df_test, categories=y_columns, target=y_data)
ordinal_encode(df = df_train, categories = y_columns, target = y_data)
ordinal_encode(df = df_test, categories = y_columns, target = y_data)
util.normalize(df_train, df_test, y_data, sc, enc)
normalize(df_train, df_test, y_data, sc, enc)
# Plot absolute quantities of class 0 and class 1
sns.countplot(x=y_data, data=df_train)
util.plot_counts(model_name=model_name)
sns.countplot(x = y_data, data = df_train)
plot_counts(model_name = model_name)
# Separate X and y
X_train = df_train.select_dtypes(include=[np.number]).drop(columns=[y_data])
X_test = df_test.select_dtypes(include=[np.number]).drop(columns=[y_data])
X_train = df_train.select_dtypes(include=[np.number]).drop(columns = [y_data])
X_test = df_test.select_dtypes(include=[np.number]).drop(columns = [y_data])
y_train = df_train[[y_data]]
y_test = df_test[[y_data]]
......@@ -75,20 +72,20 @@ dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
y_prediction = dtc.predict(X_test)
print("Classification report: \n", classification_report(y_test, y_prediction))
util.plot_confusion_matrix(confusion_matrix=confusion_matrix(y_test, y_prediction),
accuracy=dtc.score(X_test, y_test),
model_name=model_name)
plot_confusion_matrix(confusion_matrix = confusion_matrix(y_test, y_prediction),
accuracy = dtc.score(X_test, y_test),
model_name=model_name)
# Determine feature importance
features = pd.DataFrame(dtc.feature_importances_,
index=X_train.columns,
index= X_train.columns,
columns=['Importance']).sort_values(by='Importance', ascending=False)
util.plot_features(features, model_name=model_name)
plot_features(features, model_name = model_name)
def predict(prediction_input):
if len(prediction_input) == 0:
return
input_data = pd.DataFrame(prediction_input, columns=X_train.columns)
input_data = pd.DataFrame(prediction_input, columns = X_train.columns)
return dtc.predict(input_data)
util.print_high_confidence_samples(model=dtc, x=X_train)
\ No newline at end of file
print_high_confidence_samples(model = dtc, x = X_train)
......@@ -2,6 +2,8 @@ import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import os
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
......@@ -14,8 +16,8 @@ warnings.filterwarnings("ignore")
y_data = 'class'
y_values = ['normal', 'anomaly']
df_train, df_test, model_name = import_data(
train_file_path="nsl-kdd-dataset/" + "KDDTrain+.arff",
test_file_path="nsl-kdd-dataset/" + "KDDTest+.arff",
train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
model_name="KNN")
sc = StandardScaler()
......
import numpy as np
import pandas as pd
import seaborn as sns
import os
import warnings
......@@ -20,8 +21,8 @@ from sklearn.metrics import classification_report, confusion_matrix
y_data = 'class'
y_values = ['normal', 'anomaly']
df_train, df_test, model_name = import_data(
train_file_path = "nsl-kdd-dataset/" + "KDDTrain+.arff",
test_file_path = "nsl-kdd-dataset/" + "KDDTest+.arff",
train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
model_name = "Logistic Regression")
sc = StandardScaler()
enc = LabelEncoder()
......
import sys
import numpy as np
import pandas as pd
import os
import warnings
from sklearn.preprocessing import StandardScaler, LabelEncoder
......@@ -10,15 +13,16 @@ from sklearn.metrics import classification_report, confusion_matrix, precision_r
from utilities import plot_precision_recall_curve, plot_learning_curve, import_data, plot_roc_curve
from utilities import ordinal_encode, heat_map, plot_features, plot_confusion_matrix, normalize, print_high_confidence_samples
sys.stdout.reconfigure(line_buffering=True)
warnings.filterwarnings("ignore")
# Constants
y_data = 'class'
y_values = ['normal', 'anomaly']
df_train, df_test, model_name = import_data(
train_file_path = "nsl-kdd-dataset/" + "KDDTrain+.arff",
test_file_path = "nsl-kdd-dataset/" + "KDDTest+.arff",
model_name = "Random Forest")
train_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTrain+.arff"),
test_file_path = os.path.join(os.path.dirname(__file__), "nsl-kdd-dataset/" + "KDDTest+.arff"),
model_name = "Random Forest")
sc = StandardScaler()
enc = LabelEncoder()
......@@ -39,7 +43,6 @@ y_test = df_test[[y_data]]
# Train Random Forest Model
model = RandomForestClassifier(n_estimators=100, max_depth=None, random_state=42)
# Prediction function
def predict(prediction_input):
if len(prediction_input) == 0:
......@@ -50,7 +53,6 @@ def predict(prediction_input):
def train():
model.fit(X_train, y_train.values.ravel())
graphs()
print("Training complete.")
def graphs():
......@@ -93,7 +95,6 @@ def graphs():
# Plot ROC curve using the function from utilities
plot_roc_curve(y_test, y_score, model_name=model_name)
if __name__ == "__main__":
train()
print("Graphs complete.")
code/machine_learning_models/resulting_figures/Decision Tree - Counts.png

40.1 KiB

code/machine_learning_models/resulting_figures/KNN - Confusion Matrix.png

69.4 KiB

code/machine_learning_models/resulting_figures/KNN - Counts.png

40.1 KiB

code/machine_learning_models/resulting_figures/KNN - ROC Curve.png

139 KiB

code/machine_learning_models/resulting_figures/Logistic Regression - Counts.png

521 KiB

0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment