Skip to content
Snippets Groups Projects
Commit 8271e9f2 authored by Vladyslav Lubkovskyi's avatar Vladyslav Lubkovskyi
Browse files

ml for intrusion detection

parent 0f73c6bf
No related branches found
No related tags found
No related merge requests found
File added
import matplotlib
matplotlib.use('TkAgg')
import time
import numpy as np
import pandas as pd
import pyarrow as pa
from math import *
import matplotlib.pyplot as plt
from PIL import Image
import seaborn as sns
import itertools
import io
import plotly.offline as py
from IPython import get_ipython
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.figure_factory as ff
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
try:
get_ipython()
py.init_notebook_mode(connected=True)
except NameError:
pass
warnings.filterwarnings('ignore')
Training = pd.read_parquet('intrusion_dataset.parquet')
####################################################################################
# Checking what is in the dataset
# def data_overview(df, message):
# print(f'{message}:\n')
# print("Rows: ", df.shape[0])
# print("\nNumber of featerus: ", df.shape[1])
# print("\nFeatures: ", df.columns.to_list())
# print("\nMissing values: ", df.isnull().sum().values.sum())
# print("\nUnique values: ", df.nunique())
#
#
# data_overview(Training, 'Intrusion Detection Dataset')
####################################################################################
####################################################################################
# This code shall be used to find correlation and delete them
# but im lazy to do it so if someone of you want to do it, go ahead
# and do it
#
# Training = Training.dropna(axis="columns")
# nTraining = Training[[col for col in Training.columns if Training[col].nunique() > 1 and
# pd.api.types.is_numeric_dtype(Training[col])]]
#
# corr = nTraining.corr()
# plt.figure(figsize=(15, 12))
# sns.heatmap(corr)
# plt.show()
####################################################################################
# Preparing of the data
attack_cat = Training.get('attack_cat').drop_duplicates()
Training = Training.drop(['label'], axis=1)
y = Training[["attack_cat"]]
x = Training.drop(["attack_cat", ], axis=1)
non_numeric_columns = Training.select_dtypes(include=['object']).columns
# Convert non-numeric columns to numeric using LabelEncoder
label_encoders = {}
for col in non_numeric_columns:
le = LabelEncoder()
Training[col] = le.fit_transform(Training[col])
label_encoders[col] = le
sc = StandardScaler()
x = Training.select_dtypes(include=['float64', 'int64'])
sc = MinMaxScaler()
x = sc.fit_transform(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
# Naive Bayes
clfg = GaussianNB()
start = time.time()
clfg.fit(x_train, y_train.values.ravel())
end = time.time()
print(f"Training time: {end - start}")
start = time.time()
y_pred = clfg.predict(x_test)
end = time.time()
print(f"Prediction time: {end - start}")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment