-
Notifications
You must be signed in to change notification settings - Fork 0
/
diabites.py
60 lines (37 loc) · 1.51 KB
/
diabites.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import numpy as np
import pandas as pd
from sklearn import ensemble
from sklearn import linear_model
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import pickle
df = pd.read_csv('diabetes.csv')
print(df)
df['Pregnancies'].replace(0, np.nan, inplace=True)
df['Glucose'].replace(0, np.nan, inplace=True)
df['BloodPressure'].replace(0, np.nan, inplace=True)
df['SkinThickness'].replace(0, np.nan, inplace=True)
df['Insulin'].replace(0, np.nan, inplace=True)
df['BMI'].replace(0, np.nan, inplace=True)
df.drop(['SkinThickness' ,'Insulin'], axis=1, inplace=True)
df = df.dropna(axis=0)
y = df['Outcome']
print(y)
X = df.drop('Outcome', axis=1)
print(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2
, random_state=1)
print('Shape training set: X:{}, y:{}'.format(X_train.shape, y_train.shape))
print('Shape test set: X:{}, y:{}'.format(X_test.shape, y_test.shape))
#model = ensemble.RandomForestClassifier()
#model.fit(X_train, y_train)
#y_pred = model.predict(X_test)
#print('Accuracy : {}'.format(accuracy_score(y_test, y_pred)))
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)
y_predlog = log_reg.predict(X_test)
print('Accuracy : {}'.format(accuracy_score(y_test, y_predlog)))
#same both
pickle.dump(log_reg,open('modeldiabetes.pkl','wb'))
model=pickle.load(open('modeldiabetes.pkl','rb'))