Machine Learning for Beginners(scikit-learn module)

发布时间 2023-09-24 16:19:08作者: Egu0

Machine Learning Common Lifycycle

  1. Import the Data
  2. Clean the Data
  3. Split the Data into Training/Test Sets
  4. Create a Model
  5. Train the Model
  6. Make Predictions
  7. Evaluate and Imporove
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from joblib import dump, load
from sklearn import tree

# prepare date
music_df = pd.read_csv('music.csv')  # csv.zip: https://bit.ly/3muqqta
X = music_df.drop(columns=['genre'])
y = music_df['genre']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

# learning
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

# evaluating
predictions2 = model.predict(X_test)  # predict on test set
score = accuracy_score(predictions2, y_test)  # evaluate, return value range 0-1
print("accuracy score:", score)

# dump and load model
dump(model, 'music-recommender.joblib')
# load & use
model_loaded = load('music-recommender.joblib')
print(model_loaded.predict([[21, 1]]))

# visualizing a Decision Tree
tree.export_graphviz(model,
                     out_file='music-recommender.dot',
                     feature_names=['age', 'gender'],  # features of data
                     class_names=sorted(y.unique()),  # labels
                     label='all',  # show informative labels at every node
                     rounded=True,
                     filled=True)
# Then use vscode extension (https://marketplace.visualstudio.com/items?itemName=joaompinto.vscode-graphviz)
# to open .dot file and preview the decision tree.