Sklearn | 温书

简单范例

import warnings
warnings.filterwarnings('ignore')  
from sklearn import neighbors, datasets, preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X, y = iris.data[:, :2], iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)

scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

accuracy_score(y_test, y_pred)

0.631578947368421

数据分割

1	X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)

数据处理

Standardization

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(X_train)
scaler.transform(X_train)

array([[-0.91090798, -1.59775374],
       [-1.0271058 ,  0.08448757],
       [ 0.59966379, -1.59775374],
       [ 0.01867465, -0.96691325],
       [ 0.48346596, -0.33607276],
       [-1.25950146,  0.29476773],
       [-1.37569929,  0.71532806],
       [-0.79471015, -1.17719341],
       [-1.14330363,  0.71532806],
       [ 2.45882905,  1.55644871],
       [-0.79471015,  0.71532806],
       [-0.79471015,  1.34616854],
       [-0.21372101, -0.33607276],
       [ 0.83205945, -0.1257926 ],
       [-0.44611666,  1.76672887],
       [ 1.41304859,  0.29476773],
       [ 0.01867465, -0.54635292],
       [ 2.22643339, -0.96691325],
       [-0.32991883, -1.17719341],
       [ 0.13487248,  0.29476773],
       [-1.0271058 ,  1.13588838],
       [-1.49189712, -1.59775374],
       [ 0.59966379, -0.54635292],
       [-1.60809495, -0.33607276],
       [-0.91090798,  1.13588838],
       [ 1.64544425, -0.1257926 ],
       [ 0.25107031,  0.71532806],
       [ 0.48346596, -1.8080339 ],
       [ 1.8778399 , -0.54635292],
       [ 1.18065293, -0.1257926 ],
       [ 0.71586162, -0.54635292],
       [-0.09752318, -1.17719341],
       [-0.91090798,  0.92560822],
       [-0.79471015,  1.55644871],
       [ 1.18065293, -0.54635292],
       [-0.67851232, -0.75663309],
       [-0.79471015,  1.55644871],
       [-0.21372101, -1.17719341],
       [ 0.36726814, -0.1257926 ],
       [ 0.94825728, -0.33607276],
       [ 0.71586162, -0.54635292],
       [-1.72429277, -0.1257926 ],
       [ 1.64544425,  1.13588838],
       [-0.79471015,  0.92560822],
       [ 0.59966379, -1.17719341],
       [-1.60809495,  0.29476773],
       [ 2.11023556, -0.1257926 ],
       [ 0.71586162,  0.29476773],
       [-0.79471015,  1.55644871],
       [ 0.83205945,  0.29476773],
       [ 0.59966379, -0.75663309],
       [-0.91090798,  0.92560822],
       [-0.67851232,  0.71532806],
       [ 0.71586162, -0.75663309],
       [ 0.01867465,  1.97700903],
       [-0.09752318,  2.81812969],
       [-1.37569929,  0.29476773],
       [ 1.29685076,  0.08448757],
       [ 0.59966379, -0.33607276],
       [-0.32991883,  0.92560822],
       [-0.09752318, -0.96691325],
       [-0.91090798,  0.50504789],
       [ 0.25107031, -1.8080339 ],
       [-1.0271058 , -0.1257926 ],
       [-0.91090798, -2.22859423],
       [ 0.94825728, -0.1257926 ],
       [-0.09752318, -0.54635292],
       [-0.32991883, -0.96691325],
       [-0.32991883, -1.59775374],
       [-1.14330363,  0.08448757],
       [ 0.25107031, -0.33607276],
       [-0.91090798, -0.1257926 ],
       [ 1.29685076,  0.08448757],
       [ 1.06445511, -1.17719341],
       [-0.56231449,  1.34616854],
       [-0.67851232,  2.1872892 ],
       [-0.91090798,  0.71532806],
       [-1.37569929,  1.13588838],
       [ 2.22643339,  1.55644871],
       [ 1.76164208, -0.33607276],
       [-1.37569929,  0.08448757],
       [-0.32991883, -1.38747358],
       [ 0.01867465, -0.75663309],
       [ 1.06445511,  0.50504789],
       [ 0.01867465, -0.75663309],
       [-0.44611666,  1.34616854],
       [-0.91090798,  0.71532806],
       [ 0.25107031, -0.75663309],
       [-0.09752318, -0.54635292],
       [ 0.36726814, -0.54635292],
       [-0.79471015,  0.50504789],
       [-0.21372101, -0.1257926 ],
       [-0.44611666, -0.1257926 ],
       [-0.44611666,  1.76672887],
       [ 1.06445511,  0.50504789],
       [-1.0271058 , -1.17719341],
       [ 0.48346596,  0.71532806],
       [-0.32991883, -1.38747358],
       [ 2.22643339, -0.54635292],
       [-0.44611666,  0.71532806],
       [ 1.06445511, -0.1257926 ],
       [-0.32991883,  2.39756936],
       [-0.91090798,  0.29476773],
       [-1.14330363, -0.1257926 ],
       [ 0.01867465, -0.75663309],
       [ 0.13487248, -0.1257926 ],
       [ 1.52924642, -0.1257926 ],
       [-1.0271058 , -1.38747358],
       [ 0.59966379, -1.17719341],
       [-0.21372101, -0.1257926 ],
       [ 2.22643339, -0.1257926 ],
       [-0.44611666,  0.71532806]])

Normalization

from sklearn.preprocessing import Normalizer

scaler = Normalizer().fit(X_train)
scaler.transform(X_train)

array([[0.90849045, 0.41790561],
       [0.84507884, 0.53464171],
       [0.93935732, 0.34293997],
       [0.91250932, 0.4090559 ],
       [0.90580954, 0.42368511],
       [0.82659925, 0.56279098],
       [0.80417614, 0.59439106],
       [0.89792072, 0.44015722],
       [0.81602448, 0.57801734],
       [0.90116674, 0.43347261],
       [0.83205029, 0.5547002 ],
       [0.80942185, 0.58722762],
       [0.88799441, 0.45985425],
       [0.90795938, 0.41905818],
       [0.81067923, 0.58549055],
       [0.90947448, 0.41575976],
       [0.90055164, 0.43474907],
       [0.94744567, 0.31991672],
       [0.91036648, 0.41380294],
       [0.87903186, 0.47676304],
       [0.80588181, 0.59207643],
       [0.89043468, 0.45511106],
       [0.91381155, 0.40613847],
       [0.8349582 , 0.55031336],
       [0.81153434, 0.58430473],
       [0.92307692, 0.38461538],
       [0.87002219, 0.49301257],
       [0.94242775, 0.33440985],
       [0.93528626, 0.3538921 ],
       [0.9149178 , 0.40364021],
       [0.91615733, 0.40081883],
       [0.91578821, 0.4016615 ],
       [0.81923192, 0.57346234],
       [0.80188283, 0.59748132],
       [0.9246781 , 0.38074981],
       [0.88749608, 0.46081527],
       [0.80188283, 0.59748132],
       [0.91313788, 0.40765084],
       [0.89734997, 0.44131966],
       [0.91551945, 0.4022737 ],
       [0.91615733, 0.40081883],
       [0.82012695, 0.5721816 ],
       [0.89442719, 0.4472136 ],
       [0.82451335, 0.5658425 ],
       [0.92949071, 0.36884552],
       [0.80873608, 0.5881717 ],
       [0.93015522, 0.36716653],
       [0.89442719, 0.4472136 ],
       [0.80188283, 0.59748132],
       [0.89717068, 0.44168403],
       [0.91914503, 0.3939193 ],
       [0.81923192, 0.57346234],
       [0.83696961, 0.54724936],
       [0.92136416, 0.38870051],
       [0.82321279, 0.56773296],
       [0.79159032, 0.61105218],
       [0.8209052 , 0.57106449],
       [0.9121687 , 0.40981492],
       [0.90838094, 0.41814361],
       [0.84366149, 0.53687549],
       [0.90981905, 0.41500518],
       [0.83460941, 0.55084221],
       [0.93887632, 0.34425465],
       [0.8528513 , 0.52215386],
       [0.92847669, 0.37139068],
       [0.91036648, 0.41380294],
       [0.89755433, 0.44090388],
       [0.90407227, 0.42737962],
       [0.92257988, 0.38580613],
       [0.84003938, 0.54252543],
       [0.90034895, 0.43516866],
       [0.85749293, 0.51449576],
       [0.9121687 , 0.40981492],
       [0.93690259, 0.34959052],
       [0.81995808, 0.57242357],
       [0.78526917, 0.61915453],
       [0.8269265 , 0.56231002],
       [0.787505  , 0.61630826],
       [0.89674427, 0.44254912],
       [0.92935209, 0.36919466],
       [0.82926643, 0.55885346],
       [0.91653938, 0.39994446],
       [0.90658206, 0.42202958],
       [0.89708903, 0.44184982],
       [0.90658206, 0.42202958],
       [0.82493237, 0.56523144],
       [0.8269265 , 0.56231002],
       [0.91192151, 0.41036468],
       [0.89755433, 0.44090388],
       [0.90882955, 0.41716766],
       [0.83957016, 0.54325128],
       [0.88147997, 0.47222141],
       [0.87415728, 0.48564293],
       [0.81067923, 0.58549055],
       [0.89708903, 0.44184982],
       [0.89076187, 0.45447034],
       [0.87681241, 0.48083261],
       [0.91653938, 0.39994446],
       [0.93979342, 0.34174306],
       [0.84623284, 0.53281327],
       [0.91268458, 0.40866474],
       [0.79476781, 0.6069136 ],
       [0.8422714 , 0.5390537 ],
       [0.8479983 , 0.52999894],
       [0.90658206, 0.42202958],
       [0.89138513, 0.45324668],
       [0.92114622, 0.38921671],
       [0.89806271, 0.43986745],
       [0.92949071, 0.36884552],
       [0.88147997, 0.47222141],
       [0.93177739, 0.36303015],
       [0.84623284, 0.53281327]])

Binarization

from sklearn.preprocessing import Binarizer

binarizer = Binarizer(threshold=0.0).fit(X)
binarizer.transform(X)

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.],
       [1., 1.]])

Encoding Categorical Features

from sklearn.preprocessing import LabelEncoder

enc = LabelEncoder()
enc.fit_transform(y)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

Imputing Missing Values

from sklearn.preprocessing import Imputer

imp = Imputer(missing_values=0, strategy='mean', axis=0)
imp.fit_transform(X_train)

array([[-0.91090798, -1.59775374],
       [-1.0271058 ,  0.08448757],
       [ 0.59966379, -1.59775374],
       [ 0.01867465, -0.96691325],
       [ 0.48346596, -0.33607276],
       [-1.25950146,  0.29476773],
       [-1.37569929,  0.71532806],
       [-0.79471015, -1.17719341],
       [-1.14330363,  0.71532806],
       [ 2.45882905,  1.55644871],
       [-0.79471015,  0.71532806],
       [-0.79471015,  1.34616854],
       [-0.21372101, -0.33607276],
       [ 0.83205945, -0.1257926 ],
       [-0.44611666,  1.76672887],
       [ 1.41304859,  0.29476773],
       [ 0.01867465, -0.54635292],
       [ 2.22643339, -0.96691325],
       [-0.32991883, -1.17719341],
       [ 0.13487248,  0.29476773],
       [-1.0271058 ,  1.13588838],
       [-1.49189712, -1.59775374],
       [ 0.59966379, -0.54635292],
       [-1.60809495, -0.33607276],
       [-0.91090798,  1.13588838],
       [ 1.64544425, -0.1257926 ],
       [ 0.25107031,  0.71532806],
       [ 0.48346596, -1.8080339 ],
       [ 1.8778399 , -0.54635292],
       [ 1.18065293, -0.1257926 ],
       [ 0.71586162, -0.54635292],
       [-0.09752318, -1.17719341],
       [-0.91090798,  0.92560822],
       [-0.79471015,  1.55644871],
       [ 1.18065293, -0.54635292],
       [-0.67851232, -0.75663309],
       [-0.79471015,  1.55644871],
       [-0.21372101, -1.17719341],
       [ 0.36726814, -0.1257926 ],
       [ 0.94825728, -0.33607276],
       [ 0.71586162, -0.54635292],
       [-1.72429277, -0.1257926 ],
       [ 1.64544425,  1.13588838],
       [-0.79471015,  0.92560822],
       [ 0.59966379, -1.17719341],
       [-1.60809495,  0.29476773],
       [ 2.11023556, -0.1257926 ],
       [ 0.71586162,  0.29476773],
       [-0.79471015,  1.55644871],
       [ 0.83205945,  0.29476773],
       [ 0.59966379, -0.75663309],
       [-0.91090798,  0.92560822],
       [-0.67851232,  0.71532806],
       [ 0.71586162, -0.75663309],
       [ 0.01867465,  1.97700903],
       [-0.09752318,  2.81812969],
       [-1.37569929,  0.29476773],
       [ 1.29685076,  0.08448757],
       [ 0.59966379, -0.33607276],
       [-0.32991883,  0.92560822],
       [-0.09752318, -0.96691325],
       [-0.91090798,  0.50504789],
       [ 0.25107031, -1.8080339 ],
       [-1.0271058 , -0.1257926 ],
       [-0.91090798, -2.22859423],
       [ 0.94825728, -0.1257926 ],
       [-0.09752318, -0.54635292],
       [-0.32991883, -0.96691325],
       [-0.32991883, -1.59775374],
       [-1.14330363,  0.08448757],
       [ 0.25107031, -0.33607276],
       [-0.91090798, -0.1257926 ],
       [ 1.29685076,  0.08448757],
       [ 1.06445511, -1.17719341],
       [-0.56231449,  1.34616854],
       [-0.67851232,  2.1872892 ],
       [-0.91090798,  0.71532806],
       [-1.37569929,  1.13588838],
       [ 2.22643339,  1.55644871],
       [ 1.76164208, -0.33607276],
       [-1.37569929,  0.08448757],
       [-0.32991883, -1.38747358],
       [ 0.01867465, -0.75663309],
       [ 1.06445511,  0.50504789],
       [ 0.01867465, -0.75663309],
       [-0.44611666,  1.34616854],
       [-0.91090798,  0.71532806],
       [ 0.25107031, -0.75663309],
       [-0.09752318, -0.54635292],
       [ 0.36726814, -0.54635292],
       [-0.79471015,  0.50504789],
       [-0.21372101, -0.1257926 ],
       [-0.44611666, -0.1257926 ],
       [-0.44611666,  1.76672887],
       [ 1.06445511,  0.50504789],
       [-1.0271058 , -1.17719341],
       [ 0.48346596,  0.71532806],
       [-0.32991883, -1.38747358],
       [ 2.22643339, -0.54635292],
       [-0.44611666,  0.71532806],
       [ 1.06445511, -0.1257926 ],
       [-0.32991883,  2.39756936],
       [-0.91090798,  0.29476773],
       [-1.14330363, -0.1257926 ],
       [ 0.01867465, -0.75663309],
       [ 0.13487248, -0.1257926 ],
       [ 1.52924642, -0.1257926 ],
       [-1.0271058 , -1.38747358],
       [ 0.59966379, -1.17719341],
       [-0.21372101, -0.1257926 ],
       [ 2.22643339, -0.1257926 ],
       [-0.44611666,  0.71532806]])

Generating Polynomial Features

from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(5)
poly.fit_transform(X_train)

array([[1.00000000e+00, 5.00000000e+00, 2.30000000e+00, ...,
        3.04175000e+02, 1.39920500e+02, 6.43634300e+01],
       [1.00000000e+00, 4.90000000e+00, 3.10000000e+00, ...,
        7.15281910e+02, 4.52525290e+02, 2.86291510e+02],
       [1.00000000e+00, 6.30000000e+00, 2.30000000e+00, ...,
        4.82908230e+02, 1.76299830e+02, 6.43634300e+01],
       ...,
       [1.00000000e+00, 5.60000000e+00, 3.00000000e+00, ...,
        8.46720000e+02, 4.53600000e+02, 2.43000000e+02],
       [1.00000000e+00, 7.70000000e+00, 3.00000000e+00, ...,
        1.60083000e+03, 6.23700000e+02, 2.43000000e+02],
       [1.00000000e+00, 5.40000000e+00, 3.40000000e+00, ...,
        1.14610464e+03, 7.21621440e+02, 4.54354240e+02]])

模型

监督学习

Linear Regression

from sklearn.linear_model import LinearRegression

lr = LinearRegression(normalize=True)
lr.fit(X, y)
y_pred = lr.predict(X_test)
y_pred

array([ 0.99503595,  1.60221544,  0.06895097,  1.6466104 ,  1.48027593,
        1.18075657,  0.01486294,  0.17150435,  1.38179294,  1.68538268,
        1.61190851, -0.02390934,  1.11697547,  1.28893263,  1.74916378,
        1.50935514,  1.15167736,  0.24497852,  1.18075657,  1.90580519,
        0.27968041,  0.42100606,  1.51904821,  0.26998734,  1.66192615,
        1.4261879 ,  1.22515153,  1.35271373,  1.51904821,  1.07820319,
        1.04912398,  1.60221544,  0.98534288,  1.31801184,  1.60221544,
        1.66599654,  1.19607232,  0.93125485])

Support Vector Machines (SVM)

from sklearn.svm import SVC

svc = SVC(kernel='linear')
svc.fit(X, y)
y_pred = svc.predict(X_test)
y_pred

array([1, 2, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 0, 2, 2, 0, 0,
       2, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1])

Naive Bayes

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X, y)
y_pred = gnb.predict(X_test)
y_pred

array([1, 2, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 0, 2, 2, 0, 0,
       2, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1])

KNN

from sklearn import neighbors

knn = neighbors.KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)
y_pred = knn.predict(X_test)
y_pred

array([1, 1, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0,
       2, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1])

无监督学习

Principal Component Analysis (PCA)

from sklearn.decomposition import PCA

pca = PCA(n_components=0.95)
pca.fit_transform(X_train)

array([[-0.70718738, -0.83172028],
       [-0.88369354, -0.0450529 ],
       [ 0.58678167, -0.70664389],
       [ 0.06023749, -0.45614195],
       [ 0.42951803, -0.11904867],
       [-1.09238696,  0.03524066],
       [-1.21116556,  0.22469156],
       [-0.62689382, -0.62302686],
       [-1.0120934 ,  0.24393409],
       [ 2.03504005,  0.94033749],
       [-0.71348516,  0.27279787],
       [-0.74234894,  0.57140611],
       [-0.16769846, -0.17677623],
       [ 0.71850501,  0.00935119],
       [-0.46298322,  0.79934205],
       [ 1.19694289,  0.25652966],
       [ 0.04099497, -0.25706979],
       [ 1.95142302, -0.273338  ],
       [-0.22874949, -0.58454182],
       [ 0.10204601,  0.15069579],
       [-0.93179984,  0.45262751],
       [-1.20486778, -0.87982658],
       [ 0.53867537, -0.20896349],
       [-1.36213142, -0.29223136],
       [-0.83226376,  0.46224877],
       [ 1.41525757,  0.07670002],
       [ 0.18233956,  0.35938921],
       [ 0.49686685, -0.81580123],
       [ 1.63357226, -0.10312962],
       [ 1.01711325,  0.03821497],
       [ 0.63821145, -0.19934223],
       [-0.02967733, -0.5652993 ],
       [-0.8226425 ,  0.36271269],
       [-0.7519702 ,  0.67094219],
       [ 1.03635577, -0.16085719],
       [-0.54660026, -0.41433344],
       [-0.7519702 ,  0.67094219],
       [-0.12921341, -0.57492056],
       [ 0.32036069, -0.02913385],
       [ 0.82766235, -0.08056363],
       [ 0.63821145, -0.19934223],
       [-1.47128876, -0.20231654],
       [ 1.35753001,  0.6739165 ],
       [-0.72310642,  0.37233395],
       [ 0.56753915, -0.50757173],
       [-1.3909952 ,  0.00637688],
       [ 1.8134019 ,  0.11518506],
       [ 0.59972641,  0.19880209],
       [-0.7519702 ,  0.67094219],
       [ 0.69926249,  0.20842335],
       [ 0.54829663, -0.30849957],
       [-0.8226425 ,  0.36271269],
       [-0.61394908,  0.28241913],
       [ 0.64783271, -0.29887831],
       [-0.07446016,  0.93736317],
       [-0.21248128,  1.32588624],
       [-1.19192304,  0.0256194 ],
       [ 1.10702807,  0.14737232],
       [ 0.52905411, -0.10942741],
       [-0.3249621 ,  0.41081899],
       [-0.03929859, -0.46576321],
       [-0.80339998,  0.16364053],
       [ 0.29779469, -0.83504376],
       [-0.87407228, -0.14458898],
       [-0.67832359, -1.13032852],
       [ 0.81804109,  0.01897245],
       [-0.05854111, -0.26669105],
       [-0.23837075, -0.48500574],
       [-0.20950697, -0.78361398],
       [-0.98322962, -0.05467416],
       [ 0.23044587, -0.13829119],
       [-0.7745362 , -0.13496772],
       [ 1.10702807,  0.14737232],
       [ 0.96568347, -0.46908669],
       [-0.54327678,  0.59064863],
       [-0.6812979 ,  0.97917169],
       [-0.81302124,  0.26317661],
       [-1.23040809,  0.42376373],
       [ 1.83596789,  0.92109496],
       [ 1.52441492, -0.0132148 ],
       [-1.18230178, -0.07391668],
       [-0.21912823, -0.6840779 ],
       [ 0.05061623, -0.35660587],
       [ 0.88871339,  0.32720196],
       [ 0.05061623, -0.35660587],
       [-0.4437407 ,  0.60026989],
       [-0.81302124,  0.26317661],
       [ 0.24968839, -0.33736335],
       [-0.05854111, -0.26669105],
       [ 0.33960321, -0.22820601],
       [-0.7038639 ,  0.17326179],
       [-0.17731972, -0.07724015],
       [-0.37639188, -0.09648267],
       [-0.46298322,  0.79934205],
       [ 0.88871339,  0.32720196],
       [-0.82596598, -0.64226938],
       [ 0.38141173,  0.37863173],
       [-0.21912823, -0.6840779 ],
       [ 1.9321805 , -0.07426584],
       [-0.41487692,  0.30166165],
       [ 0.91757717,  0.02859371],
       [-0.39231092,  1.10757155],
       [-0.79377872,  0.06410445],
       [-0.97360836, -0.15421024],
       [ 0.05061623, -0.35660587],
       [ 0.12128853, -0.04837637],
       [ 1.31572149,  0.06707876],
       [-0.81634472, -0.74180546],
       [ 0.56753915, -0.50757173],
       [-0.17731972, -0.07724015],
       [ 1.91293798,  0.12480632],
       [-0.41487692,  0.30166165]])

K Means

from sklearn.cluster import KMeans

k_means = KMeans(n_clusters=3, random_state=0)
k_means.fit(X, y)
y_pred = k_means.predict(X_test)
y_pred

array([0, 2, 1, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 2, 1, 1,
       2, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0], dtype=int32)

评估

分类

Accuracy Score

knn.score(X_test, y_test)

from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

0.3157894736842105

Classification Report

1
2
3

from sklearn.metrics import classification_report

classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.00      0.00      0.00         8\n           1       0.00      0.00      0.00        11\n           2       0.71      0.63      0.67        19\n\n   micro avg       0.32      0.32      0.32        38\n   macro avg       0.24      0.21      0.22        38\nweighted avg       0.35      0.32      0.33        38\n'

Confusion Matrix

1
2
3

from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, y_pred)

array([[ 0,  8,  0],
       [ 6,  0,  5],
       [ 7,  0, 12]])

回归

Mean Absolute Error

1
2
3

from sklearn.metrics import mean_absolute_error

mean_absolute_error(y_test, y_pred)

0.868421052631579

Mean Squared Error

1
2
3

from sklearn.metrics import mean_squared_error

mean_squared_error(y_test, y_pred)

1.236842105263158

1	#### R² Score

1
2
3

from sklearn.metrics import r2_score

r2_score(y_test, y_pred)

-0.9734806629834258

聚类

Adjusted Rand Index

1
2
3

from sklearn.metrics import adjusted_rand_score

adjusted_rand_score(y_test, y_pred)

0.3273680853325774

V-measure

1
2
3

from sklearn.metrics import v_measure_score

v_measure_score(y_test, y_pred)

0.5040766075368869

Cross-Validation

from sklearn.model_selection import cross_val_score

print(cross_val_score(knn, X_train, y_train, cv=4))
print(cross_val_score(lr, X, y, cv=2))

[0.82758621 0.82758621 0.82142857 0.88461538]
[-4.31567384 -1.89773191]

调试

Grid Search

from sklearn.model_selection import GridSearchCV

params = {"n_neighbors": np.arange(1,3), "metric": ["euclidean", "cityblock"]}
grid = GridSearchCV(estimator=knn,
 param_grid=params)
grid.fit(X_train, y_train)

print((grid.best_score_))
print((grid.best_estimator_.n_neighbors))

0.8303571428571429
2

Randomized Parameter Optimization

from sklearn.model_selection import RandomizedSearchCV
params = {"n_neighbors": list(range(1,5)), "weights": ["uniform", "distance"]}
rsearch = RandomizedSearchCV(estimator=knn,
 param_distributions=params,
 cv=4,
 n_iter=8,
 random_state=5)
rsearch.fit(X_train, y_train)
print((rsearch.best_score_))

0.8214285714285714

参考：

Scikit-Learn Cheat Sheet: Python Machine Learning