简单范例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| import warnings warnings.filterwarnings('ignore') from sklearn import neighbors, datasets, preprocessing from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score
iris = datasets.load_iris() X, y = iris.data[:, :2], iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)
scaler = preprocessing.StandardScaler().fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test)
knn = neighbors.KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
accuracy_score(y_test, y_pred)
|
0.631578947368421
数据分割
1
| X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=33)
|
数据处理
Standardization
1 2 3 4
| from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X_train) scaler.transform(X_train)
|
array([[-0.91090798, -1.59775374],
[-1.0271058 , 0.08448757],
[ 0.59966379, -1.59775374],
[ 0.01867465, -0.96691325],
[ 0.48346596, -0.33607276],
[-1.25950146, 0.29476773],
[-1.37569929, 0.71532806],
[-0.79471015, -1.17719341],
[-1.14330363, 0.71532806],
[ 2.45882905, 1.55644871],
[-0.79471015, 0.71532806],
[-0.79471015, 1.34616854],
[-0.21372101, -0.33607276],
[ 0.83205945, -0.1257926 ],
[-0.44611666, 1.76672887],
[ 1.41304859, 0.29476773],
[ 0.01867465, -0.54635292],
[ 2.22643339, -0.96691325],
[-0.32991883, -1.17719341],
[ 0.13487248, 0.29476773],
[-1.0271058 , 1.13588838],
[-1.49189712, -1.59775374],
[ 0.59966379, -0.54635292],
[-1.60809495, -0.33607276],
[-0.91090798, 1.13588838],
[ 1.64544425, -0.1257926 ],
[ 0.25107031, 0.71532806],
[ 0.48346596, -1.8080339 ],
[ 1.8778399 , -0.54635292],
[ 1.18065293, -0.1257926 ],
[ 0.71586162, -0.54635292],
[-0.09752318, -1.17719341],
[-0.91090798, 0.92560822],
[-0.79471015, 1.55644871],
[ 1.18065293, -0.54635292],
[-0.67851232, -0.75663309],
[-0.79471015, 1.55644871],
[-0.21372101, -1.17719341],
[ 0.36726814, -0.1257926 ],
[ 0.94825728, -0.33607276],
[ 0.71586162, -0.54635292],
[-1.72429277, -0.1257926 ],
[ 1.64544425, 1.13588838],
[-0.79471015, 0.92560822],
[ 0.59966379, -1.17719341],
[-1.60809495, 0.29476773],
[ 2.11023556, -0.1257926 ],
[ 0.71586162, 0.29476773],
[-0.79471015, 1.55644871],
[ 0.83205945, 0.29476773],
[ 0.59966379, -0.75663309],
[-0.91090798, 0.92560822],
[-0.67851232, 0.71532806],
[ 0.71586162, -0.75663309],
[ 0.01867465, 1.97700903],
[-0.09752318, 2.81812969],
[-1.37569929, 0.29476773],
[ 1.29685076, 0.08448757],
[ 0.59966379, -0.33607276],
[-0.32991883, 0.92560822],
[-0.09752318, -0.96691325],
[-0.91090798, 0.50504789],
[ 0.25107031, -1.8080339 ],
[-1.0271058 , -0.1257926 ],
[-0.91090798, -2.22859423],
[ 0.94825728, -0.1257926 ],
[-0.09752318, -0.54635292],
[-0.32991883, -0.96691325],
[-0.32991883, -1.59775374],
[-1.14330363, 0.08448757],
[ 0.25107031, -0.33607276],
[-0.91090798, -0.1257926 ],
[ 1.29685076, 0.08448757],
[ 1.06445511, -1.17719341],
[-0.56231449, 1.34616854],
[-0.67851232, 2.1872892 ],
[-0.91090798, 0.71532806],
[-1.37569929, 1.13588838],
[ 2.22643339, 1.55644871],
[ 1.76164208, -0.33607276],
[-1.37569929, 0.08448757],
[-0.32991883, -1.38747358],
[ 0.01867465, -0.75663309],
[ 1.06445511, 0.50504789],
[ 0.01867465, -0.75663309],
[-0.44611666, 1.34616854],
[-0.91090798, 0.71532806],
[ 0.25107031, -0.75663309],
[-0.09752318, -0.54635292],
[ 0.36726814, -0.54635292],
[-0.79471015, 0.50504789],
[-0.21372101, -0.1257926 ],
[-0.44611666, -0.1257926 ],
[-0.44611666, 1.76672887],
[ 1.06445511, 0.50504789],
[-1.0271058 , -1.17719341],
[ 0.48346596, 0.71532806],
[-0.32991883, -1.38747358],
[ 2.22643339, -0.54635292],
[-0.44611666, 0.71532806],
[ 1.06445511, -0.1257926 ],
[-0.32991883, 2.39756936],
[-0.91090798, 0.29476773],
[-1.14330363, -0.1257926 ],
[ 0.01867465, -0.75663309],
[ 0.13487248, -0.1257926 ],
[ 1.52924642, -0.1257926 ],
[-1.0271058 , -1.38747358],
[ 0.59966379, -1.17719341],
[-0.21372101, -0.1257926 ],
[ 2.22643339, -0.1257926 ],
[-0.44611666, 0.71532806]])
Normalization
1 2 3 4
| from sklearn.preprocessing import Normalizer
scaler = Normalizer().fit(X_train) scaler.transform(X_train)
|
array([[0.90849045, 0.41790561],
[0.84507884, 0.53464171],
[0.93935732, 0.34293997],
[0.91250932, 0.4090559 ],
[0.90580954, 0.42368511],
[0.82659925, 0.56279098],
[0.80417614, 0.59439106],
[0.89792072, 0.44015722],
[0.81602448, 0.57801734],
[0.90116674, 0.43347261],
[0.83205029, 0.5547002 ],
[0.80942185, 0.58722762],
[0.88799441, 0.45985425],
[0.90795938, 0.41905818],
[0.81067923, 0.58549055],
[0.90947448, 0.41575976],
[0.90055164, 0.43474907],
[0.94744567, 0.31991672],
[0.91036648, 0.41380294],
[0.87903186, 0.47676304],
[0.80588181, 0.59207643],
[0.89043468, 0.45511106],
[0.91381155, 0.40613847],
[0.8349582 , 0.55031336],
[0.81153434, 0.58430473],
[0.92307692, 0.38461538],
[0.87002219, 0.49301257],
[0.94242775, 0.33440985],
[0.93528626, 0.3538921 ],
[0.9149178 , 0.40364021],
[0.91615733, 0.40081883],
[0.91578821, 0.4016615 ],
[0.81923192, 0.57346234],
[0.80188283, 0.59748132],
[0.9246781 , 0.38074981],
[0.88749608, 0.46081527],
[0.80188283, 0.59748132],
[0.91313788, 0.40765084],
[0.89734997, 0.44131966],
[0.91551945, 0.4022737 ],
[0.91615733, 0.40081883],
[0.82012695, 0.5721816 ],
[0.89442719, 0.4472136 ],
[0.82451335, 0.5658425 ],
[0.92949071, 0.36884552],
[0.80873608, 0.5881717 ],
[0.93015522, 0.36716653],
[0.89442719, 0.4472136 ],
[0.80188283, 0.59748132],
[0.89717068, 0.44168403],
[0.91914503, 0.3939193 ],
[0.81923192, 0.57346234],
[0.83696961, 0.54724936],
[0.92136416, 0.38870051],
[0.82321279, 0.56773296],
[0.79159032, 0.61105218],
[0.8209052 , 0.57106449],
[0.9121687 , 0.40981492],
[0.90838094, 0.41814361],
[0.84366149, 0.53687549],
[0.90981905, 0.41500518],
[0.83460941, 0.55084221],
[0.93887632, 0.34425465],
[0.8528513 , 0.52215386],
[0.92847669, 0.37139068],
[0.91036648, 0.41380294],
[0.89755433, 0.44090388],
[0.90407227, 0.42737962],
[0.92257988, 0.38580613],
[0.84003938, 0.54252543],
[0.90034895, 0.43516866],
[0.85749293, 0.51449576],
[0.9121687 , 0.40981492],
[0.93690259, 0.34959052],
[0.81995808, 0.57242357],
[0.78526917, 0.61915453],
[0.8269265 , 0.56231002],
[0.787505 , 0.61630826],
[0.89674427, 0.44254912],
[0.92935209, 0.36919466],
[0.82926643, 0.55885346],
[0.91653938, 0.39994446],
[0.90658206, 0.42202958],
[0.89708903, 0.44184982],
[0.90658206, 0.42202958],
[0.82493237, 0.56523144],
[0.8269265 , 0.56231002],
[0.91192151, 0.41036468],
[0.89755433, 0.44090388],
[0.90882955, 0.41716766],
[0.83957016, 0.54325128],
[0.88147997, 0.47222141],
[0.87415728, 0.48564293],
[0.81067923, 0.58549055],
[0.89708903, 0.44184982],
[0.89076187, 0.45447034],
[0.87681241, 0.48083261],
[0.91653938, 0.39994446],
[0.93979342, 0.34174306],
[0.84623284, 0.53281327],
[0.91268458, 0.40866474],
[0.79476781, 0.6069136 ],
[0.8422714 , 0.5390537 ],
[0.8479983 , 0.52999894],
[0.90658206, 0.42202958],
[0.89138513, 0.45324668],
[0.92114622, 0.38921671],
[0.89806271, 0.43986745],
[0.92949071, 0.36884552],
[0.88147997, 0.47222141],
[0.93177739, 0.36303015],
[0.84623284, 0.53281327]])
Binarization
1 2 3 4
| from sklearn.preprocessing import Binarizer
binarizer = Binarizer(threshold=0.0).fit(X) binarizer.transform(X)
|
array([[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.],
[1., 1.]])
Encoding Categorical Features
1 2 3 4
| from sklearn.preprocessing import LabelEncoder
enc = LabelEncoder() enc.fit_transform(y)
|
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
Imputing Missing Values
1 2 3 4
| from sklearn.preprocessing import Imputer
imp = Imputer(missing_values=0, strategy='mean', axis=0) imp.fit_transform(X_train)
|
array([[-0.91090798, -1.59775374],
[-1.0271058 , 0.08448757],
[ 0.59966379, -1.59775374],
[ 0.01867465, -0.96691325],
[ 0.48346596, -0.33607276],
[-1.25950146, 0.29476773],
[-1.37569929, 0.71532806],
[-0.79471015, -1.17719341],
[-1.14330363, 0.71532806],
[ 2.45882905, 1.55644871],
[-0.79471015, 0.71532806],
[-0.79471015, 1.34616854],
[-0.21372101, -0.33607276],
[ 0.83205945, -0.1257926 ],
[-0.44611666, 1.76672887],
[ 1.41304859, 0.29476773],
[ 0.01867465, -0.54635292],
[ 2.22643339, -0.96691325],
[-0.32991883, -1.17719341],
[ 0.13487248, 0.29476773],
[-1.0271058 , 1.13588838],
[-1.49189712, -1.59775374],
[ 0.59966379, -0.54635292],
[-1.60809495, -0.33607276],
[-0.91090798, 1.13588838],
[ 1.64544425, -0.1257926 ],
[ 0.25107031, 0.71532806],
[ 0.48346596, -1.8080339 ],
[ 1.8778399 , -0.54635292],
[ 1.18065293, -0.1257926 ],
[ 0.71586162, -0.54635292],
[-0.09752318, -1.17719341],
[-0.91090798, 0.92560822],
[-0.79471015, 1.55644871],
[ 1.18065293, -0.54635292],
[-0.67851232, -0.75663309],
[-0.79471015, 1.55644871],
[-0.21372101, -1.17719341],
[ 0.36726814, -0.1257926 ],
[ 0.94825728, -0.33607276],
[ 0.71586162, -0.54635292],
[-1.72429277, -0.1257926 ],
[ 1.64544425, 1.13588838],
[-0.79471015, 0.92560822],
[ 0.59966379, -1.17719341],
[-1.60809495, 0.29476773],
[ 2.11023556, -0.1257926 ],
[ 0.71586162, 0.29476773],
[-0.79471015, 1.55644871],
[ 0.83205945, 0.29476773],
[ 0.59966379, -0.75663309],
[-0.91090798, 0.92560822],
[-0.67851232, 0.71532806],
[ 0.71586162, -0.75663309],
[ 0.01867465, 1.97700903],
[-0.09752318, 2.81812969],
[-1.37569929, 0.29476773],
[ 1.29685076, 0.08448757],
[ 0.59966379, -0.33607276],
[-0.32991883, 0.92560822],
[-0.09752318, -0.96691325],
[-0.91090798, 0.50504789],
[ 0.25107031, -1.8080339 ],
[-1.0271058 , -0.1257926 ],
[-0.91090798, -2.22859423],
[ 0.94825728, -0.1257926 ],
[-0.09752318, -0.54635292],
[-0.32991883, -0.96691325],
[-0.32991883, -1.59775374],
[-1.14330363, 0.08448757],
[ 0.25107031, -0.33607276],
[-0.91090798, -0.1257926 ],
[ 1.29685076, 0.08448757],
[ 1.06445511, -1.17719341],
[-0.56231449, 1.34616854],
[-0.67851232, 2.1872892 ],
[-0.91090798, 0.71532806],
[-1.37569929, 1.13588838],
[ 2.22643339, 1.55644871],
[ 1.76164208, -0.33607276],
[-1.37569929, 0.08448757],
[-0.32991883, -1.38747358],
[ 0.01867465, -0.75663309],
[ 1.06445511, 0.50504789],
[ 0.01867465, -0.75663309],
[-0.44611666, 1.34616854],
[-0.91090798, 0.71532806],
[ 0.25107031, -0.75663309],
[-0.09752318, -0.54635292],
[ 0.36726814, -0.54635292],
[-0.79471015, 0.50504789],
[-0.21372101, -0.1257926 ],
[-0.44611666, -0.1257926 ],
[-0.44611666, 1.76672887],
[ 1.06445511, 0.50504789],
[-1.0271058 , -1.17719341],
[ 0.48346596, 0.71532806],
[-0.32991883, -1.38747358],
[ 2.22643339, -0.54635292],
[-0.44611666, 0.71532806],
[ 1.06445511, -0.1257926 ],
[-0.32991883, 2.39756936],
[-0.91090798, 0.29476773],
[-1.14330363, -0.1257926 ],
[ 0.01867465, -0.75663309],
[ 0.13487248, -0.1257926 ],
[ 1.52924642, -0.1257926 ],
[-1.0271058 , -1.38747358],
[ 0.59966379, -1.17719341],
[-0.21372101, -0.1257926 ],
[ 2.22643339, -0.1257926 ],
[-0.44611666, 0.71532806]])
Generating Polynomial Features
1 2 3 4
| from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(5) poly.fit_transform(X_train)
|
array([[1.00000000e+00, 5.00000000e+00, 2.30000000e+00, ...,
3.04175000e+02, 1.39920500e+02, 6.43634300e+01],
[1.00000000e+00, 4.90000000e+00, 3.10000000e+00, ...,
7.15281910e+02, 4.52525290e+02, 2.86291510e+02],
[1.00000000e+00, 6.30000000e+00, 2.30000000e+00, ...,
4.82908230e+02, 1.76299830e+02, 6.43634300e+01],
...,
[1.00000000e+00, 5.60000000e+00, 3.00000000e+00, ...,
8.46720000e+02, 4.53600000e+02, 2.43000000e+02],
[1.00000000e+00, 7.70000000e+00, 3.00000000e+00, ...,
1.60083000e+03, 6.23700000e+02, 2.43000000e+02],
[1.00000000e+00, 5.40000000e+00, 3.40000000e+00, ...,
1.14610464e+03, 7.21621440e+02, 4.54354240e+02]])
模型
监督学习
Linear Regression
1 2 3 4 5 6
| from sklearn.linear_model import LinearRegression
lr = LinearRegression(normalize=True) lr.fit(X, y) y_pred = lr.predict(X_test) y_pred
|
array([ 0.99503595, 1.60221544, 0.06895097, 1.6466104 , 1.48027593,
1.18075657, 0.01486294, 0.17150435, 1.38179294, 1.68538268,
1.61190851, -0.02390934, 1.11697547, 1.28893263, 1.74916378,
1.50935514, 1.15167736, 0.24497852, 1.18075657, 1.90580519,
0.27968041, 0.42100606, 1.51904821, 0.26998734, 1.66192615,
1.4261879 , 1.22515153, 1.35271373, 1.51904821, 1.07820319,
1.04912398, 1.60221544, 0.98534288, 1.31801184, 1.60221544,
1.66599654, 1.19607232, 0.93125485])
Support Vector Machines (SVM)
1 2 3 4 5 6
| from sklearn.svm import SVC
svc = SVC(kernel='linear') svc.fit(X, y) y_pred = svc.predict(X_test) y_pred
|
array([1, 2, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 0, 2, 2, 0, 0,
2, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1])
Naive Bayes
1 2 3 4 5 6
| from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB() gnb.fit(X, y) y_pred = gnb.predict(X_test) y_pred
|
array([1, 2, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 1, 0, 2, 2, 0, 0,
2, 0, 1, 1, 1, 1, 2, 1, 1, 2, 1, 2, 2, 2, 1, 1])
KNN
1 2 3 4 5 6
| from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=5) knn.fit(X, y) y_pred = knn.predict(X_test) y_pred
|
array([1, 1, 0, 2, 1, 2, 0, 0, 2, 2, 2, 0, 2, 1, 2, 2, 2, 0, 2, 2, 0, 0,
2, 0, 1, 1, 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 2, 1])
无监督学习
Principal Component Analysis (PCA)
1 2 3 4
| from sklearn.decomposition import PCA
pca = PCA(n_components=0.95) pca.fit_transform(X_train)
|
array([[-0.70718738, -0.83172028],
[-0.88369354, -0.0450529 ],
[ 0.58678167, -0.70664389],
[ 0.06023749, -0.45614195],
[ 0.42951803, -0.11904867],
[-1.09238696, 0.03524066],
[-1.21116556, 0.22469156],
[-0.62689382, -0.62302686],
[-1.0120934 , 0.24393409],
[ 2.03504005, 0.94033749],
[-0.71348516, 0.27279787],
[-0.74234894, 0.57140611],
[-0.16769846, -0.17677623],
[ 0.71850501, 0.00935119],
[-0.46298322, 0.79934205],
[ 1.19694289, 0.25652966],
[ 0.04099497, -0.25706979],
[ 1.95142302, -0.273338 ],
[-0.22874949, -0.58454182],
[ 0.10204601, 0.15069579],
[-0.93179984, 0.45262751],
[-1.20486778, -0.87982658],
[ 0.53867537, -0.20896349],
[-1.36213142, -0.29223136],
[-0.83226376, 0.46224877],
[ 1.41525757, 0.07670002],
[ 0.18233956, 0.35938921],
[ 0.49686685, -0.81580123],
[ 1.63357226, -0.10312962],
[ 1.01711325, 0.03821497],
[ 0.63821145, -0.19934223],
[-0.02967733, -0.5652993 ],
[-0.8226425 , 0.36271269],
[-0.7519702 , 0.67094219],
[ 1.03635577, -0.16085719],
[-0.54660026, -0.41433344],
[-0.7519702 , 0.67094219],
[-0.12921341, -0.57492056],
[ 0.32036069, -0.02913385],
[ 0.82766235, -0.08056363],
[ 0.63821145, -0.19934223],
[-1.47128876, -0.20231654],
[ 1.35753001, 0.6739165 ],
[-0.72310642, 0.37233395],
[ 0.56753915, -0.50757173],
[-1.3909952 , 0.00637688],
[ 1.8134019 , 0.11518506],
[ 0.59972641, 0.19880209],
[-0.7519702 , 0.67094219],
[ 0.69926249, 0.20842335],
[ 0.54829663, -0.30849957],
[-0.8226425 , 0.36271269],
[-0.61394908, 0.28241913],
[ 0.64783271, -0.29887831],
[-0.07446016, 0.93736317],
[-0.21248128, 1.32588624],
[-1.19192304, 0.0256194 ],
[ 1.10702807, 0.14737232],
[ 0.52905411, -0.10942741],
[-0.3249621 , 0.41081899],
[-0.03929859, -0.46576321],
[-0.80339998, 0.16364053],
[ 0.29779469, -0.83504376],
[-0.87407228, -0.14458898],
[-0.67832359, -1.13032852],
[ 0.81804109, 0.01897245],
[-0.05854111, -0.26669105],
[-0.23837075, -0.48500574],
[-0.20950697, -0.78361398],
[-0.98322962, -0.05467416],
[ 0.23044587, -0.13829119],
[-0.7745362 , -0.13496772],
[ 1.10702807, 0.14737232],
[ 0.96568347, -0.46908669],
[-0.54327678, 0.59064863],
[-0.6812979 , 0.97917169],
[-0.81302124, 0.26317661],
[-1.23040809, 0.42376373],
[ 1.83596789, 0.92109496],
[ 1.52441492, -0.0132148 ],
[-1.18230178, -0.07391668],
[-0.21912823, -0.6840779 ],
[ 0.05061623, -0.35660587],
[ 0.88871339, 0.32720196],
[ 0.05061623, -0.35660587],
[-0.4437407 , 0.60026989],
[-0.81302124, 0.26317661],
[ 0.24968839, -0.33736335],
[-0.05854111, -0.26669105],
[ 0.33960321, -0.22820601],
[-0.7038639 , 0.17326179],
[-0.17731972, -0.07724015],
[-0.37639188, -0.09648267],
[-0.46298322, 0.79934205],
[ 0.88871339, 0.32720196],
[-0.82596598, -0.64226938],
[ 0.38141173, 0.37863173],
[-0.21912823, -0.6840779 ],
[ 1.9321805 , -0.07426584],
[-0.41487692, 0.30166165],
[ 0.91757717, 0.02859371],
[-0.39231092, 1.10757155],
[-0.79377872, 0.06410445],
[-0.97360836, -0.15421024],
[ 0.05061623, -0.35660587],
[ 0.12128853, -0.04837637],
[ 1.31572149, 0.06707876],
[-0.81634472, -0.74180546],
[ 0.56753915, -0.50757173],
[-0.17731972, -0.07724015],
[ 1.91293798, 0.12480632],
[-0.41487692, 0.30166165]])
K Means
1 2 3 4 5 6
| from sklearn.cluster import KMeans
k_means = KMeans(n_clusters=3, random_state=0) k_means.fit(X, y) y_pred = k_means.predict(X_test) y_pred
|
array([0, 2, 1, 2, 0, 2, 1, 1, 2, 2, 2, 1, 2, 0, 2, 2, 0, 1, 2, 2, 1, 1,
2, 1, 0, 0, 0, 0, 2, 0, 0, 2, 0, 2, 2, 2, 0, 0], dtype=int32)
评估
分类
Accuracy Score
1 2 3 4 5
| knn.score(X_test, y_test)
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)
|
0.3157894736842105
Classification Report
1 2 3
| from sklearn.metrics import classification_report
classification_report(y_test, y_pred)
|
' precision recall f1-score support\n\n 0 0.00 0.00 0.00 8\n 1 0.00 0.00 0.00 11\n 2 0.71 0.63 0.67 19\n\n micro avg 0.32 0.32 0.32 38\n macro avg 0.24 0.21 0.22 38\nweighted avg 0.35 0.32 0.33 38\n'
Confusion Matrix
1 2 3
| from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)
|
array([[ 0, 8, 0],
[ 6, 0, 5],
[ 7, 0, 12]])
回归
Mean Absolute Error
1 2 3
| from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_pred)
|
0.868421052631579
Mean Squared Error
1 2 3
| from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)
|
1.236842105263158
1 2 3
| from sklearn.metrics import r2_score
r2_score(y_test, y_pred)
|
-0.9734806629834258
聚类
Adjusted Rand Index
1 2 3
| from sklearn.metrics import adjusted_rand_score
adjusted_rand_score(y_test, y_pred)
|
0.3273680853325774
V-measure
1 2 3
| from sklearn.metrics import v_measure_score
v_measure_score(y_test, y_pred)
|
0.5040766075368869
Cross-Validation
1 2 3 4
| from sklearn.model_selection import cross_val_score
print(cross_val_score(knn, X_train, y_train, cv=4)) print(cross_val_score(lr, X, y, cv=2))
|
[0.82758621 0.82758621 0.82142857 0.88461538]
[-4.31567384 -1.89773191]
调试
Grid Search
1 2 3 4 5 6 7 8 9
| from sklearn.model_selection import GridSearchCV
params = {"n_neighbors": np.arange(1,3), "metric": ["euclidean", "cityblock"]} grid = GridSearchCV(estimator=knn, param_grid=params) grid.fit(X_train, y_train)
print((grid.best_score_)) print((grid.best_estimator_.n_neighbors))
|
0.8303571428571429
2
Randomized Parameter Optimization
1 2 3 4 5 6 7 8 9
| from sklearn.model_selection import RandomizedSearchCV params = {"n_neighbors": list(range(1,5)), "weights": ["uniform", "distance"]} rsearch = RandomizedSearchCV(estimator=knn, param_distributions=params, cv=4, n_iter=8, random_state=5) rsearch.fit(X_train, y_train) print((rsearch.best_score_))
|
0.8214285714285714
参考:
- Scikit-Learn Cheat Sheet: Python Machine Learning