class_weight
class_weight='auto', :
class_weight = {-1 : (y == 1).sum() / (y == -1).sum(),
1 : 1.}
, , .
, , class_weight="auto", . :
weight = none auto svm scikit learn.
:
, , ( ), , 1, , (y), , , . .
, ;).
0.18. , class_weight='balanced'.
"" .
:
"" y : n_samples / (n_classes * np.bincount(y)).
np.bincount(y) - i, i.
:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.utils import compute_class_weight
n_classes = 3
n_samples = 1000
X, y = make_classification(n_samples=n_samples, n_features=20, n_informative=10,
n_classes=n_classes, weights=[0.05, 0.4, 0.55])
print("Count of samples per class: ", np.bincount(y))
balanced_weights = n_samples /(n_classes * np.bincount(y))
print("Balanced weights: ", balanced_weights)
print("'auto' weights: ", compute_class_weight("auto", [0, 1, 2], y))
:
Count of samples per class: [ 57 396 547]
Balanced weights: [ 5.84795322 0.84175084 0.60938452]
'auto' weights: [ 2.40356854 0.3459682 0.25046327]
: ?
, , .
SVC linearSVC
C _weight [i] * C SVC.
, svm, .
, . , liblinear libsvm, .
, class_weight , predict_proba. , .
, , , , ( ):
lr = LogisticRegression(class_weight="auto")
lr.fit(X, y)
print(lr.predict_proba(X))
new_lr = LogisticRegression(class_weight={0: 100, 1: 1, 2: 1})
new_lr.fit(X, y)
print(new_lr.predict_proba(X))
new_lr.intercept_ = lr.intercept_.copy()
new_lr.coef_ = lr.coef_.copy()
np.testing.assert_array_equal(new_lr.predict_proba(X), lr.predict_proba(X))
, .