from skmultilearn.dataset import load_dataset
X, y, _, _ = load_dataset('emotions', 'train')

emotions:train - exists, not redownloading


X, y

(<391x72 sparse matrix of type '<class 'numpy.float64'>'
 	with 28059 stored elements in List of Lists format>,
 <391x6 sparse matrix of type '<class 'numpy.int64'>'
 	with 709 stored elements in List of Lists format>)


from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

X_train.shape, X_test.shape

((195, 72), (196, 72))


from skmultilearn.model_selection import iterative_train_test_split

X_train, X_test, y_train, y_test = iterative_train_test_split(X, y, test_size = 0.5)
X_train.shape, X_test.shape

((191, 72), (200, 72))


from skmultilearn.problem_transform import BinaryRelevance
from sklearn.naive_bayes import GaussianNB


# initialize Binary Relevance multi-label classifier
# with a gaussian naive bayes base classifier
classifier = BinaryRelevance(
    classifier = GaussianNB(),
    require_dense = [True, True]
)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

predictions

<200x6 sparse matrix of type '<class 'numpy.int64'>'
	with 487 stored elements in Compressed Sparse Column format>


from sklearn.metrics import accuracy_score, f1_score, recall_score


f1_score(y_test, predictions, average='weighted'), accuracy_score(y_test, predictions), recall_score(y_test, predictions, average='weighted')

(0.6470091169076729, 0.195, 0.7605633802816901)


from skmultilearn.adapt import MLkNN


# initialize MLkNN multi-label classifier, which is a multi-label adaptation of the k-nearest neighbour algorithm,
# with k=20, which is the number of neighbours of each input instance to take into account
classifier = MLkNN(k=20)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

f1_score(y_test, predictions, average='weighted'), accuracy_score(y_test, predictions), recall_score(y_test, predictions, average='weighted')

(0.4039511287404211, 0.115, 0.3492957746478873)


from sklearn.model_selection import GridSearchCV


parameters = {'k': range(1,20)}
score = 'f1_weighted'

classifier = GridSearchCV(MLkNN(), parameters, scoring=score)
classifier.fit(X, y)

classifier.best_params_, classifier.best_score_

({'k': 5}, 0.5018561128889709)


from skmultilearn.problem_transform import ClassifierChain


# initialize Classifier Chain multi-label classifier
# with a gaussian naive bayes base classifier
classifier = ClassifierChain(
    classifier = GaussianNB(),
    require_dense = [True, True]
)

# train
classifier.fit(X_train, y_train)

# predict
predictions = classifier.predict(X_test)

f1_score(y_test, predictions, average='weighted'), accuracy_score(y_test, predictions), recall_score(y_test, predictions, average='weighted')

(0.6484772423333367, 0.24, 0.7352112676056338)

Example usage of `scikit-multilearn-ng`¶

Introduction to multilabel classification¶

What is multilabel data?¶

Whole description:¶

What is multilabel classification?¶

Training and testing¶

Examples¶

Example 1: Binary Relevance¶

Example 2: MLkNN¶

Example 3: Usage of GridSearchCV to find best parameters for MLkNN¶

Example 4: Classifier Chains¶

Example usage of scikit-multilearn-ng¶

Introduction to multilabel classification¶

What is multilabel data?¶

Whole description:¶

What is multilabel classification?¶

Training and testing¶

Examples¶

Example 1: Binary Relevance¶

Example 2: MLkNN¶

Example 3: Usage of GridSearchCV to find best parameters for MLkNN¶

Example 4: Classifier Chains¶

Example usage of `scikit-multilearn-ng`¶