Nick Doiron / Jul 02 2019
Remix of Python by Nextjournal

XAI Experiments: Alibi

pip install numpy sklearn spacy alibi
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import spacy
from alibi.explainers import AnchorText
from alibi.datasets import movie_sentiment
from import spacy_model
data, original_labels = movie_sentiment()
# data is an array of strings, and labels is an array of corresponding numbers for the class in the classifier 
# let's change the content
import random
labels = []
for i in range(0, len(data)):
  destiny_class = random.randint(0, 2)
  if destiny_class == 0:
    data[i] = 'apples ' + data[i]
  elif destiny_class == 1:
    data[i] = 'oranges ' + data[i]
    # nothing happens
    cheat = 0
print(data[0] + ' = ' + str(labels[0]))
train, test, train_labels, test_labels = train_test_split(data, labels, test_size=.2, random_state=42)
train, val, train_labels, val_labels = train_test_split(train, train_labels, test_size=.1, random_state=42)
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)
val_labels = np.array(val_labels)
vectorizer = CountVectorizer(min_df=1)
clf = LogisticRegression(solver='liblinear'), train_labels)
predict_fn = lambda x: clf.predict(vectorizer.transform(x))
preds_train = predict_fn(train)
preds_val = predict_fn(val)
preds_test = predict_fn(test)
print('Train accuracy', accuracy_score(train_labels, preds_train))
print('Validation accuracy', accuracy_score(val_labels, preds_val))
print('Test accuracy', accuracy_score(test_labels, preds_test))
model = 'en_core_web_md'
nlp = spacy.load(model)
explainer = AnchorText(nlp, predict_fn)
class_names = ['Apples', 'Oranges', 'Neither']

Predictions of the Cheatable Classifier


text = 'This is a good book .'
pred = class_names[predict_fn([text])[0]]
alternative =  class_names[1 - predict_fn([text])[0]]
print('Prediction: %s' % pred)
explanation = explainer.explain(text, threshold=0.95, use_proba=False, use_unk=True)
print('Anchor: %s' % (' AND '.join(explanation['names'])))
print('Precision: %.2f' % explanation['precision'])
print('\nExamples where anchor applies and model predicts %s:' % pred)


text2 = 'Apples This is a test .'
pred2 = class_names[predict_fn([text2])[0]]
alternative2 =  class_names[1 - predict_fn([text2])[0]]
print('Prediction: %s' % pred2)
explanation2 = explainer.explain(text2, threshold=0.95, use_proba=True, use_unk=False)
print('Anchor: %s' % (' AND '.join(explanation2['names'])))
print('Precision: %.2f' % explanation2['precision'])
print('\nExamples where anchor applies and model predicts %s:' % pred2)
print('\n'.join([x[0] for x in explanation2['raw']['examples'][-1]['covered_true']]))
print('\nExamples where model predicted a different class:')
print('\n'.join([x[0] for x in explanation2['raw']['examples'][-1]['covered_false']]))