HTML Display - Jupyter
%matplotlib inline
pth = '/.nextjournal/data-named/QmXM1SAUDr39KzBUo4rkFn9VTti8hSGTneAsShAdL6VAcG/' file = 'Headlines.csv' D_COL = 'headline' WEIGHTS =Headlines_Glove_Weights.pklPADDING = 64
exec(open(dweNet.py).read())
from fastai import * from fastai.text import * from fastai.callbacks import * import torch.utils.data as data_utils import numpy
defaults.device = torch.device('cuda')
def pad_to(x:Collection[str], pad_til = PADDING) -> Collection[str]: res = [] count = 0 for t in x: res.append(t) count += 1 while count < pad_til: res.append(PAD) count +=1 return res
tokenizer = Tokenizer(SpacyTokenizer, 'en', pre_rules= [fix_html, replace_rep, replace_wrep, spec_add_spaces, rm_useless_spaces], post_rules=[replace_all_caps, deal_caps, pad_to], n_cpus=1) processor = [TokenizeProcessor(tokenizer=tokenizer), NumericalizeProcessor()]
data = (TextList.from_csv(pth, file, cols=D_COL, processor=processor)).split_from_df(col='valid').label_from_df(cols=0).databunch()
data.show_batch() # HTML Output works for Jupyter kernel but not for NextJournal
text | target |
---|---|
xxbos pulitzer prize winning play , and a winning director , too | nonsarcastic |
xxbos video : # xxunk poem on house floor | nonsarcastic |
xxbos cnn headline news reporter xxunk to face the cold , hard xxunk | sarcastic |
xxbos remains of minnesota boy missing since 1989 found | nonsarcastic |
xxbos the perfect little tea cake to kick off fall | nonsarcastic |
weights_matrix = pickle.load(open(WEIGHTS, 'rb'))
net = DenseNet(weights_matrix) net.to(DEVICE);
learn = Learner(data, net, wd=0.1, loss_func=CrossEntropyFlat(), metrics=[accuracy, FBeta(average='micro',beta=1)])
learn.fit_one_cycle(1, 1e-03, moms=(0.8,0.7)) # HTML Output works for NextJournal but not Jupyter, it is just rendering a gray rectangle. We can see it is a IPython.core.display.HTML object by changing the 'show result as' to text.
preds,y,losses = learn.get_preds(with_loss=True) interp = ClassificationInterpretation(learn, preds, y, losses)
interp.plot_confusion_matrix() # Working for Jupyter not NJ
interp.top_losses()
torch.return_types.topk(
values=tensor([4.1473e+00, 3.8272e+00, 3.8093e+00, ..., 1.4159e-03, 8.2983e-04,
6.8355e-04]),
indices=tensor([1690, 839, 4321, ..., 1072, 5098, 710]))
interp.top_losses()[1]
tensor([1690, 839, 4321, ..., 1072, 5098, 710])
data.valid_ds[1690]
(Text xxbos do n't nobody wanna hear area man run his mouth xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad xxpad,
Category sarcastic)