Dainius Jocas / Aug 10 2022 / Published
Text analysis
{:deps {org.clojure/clojure {:mvn/version "1.10.3"}        ;; compliment is used for autocompletion        ;; add your libs here (and restart the runtime to pick up changes)        compliment/compliment {:mvn/version "0.3.9"}        lt.jocas/lucene-custom-analyzer {:mvn/version "1.0.21"}        lt.jocas/lucene-text-analysis {:mvn/version "1.0.17"}        org.clojure/tools.deps.alpha {:git/url "https://github.com/clojure/tools.deps.alpha.git"                                      :sha "d0b33e0d346736aa985c150145b332f97b92135e"}}}Extensible Data Notation
apt-get updateapt-get install graphviz12.1s
; Setup notebook helpers(require [lucene.custom.text-analysis :as analysis])(require [lucene.custom.analyzer :as custom-analyzer])(require [clojure.java.io :as io])(require [clojure.java.shell :as sh])(defn draw-text  "Analyzes the text, converts token stream into the dot program, shells-out to graphviz.  Resulting graph image is stored in the results/ directory.  Image will be previewed below"  [text analyzer-conf]  (let [analyzer (custom-analyzer/create analyzer-conf)        dot-program (analysis/text->graph text analyzer)]    (sh/sh "dot" "-Tpng" "-o" "results/text-graph.png" :in dot-program))  nil)0.5s
(draw-text  "foo bar baz"  {:tokenizer :standard   :char-filters [{:patternReplace {:pattern "foo"                                    :replacement "aaa"}}]   :token-filters [:uppercase                   :reverseString                   {:edgeNGram {:minGramSize 1 :maxGramSize 5 :preserveOriginal true}}                   {:shingle {:minShingleSize 2 :maxShingleSize 4 :tokenSeparator "_"}}]})0.2s