Radabstellplätze Wien
pip3 install pandas numpy12.5s
clj-pandas (Bash in Clojure)
Aktuelle Daten von data.gv.at laden
(require [clojure.data.json :as json] [clojure.java.io :as io] [clojure.string :as str]) (def json-url "https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:FAHRRADABSTELLANLAGEOGD&srsName=EPSG:4326&outputFormat=json")(def date (.format (java.text.SimpleDateFormat. "yyyy-MM-dd") (java.util.Date.)))(def file-name (str "/results/input-" date ".json"));; store input data for reference in results(io/copy (io/reader json-url) (io/file file-name))(def current-data (-> file-name io/reader json/read))6.5s
Clojure
clj-pandas
user/current-data
nil
(require [panthera.panthera :as pt] [libpython-clj.python :as py :refer [py* py** call-attr]] [panthera.pandas.utils :as u])40.0s
Clojure
clj-pandas
nil
(defn show ([obj] (show obj {})) ([obj opts] (if (pt/series? obj) {:nextjournal/viewer "html" :nextjournal.viewer/value (py* (call-attr obj "to_frame") to_html [] opts)} {:nextjournal/viewer "html" :nextjournal.viewer/value (py* obj to_html [] opts)})))0.1s
Clojure
clj-pandas
user/show
(def features (pt/data-frame (mapv (fn [f] (get f "properties")) (get current-data "features"))))(-> features (pt/subset-cols "BEZIRK" "ANZAHL") (pt/groupby ["BEZIRK"]) pt/sum show)2.4s
Clojure
clj-pandas
(show (pt/head features) {:index false})0.1s
Clojure
clj-pandas
Historische Daten
(def data {"28.08.2014" FAHRRADABSTELLANLAGEOGD-2014-08-28.json "30.04.2015" FAHRRADABSTELLANLAGEN-2015-04.csv "29.12.2018" input-2018-12-29.json "28.04.2020" input-2020-04-28.json})0.1s
Clojure
clj-pandas
user/data
(defn features-df [uri] (cond (str/ends-with? uri ".json") (pt/data-frame (mapv (fn [f] (get f "properties")) (-> uri io/reader json/read (get "features")))) (str/ends-with? uri ".csv") (-> (pt/read-csv uri) (pt/rename {:columns {"bezirk" "BEZIRK" "anzahl" "ANZAHL"}})))) (defn features-by-district-df [uri sum-k] (-> uri features-df (pt/subset-cols "BEZIRK" "ANZAHL") (pt/groupby ["BEZIRK"] {:as-index false}) pt/sum (pt/rename {:columns {"BEZIRK" "Bezirk" "ANZAHL" sum-k}}) (pt/astype {"Bezirk" "int32" sum-k "int32"})))0.1s
Clojure
clj-pandas
user/features-by-district-df
(def history (reduce pt/merge-ordered (mapv (fn [[k v]] (features-by-district-df v k)) data)))4.0s
Clojure
clj-pandas
user/history
(def diff (pt/ops (pt/subset-cols history "28.04.2020") (pt/subset-cols history "29.12.2018") :-))(vec diff)0.1s
Clojure
clj-pandas
Vector(23)[-124, 1228, 151, 56, 81, 62, 167, 32, 139, 214, -8, 86, -46, 75, 108, 144, 62, 172, 4, -56, 3 more...]
Anzahl Radabstellplätze je Bezirk im Jahresvergleich
(def history-with-diff (pt/assign history {"Differenz Dez 2018 - aktuell" diff}))(pt/to-csv history-with-diff "/results/radabbstellplaetze-2014-2020.csv")(show history-with-diff {:index false})0.2s
Clojure
clj-pandas
Veränderung Radabstellplätze am Neubau
(def features-19 (features-df (get data "29.12.2018")))(def features-20 (features-df (get data "28.04.2020")))1.6s
Clojure
clj-pandas
user/features-20
(defn pandas-merge [left right & [attrs]] (py/call-attr-kw u/pd "merge" [left right] (u/keys->pyargs attrs)))0.0s
Clojure
clj-pandas
user/pandas-merge
(defn address-sum [df] (-> df (pt/groupby ["ADRESSE"] {:as-index false}) (py/get-attr "ANZAHL") pt/sum))0.1s
Clojure
clj-pandas
user/address-sum
(def features-neubau-19 (-> features-19 (pt/filter-rows (-> % (pt/subset-cols "BEZIRK") (pt/eq 7) pt/values)) address-sum))(def features-neubau-20 (-> features-20 (pt/filter-rows (-> % (pt/subset-cols "BEZIRK") (pt/eq 7) pt/values)) address-sum))0.2s
Clojure
clj-pandas
user/features-neubau-20
(require [panthera.pandas.generics :as generics :refer [sort-values]])(def combined (pandas-merge features-neubau-19 features-neubau-20 {:on ["ADRESSE" "ANZAHL"] :indicator true :suffixes ["-19" "-20"] :how "outer"}))(def added (-> combined (pt/filter-rows (-> % (pt/subset-cols "_merge") (pt/eq "right_only") pt/values)) (sort-values {:by "ADRESSE"})))(show added) 0.4s
Clojure
clj-pandas
(-> combined pt/tail show)0.1s
Clojure
clj-pandas
| ADRESSE | ANZAHL | _merge | |
|---|---|---|---|
| 298 | Zieglergasse 72-74 | 4.0 | right_only |
| 299 | Zieglergasse 78 | 10.0 | right_only |
| 300 | Zieglergasse 8 | 16.0 | right_only |
| 301 | Zieglergasse 82 | 4.0 | right_only |
| 302 | Zieglergasse 84 | 4.0 | right_only |
(def changed (-> combined (pt/filter-rows (-> % (pt/subset-cols "_merge") (pt/ne "both") pt/values)) pt/->clj)) (def changed-agg (->> changed (reduce (fn [m f] (update m (:ADRESSE f) (fn [a] (if (= (:-merge f) "right_only") (+ (or a 0) (:ANZAHL f)) (- (or a 0) (:ANZAHL f)))))) {}) (into (sorted-map-by compare)))) (show (-> (pt/data-frame {:ADRESSE (keys changed-agg) :ANZAHL (vals changed-agg)}))) 0.5s
Clojure
clj-pandas
(def removed (-> combined (pt/filter-rows (-> % (pt/subset-cols "_merge") (pt/eq "left_only") pt/values)) (sort-values {:by "ADRESSE"}))) (show removed)0.3s
Clojure
clj-pandas
Appendix
{:deps {org.clojure/clojure {:mvn/version "1.10.1"} org.clojure/data.json {:mvn/version "0.2.6"} clj-python/libpython-clj {:mvn/version "1.41"} panthera {:mvn/version "0.1-alpha.19"}}} deps.edn
Extensible Data Notation