Radabstellplätze Wien

pip3 install pandas numpy
2.1s

Aktuelle Daten von data.gv.at laden

(require '[clojure.data.json :as json]
         '[clojure.java.io :as io]
         '[clojure.string :as str])
 
(def json-url
  "https://data.wien.gv.at/daten/geo?service=WFS&request=GetFeature&version=1.1.0&typeName=ogdwien:FAHRRADABSTELLANLAGEOGD&srsName=EPSG:4326&outputFormat=json")
(def date
  (.format (java.text.SimpleDateFormat. "yyyy-MM-dd") (java.util.Date.)))
(def date-de
  (.format (java.text.SimpleDateFormat. "dd.MM.yyyy") (java.util.Date.)))
(def file-name (str "/tmp/input-" date ".json"))
(io/copy (io/reader json-url)
         (io/file file-name))
;; store input data for reference in results
(io/copy
  (io/file file-name)
  (io/file (str "/results/input-" date ".json")))
(def current-data (-> file-name io/reader json/read))
2.7s
(require '[panthera.panthera :as pt]
         '[libpython-clj.python :as py :refer [py* py** call-attr]]
         '[panthera.pandas.utils :as u])
35.9s
(defn show
  ([obj]
   (show obj {}))
  ([obj opts]
   (if (pt/series? obj)
     {:nextjournal/viewer "html" 
      :nextjournal/value (py* (call-attr obj "to_frame") to_html [] opts)}
     {:nextjournal/viewer "html" 
      :nextjournal/value (py* obj to_html [] opts)})))
0.1s
(def features
  (pt/data-frame
   (mapv (fn [f] (get f "properties"))
      (get current-data "features"))))
(-> features (pt/subset-cols "BEZIRK" "ANZAHL") (pt/groupby ["BEZIRK"]) pt/sum show)
2.5s
(show (pt/head features) {:index false})
0.1s

Historische Daten

FAHRRADABSTELLANLAGEOGD-2014-08-28.json
Download
FAHRRADABSTELLANLAGEN-2015-04.csv
Download
input-2018-12-29.json
Download
input-2020-04-28.json
Download
(def data
  {"28.08.2014" FAHRRADABSTELLANLAGEOGD-2014-08-28.json
   "30.04.2015" FAHRRADABSTELLANLAGEN-2015-04.csv
   "29.12.2018" input-2018-12-29.json
   "28.04.2020" input-2020-04-28.json
   date-de file-name})
data
0.1s
(defn features-df [uri]
  (cond
    (str/ends-with? uri ".json")
    (pt/data-frame
     (mapv
      (fn [f] (get f "properties"))
      (-> uri io/reader json/read (get "features"))))
    
    (str/ends-with? uri ".csv")
    (-> (pt/read-csv uri)
        (pt/rename {:columns {"bezirk" "BEZIRK" "anzahl" "ANZAHL"}}))))
  
(defn features-by-district-df [uri sum-k]
  (-> uri features-df
    (pt/subset-cols "BEZIRK" "ANZAHL")
    (pt/groupby ["BEZIRK"] {:as-index false})
    pt/sum
    (pt/rename {:columns {"BEZIRK" "Bezirk" "ANZAHL" sum-k}})
    (pt/astype {"Bezirk" "int32" sum-k "int32"})))
0.1s
(def history
  (reduce
   pt/merge-ordered
   (mapv (fn [[k v]] (features-by-district-df v k)) data)))
5.0s
(def diff
  (pt/ops
    (pt/subset-cols history date-de)
    (pt/subset-cols history "29.12.2018")
    
    :-))
(vec diff)
0.2s

Anzahl Radabstellplätze je Bezirk im Jahresvergleich

(def history-with-diff
  (pt/assign history {"Differenz Dez 2018 - aktuell" diff}))
(pt/to-csv history-with-diff "/results/radabbstellplaetze-2014-2021.csv")
(show history-with-diff {:index false})
0.5s

Veränderung Radabstellplätze am Neubau

(def features-19 (features-df (get data "29.12.2018")))
(def features-20 (features-df (get data "28.04.2020")))
1.9s
(defn pandas-merge [left right & [attrs]]
  (py/call-attr-kw u/pd "merge" [left right]
                   (u/keys->pyargs attrs)))
0.1s
(defn address-sum [df]
  (-> df
    (pt/groupby ["ADRESSE"] {:as-index false})
    (py/get-attr "ANZAHL")
    pt/sum))
0.1s
(def features-neubau-19
  (-> features-19
      (pt/filter-rows
       #(-> % (pt/subset-cols "BEZIRK") (pt/eq 7) pt/values))
      address-sum))
(def features-neubau-20
  (-> features-20
      (pt/filter-rows
       #(-> % (pt/subset-cols "BEZIRK") (pt/eq 7) pt/values))
      address-sum))
0.2s
(require '[panthera.pandas.generics :as generics :refer [sort-values]])
(def combined
  (pandas-merge features-neubau-19
                features-neubau-20
                {:on ["ADRESSE" "ANZAHL"]
                 :indicator true
                 :suffixes ["-19" "-20"]
                 :how "outer"}))
(def added
  (-> combined
      (pt/filter-rows
            #(-> % (pt/subset-cols "_merge") (pt/eq "right_only") pt/values))
      (sort-values {:by "ADRESSE"})))
(show added)
 
0.3s
(-> combined pt/tail show)
0.1s
(def changed (-> combined
               (pt/filter-rows
                    #(-> % (pt/subset-cols "_merge") (pt/ne "both") pt/values))
               pt/->clj))
 
(def changed-agg
  (->>
    changed
    (reduce
     (fn [m f]
       (update m
               (:ADRESSE f)
               (fn [a]
                 (if (= (:-merge f) "right_only")
                   (+ (or a 0) (:ANZAHL f))
                   (- (or a 0) (:ANZAHL f))))))                   
     {})
    (into (sorted-map-by compare))))
    
(show (->
        (pt/data-frame {:ADRESSE (keys changed-agg)
                        :ANZAHL (vals changed-agg)})))
        
        
 
0.4s
(def removed
 (-> combined
     (pt/filter-rows
           #(-> % (pt/subset-cols "_merge") (pt/eq "left_only") pt/values))
   (sort-values {:by "ADRESSE"})))  
(show removed)
0.2s

Appendix

{:deps
 {org.clojure/clojure    {:mvn/version "1.10.1"}
  org.clojure/data.json  {:mvn/version "0.2.6"}
  clj-python/libpython-clj {:mvn/version "1.41"}
  panthera {:mvn/version "0.1-alpha.19"}}}
 
Extensible Data Notation
Runtimes (2)