2021-003 📊數據動圖展秀臺灣HDI
我們常用的清資料工具是 蟒蛇🐼 Python Pandas 以及 🐷莉亞 Julia,各種 DataFrame 套件豐富,不到一百行程式,就能把資料轉換為我們要的最終樣貌。但偏偏 Python Pandas 或 Julia 在數據動圖繪製上,沒有像 JavaScript 這麼高互動。例如,我們若要把臺灣跟聯合國合併後的HDI人類發展指數呈現出來,或許可以用 🐷莉亞 常見的 Plots.jl 畫出來,只可惜滑鼠互動上還不夠吸引人:
using Plots
x = 1:10; y = rand(10, 2) # 2 columns means two lines
p = plot(x, y)
z = rand(10)
plot!(p, x, z)
我們來把「臺灣人類發展指數」塞到 📊數據動圖 吧
改用 JavaScript 製作數據動圖比較有滑鼠互動性,描述每個國家的指數差異,我們運用 %%html 這個魔法命令(這命令隸屬於Jupyter Notebook),讓下面的程式碼可以直接跑出 JavaScript amcharts 圖表在文章當中,對於展示 JS 效果很方便。
在上一篇分享文章 https://nextjournal.com/cameo/2021-002-taiwan-hdi?token=4DXuq8KhDLiwh35TUJTdDN 最終清資料之後,把臺灣人類發展指數與聯合國的HDI資料合併了,我們來將 CSV 用字串的形式放入數據動圖 JavaScript 展示:
%%html
<meta charset="utf-8" />
<html>
<body>
<script type="module">
const str_meta_csv = `key,value
左側 Y 軸標題,單位:HDI指數(最佳值是1.00)
資料來源,製圖者: 卡米爾股份有限公司
圖表下載檔名,臺灣HDI`;
const str_hdi_csv = `Years,Norway,Singapore,UnitedStates,Japan,Taiwan,Spain,France,Italy,China
2005,0.931,0.874,0.9,0.875,0.846,0.851,0.867,0.865,0.64
2006,0.934,0.881,0.903,0.879,0.859,0.856,0.871,0.87,0.653
2007,0.936,0.882,0.906,0.882,0.868,0.86,0.874,0.874,0.667
2008,0.937,0.888,0.911,0.883,0.857,0.863,0.875,0.876,0.678
2009,0.937,0.888,0.912,0.882,0.871,0.865,0.876,0.876,0.687
2010,0.94,0.909,0.916,0.887,0.873,0.872,0.879,0.879,0.699
2011,0.942,0.913,0.919,0.892,0.874,0.878,0.882,0.883,0.707
2012,0.941,0.918,0.92,0.897,0.879,0.881,0.885,0.882,0.716
2013,0.944,0.921,0.918,0.902,0.882,0.882,0.889,0.881,0.724
2014,0.944,0.926,0.92,0.906,0.882,0.888,0.893,0.882,0.731
2015,0.947,0.931,0.921,0.908,0.885,0.895,0.895,0.882,0.739
2016,0.95,0.935,0.922,0.912,0.903,0.899,0.894,0.884,0.746
2017,0.954,0.933,0.924,0.915,0.907,0.903,0.897,0.886,0.75
2018,0.956,0.936,0.925,0.917,0.911,0.905,0.898,0.89,0.755
2019,0.957,0.938,0.926,0.919,0.911,0.904,0.901,0.892,0.761`;
// 將 csv 字串轉 key value js object 形式,最外層再用 javascript array 包起來
// from http://techslides.com/convert-csv-to-json-in-javascript
function csv_to_json(str_csv) {
var lines = str_csv.split("\n");
var ary_json = [];
var headers = lines[0].split(",");
for (var i = 1; i < lines.length; i++) {
var dic = {};
var currentline = lines[i].split(",");
for (var j = 0; j < headers.length; j++) {
dic[headers[j]] = currentline[j];
}
ary_json.push(dic);
}
return ary_json;
}
// 將 csv 轉換為 key value pair 的 javascript object (dictionary)
function csv_to_dic(str_csv) {
const ary = Papa.parse(str_csv).data;
let dic = {};
for (let i = 0; i < ary.length; i++) {
let str_key = ary[i][0];
let str_value = ary[i][1];
dic[str_key] = str_value;
}
return dic;
}
// 將一個二維矩陣的 row col 順時針轉九十度
function transpose(ary) {
return ary[0].map((_, col) => ary.map((row) => row[col]));
}
// 將程式碼包裝為 .html 易用自定義 html tag (運用 web component 世界標準技術)
class CameoLine extends HTMLElement {
connectedCallback() {
this.str_random_id = "id_" + Math.random().toString(36).substr(2, 9);
this.innerHTML = `
<div class="cameo-line" id="${this.str_random_id}"
style="width: 100%; height: 600px;"></div>
`;
this.chart_render();
}
// amcharts 渲染相關指令
async chart_render() {
const ary_transpose = transpose(Papa.parse(str_hdi_csv).data);
const ary_data = csv_to_json(str_hdi_csv);
const dic_meta = csv_to_dic(str_meta_csv);
am4core.useTheme(am4themes_animated);
var chart = am4core.create(this.str_random_id, am4charts.XYChart);
this.chart = chart;
chart.exporting.menu = new am4core.ExportMenu();
chart.exporting.filePrefix = dic_meta["圖表下載檔名"];
chart.exporting.useWebFonts = false;
chart.exporting.menu.items = [
{
label: "...",
menu: [
{
label: "Image",
menu: [
{ type: "png", label: "PNG" },
{ type: "jpg", label: "JPG" },
{ type: "svg", label: "SVG" },
{ type: "pdf", label: "PDF" }
]
},
{
label: "Print",
type: "print"
}
]
}
];
var watermark = chart.createChild(am4core.Label);
watermark.text = dic_meta["資料來源"];
watermark.fontSize = 10;
watermark.align = "right";
watermark.fillOpacity = 0.5;
chart.data = ary_data;
var categoryAxis = chart.xAxes.push(new am4charts.CategoryAxis());
categoryAxis.dataFields.category = ary_transpose[0][0];
categoryAxis.fontSize = "12px";
var valueAxis = chart.yAxes.push(new am4charts.ValueAxis());
valueAxis.renderer.inversed = false;
valueAxis.title.text = dic_meta["左側 Y 軸標題"];
valueAxis.renderer.minLabelPosition = 0.01;
valueAxis.fontSize = "12px";
var tooltipText = "{name}";
let ary_color = [
"#000000",
"#EEAC5D",
"#F7CB46",
"#EAE660",
"#8BC9BD",
"#4DD6C1",
"#31AFA0",
"#357993",
"#276074",
"#3A5697",
"#253875"
];
let ary_series = [];
let str_x軸名字 = ary_transpose[0][0];
chart.cursor = new am4charts.XYCursor();
chart.cursor.behavior = "zoomY";
for (let i = 0; i < ary_transpose.length; i++) {
let str_y軸名字 = ary_transpose[i][0];
ary_series.push(chart.series.push(new am4charts.LineSeries()));
ary_series[i].dataFields.valueY = str_y軸名字;
ary_series[i].dataFields.categoryX = str_x軸名字;
ary_series[i].name = str_y軸名字;
ary_series[i].bullets.push(new am4charts.CircleBullet());
ary_series[i].tooltipText = "{name}";
ary_series[i].tooltip.fontSize = "10px";
ary_series[i].legendSettings.valueText = "{valueY}";
ary_series[i].visible = false;
ary_series[i].fill = am4core.color(ary_color[i]);
ary_series[i].stroke = am4core.color(ary_color[i]);
let hs1 = ary_series[i].segments.template.states.create("hover");
hs1.properties.strokeWidth = 1;
ary_series[i].segments.template.strokeWidth = 1;
}
chart.series.shift();
ary_series.shift();
chart.legend = new am4charts.Legend();
chart.legend.itemContainers.template.events.on("over", function (
event
) {
var segments = event.target.dataItem.dataContext.segments;
segments.each(function (segment) {
segment.isHover = true;
});
});
chart.legend.position = "right";
chart.legend.fontSize = "12px";
chart.scrollbarX = new am4core.Scrollbar();
chart.scrollbarX.align = "center";
chart.legend.itemContainers.template.events.on("out", function (
event
) {
var segments = event.target.dataItem.dataContext.segments;
segments.each(function (segment) {
segment.isHover = false;
});
});
}
}
customElements.define("cameo-line", CameoLine);
</script>
<!-- 在 HTML 只要一行就可以引用我們自定義的 cameo-line 標籤,把圖表秀出來 -->
<cameo-line></cameo-line>
<script src="https://csb-0u8iq-kg8iy42x7.vercel.app/cdn.amcharts.com/lib/4/core.js"></script>
<script src="https://csb-0u8iq-kg8iy42x7.vercel.app/cdn.amcharts.com/lib/4/charts.js"></script>
<script src="https://csb-0u8iq-kg8iy42x7.vercel.app/cdn.amcharts.com/lib/4/themes/animated.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/PapaParse/5.3.0/papaparse.min.js"></script>
</body>
</html>
🐷莉亞 Julia 與 JavaScript 手牽手來合作
我們在實務上發現,Julia 用來處理每個月30GB以上的環保物聯網CSV資料前處理,包含欄位整併以及CSV JSON互換等,速度大約是 Python 的 40 倍快,原本需要多臺機器分散式運算,到了 Julia 手上通常只要一臺 Google Cloud Platform (GCP) VM RAM 8GB 主機就可以搞定多數 CSV 前處理情況(GCP先佔主機的方式啟用的話,每個月費用大約落在 NTD 600 以內)。
由於 JavaScript 速度上並不適合用來處理每個月 30GB 如此大量的資料流,所以我們可以讓Julia專注在資料前處理,而JavaScript用來進行數據動圖,讓讀者產生「美學驚嘆」的感受,這樣的搭配讓人非常喜歡!