Simon Danisch / Jan 14 2019

Top Python packages and C/C++

Took the first 3 google results for top python packages, took first 10 packages:

# first 10 from:
# https://pythontips.com/2018/06/03/top-14-most-famous-python-libraries-frameworks/
A = [
  "requests",
  "tqdm",
  "pillow",
  "scrapy",
  "numpy",
  "pandas",
  "scapy",
  "matplotlib",
  "kivy",
  "nltk"
]
# first 10 from:
# https://tryolabs.com/blog/2017/12/19/top-10-python-libraries-of-2017/
B = [
  "Pipenv", "PyTorch", "Caffe", "Pendulum", "Dash", "PyFlux", "python-fire", "imbalanced-learn", "FlashText", "Luminoth"
  
]
# first 10 from: 
# https://pythontips.com/2013/07/30/20-python-libraries-you-cant-live-without/
C = [
  "Requests", "Scrapy", "SQLAlchemy", "BeautifulSoup", 
  "Twisted", "NumPy", "SciPy", "matplotlib"
]
# add packages here, that need correct casing
packages = Set(vcat(map(x-> lowercase.(x), (A, B, C))...,  "wxPython", "Pillow"))
using JSON

Find github repository and check if C/C++ is in the used languages

function name2repo(name)
  path = name * ".json"
  if !isfile(path)
    download("https://api.github.com/search/repositories?q=$name&sort=stars", path)
  end
  repo = JSON.parsefile(path)
  results = filter(repo["items"]) do x
  	x["name"] == name
  end
  isempty(results) && return nothing
  results[1]
end
function getlangs(repo)
  path = repo["name"] * "_lang.json"
  isfile(path) || download(repo["languages_url"], path)
  JSON.parsefile(path)
end
languages = []
not_github = []
for pkg in packages
  repo = name2repo(pkg)
  if repo == nothing
    push!(not_github, pkg)
  else
    langs = getlangs(repo)
    langs == nothing && continue
    push!(languages, pkg => haskey(langs, "C++") || haskey(langs, "C"))
  end
end
packages
not_github
length(languages)
for (p, c) in languages
  println(p, " contains C/C++: ", c)
end
sum(last.(languages))  / length(languages)