scrape-dandi/scrape_dandi/plot.py
2023-10-30 12:18:07 -07:00

40 lines
No EOL
964 B
Python

from matplotlib import pyplot as plt
import matplotlib.ticker as tkr
import numpy as np
import json
def sizeof_fmt(x, pos):
if x<0:
return ""
for x_unit in ['bytes', 'kB', 'MB', 'GB', 'TB']:
if x < 1024.0:
return "%3.1f %s" % (x, x_unit)
x /= 1024.0
def plot_sizes(meta:dict):
x = np.arange(np.max([int(k) for k in meta.keys()]))
y = np.array([meta.get(str(i), {}).get('size', 0) for i in x])
cumulative = np.cumsum(y)
#cumulative = cumulative / np.max(cumulative)
y[y<1000000000] = 0
fig, ax = plt.subplots(2,1)
ax[0].bar(x, y)
ax[0].set_yscale('log')
ax[0].yaxis.set_major_formatter(tkr.FuncFormatter(sizeof_fmt))
ax[1].plot(x, cumulative)
ax[1].yaxis.set_major_formatter(tkr.FuncFormatter(sizeof_fmt))
plt.show()
if __name__ == "__main__":
with open('metadata.json', 'r') as jfile:
meta = json.load(jfile)
del meta['108']
plot_sizes(meta)