hnbot/plot.py

64 lines
2.0 KiB
Python
Raw Permalink Normal View History

2024-03-14 03:07:12 +00:00
#!/usr/bin/env python3
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import timedelta, datetime
# this script loads data from a csv file
# with headers id, created_at, time, title, url, author, ndescendants, score, rank
# and then saves a plot with score, ndescendants and rank for each id
# load data from csv file
df = pd.read_csv('hacker_news.csv', index_col='created_at')
# group pandas dataframe by id
grouped = df.groupby(['id'])
# create one chart per id and plot score, ndescendants and rank in each chart
for [hn_id], group in grouped:
# sort group by created_at ascending
group = group.sort_values(by='created_at', ascending=True)
# this is the time when the item was created on HN
item_created_at = datetime.utcfromtimestamp(group['time'].values[0])
# use relative time for x axis
def date_to_relative(d1):
date_fmt = '%Y-%m-%d %H:%M:%S'
current = datetime.strptime(d1, date_fmt)
return (current - item_created_at) / timedelta(hours=1)
group.index = group.index.map(date_to_relative)
# title generation
hn_item_title = group['title'].values[0]
hn_item_url = group['url'].values[0]
hn_item_link = f'https://news.ycombinator.com/item?id={hn_id}'
plot_title = f'{hn_item_title}\n{hn_item_url}\n{hn_item_link}'
fig, ax1 = plt.subplots(figsize=(10, 5))
ax1.set_title(plot_title)
ax1.set_xlabel('hours')
ax1.set_ylabel('score, comments')
ax1.plot(group['score'], label='score', color='blue')
ax1.plot(group['ndescendants'], label='comments', color='orange')
ax1.legend()
# show every 50th date
# TODO: do something more clever here
plt.xticks(group.index[::50], rotation=45)
ax2 = ax1.twinx()
ax2.set_ylabel('rank')
ax2.set_ylim(1, 30)
ax2.plot(group['rank'], label='rank', color='green')
ax2.legend(loc='upper right')
plt.tight_layout()
plt.savefig(f'hn_{hn_id}.png')
plt.close()
print(f'Saved hn_{hn_id}.png')