diff --git a/.gitignore b/.gitignore index 41c1a66..5d9a769 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,5 @@ .env .vscode -# python virtual environment -venv # go executable hnbot -hnbot.sqlite3 - -*.csv -*.png - diff --git a/hacker_news.csv.sql b/hacker_news.csv.sql deleted file mode 100644 index 81ddbef..0000000 --- a/hacker_news.csv.sql +++ /dev/null @@ -1,11 +0,0 @@ -.headers on -.mode csv -.output hacker_news.csv -SELECT hn.id, hn.created_at, hn.time, hn.title, hn.url, hn.author, hn.ndescendants, hn.score, hn.rank -FROM ( - SELECT id, MAX(created_at) AS created_at FROM hn_items - WHERE rank = 1 AND length(title) >= 5 - GROUP BY id - ORDER BY time ASC -) t JOIN hn_items hn ON t.id = hn.id -ORDER BY hn.id, hn.created_at DESC; diff --git a/hnbot.sqlite3 b/hnbot.sqlite3 new file mode 100644 index 0000000..b3f103f Binary files /dev/null and b/hnbot.sqlite3 differ diff --git a/plot.py b/plot.py deleted file mode 100644 index 4a5f3ca..0000000 --- a/plot.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 - -import pandas as pd -import matplotlib.pyplot as plt -import matplotlib.dates as mdates -from datetime import timedelta, datetime - -# this script loads data from a csv file -# with headers id, created_at, time, title, url, author, ndescendants, score, rank -# and then saves a plot with score, ndescendants and rank for each id - -# load data from csv file -df = pd.read_csv('hacker_news.csv', index_col='created_at') - -# group pandas dataframe by id -grouped = df.groupby(['id']) - -# create one chart per id and plot score, ndescendants and rank in each chart -for [hn_id], group in grouped: - # sort group by created_at ascending - group = group.sort_values(by='created_at', ascending=True) - - # this is the time when the item was created on HN - item_created_at = datetime.utcfromtimestamp(group['time'].values[0]) - - # use relative time for x axis - def date_to_relative(d1): - date_fmt = '%Y-%m-%d %H:%M:%S' - current = datetime.strptime(d1, date_fmt) - return (current - item_created_at) / timedelta(hours=1) - - group.index = group.index.map(date_to_relative) - - # title generation - hn_item_title = group['title'].values[0] - hn_item_url = group['url'].values[0] - hn_item_link = f'https://news.ycombinator.com/item?id={hn_id}' - plot_title = f'{hn_item_title}\n{hn_item_url}\n{hn_item_link}' - - fig, ax1 = plt.subplots(figsize=(10, 5)) - - ax1.set_title(plot_title) - ax1.set_xlabel('hours') - ax1.set_ylabel('score, comments') - ax1.plot(group['score'], label='score', color='blue') - ax1.plot(group['ndescendants'], label='comments', color='orange') - ax1.legend() - - # show every 50th date - # TODO: do something more clever here - plt.xticks(group.index[::50], rotation=45) - - ax2 = ax1.twinx() - ax2.set_ylabel('rank') - ax2.set_ylim(1, 30) - ax2.plot(group['rank'], label='rank', color='green') - ax2.legend(loc='upper right') - - plt.tight_layout() - plt.savefig(f'hn_{hn_id}.png') - plt.close() - - print(f'Saved hn_{hn_id}.png')