In [1]:
%pylab inline
import urllib
from bs4 import BeautifulSoup
import pandas as pd
Populating the interactive namespace from numpy and matplotlib
In [2]:
def find_data(olimpiada):
    html = urllib.urlopen("http://oi.edu.pl/l/%doi_2etap_wyniki/" % olimpiada)
    soup = BeautifulSoup(html)
    results = soup.find("table", {"class": "results_table"})
    scores = []
    for row in results.find_all("tr")[1:]:
        try:
            scores.append(int(row.find_all("td")[-1].get_text().strip()))
        except:
            pass
    return scores
In [7]:
results = []
for i in range(3,23):
    results.append((i, find_data(i)))
In [11]:
tresholds = []
for no, scores in results:
    tresholds.append(min(scores))
plt.xkcd()
plt.plot(range(3,23), tresholds)
plt.xlabel('Olympiad')
plt.title(u'Treshold in 2nd stages of Polish Olympiad in Informatics')
plt.show()
In [5]:
df_tresholds = pd.DataFrame(tresholds, columns=["treshold"])
df_tresholds.describe()
Out[5]:
treshold
count 20.000000
mean 124.200000
std 45.293894
min 57.000000
25% 80.000000
50% 126.000000
75% 156.000000
max 206.000000
In [6]:
plt.xkcd()
plt.figure(figsize=(20,20))
for no, scores in results:
    plt.subplot(5, 4, no-2)
    plt.axis([0, 400, 0, 35])
    bins = linspace(0, 400, 20)
    plt.hist(scores, bins=bins)
    plt.title("%d. Polish Olympiad in Informatics" % no)

© Bartosz Kostka 2013-2018