Skip to content
Snippets Groups Projects
Commit cda79c35 authored by Antonio MORAIS's avatar Antonio MORAIS
Browse files

Added Goutte implementation port

parent 05b2d5f3
No related branches found
No related tags found
No related merge requests found
Pipeline #54295 failed
This commit is part of merge request !103. Comments created here will be created in the context of that merge request.
%% Cell type:code id: tags:
``` python
from scipy.stats import beta
import matplotlib.pyplot as plt
import numpy
import random
#Precision
#System 1
TP1 = 10
FP1 = 10
# System 2
TP2 = 3
FP2 = 2
nbsamples = 10000 # Sample size, higher is better
lbd = 0.5 # lambda
```
%% Cell type:code id: tags:
``` python
x = numpy.linspace(0.01, 0.99, nbsamples)
pdf1 = beta.pdf(x, TP1 + lbd, FP1 + lbd)
pdf2 = beta.pdf(x, TP2 + lbd, FP2 + lbd)
plt.plot(x, pdf1, label='TP = {}, FP = {}'.format(TP1, FP1))
plt.plot(x, pdf2, label='TP = {}, FP = {}'.format(TP2, FP2))
plt.title('Beta distributions')
plt.legend()
```
%% Output
<matplotlib.legend.Legend at 0x7f65a3d3c828>
%% Cell type:code id: tags:
``` python
def randombeta(nbsamples, shape1, shape2):
betageneration = numpy.empty(nbsamples)
for i in range(nbsamples):
betageneration[i] = random.betavariate(shape1, shape2)
return betageneration
p1 = randombeta(nbsamples, TP1 + lbd, FP1 + lbd)
p2 = randombeta(nbsamples, TP2 + lbd, FP2 + lbd)
print('Empirical probability that system 2 is better than system 1 = {}'.format(numpy.count_nonzero(p2 > p1) / nbsamples))
```
%% Output
Empirical probability that system 2 is better than system 1 = 0.6548
%% Cell type:code id: tags:
``` python
plt.hist(p2, bins=25)
plt.title("Distribution (empirical) for System 2 Precision")
plt.xlabel("Precision")
plt.ylabel("Count")
```
%% Output
Text(0, 0.5, 'Count')
%% Cell type:code id: tags:
``` python
# F1-score
def randomgamma(nbsamples, shape, scale):
gammageneration = numpy.empty(nbsamples)
for i in range(nbsamples):
gammageneration[i] = random.gammavariate(shape, scale)
return gammageneration
FN1 = 0
FN2 = 7
# Sampling Gamma distributions to produce F-score samples
U1 = randomgamma(nbsamples, shape=TP1+lbd, scale=2)
V1 = randomgamma(nbsamples, FP1+FN1+2*lbd, scale=1)
F1scores1 = U1/(U1+V1)
U2 = randomgamma(nbsamples, TP2+lbd, scale=2)
V2 = randomgamma(nbsamples, FP2+FN2+2*lbd, scale=1)
F1scores2 = U2/(U2+V2)
fig, axs = plt.subplots(1, 2,figsize=(15,5))
fig.suptitle('Empirical distribution for F1 scores')
axs[0].hist(F1scores1, bins=25)
axs[0].set_title('System 1')
axs[0].set(xlabel='F1 score')
axs[1].hist(F1scores2, bins=25)
axs[1].set_title('System 2')
axs[1].set(xlabel='F1 score', ylabel='count')
# plt.hist(F1scores1, bins=25)
# plt.title("Distribution (empirical) for System 2 Precision")
# plt.xlabel("Precision")
# plt.ylabel("Count")
# plt.hist(F1scores2, bins=25)
# plt.title("Distribution (empirical) for System 1 Precision")
# plt.xlabel("Precision")
# plt.ylabel("Count")
print('Empirical probability that system 1 is better than system 2 in F1 score = {}'
.format(numpy.count_nonzero(F1scores1 > F1scores2) / nbsamples))
```
%% Output
Empirical probability that system 1 is better than system 2 in F1 score = 0.9245
This diff is collapsed.
coverage.xml 0 → 100644
+ 2346
0
View file @ cda79c35
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment