Commit 01c26849 authored by Pavel KORSHUNOV's avatar Pavel KORSHUNOV

public version of the project

parents
*~
*.swp
*.pyc
bin
eggs
parts
.installed.cfg
.mr.developer.cfg
*.egg-info
src
develop-eggs
sphinx
dist
.nfs*
.gdb_history
build
.coverage
record.txt
miniconda.sh
miniconda/
\ No newline at end of file
Copyright (c) 2019 Idiap Research Institute, http://www.idiap.ch/
Written by Pavel Korshunov <pavel.korshunov@idiap.ch>
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
include README.rst buildout.cfg version.txt requirements.txt
recursive-include *.rst *.png *.ico *.txt *.sh *.py
This diff is collapsed.
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
#!/bin/bash
## Batch script for computing diarization error rate (DER) for a cross database study with different combinations
## of training, validation, and test protocols.
# the parent directory with experiments
workdir="crossdb"
resdir="scores"
hyperprotocols=(AMI.SpeakerDiarization.MixHeadset.development CallHome.SpeakerDiarization.CallHomeProtocol.development)
dbapply=(ami call)
dbnames=(callhome callhomesre meta4db meta9db)
dbshort=(call sre meta4 meta9)
protocols=(AMI.SpeakerDiarization.MixHeadset CallHome.SpeakerDiarization.CallHomeProtocol Odessa.SpeakerDiarization.Fullset)
sets=(development test)
for protocol in "${protocols[@]}"; do
dbappliedto="${protocol%%.*}"
for aset in "${sets[@]}"; do
plength=${#hyperprotocols[@]}
for ((j=0;j<$plength;j++)); do
hyperprotocol=${hyperprotocols[$j]}
dba=${dbapply[$j]}
length=${#dbshort[@]}
for ((i=0;i<$length;i++)); do
short=${dbshort[$i]}
dbname=${dbnames[$i]}
python ../scripts/der_for_rttm.py --scores \
${resdir}/trainedon_${dbname}_hyperon_${dba}_scoresfor_${dbappliedto}_${aset}/${protocol}.${aset}.rttm \
--subset ${aset} --output-der \
${resdir}/trainedon_${dbname}_hyperon_${dba}_scoresfor_${dbappliedto}_${aset}/${protocol}.${aset}_der.txt \
--dbtrained-on ${dbname} --dbvalhyped-on ${dba} ${protocol}
done
done
done
done
#!/bin/bash
## This scripts convert all given audio data to the WAV 16K format
## the usage is
## ./convert_audio.sh $(find inputdir/ -name '*.MPG')
outdir="/path/to/output/callhome_16k"
for f in "${@}"; do
echo "-- ${f}"
prefix=${f##*/}
prefix=${prefix%.*}
echo "prefix <${prefix}>"
outfile="${outdir}/${prefix}.wav"
ffmpeg -i ${f} -acodec pcm_s16le -ac 1 -ar 16000 -y ${outfile}
done
#!/bin/bash
## This script allows to convert the Sphere formatted data of the NIST SRE database
## to WAV format using sph2pipe utility provided by NIST SRE
## Usage:
## ./convert_sph2wav.sh $(find path/to/sphere/formatted/database/ -name '*.sph')
cmdexe="/path/to/sph2pipe_v2.5/sph2pipe"
outdir="/the/wav/of/callhome/"
for f in "${@}"; do
echo "-- ${f}"
prefix=${f##*/}
prefix=${prefix%.*}
echo "prefix <${prefix}>"
outfile="${outdir}/${prefix}.wav"
${cmdexe} -f rif ${f} ${outfile}
done
#!/bin/bash
## This script allows to run batch evaluations of different combinations of pre-trained and pre-validated models
## Different combinations of models are applied to different application protocols and scores are computed for them
# the parent directory with experiments
workdir="path/to/folder/with/experiments/crossdb"
protocols=(Odessa.SpeakerDiarization.Fullset CallHome.SpeakerDiarization.CallHomeProtocol AMI.SpeakerDiarization.MixHeadset)
dbtrshort=(callhome callhomesre meta4db meta9db)
dbtrainprotocols=(CallHome.SpeakerDiarization.CallHomeProtocol.train CallHomeSRE.SpeakerDiarization.All.train X.SpeakerDiarization.SmallerProtocol.train X.SpeakerDiarization.MetaProtocolSAD.train)
dbshort=(call sre meta4 meta9)
tasks=(scd emb)
# model numbers of one database that was validated to be best for other databases
# ami sad weights
dbweights=(0020 0036 0539)
# ami scd weights
dbweights=(0527 0075 0705)
# ami emb weights
dbweights=(0070 0365 0090 0145)
# call sad weights
dbweights=(0015 0045 0695)
# call scd weights
dbweights=(0115 0080 0635)
# call emb weights
dbweights=(0220 0380 0680 0440)
# sad model number from dihard database, evaluated on it
dbweights=(0140)
# apply to these protocols
protocols=(DIHARD2.SpeakerDiarization.All)
#protocols=(AMI.SpeakerDiarization.MixHeadset CallHome.SpeakerDiarization.CallHomeProtocol)
# short name for database on which the model is trained
dbtrshort=(dihard)
#dbtrshort=(callhome callhomesre meta9db)
# protocol on which the model is trained
dbtrainprotocols=(X.SpeakerDiarization.DIHARD_Official)
#dbtrainprotocols=(CallHome.SpeakerDiarization.CallHomeProtocol CallHomeSRE.SpeakerDiarization.All X.SpeakerDiarization.MetaProtocolSAD)
# protocl on which the trained model is validated
dbvalidprotocols=(X.SpeakerDiarization.DIHARD_Official)
#dbvalidprotocols=(AMI.SpeakerDiarization.MixHeadset CallHome.SpeakerDiarization.CallHomeProtocol)
# apply for which task
tasks=(sad)
for protocol in "${protocols[@]}"; do
for task in "${tasks[@]}"; do
length=${#dbtrshort[@]}
for ((i=0;i<$length;i++)); do
trainfolder=${dbtrainprotocols[$i]}
dbshort=${dbtrshort[$i]}
for validprotocol in "${dbvalidprotocols[@]}"; do
# The commented out part is for the submission to the SGE grid, if available
# jman submit -q q1d -m 6G --io-big --log-dir=gridtk_logs/a${task}${dbshort} \
# --name a${task}${dbshort} --
pyannote-speech-detection apply \
${workdir}/${task}_${dbshort}/train/${trainfolder}.train/validate/${validprotocol}.development ${protocol}
done
done
done
done
#!/bin/bash
## This script allows to run batch parameter hyper training for different
## configurations of Diarization pipeline, relying on different combinations
## of pre-trained and pre-validated models
# the parent directory with experiments
# the parent directory with experiments
workdir="path/to/folder/with/experiments/crossdb"
# where the scores with hyperparameters will be written
resdir=workdir + "/scores"
hyperprotocols=(AMI.SpeakerDiarization.MixHeadset.development CallHome.SpeakerDiarization.CallHomeProtocol.development)
dbapply=(ami call)
dbnames=(callhome callhomesre meta4db meta9db)
dbshort=(call sre meta4 meta9)
#protocols=(AMI.SpeakerDiarization.MixHeadset CallHome.SpeakerDiarization.CallHomeProtocol)
#sets=(development test)
protocols=(Odessa.SpeakerDiarization.Fullset)
sets=(test)
for protocol in "${protocols[@]}"; do
dbappliedto="${protocol%%.*}"
for aset in "${sets[@]}"; do
plength=${#hyperprotocols[@]}
for ((j=0;j<$plength;j++)); do
hyperprotocol=${hyperprotocols[$j]}
dba=${dbapply[$j]}
length=${#dbshort[@]}
for ((i=0;i<$length;i++)); do
short=${dbshort[$i]}
dbname=${dbnames[$i]}
# The commented out part is for the submission to the SGE grid, if available
# jman submit -q q1d -m 8G --io-big --log-dir=gridtk_logs/${dba}dia${short} \
# --name ${dba}dia${short} --
pyannote-pipeline apply \
${workdir}/${dbname}_dia_${dba}_hyper/train/${hyperprotocol}/params.yml --subset=${aset} \
${protocol} ${resdir}/trainedon_${dbname}_hyperon_${dba}_scoresfor_${dbappliedto}_${aset}
done
done
done
done
Protocols:
X:
SpeakerDiarization:
MetaProtocolSAD:
train:
REPERE.SpeakerDiarization.Phase1: [train]
REPERE.SpeakerDiarization.Phase2: [train]
Ester.SpeakerDiarization.Ester1: [train]
Ester.SpeakerDiarization.Ester2: [train]
CallHomeSRE.SpeakerDiarization.All: [test]
AMI.SpeakerDiarization.MixHeadset: [train]
CallHome.SpeakerDiarization.CallHomeProtocol: [train]
LibriSpeech.SpeakerDiarization.LibriSpeechClean: [train]
LibriSpeech.SpeakerDiarization.LibriSpeechOther: [train]
development:
AMI.SpeakerDiarization.MixHeadset: [development]
test:
AMI.SpeakerDiarization.MixHeadset: [test]
SmallerProtocol:
train:
CallHomeSRE.SpeakerDiarization.All: [test]
AMI.SpeakerDiarization.MixHeadset: [train]
CallHome.SpeakerDiarization.CallHomeProtocol: [train]
LibriSpeech.SpeakerDiarization.LibriSpeechOther: [train]
development:
AMI.SpeakerDiarization.MixHeadset: [development]
test:
AMI.SpeakerDiarization.MixHeadset: [test]
DIHARD2:
SpeakerDiarization:
All:
train:
annotation: /path/to/database/speech/files/dihard2/annotations/LDC2019E31_Second_DIHARD_Challenge_Development_Data/data/single_channel/rttm/
annotated: /path/to/database/speech/files/dihard2/annotations/LDC2019E31_Second_DIHARD_Challenge_Development_Data/data/single_channel/uem/all.uem
uris: /path/to/database/speech/files/dihard2/DIHARD2.DEV.trn.lst
development:
annotation: /path/to/database/speech/files/dihard2/annotations/LDC2019E31_Second_DIHARD_Challenge_Development_Data/data/single_channel/rttm/
annotated: /path/to/database/speech/files/dihard2/annotations/LDC2019E31_Second_DIHARD_Challenge_Development_Data/data/single_channel/uem/all.uem
uris: /path/to/database/speech/files/dihard2/DIHARD2.DEV.dev.lst
test:
annotation: /path/to/database/speech/files/dihard2/annotations/LDC2019E32_Second_DIHARD_Challenge_Evaluation_Data_V1.1/data/single_channel/rttm/
annotated: /path/to/database/speech/files/dihard2/annotations/LDC2019E32_Second_DIHARD_Challenge_Evaluation_Data_V1.1/data/single_channel/uem/all.uem
Databases:
AMI: path/to/database/speech/files/amicorpus/*/audio/{uri}.wav
MUSAN: path/to/database/speech/files/musan/{uri}.wav
Odessa: /path/to/database/speech/files/odessa_db/audio/original/{uri}.wav
CallHome: /path/to/database/speech/files/callhome_16k/{uri}.wav
CallHomeSRE: /path/to/database/speech/files/sre00_callhome_16k/{uri}.wav
LibriSpeech: path/to/database/speech/files/LibriSpeech/{uri}.wav
Ester:
- path/to/database/speech/files/ester/audio/16k/ESTER/DATA/TRAIN/{uri}.wav
- path/to/database/speech/files/ester/audio/16k/ESTER/DATA/DEV/{uri}.wav
- path/to/database/speech/files/ester/audio/16k/ESTER/DATA/TEST/{uri}.wav
- path/to/database/speech/files/ester2/audio/16k/waveform/{uri}.wav
REPERE: path/to/database/speech/files/repere/{uri}.wav
DIHARD2: /path/to/database/speech/files/dihard2/dev/{uri}.wav
feature_extraction:
name: LibrosaMFCC # use MFCC from librosa
params:
e: False # do not use energy
De: True # use energy 1st derivative
DDe: True # use energy 2nd derivative
coefs: 19 # use 19 MFCC coefficients
D: True # use coefficients 1st derivative
DD: True # use coefficients 2nd derivative
duration: 0.025 # extract MFCC from 25ms windows
step: 0.010 # extract MFCC every 10ms
sample_rate: 16000 # convert to 16KHz first (if needed)
data_augmentation:
name: AddNoise # add noise on-the-fly
params:
snr_min: 10 # using random signal-to-noise
snr_max: 20 # ratio between 10 and 20 dBs
collection: MUSAN.Collection.BackgroundNoise # use background noise from MUSAN
# (needs pyannote.db.musan)
architecture:
name: ClopiNet
params:
instance_normalize: True
rnn: LSTM
recurrent: [256, 256, 256]
bidirectional: True
pooling: sum
batch_normalize: True
normalize: False
approach:
name: TripletLoss
params:
metric: angular
clamp: sigmoid
margin: 0.0
duration: 3
sampling: all
per_fold: 20
per_label: 3
parallel: 1
per_epoch: 0.5
label_min_duration: 60
scheduler:
name: ConstantScheduler
params:
learning_rate: auto
task:
name: SpeakerChangeDetection
params:
duration: 3.2 # sequences are 3.2s long
collar: 0.100 # upsampling collar = 100ms
batch_size: 32 # 32 sequences per batch
per_epoch: 1 # one epoch = 1 day of audio
parallel: 1 # pre-fetch training data
# in 4 parallel generators
data_augmentation:
name: AddNoise # add noise on-the-fly
params:
snr_min: 10 # using random signal-to-noise
snr_max: 20 # ratio between 10 and 20 dBs
collection: MUSAN.Collection.BackgroundNoise # use background noise from MUSAN
# (needs pyannote.db.musan)
feature_extraction:
name: LibrosaMFCC # use MFCC from librosa
params:
e: False # do not use energy
De: True # use energy 1st derivative
DDe: True # use energy 2nd derivative
coefs: 19 # use 19 MFCC coefficients
D: True # use coefficients 1st derivative
DD: True # use coefficients 2nd derivative
duration: 0.025 # extract MFCC from 25ms windows
step: 0.010 # extract MFCC every 10ms
sample_rate: 16000 # convert to 16KHz first (if needed)
architecture:
name: StackedRNN
params:
instance_normalize: True # normalize sequences
rnn: LSTM # use LSTM (could be GRU)
recurrent: [32, 20] # two layers with 32 and 20 hidden states
bidirectional: True # bidirectional LSTMs
linear: [40, 10] # add two linear layers at the end
scheduler:
name: CyclicScheduler # use cyclic learning rate (LR) scheduler
params:
learning_rate: auto # automatically guess LR upper bound
epochs_per_cycle: 14 # 14 epochs per cycle
task:
name: SpeechActivityDetection
params:
duration: 2.0 # sequences are 2s long
batch_size: 64 # 64 sequences per batch
per_epoch: 1 # one epoch = 1 day of audio
parallel: 1 # pre-fetch training data in 6 parallel generators
data_augmentation:
name: AddNoise # add noise on-the-fly
params:
snr_min: 10 # using random signal-to-noise
snr_max: 20 # ratio between 10 and 20 dBs
collection: MUSAN.Collection.BackgroundNoise # use background noise from MUSAN
# (needs pyannote.db.musan)
feature_extraction:
name: LibrosaMFCC # use MFCC from librosa
params:
e: False # do not use energy
De: True # use energy 1st derivative
DDe: True # use energy 2nd derivative
coefs: 19 # use 19 MFCC coefficients
D: True # use coefficients 1st derivative
DD: True # use coefficients 2nd derivative
duration: 0.025 # extract MFCC from 25ms windows
step: 0.010 # extract MFCC every 10ms
sample_rate: 16000 # convert to 16KHz first (if needed)
architecture:
name: StackedRNN
params:
instance_normalize: True # normalize sequences
rnn: LSTM # use LSTM (could be GRU)
recurrent: [128, 128] # two layers with 128 hidden states
bidirectional: True # bidirectional LSTMs
linear: [32, 32] # add two linear layers at the end
scheduler:
name: CyclicScheduler # use cyclic learning rate (LR) scheduler
params:
learning_rate: auto # automatically guess LR upper bound
epochs_per_cycle: 14 # 14 epochs per cycle
"""
Write SAD files and compute VAD DER for scotres in SAD format (as in DIHARD2 challenge)
Usage:
domain_eval_dihard2.py [options] [<database.task.protocol>]
domain_eval_dihard2.py -h | --help
domain_eval_dihard2.py --version
Common options:
<database.task.protocol> Experimental protocol, e.g., 'DIHARD2.SpeakerDiarization.All'
--scores=<scores> Path to the h5 files computed by pyannote
--output-dir=<output-dir> The path to where to store the VAD files in LAB format
--params-path=<params-path> Path to the YAML file with pre-tuned parameters for Binarize class
--subset=<subset> The subset of the protocol for which to compute DER
"""
import os
import itertools
import json
from pyannote.database import get_protocol
from pyannote.audio.features import Precomputed
from pyannote.database import get_annotated
from pyannote.metrics.detection import DetectionErrorRate
from docopt import docopt
import yaml
# scores binarizer
from pyannote.audio.signal import Binarize
#
def main(protocol_name, scores_path, subset='test', params_path=None, out_dir=None):
metric = DetectionErrorRate()
protocol = get_protocol(protocol_name)
if not params_path:
binarize = Binarize(onset=0.5, offset=0.5, log_scale=True)
else:
try:
with open(params_path, 'r') as fp:
config = yaml.load(fp)
except FileNotFoundError:
config = dict()
print(f'Wrong file path {params_path}')
if 'epoch' in config.keys():
config = config['params']
binarize = Binarize(onset=config['onset'],
offset=config['offset'],
pad_onset = config['pad_onset'],
pad_offset = config['pad_offset'],
min_duration_on = config['min_duration_on'],
min_duration_off = config['min_duration_off'],
log_scale=True)
if subset == 'test':
subset = protocol.test()
elif subset == 'development':
subset = protocol.development()
else:
subset = protocol.train()
for test_file in subset:
uri = test_file['uri']
precomputed = Precomputed(scores_path)
sad_scores = precomputed(test_file)
speech_regions = binarize.apply(sad_scores, dimension=1)
if out_dir:
with open(f'{out_dir}/{uri}.lab', 'w') as f:
json_str = speech_regions.for_json()["content"]
for item in json_str:
# make sure we don't write down negative values
start = max(0, float(item['start']))
end = max(0, float(item['end']))
f.write('{0:.3f} {1:.3f} speech\n'.format(start, end))
uem = get_annotated(test_file)
reference = test_file['annotation']
der = metric(reference, speech_regions.to_annotation(), uem=uem)
print('{0} {1:.2f}'.format(test_file['uri'], 100 * der))
print(f'\n Total Detection error rate = {100*abs(metric):.2f}%')
if __name__ == '__main__':
args = docopt(__doc__, version="Compute DER for DIHARD2 data")
protocol_name = args['<database.task.protocol>']
params_path = args['--params-path']
scores_path = args['--scores']
out_dir = args['--output-dir']
subset = args['--subset']
main(protocol_name, scores_path, subset, params_path, out_dir)
"""
The purpose of this script is to test several techniques for conversion of
NIST SRE Sphere files to WAV format. In the end, the best approach is to use
official utility provided by NIST SRE. None of the open source python-based utilities
work correctly with all different headers that can be found in NIST SRE databases
See the ../bash_scripts/convert_sph2wav.sh for using NIST SRE conversion utility
"""
from sphfile import SPHFile
import audioop
import wave
import numpy
import scipy.io
import librosa
import soundfile as sf
from soundfile import SoundFile
fname = 'path/to/sphere/file/jaaaA.sph'
#sph =SPHFile(fname)
#sample_rate = sph.format['sample_rate']
#data = sph.content
#data, sample_rate = sf.read(fname, dtype='float32', start=0, frames=8000)
#data, sample_rate = sf.read('noise-free-sound-0030.wav', dtype='float32', start=0, frames=48000)
with SoundFile(fname, 'r') as myfile:
sample_rate = myfile.samplerate
print(myfile.samplerate)
myfile.seek(0)
data = myfile.read(16000, dtype='float32')
scipy.io.wavfile.write('test_soundfile.wav', sample_rate, data)
#import ipdb; ipdb.set_trace()
#output_width = 2 # 2 bytes
#a = audioop.ulaw2lin(data, 1)
#a = audioop.lin2lin(a, 1, 2)
#a = audioop.bias(a, 2, -128)
#y = numpy.frombuffer(a, numpy.int16)
#y = y.astype(numpy.float32) / 32768.0
#y = librosa.core.resample(data, sample_rate, 16000)
#scipy.io.wavfile.write('test_resampled.wav', 16000, y)
#output = wave.open('testulaw.wav', 'w')
#output.setparams((sph.format['channel_count'], output_width, sample_rate, 0, 'NONE', 'not_compressed'))
#output.writeframes(a)
#output.close()
"""
Compute DER for scores in RTTM format
Usage:
der_for_rttm.py [options] [<database.task.protocol>]
der_for_rttm.py -h | --help
der_for_rttm.py --version
Common options:
<database.task.protocol> Experimental protocol, e.g., 'DIHARD2.SpeakerDiarization.All'
--scores=<scores> Path to the RTTM file with scores
--subset=<subset> The subset of the protocol for which to compute DER
--output-der=<output-der> The path to where write computed DER scores
--dbtrained-on=<dbtrained-on> Name of the database it was trained on
--dbvalhyped-on=<dbvalhyped-on> Name of the database it was validated on and on
which hyper-parameters were also estimated
"""
import os
from docopt import docopt
from pyannote.database import get_protocol
from pandas import read_table
from pyannote.core import Segment, Timeline, Annotation
# evaluation metric for diarization
from pyannote.metrics.diarization import DiarizationErrorRate, GreedyDiarizationErrorRate
# loop on test files
from pyannote.database import get_annotated
def compute_der(scores_path, protocol_name, subset_name, metric, out_der=None, dbtrainedon=None, dbvalhypeon=None):
if not os.path.exists(scores_path):
return
protocol = get_protocol(protocol_name)
database_name = protocol_name.split('.')[0]
names = ['NA1', 'uri', 'NA2', 'start', 'duration', 'NA3', 'NA4', 'speaker', 'NA5', 'NA6']
score_file = read_table(scores_path, delim_whitespace=True, names=names)
hypotheses = score_file.groupby(by='uri')
if subset_name == 'test':
subset = protocol.test()
elif subset_name == 'development':
subset = protocol.development()
elif subset_name == 'train':
subset = protocol.train()
else:
raise ValueError('Incorrect subset {0} for the protocol is given!'.format(subset_name))
fout = None
if out_der:
fout = open(out_der, 'w')
for test_file in subset:
# load reference annotation
uri = test_file['uri']
uem = get_annotated(test_file)
reference = test_file['annotation']
hypothesis = Annotation(uri=uri)
for t, turn in enumerate(hypotheses.get_group(database_name + '/' + uri).itertuples()):
segment = Segment(start=turn.start,
end=turn.start+turn.duration)
hypothesis[segment, t] = turn.speaker
der = metric(reference, hypothesis, uem=uem)
if fout:
fout.write('{0}, DER: {1:.2f}\n'.format(uri, 100 * der))
if fout:
fout.write(f'\nDiarization error rate = {100*abs(metric):.2f}%\n')
# database # subset # trained on database # validated and hyper params on
print(f'{database_name} & {subset_name} & {dbtrainedon} & {dbvalhypeon} & {100*abs(metric):.2f}%')
return abs(metric)
def main():
args = docopt(__doc__, version="Compute DER for scores in RTTM format")
protocol_name = args['<database.task.protocol>']
scores_path = args['--scores']
subset = 'test'
if '--subset' in args:
subset = args['--subset']
out_der = None
if '--output-der' in args:
out_der = args['--output-der']