From 8665d1141547651a238bf32975aac4b1075e7796 Mon Sep 17 00:00:00 2001 From: Manuel Guenther <manuel.guenther@idiap.ch> Date: Tue, 5 May 2015 19:55:29 +0200 Subject: [PATCH] Improved PCA testing --- bob/bio/base/algorithm/PCA.py | 79 +-- bob/bio/base/test/data/pca_model.hdf5 | Bin 0 -> 2344 bytes bob/bio/base/test/data/pca_projected.hdf5 | Bin 0 -> 2184 bytes bob/bio/base/test/data/pca_projector.hdf5 | Bin 0 -> 16712 bytes bob/bio/base/test/test_algorithms.py | 698 ++++++++++++++++++++++ bob/bio/base/test/utils.py | 15 +- 6 files changed, 749 insertions(+), 43 deletions(-) create mode 100644 bob/bio/base/test/data/pca_model.hdf5 create mode 100644 bob/bio/base/test/data/pca_projected.hdf5 create mode 100644 bob/bio/base/test/data/pca_projector.hdf5 create mode 100644 bob/bio/base/test/test_algorithms.py diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py index 28431321..f9141a17 100644 --- a/bob/bio/base/algorithm/PCA.py +++ b/bob/bio/base/algorithm/PCA.py @@ -39,84 +39,85 @@ class PCA (Algorithm): **kwargs ) - self.m_subspace_dim = subspace_dimension - self.m_machine = None - self.m_distance_function = distance_function - self.m_factor = -1. if is_distance_function else 1. - self.m_uses_variances = uses_variances + self.subspace_dim = subspace_dimension + self.machine = None + self.distance_function = distance_function + self.factor = -1. if is_distance_function else 1. + self.uses_variances = uses_variances + + + def _check_feature(self, feature): + """Checks that the features are apropriate""" + if not isinstance(feature, numpy.ndarray) or len(feature.shape) != 1: + raise ValueError("The given feature is not appropriate") def train_projector(self, training_features, projector_file): """Generates the PCA covariance matrix""" - # Initializes the data - data = numpy.vstack([feature.flatten() for feature in training_features]) + # Assure that all data are 1D + [self._check_feature(feature) for feature in training_features] + # Initializes the data + data = numpy.vstack(training_features) logger.info(" -> Training LinearMachine using PCA") t = bob.learn.linear.PCATrainer() - self.m_machine, self.m_variances = t.train(data) + self.machine, self.variances = t.train(data) # For re-shaping, we need to copy... - self.m_variances = self.m_variances.copy() + self.variances = self.variances.copy() # compute variance percentage, if desired - if isinstance(self.m_subspace_dim, float): - cummulated = numpy.cumsum(self.m_variances) / numpy.sum(self.m_variances) + if isinstance(self.subspace_dim, float): + cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances) for index in range(len(cummulated)): - if cummulated[index] > self.m_subspace_dim: - self.m_subspace_dim = index + if cummulated[index] > self.subspace_dim: + self.subspace_dim = index break - self.m_subspace_dim = index - - logger.info(" ... Keeping %d PCA dimensions", self.m_subspace_dim) - + self.subspace_dim = index + logger.info(" ... Keeping %d PCA dimensions", self.subspace_dim) # re-shape machine - self.m_machine.resize(self.m_machine.shape[0], self.m_subspace_dim) - self.m_variances.resize(self.m_subspace_dim) + self.machine.resize(self.machine.shape[0], self.subspace_dim) + self.variances.resize(self.subspace_dim) f = bob.io.base.HDF5File(projector_file, "w") - f.set("Eigenvalues", self.m_variances) + f.set("Eigenvalues", self.variances) f.create_group("Machine") f.cd("/Machine") - self.m_machine.save(f) + self.machine.save(f) def load_projector(self, projector_file): """Reads the PCA projection matrix from file""" # read PCA projector f = bob.io.base.HDF5File(projector_file) - self.m_variances = f.read("Eigenvalues") + self.variances = f.read("Eigenvalues") f.cd("/Machine") - self.m_machine = bob.learn.linear.Machine(f) - # Allocates an array for the projected data - self.m_projected_feature = numpy.ndarray(self.m_machine.shape[1], numpy.float64) + self.machine = bob.learn.linear.Machine(f) + def project(self, feature): """Projects the data using the stored covariance matrix""" + self._check_feature(feature) # Projects the data - self.m_machine(feature, self.m_projected_feature) - # return the projected data - return self.m_projected_feature + return self.machine(feature) + def enroll(self, enroll_features): - """Enrolls the model by computing an average of the given input vectors""" + """Enrolls the model by storing all given input vectors""" + [self._check_feature(feature) for feature in enroll_features] assert len(enroll_features) # just store all the features - model = numpy.zeros((len(enroll_features), enroll_features[0].shape[0]), numpy.float64) - for n, feature in enumerate(enroll_features): - model[n,:] += feature[:] - - # return enrolled model - return model + return numpy.vstack(enroll_features) def score(self, model, probe): - """Computes the distance of the model to the probe using the distance function taken from the config file""" + """Computes the distance of the model to the probe using the distance function""" # return the negative distance (as a similarity measure) if len(model.shape) == 2: # we have multiple models, so we use the multiple model scoring return self.score_for_multiple_models(model, probe) - elif self.m_uses_variances: + elif self.uses_variances: # single model, single probe (multiple probes have already been handled) - return self.m_factor * self.m_distance_function(model, probe, self.m_variances) + return self.factor * self.distance_function(model, probe, self.variances) else: # single model, single probe (multiple probes have already been handled) - return self.m_factor * self.m_distance_function(model, probe) + return self.factor * self.distance_function(model, probe) diff --git a/bob/bio/base/test/data/pca_model.hdf5 b/bob/bio/base/test/data/pca_model.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..837c45401d0b6cf0a7991f7861c586a48ef20847 GIT binary patch literal 2344 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@p0u4@x2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa+hfC-G!BPs+uTpa^I9*%(e8kR~=K+_p4FcOQ3 z5-WimSbFq;Nsvi1GO$6+f*Q!kpaC|CkqIKe3N;rO%?wQWAeDj&_6(4;>%d^b&0zAM z8KRLDC<>BiVuBdR0aee;m;g=cC!jV!C?TjRAU)?9JVO~6GEn4c%?bANcLC*SsN)zE zp#Gq>y`$!jhQMeD480I|En8yUdQ-#U_=834u6aQY5AP}55&fO!aM0jNdOnYsLuKQ+ zwSkUa4uz)n*S<W_bda&^J0<KI?7*Vh&(oxt>TqeTrf2SwV28yzVppYRB{^I$f7m{K zuc3okXM`*3vTTQ$hi;et_~YPEsmZPLcdMJj`x==)6X(V|I7j6%oodf^*tM=Z;I~1B zL+aVa*&6Tj9CFJQSH3XLa7a7)@X6P@2nV65i>f{Nq8t?59n)C%ra4^tBe^2PG0<U( TxW}F<%WQ``bJ`fQrE?tsrO=7M literal 0 HcmV?d00001 diff --git a/bob/bio/base/test/data/pca_projected.hdf5 b/bob/bio/base/test/data/pca_projected.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c249faaafc707ecc13794f38e844ba04a3860a95 GIT binary patch literal 2184 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@pf({Od2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa*WIs+y=N{^5b@Njhu0C_b6>R(tYJpoN;uwY0m zDoU&ba$xDv113Qx&B(w8F$-!SBQzy5GC~Acq2|J9W}rMsMo__?0g_f77)-bsO#U-N z6tV(ELGnyYOke{zpz4_!6QD_318O3K5`vlnQhc7lGn9d$0!5zQoM1nH7f_CdI+j5J z>Mwe`W7NLU5Eu=C!5;!sJ3Pu?ak?F-&F|Qe=j`vm@JGRTa$~APW!jf5Pes)o4wM>w J*!}$NegHJ@MalpG literal 0 HcmV?d00001 diff --git a/bob/bio/base/test/data/pca_projector.hdf5 b/bob/bio/base/test/data/pca_projector.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c336f1132526f2ea46d7530860ac06acc0e79f9f GIT binary patch literal 16712 zcmeHu2{@MB*8hWwGK7>;B&5Muq>0w8A|XX86`se;n|YR55i-y7JdclA#4@Eal!~Y* zO)AOIs3`xZH_rEd@43$ZeAoF;-}OK5?Yi!@_g?GX_g=rf*4le*_Y<IWP?2>R&oa`_ zi;0P}h{XPTap5*Me<{iTYFVf+@G0{Jx_Rn9PxI)173fGm|45|O^X(k-{r_1`Q$s<4 zM3Y?Te}T?FMv`3gtF+M1Li=Cz-va?91^Ht?HPxC|mw%q#n76a}DoMf2#K_W7&)mVt z?)Ro6dIqLumPWtZ;LnC#3xfYeN=I7q>)Ck&r<=EAI=WvCi|6}UpbRwrPYwq^keGhw z5hQ75lJp$IFTusM)}MTOdivixV49a_priZ}CvE;EIDhAy@8zfPZPMWrB$D%g;{V^S z99KJfkd8z@KPnQ5e_pSDG>!#-b)E<4ccp2<Y4i0a#((tW|KII&B!=JZU;!WY`M5}# z55JUo_5WF1?BVz&_@Axj|8MO6((XSH_;XxTnr99F$*;^TtsU&ib`JXU{9koLGspS5 zlaZN;sl6RZ-%QW$XXw;3us3tmvp2J{{3Wp9w+p+2f3&U-%l(p9TJ?)wtW61y*wRT@ zjH+f92X+vQVVWJwHg^z5c1Ev=H@itZR4P&&<h@DAeijZIWuXvKyR|nA93T_6Kg_1C zDN%^`C#744L@0!LU7;mcE}6*M`nD=Gk4(^!Ei8t#D1`X)Q<=wQD8z0-dH(gjy2K6P zqOQIeGU2#A_oDVyG7-wJ+8wZgOiWegmzR3!64`eRuG5+565Hp@)-Da85UzZeZEIc0 zgu#*JYz(HlgwN!givlVXB7d=l#lcHtLexyf;>j@zL3j3QzUNX3k=$E;^u!l3aWuMH zeisLY@H}+<-EW2zqNF(8QG|(1NVVm8?ds7bRKDcsJjx*xi}PovM+eD7Gyf)w)0-&- zB!0d9{e~_PtjIIG@fw*hT$LVeTcS(c$-U>rrb;G0<hRTAZ=w*XO0IpO_GChMxAw+F z1qx9w|0Pc&iA-eJC@y_Es!NEd9nO4KK^u2Mcz5q=3h@pM-a;US@Vn%6!h-g`*5FEx z7(oi5=rYzxI!qxh+7}<VyM#;xC}g0tH-*?a$dG<#F@*>;nY!!tgG`7hk9Q94rVtY{ zMFT+!WJ2sV$Hs9JGU1g!)S|YJLiDG;AKD>JCd73QzA&|-5Y}ZI6>VsE?CwvU?cGKp zj<c@Za%h__ar(<kF0(Nm!r)bodR~Ywq0iM;v1bE?80}U{;~5|mEJ5{ipDc8VZSP{X zTf0z*&J)8mBYir=DprMD7aBf3#X}Fm`6z_k>%1i=jcEOSR&*L4AroJpJf=idk%=|? zPHkZCArp~eoI?&f$b@Nx(zG}$g}CCzM{lr~LZsx3WZXTgOY9$y-ItL`Ay}Vp6Zlf0 zOYl0YH0b%$=An)K+l{?sLQ;I$fe+g$gi93bD@NM;j2nHp^VX3GHbq~L>Q}nNbWhXj zX<iB;xm&YQlaEYf9&OnstVbsDudwRH(Ws{Q8XX;ihC@%WZPo`eG104K_?b37ud$Ge zoChhyhghEOSt^;h9-H#)asio8nqn$k5keu_nYOm;((HxNsI$0*X3xu=zF#UyqY%CW zmHvj+G<?Ie>h%4{#J&#?SGM<&iH*jW%tH242-(*yuic}_#F3J;M&oocVU^i3;P60~ z5I-+eCVzxNtTOq&HgP3|_<TEf_WB;$dlH37dioT?|Dd}HvlW?8yvZ1LI*dY;cLkMe z)9f?YY&o|~2AMe7tJai8dykLlq6_iQC`5PP8j-5IWManp@+;S2nmuZ7o8fps8_&1Q zfgi7RiKtQuBU4)n;U@Rl$NnRkc-A>+!9|<baiv%njv_LVbBO%Hj^-bVQqz4_lVoD< z>xILev~gv|crr!M{AK&KlDG;<3X#x%U#P-Imna-Qlp{2tOPo?VtUnP-CN|4fk&J0_ zyzf`KXly1EjPAnThcwB=@qqP`hZV^Ly`-((Q*jER`Y2X-j*d)ty9C}`GD9ZRmp`w2 zuSg+29eex0i8ep6+|e8zow~%D?XrhspOK09;DDt9Tohsl_sxwt&Sb*kUbm*OA8j5o zBJQiw;!N0Q5Gx$kA$E30MflO;$%7r{?NRs1L}%Ig&Pa9|E|ChN>TDE3_a6PIayOYM z(U3nQ1r$Q*6Z36Pnthff-3d9#Pa(#?vC4UwP>8`A-xWo*Wa86(R-<EUDa0Y=<>P@% zD1@NQO;L^)Wa26LYppB|ue^F`*ELFHf(JU>N*E}_kNS`&EsivMT@!k(Dw#rLbsX65 zPO~4)KyhOS+PJznQa+2){JnsIQT}5Oh48w~yGv#R&3~V~+jfWM@7YEyJ@QhtxTC7O zY42Gw;ofw--0?b@U^39%T*s?R1dKdS;1(ehya)Fdzu!V3&TNyajc3*)cHLMT$xVBH zZ*Z@m>l6*o9JP;YY5ueeC9}{*m$1}u)f>A&iwpaw_XVa>2$tYyp1&E8iBrx&D<1QZ z3AJwzCnRX|@#xyCrEN5SG_akcIMZ-_P@nhEofao9^Nu8wXm*-%ZSb2mZCsxO?9{U- zbqV8#_v^lX)g@MGCk@=C#oe0L4BJh#aXjbHvEFo&OoTI5G>Ftvh!yRr-Nu|0Vm(8F z|Bq8N`(e&ikfQm?)03m=x7qZFU#rbOzw2A+i<=wi*;<PK#|kGd|F^|X{IH>srM;QG z%RlewFNyyO0t@;6pZEJLB;xnS>1de*3oQq7G_tk(x!GW+WkJ7Eeg0qk`8;c3|MB<q zf1aP+_`MerX@UOh`I$d$WIr!r$uIir{@*h{Tl}{?ch~%=7jXOY{!DUSnO!UYsMX(> z7xI+VEB>DM$Cmlmk(w4ZihpGOp}cv%yqW6{jsJ~`|19snF+bJ?`mfs+Wggo9-gfo( zc>J0{x?jf_fAg<}bC3mG7V7^xK52ZvpeEM;(LDa%?@a&5{y$#+dpv$k<NyA1p&M+! zl=+4Cf?|KKzw-j?{|Wtb9=hQErt^8=!l29yf0Y*Ek<tR=U!?zR9v0rSKpAN3&Cl}B zF!mGCe|#P{`Cr7{|6d&Wztdxr@Jrjj=wCSeMBpzw_{$FdvV*_;;Gf5V`{U1s8n%{U z-pSGSb_IV(t5BpLUL1or!V-BXz1Kj<V?}dhL^YZw#56Q)$%R$1D7QVi6&%m-Zlt#; z1^!qOZ$F!h7$i2ZaVk0=P2FlrIq4gaf9uW_@vh;Jcduv-cYhG{j%-ig$#w;rcK4}$ zIpvN$8^#nh##155;`zw>hv{JbSXH{}oi)BVySsgEZ3aGSPTn$BhM@G$@v>iaDXh!x z4tjUH0lqADkm%A&2bPDE$MrwuVAkobGkfm%AZfYwbZe?Vwp12ea`8w<aYh(EnHU19 zM-<%NXy!ttQ)}3p$6;{8eQT;;coEDntGu>R&4#NC?5`Xn{h-aww;^<Q4#*c4?^Rvd zjQ-b`@J{WoMz&9<m+MN`ppba^X`3$<*fbRC0=?cayywz*c!M3Ro?7;~@>K#bUU~6O z>|hkK`D)&0Zb}8pM^PUphE#0oVJ>FkZ^I>-H@(+>&4hh!ufA*;PK1FY2aKcg(qK}u zZ58X=NIbIp;~N#HRP>T{n;95)!r~hTpQyJXkb+WpbWf;aUcs@8qc2mS&-J0M&4n72 zIZ`VdVHAcM!`a5>iVcwV?(7!lb}EpjSLe>en4#78m>)Nz5UBgr<E<jIaOm}>O(Tz6 zQTKUQ{#mIeh>*D6`F4zovlz8TZ%ZujFWo#F7G@1B>4DElww1{AbW)Osz8pU`nIx9C z)gx*6)+)(77POaliVV%FLh9InJEM3wP74hDknSr1o^>oLBv}OFUhAzx33eF2KX$2M zS0oVZhYyMGO#v^nH!oc`Qt(E-9IvB(IE=mHSkcmz16OY<%O&n}gdUYqEndzHpqdv= zOpz*ph!jYhaSs3$K^+~|iaPKbX}bMwKo6IlXX)b>X-8jf1#Rl?e0X2bF8HMdv8VCi zqUmqp5XG?eL~&v{DCyZBk_#!qbFziOJYJcY#+lIHG}?qokGu_eN6KN&%c*O0XA`8E zaq~Kt$02{$wL$5oO7MECTzL2OSuixvHq7NK!z(8qKIVUBjkV#MG6s^%V0tujdU!e) zIvmfc36Olya-So^@zPp+9w6Fmt(643Dz-kpU6KwyP8&-f8eM_M1qVAyGGdXcCG>-n zDGFj_-W2Td^Fq-_fkJ~fo8Yye2VXpY0XVMnGcAs*2L`EbCc}~_=v>Jv@r~LH11kk@ zsejCes~dOjz8zT&)Dc7Vy9RDRO6l665UYr|&s(vQArW@1w(8Zgj{&dX{Ns}v&M1^q z)pR*J3imwK*SAWp1ihitx3=lTqa`a2>=3Cz!x@2S;mLF`Upn<RvZETQE{9!xC(4j# z-COM)I~#FKI*$8lQ7SNfzW6F$<~mNVG}%8#=Latid`ljoG=bE99l!LBGFWY!@xC!R z4NkAIwLjW(73p?=9Mg%ZLa(FMt2FeYAuU_a^WE}D;5e_wvDB^_qL`w8@GynJwZ0ur z6}PH^U41bF$Ac<VG<vi6u}um}MxNSsjaq`lj|@GrM{eN9e3~oVtQ^C*L$r0$N>NTv zqcU`=9Ly%W4mDCsVW3pbdActcKaKfud#APFYune0*gs_fbDxRBd-p6DiIIPME4&aS zw=Y&%cETI@J({e}-8li$4@$L{uBJl&!3{j!&ntkt(RBNP!eZEZIfwMTDGeV}H|sQR z&&BxNt-X7UOW>5|(Bk*&lkr|(hP6q(9j5W!c<;ng0H)Jp=V$I0q1%?ERVU?YG0QAM zfxqAqWD2V0d0fiJrcJ~$<ttRQ&`uAtTSLWdE=8A~Wi{idX*b_^QU<(c{W_(_Q-YHZ zUvsJ+t%Q_^wXAZh%z!H~=9c%FDpX^d-7f9s1cq<AvOZf^0Nwgh7XI`k9JoBKl>M<0 zlpQvX)ITZ2Z^=}3=`<tw97cW^7kUL3^}P&kkURjhZhV68E%MM{pQuMjR{<*S{xs7n z6$Q0CEA8_s4RHE<!Ug%T66oNwIXlsm2_H^ITAUm#!cCs#3@+Iouz8s6yn|vL4$gAF z&B)V5>eBud)kGUm&YtRhb-WBmrArs}7nZ>BDFOZ?OUr?QW$~FQzZh86ctF4QSs7gL zj1`jhtijuhv@~>LEO0h5J^SAFG>ql;Idkl33vg*gy-H^b#kN*XEpA6Dyy<Fh9`tO4 z%&nQDZ~3cXW${qlTt+$6XgFTsI^zKC=|^Vm&K05QtkiV_`DRpVA9!+>rvItq2li!^ z)<RKOJ^M1&Dhwgk^qcG6z$PQ+Z(fp>(8+!!^NC$L6byOtvChUpb(bKOb$JVByWjm1 zPP3B`MuAVnu>|;T^onidSOYfdIXsk0EWtD>^U>u6v1q@4Vhx>M9CDw$BjqNUfnGia z_okF<LH~V@%c4EixQy=2_nsS>xP5kY;WD=zd{fnUD?}p$)gP=|dx5zgdpf)4D2X)~ zvad#%!Ojz$MO6*1eyNAVHjYe#m(^f+@72D2;~^-Z_MPc2Q3}HRDQj|$mO(<kQuXp< zm1tp6s9Coz4pnRNZFY(HW7ps;7yXfD)C!o5akaXE!Ox~=MEHDR*H%H{(RNQ*5>Tok z<Qa;IqJ>9ySe{2N`>g^;hyqlaw(?m|%|?ORTJej6Wk{*ca}>37LY+;mAGJ%9Fu~&7 zms9?2sLJ77cTTb%-;dtslT&R0mRB000cYYMX28fn(=HZL-#G<L^#ozCwo+4xdoaYR zQukNxszL8-HttKfDltgV;b8yv2AFh`mdaaOjKZ(ZElY5U!pfo*865#_cwAIwDYMKK za0-6!-F49h^0(aD=dd^y#oj2;k3`l3dD%M2GP_0?nh@ejy4;9N=PeEPRW{+1tLnX3 zQqIt_Q91SsZyj>ERLgy4Zvl0>&k1+i>foqm-QA|n1aP?d19q;;KteZhwd?K(j8m0f zZ_<<u2{ChSWS1(mecxL*+u(uw*Lj9Gyr@Ewv6FI>vNzo6F%(r+wMEs<FJrgv4n@K7 zJh>g7h`Yp=Jy17lf+3dgquQIwpr<4vyCRJWr8^Bh-{uFyUS>O0KC3!x&*^&kJ}?)D z>O5{eU6BV@_H><6@x6-M<TJE>6RL!>`7iJ8EYF79G6BsgdlG=Gcl-qjGr*GDWk+Fc zJB+g4v}Bp8#BD6@{y!cBfGS71hezENFkT(%W8Rn!yE3djPc%28j_*g~>r;uSTTI_o z8C?Uvg>HJx^f3X+K}WvsyxNEbM)0cpQU$U<6>K^6&>9t8e#~q<RSus0{yW|$T?5~X zZ%aN}6hR(4CGL$>9dH`Q|7c&H3q}%i3<jS|Va29ZMr9U35HVr?;o*rK2s&@s*Pd1h zsl`XNZ5q<B#3S!*?_?$lZC^JgCUz5qR?hGVXfy+ViJvyZ#TejO8>^K_s)9~dlNnXs z7F29Yqo}-($1^$POX-6Zz<By{c-CYM>V)5lUh=L0G{5I{;MP3gIQ-~D?d>FJGUJu* zO{l?T;|(?^Bi*pf+d9pZXa<d^-BHS-iD0bBcC3A43;tk_W#ruJ4=Sq3ze!Bj!R9<w z$@>8&$acy%lsB^t_W8%CzPMG3)SXYvKhffs@_K<d=gv&XoiX(Pw4)s3oV_O8tWz-{ z$UInitOd`s?de<kn+sBS>Z+f(1VUquO~80VHPTtRefi>D17=sgMa3FX;bQ}J$og{y zRx)n7F?Aya)7G6&y1u0dmtPS!-DFaa>!O6j1d_6FyF0@~?wkvxMN4k99=CysZ``2+ zQl-et`RwG0`!V=zO-+_rN(E{<Y+iamEgGB;M<%DtlmY#E&f!f@^YG@}Z;Hbi&2aKA zCzI~!WE7LJ@BFf>9Ndl7srL$t@X^K<-&lRZ@d8zO-95i1=#sWn5MeCBSr1}DU9%8k zzIXQ-uFt}$$SY$PK4pT*%Pn;&Ntwtr-Y(lr$_F;3-Qq3--YD!YbNO;dB}iGA8>mSX zfbss@M&9%n@sLZADoaTY+-Q2<8y=a7#$(f$S8w!!=<kX~OujLYI>PbtvOp`AUSJT= zQmX~-moZ1f3PQj<s<c@0QVuddwh-WHazYuu+HsdRO%QNuXc5IG3+CeM#Fj`=!7k{H zCLd1)#9h3d&l7KpDu#MJTQl<U?xm-%(v(ZkDYkR$p)?hFR)uA*Y0QG7-}1CxlHx(j z(@6ZfXceAv7Z$jcoQ7(8Vj4Z(4cL~kPMZa)@di~+PCr!*Y|RVSerXFwi(vipdl?gu zsxLXKAzKZhlVP%<UrSN(b`w35U^|9ArEk4p6b!k!Z&n2gAavgH;2z4ZfZgC&d#BnI z_l@%e>)vjHxUvLl+3hA|PuC1;Ulsu!hwJ8!Y(f~2x0aULoDI^4x+WJr&q2KhLMl_z zRrqRK##?cZcw|Wo(+P_&LGS0n@g?<CJa3{K*Zd|GmgB{vqLT6O5--~Lgk44R;5(EX zGO5_%EJ>f@mI(KQR8(BwCV<6e{|Vu{b+|m}hxquBFs!aO*0-HCLOQRjjML|9@C69% zTXQrQ;#%Ju9B!&d@v}{b1aoaL<;KJ%hX)mya=+e1p)&{SxYDzVG~6JXX`+@RN*zp& z9gXscO#{)VJWA^{T~S4pE_k&Y6>IO8wVPSAf;d?|t2`qDwqEe~diH4n%vBe>U%nv} zu4DweMd%a*v#eRe_bpX$Z}MT6DE|?hF!qhs3kt*zv7JrRF6FrKgl2A(Z6Qjo3C|Oh zpd#BECZ-w2W}G{q`NT9i9@f*Z94Z)!#+62VHw%bFymS7F{(+!E&@Y)i9%rA8{C(l} z{kcB4Waibn5JB30?nK<XZJo`qw^#YYC}TEmx4F2E%Qpn~$nTf%PAJ3F4AF-S&q|Tm zdFPYYHCdSV+&U@WsSM2K*!UhW#p9hwu_JV=OEGt6=A2q<8xGns9k-UQhOLK0&z6j* zLDI?X$(ARgQ1DIRyEc&=+;dlmi~Vs8-tucdeDO>gq&lcd3{g@+=7)+>XKNI4Tw_qb zYvT=7JZFTrn^D0lT`uLpTsT<pI}15Qwqe@#o<k{?CuseZZn$KU3@ZCwzLG=A!8sYH zPpXx{w)-d2mIRc++{>pC>AYbmqvo768&-~kDo|ap<qAaIwzt{2t{CcP^Z6`)6kyG@ z$<MQOiNJhJV&vB4I#{|&Vv27n3>=?&+*)SY07(_uy;)Y3;J8cc`O8(oSf<P6v2-9D zc5m{V%3f4~%9TT{-pq~IAEar=Ih+LyN<EKEhp7;LNYmNxSQ$#&3M+GZrh)Mi-}ung zjwo~XGus8C4j+hZW#E$u!@y8Bwn?KbF#gI~wC`;uj-TZixAI9x-9p>1sZCX&b>eyM zlLuumR(6%*{4N=<?U`Iu#703TrbBk0Ult<A?I+>3dkbKKlF>Iqv*(k1cW*lN=0WQz zygO5o1zz{9G%xcPqosSkF;rFHF8YAd)@CXk9KQQ0&5r^vyq3$fbY1|t>(&)UR4U5V zdfi!SW&mB+mDNa#60z-D#M6flj>4xvec!`FWk?Vviyr4^q1N(RcB7mCFywZBCgDK^ zxdZEqN2^mI$T;!o1~S6;GJ4m}<qg1Pd-mRQ|5}_qZhfLzx*F*R&y)8_P{HwaBlYqN zDy}Z)FtHC$!3uA;TfyJU@zayDjaB>00SfI^zG_^BGj@Sv18p^6@y*E4E3_8)!`1G6 zSF{0}_!lv%b=lbAvPDC?B@S2fbZ$xXYz2|~C$3%`_CoJh=NJyXtiy+Sv%$$iRam;K zcX;AP0A^{epXu_V<yY0bp`TMi&<c8G@8(6o(mv7U$1j({?v+_W!Jn_<CYFAuAIqXZ zpmWQl#rhPm>h6;$-roZrHCr|czbV8=)rxQ^s06=7xr@lM6hmp!>f<S=bKuA|W|JN3 zqoK)UA|ffS3=Cr8qP`E;gUBgMru{#zU_!;#guOcHAoij`=IGv7RO+)ck|~RUc(%*Q zl~hFQD)!9Mt$`5Ox@u2fQ5E1!l7GSSJS@yzsua7u5zaQnL^g6Hq2qG9l+24IxKHB2 z@wK#d+$~nDWyG-#zCAvX`qV8KRF`?%O2q|$e{Ly{%8h8G?7kzLU{#0_cBRy6p>Wtz zp}(OkFALt}MLbUY5|8W|{zSyL3Mk&X*+qyw48+dm;w`T>SbZUT^Jdz<hIdD$%$v`d zm>al)Rawp!W@G5(=?e2vw#}kp>6VLl+v943eQPy(k8Y6H43EYR{`BEZv^=It!E}7r zTQA({-Lm;qVmhW~$JXn7js~IYMFxJYX)vssQ?<8O2Nv6#V>r7XSgQqWR8J}drNs|6 z%&C=tsU4ri^O-7$3>0S0SegP4T;=uZhoT`uYSD=Tq7sC}vMr@$bMUZ)#GX0jayTIQ z!M%ei1+oG<&f-utKFnoOJ5&+|ahvoF<^oG_j8`i^YNiz@dvvT;9(BPP{)gShf+hIO zd1L5^GZg~@_3yR6szEo&w}r0-XzRD<`RydZDtOI(@^i698J<|=FOt(y2SesEk^x5k z7|tL@x~i3llDoYbJwz&zq_u5$UuzA%D~sh@#8r$EYDQW0LwPX%R9dVzwHEq6CD)bJ zrh=SibnEq(h46TaIcdbF4sYJ4pZ)x>1*m=L$C7DzT8{BvZjHKpxa8|AV&Pkl5v)^t z+lh7Pr{@}FshbX0tzxzFI;z1o)i8BOSQ`9BFL<lMwFn+5i0^$_Ma54B)yG?xHRFv5 zx8L{<*u!g8p*lXA{|k)mc8&R5hT}y(ef&cC;LbS0cyluqcX4f-yG+~nJUkQ}zpXSM zh?A?IplUT-&g;z_9xR6M`qM1K9JP2$&4IP`=Q+XI$|J}6OYxQK>rsnk=fGS#N3fn0 zj)|Y|-zvP94a=W#(-TsOFk${tydbF@sU7-nH`ug8V7<b!^{N%1s=CJ^$Jqz3S{^-i zQT+-q&&r;yU?~L?%|jL~3K<aGDda3`Re^(z7w^`mhU4zHT*V(xB?G@$E;(aE4rcCg zU(}Io4a)@XoV%%ChV+xsbr16pKeVKcl$<TYFy+A-xfmxrG+UbZZGZ}os^U%wGvt7a zl-G|#N!GAwnM|0%4O%|2wwsz$S&PSWlU|gQvSB+1W8!&N+I*iFmP#<c1pR9;TqmI% zxfBZTseH<SWn)WZS8Po}RzV%RH1+_TI^Ul&;$ICsPOl%BmA4{!iIs#1ZU1D$Peq@N znIO4*dhr3vDjcU*kRQJ2gRbucoAcIYqa6#sa^I~h=<U0>fM=)#3eGI&tUgeRliMr1 z2X|h@HJlfj)O%v#NLI8_`e-uNnlp72@61GP_7z|D?r4J9XOH+o)+9jw=)eW9-dG?6 z@+Y6T>p}N9bxt3eK4p`F-I3SIz;1oe*9*;wz~@!ESGhk4<;X5G5|gE9pqDK#GLwme zS4~drn@ogL>gIOWUX;V?G2N}T&vHR~h~3DsuL1^J{a)S<qC)0&%M~gac~BTGd3c{% zE_Pv<gH?VL2A0|>D6^HrGWFNh{LKZpuBn_{9chnS&x)S^9`B3DpK$+MPYBcqCyCT= zD!?pr5lhi^IjEz$ZhGz-;=9&l*)Ozn!>sBZzVhNeSaR#rnXeIkNF4iWI{mByl6Fh= zEN-(0y#{mk)^p|1&z~KAzMYDTCBH5;=%#{kKO?W};UJLu*cmF<bp^k^3*73$oCVW! z&Xf5C%~;p<3A$TCaowS!PA*a$u<SbU<<crYu&KNyR9Du3OP-w$TcTMBcQ*0fU;Nb% z<K%WLW__>3TB|<BEnm(<NdK3kSvzV`noW85>;80fEZXcPAc&BZnaY*#o(lI5`41Yi zgn)?Pw=kJiB_LhAWO~5a24^F*1dpDK2JaUFo*J6<u$&(n#NxxjVB*EM8NV<Tmk9H2 zUKWe}M(dA1$+(Keja_^6#A$xHG)7g~=^UOuP@1SG>xXm7O(Qk#>6lddI(C&;20D^g zMwGm3MnO*zlRLWc(AZ#To6(()x8p?4XY^OWckAgeE&qIYNXjlEO!9E)Gu2}n%4Hy9 zpSknE;bO4Ob}@-$Pr{H3?;eM~%SSuqN4ddEl0nw~{*-e|4z|fWzolJz3C;>zs+_cn zK>iL#IRVY9a5|h$ZJl`z_}c_JN2EJJ<hyT<eP6<H(&<3XsgWE^Z5}=G;z2ghNv}-t zwrfOT(m{{kZpOif^1T98j&0!5o~1@Es=yna?I(jJYC&=1;8*AOY53IookGJ%J(`Lc zaxV3X1o_PsT;e)qxDrDhg`5h&+P+03^f~Pu++WmA{Y@x(>VGbBrsTp<g{w(>UlCfq zuUr`sPzq8%yyp~$uc5Hc%jWNj`S{Y4G;Q!87c-|<R!G`WA!zC1_XEYW{JqEW^_|`_ zR1Ixhr!bz3C%sGe_Aq6DY)aXN1N*WuV`GSN=~M=kYYV>aex8fDrTrV4PkEs>OWvrO zz(v{_NST_!SQQqkNG%#W>jfA09MMwezY28*SyLI4)yOKJn0`+)2AgWa^>0O7M9%LO zAD0K^fr*?oZ<JsHKKXR2cqF_Q^rp)02ybq}4~*+hkBd|z>6G93hgsJ^k}ctOgFP+( znVFbXkE=z^+bNo9@>OUfecpOkcP+>+i}=Vl910P>;WgzIt(ao!B&wc9MM|ct3Q4^j zSBF{NR865`;!QE@e*aR`U}R-~UVatZZ@sayXU@kBs_8dNi5he&h6jVF4(08GU-E)0 zVJL@rDlRJp)|1Fwf)*hlla$lKp_&OF*)ktnI@f^a>P^`KEm?SyIY9opC>71k1j9_) zGvHq0QioC+pQTGwFI6TUINvDBItbNZmTATH-oZxbPW$TG*;R*13KYe-m&qvQ+9IF2 zrv?=^DGs=d)WcKvYcXyt!MMMsw{m@75)IE@hV+eXaIW>KuVHZ(CY|Oi*rwA6T4S#{ zU)7hwt+lItWK439Q!)LOuvjgea#mUL)h-S{?EX#Hfxi{@I<bG;qihJ@dNyCrajw7v zyrh&~rFiJ^#>;JslJMKtMf&||$?!7Tf2>KuA6|Txn_v^qg1vpYlFA9mxOttaX3Dn& z;E@pj^gcNQgFMwQ4130+)6jvhN~Hl{x@Luy$G&phr|nS^rJWAXd|n+}D|Zb!o(JYQ zBwc|*rlvy!2lBywinHcsO$AQuQddaRjDq|xb@vQ6R6}NJFCi-4f%{*r6k*V;2I0P& z;U8SwklT+-z~fa07|xuXxv1`kOtsa<zP;|q>yi|8$T<>Nd!Manq%Q%RXP52_ZHd9a zrMFkEVJ?D<odnC)fpQ%6ERVbv5)bd+^L#hF;ESvzO~HLrb*Pyw<XE+#3L4DV8e>Df zF}=x^VT`t)TBbptb+@1v?{Sx(%A~7=ZBgA*Qtr9Hxy_t$&9?J!;!*XdQq{{~_WEV# zl_gDBqo*EwA*LMdKFI|wa%(_?XK`7Tj0KS6S4PxWM}YIz_Vzw16@1b@)j1ASfLE0L zhmZ<i^lv9$(!b)13g<nugs26W%rYi)s5}p46TiM-;mJbw(#eS>!2$T%V`p`8L<Mfl z6tP~#RE2BL*lak;9)VsP_13g_r-Ea3T>Q4hIoJ@T;L&s~8|4lM$bM-qf$7DPM>D#P zfZ5uq&89OUIAF1&@$I5a%%Eo&+7ua&C&vwVH!9>J=a5y~v!gYTa(Ho#-bf-0Us#jw zY;+Y)9G)!iF)74F8WRl$3}wLPH!4MSDMZC>gT9)pO<<2kk=XuwSI{hQ#x5i@1f*VR zW>%@>;FF+r5_<28FoL~;N36dZJPtnVJF+|xD|o0{A--wwVuNe8YrhW|U$~sbdM6Vc zHhrR<9Z)fD-Cd`<Dwoi?UF4b=T^Pp5m;Uy6A_G0{XnQ#9E5;0s-RJJG*`ec*Aos`U z3j8gQt?s*c9JJe*uKo~1J73w{v$89Tio)vGO;#}G;I230!JiiA!{|wQK|l2hXctHj zIUAn<CL{0U&tJa;A?x4gXuhTWKPFme@qOBPyyiV0#@e9}^ucEV^c?xnT85v;n492! zgpL14#$a6k`u-qZDMtrWD))sP1*lq^r!u6HiECGPoPXUD1za6%+jsKS0k6b+mb7y* zV7|j$r64LF?`gUB8v2!i8{O%QJ8z9~PjQHrUQjJ=n0X~}TRsPqBiz{{c9tW{w~T6I z&a2=hmVfe&0c}4M!n}8zL>cB>S}dS&sTBX&?+52U!~WavIe&eZ^4I;}1%Y3`=l%H= T{;#X)Pv2|)yWsG@s^|XzFdB)e literal 0 HcmV?d00001 diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py new file mode 100644 index 00000000..609c332e --- /dev/null +++ b/bob/bio/base/test/test_algorithms.py @@ -0,0 +1,698 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> +# @date: Thu May 24 10:41:42 CEST 2012 +# +# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import os +import shutil +import numpy +import math +from nose.plugins.skip import SkipTest + +import pkg_resources + +regenerate_refs = False + +#seed_value = 5489 + +import sys +_mac_os = sys.platform == 'darwin' + + +import bob.io.base +import bob.learn.linear +import bob.io.base.test_utils +import bob.bio.base +from . import utils + +def _compare(data, reference, write_function = bob.bio.base.save, read_function = bob.bio.base.load): + # execute the preprocessor + if regenerate_refs: + write_function(data, reference) + + assert numpy.allclose(data, read_function(reference), atol=1e-5) + + +def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1): + # generate a random sequence of GMM-Stats features + numpy.random.seed(42) + train_set = [] + f = bob.io.base.HDF5File(feature_file) + for i in range(count): + per_id = [] + for j in range(count): + gmm_stats = bob.learn.em.GMMStats(f) + gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum + gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum + per_id.append(gmm_stats) + train_set.append(per_id) + return train_set + + +def test_pca(): + temp_file = bob.io.base.test_utils.temporary_filename() + # load PCA from configuration + pca1 = bob.bio.base.load_resource("pca", "algorithm") + assert isinstance(pca1, bob.bio.base.algorithm.PCA) + assert isinstance(pca1, bob.bio.base.algorithm.Algorithm) + assert pca1.performs_projection + assert pca1.requires_projector_training + assert pca1.use_projected_features_for_enrollment + assert not pca1.split_training_features_by_client + assert not pca1.requires_enroller_training + + # generate a smaller PCA subspcae + pca2 = bob.bio.base.algorithm.PCA(5) + + # create random training set + train_set = utils.random_training_set(200, 500, 0., 255.) + # train the projector + reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/pca_projector.hdf5') + try: + # train projector + pca2.train_projector(train_set, temp_file) + assert os.path.exists(temp_file) + + if regenerate_refs: shutil.copy(temp_file, reference_file) + + # check projection matrix + pca1.load_projector(reference_file) + pca2.load_projector(temp_file) + + assert numpy.allclose(pca1.variances, pca2.variances, atol=1e-5) + assert pca1.machine.shape == (200, 5) + assert pca1.machine.shape == pca2.machine.shape + # ... rotation direction might change, hence either the sum or the difference should be 0 + for i in range(5): + assert numpy.allclose(pca1.machine.weights[:,i], pca2.machine.weights[:,i], atol=1e-5) or numpy.allclose(pca1.machine.weights[:,i], - pca2.machine.weights[:,i], atol=1e-5) + + finally: + os.remove(temp_file) + + # generate and project random feature + feature = utils.random_array(200, 0., 255., seed=84) + projected = pca1.project(feature) + assert projected.shape == (5,) + _compare(projected, pkg_resources.resource_filename('bob.bio.base.test', 'data/pca_projected.hdf5'), pca1.write_feature, pca1.read_feature) + + # enroll model from random features + enroll = utils.random_training_set(5, 5, 0., 255., seed=21) + model = pca1.enroll(enroll) + _compare(model, pkg_resources.resource_filename('bob.bio.base.test', 'data/pca_model.hdf5'), pca1.write_model, pca1.read_model) + + # compare model with probe + probe = pca1.read_probe(pkg_resources.resource_filename('bob.bio.base.test', 'data/pca_projected.hdf5')) + reference_score = -251.53563107 + assert abs(pca1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (pca1.score(model, probe), reference_score) + assert abs(pca1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 + + + # test the calculation of the subspace dimension based on percentage of variance + pca3 = bob.bio.base.algorithm.PCA(.9) + try: + # train projector + pca3.train_projector(train_set, temp_file) + assert os.path.exists(temp_file) + assert pca3.subspace_dim == 140 + pca3.load_projector(temp_file) + assert pca3.machine.shape[1] == 140 + finally: + os.remove(temp_file) + + +""" + def test01_gabor_jet(self): + # read input + extractor = facereclib.utils.tests.configuration_file('grid-graph', 'feature_extractor', 'features') + feature = extractor.read_feature(self.input_dir('graph_regular.hdf5')) + tool = self.config('gabor-jet') + self.assertFalse(tool.performs_projection) + self.assertFalse(tool.requires_enroller_training) + + # enroll + model = tool.enroll([feature]) + # execute the preprocessor + if regenerate_refs: + tool.save_model(model, self.reference_dir('graph_model.hdf5')) + reference = tool.read_model(self.reference_dir('graph_model.hdf5')) + self.assertEqual(len(model), 1) + for n in range(len(model[0])): + self.assertTrue((numpy.abs(model[0][n].abs - reference[0][n].abs) < 1e-5).all()) + self.assertTrue((numpy.abs(model[0][n].phase - reference[0][n].phase) < 1e-5).all()) + + # score + sim = tool.score(model, feature) + self.assertAlmostEqual(sim, 1.) + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 1.) + + # test averaging + tool = facereclib.tools.GaborJets( + "PhaseDiffPlusCanberra", + gabor_sigma = math.sqrt(2.) * math.pi, + multiple_feature_scoring = "average_model" + ) + model = tool.enroll([feature, feature]) + + # absoulte values must be identical + for n in range(len(model)): + self.assertTrue((numpy.abs(model[n].abs - reference[0][n].abs) < 1e-5).all()) + # phases might differ with 2 Pi + for n in range(len(model)): + for j in range(len(model[n].phase)): + self.assertTrue(abs(model[n].phase[j] - reference[0][n].phase[j]) < 1e-5 or abs(model[n].phase[j] - reference[0][n].phase[j] + 2*math.pi) < 1e-5 or abs(model[n].phase[j] - reference[0][n].phase[j] - 2*math.pi) < 1e-5) + + sim = tool.score(model, feature) + self.assertAlmostEqual(sim, 1.) + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 1.) + + + + def test02_lgbphs(self): + # read input + feature1 = facereclib.utils.load(self.input_dir('lgbphs_sparse.hdf5')) + feature2 = facereclib.utils.load(self.input_dir('lgbphs_no_phase.hdf5')) + tool = self.config('lgbphs') + self.assertFalse(tool.performs_projection) + self.assertFalse(tool.requires_enroller_training) + + # enroll model + model = tool.enroll([feature1]) + self.compare(model, 'lgbphs_model.hdf5') + + # score + sim = tool.score(model, feature2) + self.assertAlmostEqual(sim, 40960.) + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature2, feature2]), sim) + + + + def test04_lda(self): + # read input + feature = facereclib.utils.load(self.input_dir('linearize.hdf5')) + # assure that the config file is loadable + tool = self.config('lda') + self.assertTrue(isinstance(tool, facereclib.tools.LDA)) + # assure that the config file is loadable + tool = self.config('pca+lda') + self.assertTrue(isinstance(tool, facereclib.tools.LDA)) + + # here we use a reduced tool, using the scaled Euclidean distance (mahalanobis) from scipy + import scipy.spatial + tool = facereclib.tools.LDA(5, 10, scipy.spatial.distance.seuclidean, True, True) + self.assertTrue(tool.performs_projection) + self.assertTrue(tool.requires_projector_training) + self.assertTrue(tool.use_projected_features_for_enrollment) + self.assertTrue(tool.split_training_features_by_client) + + # train the projector + t = tempfile.mkstemp('pca+lda.hdf5', prefix='frltest_')[1] + tool.train_projector(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=20, minimum=0., maximum=255.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('pca+lda_projector.hdf5')) + + # load the projector file + tool.load_projector(self.reference_dir('pca+lda_projector.hdf5')) + # compare the resulting machines + f = bob.io.base.HDF5File(t) + new_variances = f.read("Eigenvalues") + f.cd("/Machine") + new_machine = bob.learn.linear.Machine(f) + del f + self.assertEqual(tool.m_machine.shape, new_machine.shape) + self.assertTrue(numpy.abs(tool.m_variances - new_variances < 1e-5).all()) + # ... rotation direction might change, hence either the sum or the difference should be 0 + for i in range(5): + self.assertTrue(numpy.abs(tool.m_machine.weights[:,i] - new_machine.weights[:,i] < 1e-5).all() or numpy.abs(tool.m_machine.weights[:,i] + new_machine.weights[:,i] < 1e-5).all()) + os.remove(t) + + # project feature + projected = tool.project(feature) + self.compare(projected, 'pca+lda_feature.hdf5') + self.assertTrue(len(projected.shape) == 1) + + # enroll model + model = tool.enroll([projected]) + self.compare(model, 'pca+lda_model.hdf5') + self.assertTrue(model.shape == (1,5)) + + # score + sim = tool.score(model, projected) + self.assertAlmostEqual(sim, 0.) + + # test the calculation of the subspace dimension based on percentage of variance, + # and the usage of a different way to compute the final score in case of multiple features per model + tool = facereclib.tools.LDA(5, .9, multiple_model_scoring = 'median') + tool.train_projector(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=20, minimum=0., maximum=255.), t) + self.assertEqual(tool.m_pca_subspace, 334) + tool.load_projector(t) + os.remove(t) + projected = tool.project(feature) + model = tool.enroll([projected, projected]) + self.assertTrue(model.shape == (2,5)) + self.assertAlmostEqual(tool.score(model, projected), 0.) + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [projected, projected]), 0.) + + + def test05_bic(self): + # read input + feature = facereclib.utils.load(self.input_dir('linearize.hdf5')) + # check that the config file is readable + tool = self.config('bic') + self.assertTrue(isinstance(tool, facereclib.tools.BIC)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.BIC(numpy.subtract, 100, (5,7)) + self.assertFalse(tool.performs_projection) + self.assertTrue(tool.requires_enroller_training) + + # train the enroller + t = tempfile.mkstemp('bic.hdf5', prefix='frltest_')[1] + tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('bic_enroller.hdf5')) + + # load the projector file + tool.load_enroller(self.reference_dir('bic_enroller.hdf5')) + # compare the resulting machines + new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t)) + self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine)) + os.remove(t) + + # enroll model + model = tool.enroll([feature]) + self.compare(model, 'bic_model.hdf5') + + # score and compare to the weird reference score ... + sim = tool.score(model, feature) + self.assertAlmostEqual(sim, 0.31276072) + + # now, test without PCA + tool = facereclib.tools.BIC(numpy.subtract, 100) + # train the enroller + t = tempfile.mkstemp('iec.hdf5', prefix='frltest_')[1] + tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('iec_enroller.hdf5')) + + # load the projector file + tool.load_enroller(self.reference_dir('iec_enroller.hdf5')) + # compare the resulting machines + new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t)) + self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine)) + os.remove(t) + + # score and compare to the weird reference score ... + sim = tool.score(model, feature) + self.assertAlmostEqual(sim, 0.4070329180) + + + def test06_gmm(self): + # read input + feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) + # assure that the config file is readable + tool = self.config('gmm') + self.assertTrue(isinstance(tool, facereclib.tools.UBMGMM)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.UBMGMM( + number_of_gaussians = 2, + k_means_training_iterations = 1, + gmm_training_iterations = 1, + INIT_SEED = seed_value, + ) + self.assertTrue(tool.performs_projection) + self.assertTrue(tool.requires_projector_training) + self.assertFalse(tool.use_projected_features_for_enrollment) + self.assertFalse(tool.split_training_features_by_client) + + # train the projector + t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] + tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('gmm_projector.hdf5')) + + # load the projector file + tool.load_projector(self.reference_dir('gmm_projector.hdf5')) + # compare GMM projector with reference + new_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(t)) + self.assertTrue(tool.m_ubm.is_similar_to(new_machine)) + os.remove(t) + + # project the feature + projected = tool.project(feature) + if regenerate_refs: + projected.save(bob.io.base.HDF5File(self.reference_dir('gmm_feature.hdf5'), 'w')) + probe = tool.read_probe(self.reference_dir('gmm_feature.hdf5')) + self.assertTrue(projected.is_similar_to(probe)) + + # enroll model with the unprojected feature + model = tool.enroll([feature]) + if regenerate_refs: + model.save(bob.io.base.HDF5File(self.reference_dir('gmm_model.hdf5'), 'w')) + reference_model = tool.read_model(self.reference_dir('gmm_model.hdf5')) + self.assertTrue(model.is_similar_to(reference_model)) + + # score with projected feature and compare to the weird reference score ... + sim = tool.score(reference_model, probe) + self.assertAlmostEqual(sim, 0.25472347774) + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim) + + + def test06a_gmm_regular(self): + # read input + feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) + # assure that the config file is readable + tool = self.config('ubm_gmm_regular_scoring') + self.assertTrue(isinstance(tool, facereclib.tools.UBMGMMRegular)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.UBMGMMRegular( + number_of_gaussians = 2, + k_means_training_iterations = 1, + gmm_training_iterations = 1, + INIT_SEED = seed_value + ) + self.assertFalse(tool.performs_projection) + self.assertTrue(tool.requires_enroller_training) + + # train the enroller + t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] + tool.train_enroller(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) + # assure that it is identical to the normal UBM projector + tool.load_enroller(self.reference_dir('gmm_projector.hdf5')) + + # enroll model with the unprojected feature + model = tool.enroll([feature]) + reference_model = tool.read_model(self.reference_dir('gmm_model.hdf5')) + self.assertTrue(model.is_similar_to(reference_model)) + + # score with unprojected feature and compare to the weird reference score ... + probe = tool.read_probe(self.input_dir('dct_blocks.hdf5')) + sim = tool.score(reference_model, probe) + + self.assertAlmostEqual(sim, 0.143875716) + + + def test07_isv(self): + # read input + feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) + # assure that the config file is readable + tool = self.config('isv') + self.assertTrue(isinstance(tool, facereclib.tools.ISV)) + + # Here, we use a reduced complexity for test purposes + tool = facereclib.tools.ISV( + number_of_gaussians = 2, + subspace_dimension_of_u = 160, + k_means_training_iterations = 1, + gmm_training_iterations = 1, + isv_training_iterations = 1, + INIT_SEED = seed_value + ) + self.assertTrue(tool.performs_projection) + self.assertTrue(tool.requires_projector_training) + self.assertTrue(tool.use_projected_features_for_enrollment) + self.assertTrue(tool.split_training_features_by_client) + self.assertFalse(tool.requires_enroller_training) + + # train the projector + t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] + tool.train_projector(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=5, minimum=-5., maximum=5.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('isv_projector.hdf5')) + + # load the projector file + tool.load_projector(self.reference_dir('isv_projector.hdf5')) + + # compare ISV projector with reference + hdf5file = bob.io.base.HDF5File(t) + hdf5file.cd('Projector') + projector_reference = bob.learn.em.GMMMachine(hdf5file) + self.assertTrue(tool.m_ubm.is_similar_to(projector_reference)) + + # compare ISV enroller with reference + hdf5file.cd('/') + hdf5file.cd('Enroller') + enroller_reference = bob.learn.em.ISVBase(hdf5file) + enroller_reference.ubm = projector_reference + if not _mac_os: + self.assertTrue(tool.m_isvbase.is_similar_to(enroller_reference)) + os.remove(t) + + # project the feature + projected = tool.project(feature) + if regenerate_refs: + tool.save_feature(projected, self.reference_dir('isv_feature.hdf5')) + + # compare the projected feature with the reference + projected_reference = tool.read_feature(self.reference_dir('isv_feature.hdf5')) + self.assertTrue(projected[0].is_similar_to(projected_reference)) + + # enroll model with the projected feature + model = tool.enroll([projected[0]]) + if regenerate_refs: + model.save(bob.io.base.HDF5File(self.reference_dir('isv_model.hdf5'), 'w')) + reference_model = tool.read_model(self.reference_dir('isv_model.hdf5')) + # compare the ISV model with the reference + self.assertTrue(model.is_similar_to(reference_model)) + + # check that the read_probe function reads the correct values + probe = tool.read_probe(self.reference_dir('isv_feature.hdf5')) + self.assertTrue(probe[0].is_similar_to(projected[0])) + self.assertEqual(probe[1].any(), projected[1].any()) + + # score with projected feature and compare to the weird reference score ... + sim = tool.score(model, probe) + self.assertAlmostEqual(sim, 0.002739667184506023) + + # score with a concatenation of the probe + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim, places=5) + + + def test08_jfa(self): + # read input + feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) + # assure that the config file is readable + tool = self.config('jfa') + self.assertTrue(isinstance(tool, facereclib.tools.JFA)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.JFA( + number_of_gaussians = 2, + subspace_dimension_of_u = 2, + subspace_dimension_of_v = 2, + k_means_training_iterations = 1, + gmm_training_iterations = 1, + jfa_training_iterations = 1, + INIT_SEED = seed_value + ) + self.assertTrue(tool.performs_projection) + self.assertTrue(tool.requires_projector_training) + self.assertTrue(tool.use_projected_features_for_enrollment) + self.assertFalse(tool.split_training_features_by_client) + self.assertTrue(tool.requires_enroller_training) + + # train the projector + t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] + tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('jfa_projector.hdf5')) + + # load the projector file + tool.load_projector(self.reference_dir('jfa_projector.hdf5')) + # compare JFA projector with reference + new_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(t)) + self.assertTrue(tool.m_ubm.is_similar_to(new_machine)) + os.remove(t) + + # project the feature + projected = tool.project(feature) + if regenerate_refs: + projected.save(bob.io.base.HDF5File(self.reference_dir('jfa_feature.hdf5'), 'w')) + # compare the projected feature with the reference + projected_reference = tool.read_feature(self.reference_dir('jfa_feature.hdf5')) + self.assertTrue(projected.is_similar_to(projected_reference)) + + # train the enroller + t = tempfile.mkstemp('enroll.hdf5', prefix='frltest_')[1] + tool.train_enroller(self.train_gmm_stats(self.reference_dir('jfa_feature.hdf5'), count=5, minimum=-5., maximum=5.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('jfa_enroller.hdf5')) + tool.load_enroller(self.reference_dir('jfa_enroller.hdf5')) + # compare JFA enroller with reference + enroller_reference = bob.learn.em.JFABase(bob.io.base.HDF5File(t)) + enroller_reference.ubm = new_machine + if not _mac_os: + self.assertTrue(tool.m_jfabase.is_similar_to(enroller_reference)) + os.remove(t) + + # enroll model with the projected feature + model = tool.enroll([projected]) + if regenerate_refs: + model.save(bob.io.base.HDF5File(self.reference_dir('jfa_model.hdf5'), 'w')) + # assert that the model is ok + reference_model = tool.read_model(self.reference_dir('jfa_model.hdf5')) + self.assertTrue(model.is_similar_to(reference_model)) + + # check that the read_probe function reads the requested data + probe = tool.read_probe(self.reference_dir('jfa_feature.hdf5')) + self.assertTrue(probe.is_similar_to(projected)) + + # score with projected feature and compare to the weird reference score ... + sim = tool.score(model, probe) + self.assertAlmostEqual(sim, 0.25473213400211353) + # score with a concatenation of the probe + # self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim) + + + def test09_plda(self): + # read input + feature = facereclib.utils.load(self.input_dir('linearize.hdf5')) + # assure that the config file is readable + tool = self.config('pca+plda') + self.assertTrue(isinstance(tool, facereclib.tools.PLDA)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.PLDA( + subspace_dimension_of_f = 2, + subspace_dimension_of_g = 2, + subspace_dimension_pca = 10, + plda_training_iterations = 1, + INIT_SEED = seed_value, + ) + self.assertFalse(tool.performs_projection) + self.assertTrue(tool.requires_enroller_training) + + # train the projector + t = tempfile.mkstemp('pca+plda.hdf5', prefix='frltest_')[1] + tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=20, minimum=0., maximum=255.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('pca+plda_enroller.hdf5')) + + # load the projector file + tool.load_enroller(self.reference_dir('pca+plda_enroller.hdf5')) + # compare the resulting machines + test_file = bob.io.base.HDF5File(t) + test_file.cd('/pca') + pca_machine = bob.learn.linear.Machine(test_file) + test_file.cd('/plda') + plda_machine = bob.learn.em.PLDABase(test_file) + # TODO: compare the PCA machines + #self.assertEqual(pca_machine, tool.m_pca_machine) + # TODO: compare the PLDA machines + #self.assertEqual(plda_machine, tool.m_plda_base_machine) + os.remove(t) + + # enroll model + model = tool.enroll([feature]) + if regenerate_refs: + model.save(bob.io.base.HDF5File(self.reference_dir('pca+plda_model.hdf5'), 'w')) + # TODO: compare the models with the reference + #reference_model = tool.read_model(self.reference_dir('pca+plda_model.hdf5')) + #self.assertEqual(model, reference_model) + + # score + sim = tool.score(model, feature) + self.assertAlmostEqual(sim, 0.) + # score with a concatenation of the probe + self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 0.) + + + def test10_ivector(self): + # NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work. + # read input + feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) + # assure that the config file is readable + tool = self.config('ivector') + self.assertTrue(isinstance(tool, facereclib.tools.IVector)) + + # here, we use a reduced complexity for test purposes + tool = facereclib.tools.IVector( + number_of_gaussians = 2, + subspace_dimension_of_t=2, # T subspace dimension + update_sigma = False, # TODO Do another test with True + tv_training_iterations = 1, # Number of EM iterations for the JFA training + variance_threshold = 1e-5, + INIT_SEED = seed_value + ) + self.assertTrue(tool.performs_projection) + self.assertTrue(tool.requires_projector_training) + self.assertTrue(tool.use_projected_features_for_enrollment) + self.assertFalse(tool.split_training_features_by_client) + self.assertFalse(tool.requires_enroller_training) + + # train the projector + t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] + tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) + if regenerate_refs: + import shutil + shutil.copy2(t, self.reference_dir('ivector_projector.hdf5')) + + # load the projector file + tool.load_projector(self.reference_dir('ivector_projector.hdf5')) + + # compare ISV projector with reference + hdf5file = bob.io.base.HDF5File(t) + hdf5file.cd('Projector') + projector_reference = bob.learn.em.GMMMachine(hdf5file) + self.assertTrue(tool.m_ubm.is_similar_to(projector_reference)) + + # compare ISV enroller with reference + hdf5file.cd('/') + hdf5file.cd('Enroller') + enroller_reference = bob.learn.em.IVectorMachine(hdf5file) + enroller_reference.ubm = projector_reference + if not _mac_os: + self.assertTrue(tool.m_tv.is_similar_to(enroller_reference)) + os.remove(t) + + # project the feature + projected = tool.project(feature) + if regenerate_refs: + tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5')) + + # compare the projected feature with the reference + projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5')) + self.assertTrue(numpy.allclose(projected,projected_reference)) + + # enroll model with the projected feature + # This is not yet supported + # model = tool.enroll([projected[0]]) + # if regenerate_refs: + # model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w')) + #reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5')) + # compare the IVector model with the reference + #self.assertTrue(model.is_similar_to(reference_model)) + + # check that the read_probe function reads the correct values + probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5')) + self.assertTrue(numpy.allclose(probe,projected)) + + # score with projected feature and compare to the weird reference score ... + # This in not implemented yet + + # score with a concatenation of the probe + # This is not implemented yet +""" diff --git a/bob/bio/base/test/utils.py b/bob/bio/base/test/utils.py index 15a1f9c5..1791feb4 100644 --- a/bob/bio/base/test/utils.py +++ b/bob/bio/base/test/utils.py @@ -26,20 +26,27 @@ from nose.plugins.skip import SkipTest import logging logger = logging.getLogger("bob.bio.base") -def random_training_set(shape, count, minimum = 0, maximum = 1): + +def random_array(shape, minimum = 0, maximum = 1, seed = 42): + # generate a random sequence of features + numpy.random.seed(seed) + return numpy.random.random(shape) * (maximum - minimum) + minimum + +def random_training_set(shape, count, minimum = 0, maximum = 1, seed = 42): """Returns a random training set with the given shape and the given number of elements.""" # generate a random sequence of features - numpy.random.seed(42) + numpy.random.seed(seed) return [numpy.random.random(shape) * (maximum - minimum) + minimum for i in range(count)] -def random_training_set_by_id(shape, count = 50, minimum = 0, maximum = 1): +def random_training_set_by_id(shape, count = 50, minimum = 0, maximum = 1, seed = 42): # generate a random sequence of features - numpy.random.seed(42) + numpy.random.seed(seed) train_set = [] for i in range(count): train_set.append([numpy.random.random(shape) * (maximum - minimum) + minimum for j in range(count)]) return train_set + def grid_available(test): '''Decorator to check if the gridtk is present, before running the test''' @functools.wraps(test) -- GitLab