diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000000000000000000000000000000000000..0bdb952d0e577d1660c1426b8fe2ee566d94c8fd --- /dev/null +++ b/.travis.yml @@ -0,0 +1,33 @@ +language: python +matrix: + include: + - python: 2.7 + env: + - secure: Ghi03teJBFaPPMiuJ9Q+SxK98VjjB92loJJARjqo0jrTLYiHchssgsMMatgaHhMCso1LjSC2ylAMERB1crONyFDbyiZ3DipSE+WolEMVuNu4OfV8Ny2vcQy8m49mXKWKBviXmeu0vuFOD09pEg4y5EAjlhZ028dKyP8if4oM25Wt33RjAPb2M4y1rUmxqFZvXRyVuPVDZq4RdyaoYOPO6S/j3sEGsY3L/tVgW3zC2fRuKxXhnNiRe6/rh/avCqavFC9B3m9bQwUV6DoHv0NfsYnzjUbAw1KG32FppGDlbe+7TmqxQBWkt+TO8aerMDswMvOzhpKbmjbqAKTGEEa5VlNa+H2fj2qhsJhKMuoH7q72MA22mNnDm2W+Dx4mXWklq5t8RIgI02woxTEUYYwmzWUH2pWU9JuCsYj6FK5zvCWdhWMwvWpQaidk2AduohWXcilNp7cr3P8RZJeP+uUlWFS8iFpJS9C4IL/mg8guq95ha4jffgsEdvJG6FYrmM8Z7tscaCZ9t5GX79U0wjDndpOwUaBDNSZRNEBfopJ8HWMVDOiKCIvSr6UJ6F1ViJXxmsbeZCENhDrECY4UxqErgxzKRPqGG6RKzl/JEZh55xuu5rrvXmBc8I9tKExd4yWZNwwl4WrIAvG7aG4SYjCuZ6iN3bfirWRV85zdlw1CXq4= + - secure: P3Ax20L1CSQrYd/VCiYuI1sLUSWTnKrQ/VV9Zt8Px2t4FV5GXmbf8NRV1peLD0/yNSBBu1871F441YZqI7N7RQGZEcgjRqd41dPPFaZDXUUVaOvHx2Rq1y2d/2TwGkwsxZpgD5WNW1fOTKPFqJf49xzhAqUr8mJsGehN4KPHONDgf0zFwJx+vkW92Csnm7GDwIPcYRjeL5umKyEJlVxFNkIJdAxAIP0uCOAuqnrj6Vs9mGXIujaMerledHiGozJaXvksgSX4rs2EtTht8NUdm1u3zC3SzzEDe1MEjI/pb5t6BMuBZUa7b61kijrf0jiSAAhWw4TREGS0mg6XnZeZJSdlqMfglFKmPACMaq1GE5IEmjyGPHnO5vrxkDRnzmao5PfaJGNWNnfGdtMnd1JqjDpio9dUFWUa4AJieocZ7GbhFS9ZLWS1xM8vlnttjETxww5zbqC/QL4+xeVMoW3Cq7yeTIE2ELjKc3K+zT2KMb1iusbtOBwB7KfINEjcoIsQnlxrve+4//GuBrZ+Uq+8IhzKj1GTErkEEWaxlNdgf0YJ5uOkKOFIylUPM4cAqXnqSNjcRhUKBd5G4xC5w4vNWCzl1Yn7T0gmK2pOLDh8Ygs1CkquLLZodtjWOmIAE2vgFCeigiNtrhD9eKTCfXH5fYFKXCgCUDgPhh2yrPWJWYc= + - BOB_DOCUMENTATION_SERVER=https://www.idiap.ch/software/bob/docs/latest/bioidiap/%s/master + - BOB_UPLOAD_WHEEL="--universal" + - python: 3.3 + - python: 3.4 + - python: 3.5 +before_install: +- sudo add-apt-repository -y ppa:biometrics/bob +- sudo apt-get update -qq +- sudo apt-get install -qq --force-yes libjpeg8-dev libnetpbm10-dev libpng12-dev libtiff4-dev libgif-dev libboost-all-dev libblitz1-dev libhdf5-serial-dev libvl-dev dvipng texlive-latex-base texlive-latex-extra texlive-math-extra texlive-latex-recommended texlive-fonts-recommended libatlas-dev libatlas-base-dev liblapack-dev gfortran +- pip install --upgrade pip +- pip install --find-links https://www.idiap.ch/software/bob/wheels/travis/ --use-wheel sphinx nose numpy scipy matplotlib coverage +- pip install --find-links https://www.idiap.ch/software/bob/wheels/travis/ --use-wheel --pre -r requirements.txt coveralls +install: +- python bootstrap-buildout.py +- ./bin/buildout buildout:develop=. buildout:extensions=bob.buildout buildout:auto-checkout= +script: +- ./bin/python -c 'import pkg_resources; from bob.bio.base import get_config; print(get_config())' +- ./bin/coverage run --source=bob.bio.base ./bin/nosetests -sv +- ./bin/sphinx-build -b doctest doc sphinx +- ./bin/sphinx-build -b html doc sphinx +after_success: +- coveralls +- wget https://raw.githubusercontent.com/bioidiap/bob.extension/master/scripts/upload-{sphinx,wheel}.sh +- chmod a+x upload-sphinx.sh upload-wheel.sh +- ./upload-sphinx.sh +- ./upload-wheel.sh diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..94a9ed024d3859793618152ea559a168bbcbb5e2 --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<http://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<http://www.gnu.org/philosophy/why-not-lgpl.html>. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..f065ce1df2fab519ffc9e088d8ee9bed352052b9 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include README.rst bootstrap-buildout.py buildout.cfg COPYING version.txt requirements.txt +recursive-include doc *.py *.rst +recursive-include bob/fusion/base/test/data *-dev *-eval diff --git a/README.rst b/README.rst index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..91d29d0003c85b26a3072da408e01e91393dbf76 100644 --- a/README.rst +++ b/README.rst @@ -0,0 +1,47 @@ +.. vim: set fileencoding=utf-8 : +.. Amir Mohammadi <amir.mohammadi@idiap.ch> + +.. image:: http://img.shields.io/badge/docs-stable-yellow.png + :target: http://pythonhosted.org/bob.fusion.base/index.html +.. image:: http://img.shields.io/badge/docs-latest-orange.png + :target: https://www.idiap.ch/software/bob/docs/latest/bioidiap/bob.fusion.base/master/index.html +.. image:: http://travis-ci.org/bioidiap/bob.fusion.base.svg?branch=master + :target: https://travis-ci.org/bioidiap/bob.fusion.base?branch=master +.. image:: https://coveralls.io/repos/bioidiap/bob.fusion.base/badge.svg?branch=master + :target: https://coveralls.io/r/bioidiap/bob.fusion.base?branch=master +.. image:: https://img.shields.io/badge/github-master-0000c0.png + :target: https://github.com/bioidiap/bob.fusion.base/tree/master +.. image:: http://img.shields.io/pypi/v/bob.fusion.base.png + :target: https://pypi.python.org/pypi/bob.fusion.base +.. image:: http://img.shields.io/pypi/dm/bob.fusion.base.png + :target: https://pypi.python.org/pypi/bob.fusion.base + +==================================================================== + Scripts to run score fusion in biometric recognition experiments +==================================================================== + +This package is part of the ``bob.fusion`` packages, which allow to run comparable and reproducible score fusion in biometric recognition experiments. + +This package contains basic functionality to run score fusion in biometric recognition experiments. +It provides a generic ``./bin/fuse.py`` script that takes several parameters, including: + +* A list of development score files +* A classification algorithm + +All these steps of the score fusion in biometric recognition system are given as configuration files. + + +Installation +------------ +To create your own working package using one or more of the ``bob.fusion`` packages, please follow the `Installation Instructions <http://pythonhosted.org/bob.fusion.base/installation.html>`__ of the ``bob.fusion`` packages. + +To install this package -- alone or together with other `Packages of Bob <https://github.com/idiap/bob/wiki/Packages>`_ -- please read the `Installation Instructions <https://github.com/idiap/bob/wiki/Installation>`__. +For Bob_ to be able to work properly, some dependent packages are required to be installed. +Please make sure that you have read the `Dependencies <https://github.com/idiap/bob/wiki/Dependencies>`_ for your operating system. + +Documentation +------------- +For further documentation on this package, please read the `Stable Version <http://pythonhosted.org/bob.fusion.base/index.html>`_ or the `Latest Version <https://www.idiap.ch/software/bob/docs/latest/bioidiap/bob.fusion.base/master/index.html>`_ of the documentation. +For a list of tutorials on this or the other packages ob Bob_, or information on submitting issues, asking questions and starting discussions, please visit its website. + +.. _bob: https://www.idiap.ch/software/bob diff --git a/bob/fusion/base/__init__.py b/bob/fusion/base/__init__.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..b36187c58512bb6deb58d64649386fc9658d828d 100644 --- a/bob/fusion/base/__init__.py +++ b/bob/fusion/base/__init__.py @@ -0,0 +1,12 @@ +# from .utils import * +from . import algorithm +from . import tools + +def get_config(): + """Returns a string containing the configuration information. + """ + import bob.extension + return bob.extension.get_config(__name__) + +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/fusion/base/algorithm/Algorithm.py b/bob/fusion/base/algorithm/Algorithm.py index 7691216f460dbaeb2965a9514a967560f5de26a6..80200e6203ccd5fc7b9d0cc8563a6fc622b538cd 100644 --- a/bob/fusion/base/algorithm/Algorithm.py +++ b/bob/fusion/base/algorithm/Algorithm.py @@ -3,8 +3,12 @@ from __future__ import division from __future__ import absolute_import -from ..utils import grouping +from ..tools import grouping import numpy as np +import pickle + +import bob.core +logger = bob.core.log.setup("bob.fusion.base") class Algorithm(object): @@ -12,39 +16,51 @@ class Algorithm(object): def __init__(self, scores=None, - normalizer=None, performs_training=False, trainer_scores=None, - trainer=None, - machine=None, + has_closed_form_solution=False, + preprocessors=None, *args, **kwargs ): + """ + + kwargs : ``key=value`` pairs + A list of keyword arguments to be written in the + :py:meth:`__str__` function. + +""" super(Algorithm, self).__init__() self.scores = scores self.performs_training = performs_training self.trainer_scores = trainer_scores - self.trainer = trainer - self.machine = machine - self.normalizer = normalizer - - def normalize(self, scores): - if self.normalizer is None: - return scores - else: - if not self.normalizer.trained: - train_scores = np.vstack(self.trainer_scores) - self.normalizer.train(train_scores) - return self.normalizer(scores) + self.has_closed_form_solution = has_closed_form_solution + self.preprocessors = preprocessors + self._kwargs = kwargs + self._kwargs['preprocessors'] = preprocessors + + def preprocess(self, scores): + if self.preprocessors is not None: + for i, (preprocessor, trained) in enumerate(self.preprocessors): + if not trained: + train_scores = np.vstack(self.trainer_scores) + preprocessor.fit(train_scores) + self.preprocessors[i] = (preprocessor, True) + scores = self.preprocessor.transform(scores) + return scores def train(self): negatives, positives = self.trainer_scores - negatives = self.normalize(negatives) - positives = self.normalize(positives) - self.trainer_scores = (negatives, positives) + train_scores = np.vstack(self.trainer_scores) + train_scores = self.preprocess(train_scores) + neg_len = negatives.shape[0] + y = np.zeros((train_scores.shape[0],), dtype='bool') + y[neg_len:] = True + self.fit(train_scores, y) def __call__(self): - self.scores = self.normalize(self.scores) + self.scores = self.preprocess(self.scores) + return self.decision_function(self.scores) def plot_boundary_decision(self, score_labels, threshold, label_system1='', @@ -57,6 +73,8 @@ class Algorithm(object): y_pad=0.5, alpha=0.75, legends=None, + i1=0, + i2=1, **kwargs ): ''' @@ -69,22 +87,34 @@ class Algorithm(object): ''' if legends is None: legends = ['Impostor', 'Genuine'] + + if self.scores.shape[1] > 2: + raise NotImplementedError( + "Currently plotting the decision boundary for more than two systems " + "is not supported.") + import matplotlib.pyplot as plt plt.gca() # this is necessary for subplots to work. - X = self.scores + X = self.scores[:, [i1, i2]] Y = score_labels - x_min, x_max = X[:, 0].min() - x_pad, X[:, 0].max() + x_pad - y_min, y_max = X[:, 1].min() - y_pad, X[:, 1].max() + y_pad + x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad + y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad h1 = abs(x_max - x_min) / resolution h2 = abs(y_max - y_min) / resolution - xx, yy = np.meshgrid( - np.arange(x_min, x_max, h1), np.arange(y_min, y_max, h2)) - self.scores = np.c_[xx.ravel(), yy.ravel()] - Z = (self() > threshold).reshape(xx.shape) - self.scores = X + if self.has_closed_form_solution and self.scores.shape[1] == 2: + x1 = np.arange(x_min, x_max, h1) + x2 = self.closed_form(x1, threshold) + plt.plot(x1, x2, cmap=plt.cm.viridis) + else: + xx, yy = np.meshgrid( + np.arange(x_min, x_max, h1), np.arange(y_min, y_max, h2)) + scores = self.scores + self.scores = np.c_[xx.ravel(), yy.ravel()] + Z = (self() > threshold).reshape(xx.shape) + self.scores = scores - contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) + contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) if do_grouping: positives, negatives = X[Y], X[np.logical_not(Y)] @@ -104,3 +134,26 @@ class Algorithm(object): plt.axhline(thres_system2, color='red') return contourf + + def __str__(self): + """__str__() -> info + + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this + (and the derived) class. + """ + return "%s(%s)" % (str(self.__class__), ", ".join( + ["%s=%s" % (key, value) for key, value in + self._kwargs.items() if value is not None])) + + def save(self, model_file): + with open(model_file, "wb") as f: + pickle.dump(self, f) + + def load(self, model_file): + with open(model_file, "rb") as f: + return pickle.load(f) diff --git a/bob/fusion/base/algorithm/LLR.py b/bob/fusion/base/algorithm/LLR.py deleted file mode 100644 index 0a3ede4c02392fbfc36757e002a77e76b760ed0f..0000000000000000000000000000000000000000 --- a/bob/fusion/base/algorithm/LLR.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import absolute_import - -import bob.learn.linear - -from .Algorithm import Algorithm - -import logging -logger = logging.getLogger("bob.fusion.base") - - -class LLR(Algorithm): - """docstring for LLR""" - - def __init__(self, - *args, **kwargs): - super(LLR, self).__init__( - performs_training=True, *args, **kwargs) - self.trainer = self.trainer if self.trainer else \ - bob.learn.linear.CGLogRegTrainer() - - def train(self): - super(LLR, self).train() - (negatives, positives) = self.trainer_scores - # Trainning the LLR machine - self.machine = self.trainer.train(negatives, positives) - - def __call__(self): - super(LLR, self).__call__() - # Applying the LLR in the input data - return self.machine(self.scores).flatten() diff --git a/bob/fusion/base/algorithm/LogisticRegression.py b/bob/fusion/base/algorithm/LogisticRegression.py new file mode 100644 index 0000000000000000000000000000000000000000..38dc5c5a7d337c258414af3b6b94f18b920b42bd --- /dev/null +++ b/bob/fusion/base/algorithm/LogisticRegression.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python + +from __future__ import division +from __future__ import absolute_import + +import bob.learn.linear +from sklearn.linear_model import LogisticRegression as LogisticRegression_SK + +from .Algorithm import Algorithm + +import bob.core +logger = bob.core.log.setup("bob.fusion.base") + + +class LogisticRegression(Algorithm, LogisticRegression_SK): + __doc__ = LogisticRegression_SK.__doc__ + + def __init__(self, + *args, **kwargs): + Algorithm.__init__( + self, performs_training=True, + has_closed_form_solution=True, *args, **kwargs) + sk_kwargs = {} + for key, value in kwargs.items(): + if key in ['penalty', 'dual', 'tol', 'C', 'fit_intercept', + 'intercept_scaling', 'class_weight', + 'random_state', 'solver', 'max_iter', + 'multi_class', 'verbose', 'warm_start', 'n_jobs']: + sk_kwargs[key] = value + + LogisticRegression_SK.__init__(self, **sk_kwargs) + + def closed_form(self, x1, y): + w1 = self.coef_[0] + w2 = self.coef_[1] + x2 = (y - self.intercept_ - x1*w1)/w2 + return x2 diff --git a/bob/fusion/base/algorithm/MLP.py b/bob/fusion/base/algorithm/MLP.py index 32edb7a65556338adbd0c45f2efb57a85ea8e86d..4dac5742de84080274ad41951d1c2b9b079e806b 100644 --- a/bob/fusion/base/algorithm/MLP.py +++ b/bob/fusion/base/algorithm/MLP.py @@ -9,52 +9,79 @@ import bob.core.random import numpy from .Algorithm import Algorithm +from .mlp_train_helper import MLPTrainer -import logging -logger = logging.getLogger("bob.fusion.base") +import bob.core +logger = bob.core.log.setup("bob.fusion.base") class MLP(Algorithm): - """docstring for MLP""" + """This MLP is implemented using the bob tools + It may change its API and functionality in the future. + """ def __init__(self, - mlp_shape=None, + n_systems=2, + hidden_layers=None, trainer_devel=None, seed=None, *args, **kwargs): + # chicken and egg :D call __init__ twice. + super(MLP, self).__init__(performs_training=True, *args, **kwargs) + if hidden_layers is None: + hidden_layers = [3] + if self.scores is not None: + n_systems = numpy.asarray(self.scores).shape[1] + self.mlp_shape = [n_systems] + hidden_layers + [1] super(MLP, self).__init__( - performs_training=True, *args, **kwargs) - if mlp_shape is not None: - self.mlp_shape = mlp_shape - elif self.scores is not None: - self.mlp_shape = (numpy.asarray(self.scores).shape[1], 3, 1) - else: - self.mlp_shape = (2, 3, 1) - self.machine = self.machine if self.machine else \ + performs_training=True, mlp_shape=self.mlp_shape, seed=seed, + *args, **kwargs) + self.seed = seed + self.trainer_devel = trainer_devel if trainer_devel else \ + self.trainer_scores + self._my_kwargs = kwargs + self.initialize() + + def initialize(self, force=False): + self.machine = self.machine if self.machine and not force else \ bob.learn.mlp.Machine(self.mlp_shape) - if seed is not None: - self.rng = bob.core.random.mt19937(seed) + if self.seed is not None: + self.rng = bob.core.random.mt19937(self.seed) self.machine.randomize(rng=self.rng) else: self.machine.randomize() - self.trainer = self.trainer if self.trainer else \ + self.trainer = self.trainer if self.trainer and not force else \ bob.learn.mlp.RProp(1, bob.learn.mlp.SquareError( self.machine.output_activation), machine=self.machine, train_biases=False) - self.trainer_devel = trainer_devel if trainer_devel else \ + + def prepare_train(self): + self.trainer_devel = self.trainer_devel if self.trainer_devel else \ self.trainer_scores - self.train_helper = bob.learn.mlp.MLPTrainer( + self.train_helper = MLPTrainer( train=self.trainer_scores[::-1], devel=self.trainer_devel[::-1], mlp_shape=self.mlp_shape, machine=self.machine, trainer=self.trainer, - **kwargs) + **self._my_kwargs) - def train(self): - super(MLP, self).train() + def fit(self, train_scores, y): + n_systems = train_scores.shape[1] + if n_systems != self.mlp_shape[0]: + logger.warn( + 'Reinitializing the MLP machine with the shape of {} to {} to match th' + 'e input size.'.format(self.mlp_shape, [n_systems]+self.mlp_shape[1:])) + self.mlp_shape = [n_systems] + self.mlp_shape[1:] + self.n_systems = n_systems + self.hidden_layers = self.mlp_shape[1:-1] + self.initialize(force=True) + self.trainer_scores = (train_scores[numpy.logical_not(y)], train_scores[y]) + self.prepare_train() self.machine, self.analyzer = self.train_helper() - def __call__(self): - super(MLP, self).__call__() - return self.machine(self.scores).flatten() + def decision_function(self, scores): + scores = self.machine(scores) + if scores.ndim == 2 and scores.shape[1] == 1: + scores = scores.ravel() + return scores diff --git a/bob/fusion/base/algorithm/Weighted_Sum.py b/bob/fusion/base/algorithm/Weighted_Sum.py index 2170b9b1704633f154c9b4726e1efea5a1d63177..540fcc6b9e712e7c10203c78295846b353a36af0 100644 --- a/bob/fusion/base/algorithm/Weighted_Sum.py +++ b/bob/fusion/base/algorithm/Weighted_Sum.py @@ -7,8 +7,8 @@ import numpy from .Algorithm import Algorithm -import logging -logger = logging.getLogger("bob.fusion.base") +import bob.core +logger = bob.core.log.setup("bob.fusion.base") class Weighted_Sum(Algorithm): @@ -16,12 +16,15 @@ class Weighted_Sum(Algorithm): def __init__(self, weights=None, *args, **kwargs): super(Weighted_Sum, self).__init__( - performs_training=False, *args, **kwargs) + performs_training=False, weights=weights, + has_closed_form_solution=True, *args, **kwargs) self.weights = weights - def __call__(self): - super(Weighted_Sum, self).__call__() + def decision_function(self, scores): if self.weights is None: - return numpy.mean(self.scores, axis=1) + return numpy.mean(scores, axis=1) else: - return numpy.sum(self.scores * self.weights, axis=1) + return numpy.sum(scores * self.weights, axis=1) + + def closed_form(self, x1, y): + return 2*y - x1 diff --git a/bob/fusion/base/algorithm/__init__.py b/bob/fusion/base/algorithm/__init__.py index 6523676f274495de6586595f2eb5d5af3bdc7e35..b119df913d31c817a07e39956bb3a8d9ff884601 100644 --- a/bob/fusion/base/algorithm/__init__.py +++ b/bob/fusion/base/algorithm/__init__.py @@ -1,7 +1,8 @@ from .Algorithm import Algorithm from .Weighted_Sum import Weighted_Sum -from .LLR import LLR +from .LogisticRegression import LogisticRegression from .MLP import MLP +from .MLPClassifier import MLPClassifier # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/fusion/base/algorithm/mlp_train_helper.py b/bob/fusion/base/algorithm/mlp_train_helper.py new file mode 100644 index 0000000000000000000000000000000000000000..8ae2f8b43628e902273c435f6c01010cac4219dd --- /dev/null +++ b/bob/fusion/base/algorithm/mlp_train_helper.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Andre Anjos <andre.dos.anjos@gmail.com> +# Tue 16 Aug 14:39:22 2011 + +"""Trains an MLP using RProp +""" + +import sys +import bob.measure +import bob.learn.mlp +import bob.learn.activation +import numpy +import numpy.linalg as la + +import bob.core +logger = bob.core.log.setup("bob.fusion.base") + + +class Analyzer(object): + """Can analyze results in the end of a run. It can also save itself""" + + def gentargets(self, data, target): + t = numpy.vstack(data.shape[0] * (target,)) + return t, numpy.empty_like(t) + + def __init__(self, train, devel, target): + super(Analyzer, self).__init__() + + self.train = train + self.devel = devel + self.target = target + + real_train = self.gentargets(train[0], target[0]) + attack_train = self.gentargets(train[1], target[1]) + real_devel = self.gentargets(devel[0], target[0]) + attack_devel = self.gentargets(devel[1], target[1]) + + self.train_target = (real_train[0], attack_train[0]) + self.train_output = (real_train[1], attack_train[1]) + self.devel_target = (real_devel[0], attack_devel[0]) + self.devel_output = (real_devel[1], attack_devel[1]) + + self.data = {} # where to store variables that will be saved + self.data['epoch'] = [] + self.data['real-train-rmse'] = [] + self.data['attack-train-rmse'] = [] + self.data['real-devel-rmse'] = [] + self.data['attack-devel-rmse'] = [] + self.data['train-far'] = [] + self.data['train-frr'] = [] + self.data['devel-far'] = [] + self.data['devel-frr'] = [] + + def __call__(self, machine, iteration): + """Computes current outputs and evaluate performance""" + + def evalperf(outputs, targets): + return la.norm(bob.measure.rmse(outputs, targets)) + + for k in range(len(self.train)): + machine(self.train[k], self.train_output[k]) + machine(self.devel[k], self.devel_output[k]) + + self.data['real-train-rmse'].append(evalperf(self.train_output[0], + self.train_target[0])) + self.data['attack-train-rmse'].append(evalperf(self.train_output[1], + self.train_target[1])) + self.data['real-devel-rmse'].append(evalperf(self.devel_output[0], + self.devel_target[0])) + self.data['attack-devel-rmse'].append(evalperf(self.devel_output[1], + self.devel_target[1])) + + thres = bob.measure.eer_threshold(self.train_output[1][:, 0], + self.train_output[0][:, 0]) + train_far, train_frr = bob.measure.farfrr( + self.train_output[1][:, 0], self.train_output[0][:, 0], thres) + devel_far, devel_frr = bob.measure.farfrr( + self.devel_output[1][:, 0], self.devel_output[0][:, 0], thres) + + self.data['train-far'].append(train_far) + self.data['train-frr'].append(train_frr) + self.data['devel-far'].append(devel_far) + self.data['devel-frr'].append(devel_frr) + + self.data['epoch'].append(iteration) + + def str_header(self): + """Returns the string header of what I can print""" + return "iteration: RMSE:real/RMSE:attack (EER:%) ( train | devel )" + + def __str__(self): + """Returns a string representation of myself""" + + retval = "%d: %.4e/%.4e (%.2f%%) | %.4e/%.4e (%.2f%%)" % \ + (self.data['epoch'][-1], + self.data['real-train-rmse'][-1], + self.data['attack-train-rmse'][-1], + 50 * + (self.data['train-far'][-1] + self.data['train-frr'][-1]), + self.data['real-devel-rmse'][-1], + self.data['attack-devel-rmse'][-1], + 50 * + (self.data['devel-far'][-1] + self.data['devel-frr'][-1]), + ) + return retval + + def save(self, f): + """Saves my contents on the bob.io.base.HDF5File you give me.""" + + for k, v in self.data.items(): + f.set(k, numpy.array(v)) + + def load(self, f): + """Loads my contents from the bob.io.base.HDF5File you give me.""" + + for k in f.paths(): + self.data[k.strip('/')] = f.read(k) + + +class MLPTrainer(object): + """Creates a randomly initialized MLP and train it using the input data. + + This method will create an MLP with the shape (`mlp_shape`) that is + provided. Then it will initialize the MLP with random weights and + biases and train it for as long as the development shows improvement + and will stop as soon as it does not anymore or we reach the maximum + number of iterations. + + Performance is evaluated both on the trainining and development set + during the training, every 'epoch' training steps. Each training step + is composed of `batch_size` elements drawn randomly from all classes + available in train set. + + Keyword Parameters: + + train + An iterable (tuple or list) containing two arraysets: the first + contains the real accesses (target = +1) and the second contains + the attacks (target = -1). + + devel + An iterable (tuple or list) containing two arraysets: the first + contains the real accesses (target = +1) and the second contains + the attacks (target = -1). + + batch_size + An integer defining the number of samples per training iteration. + Good values are greater than 100. + + mlp_shape + Shape of the MLP machine. + + epoch + The number of training steps to wait until we measure the error. + + max_iter + If given (and different than zero), should tell us the maximum + number of training steps to train the network for. If set to 0 + just train until the development sets reaches the valley (in RMSE + terms). + + no_improvements + If given (and different than zero), should tell us the maximum + number of iterations we should continue trying for in case we have + no more improvements on the development set average RMSE term. + This value, if set, should not be too small as this may cause a + too-early stop. Values in the order of 10% of the max_iter should + be fine. + + """ + + def __init__(self, + train, + devel, + mlp_shape, + batch_size=1, + epoch=1, + max_iter=1000, + no_improvements=0, + valley_condition=0.9, + machine=None, + trainer=None, + *args, **kwargs + ): + super(MLPTrainer, self).__init__() + self.train = train + self.devel = devel + self.mlp_shape = mlp_shape + self.batch_size = batch_size + self.epoch = epoch + self.max_iter = max_iter + self.no_improvements = no_improvements + self.valley_condition = valley_condition + self.machine = machine if machine else \ + bob.learn.mlp.Machine(self.mlp_shape) + self.machine.randomize() + self.trainer = trainer if trainer else \ + bob.learn.mlp.RProp(batch_size, bob.learn.mlp.SquareError( + self.machine.output_activation), machine=self.machine, + train_biases=False) + + def __call__(self): + return self.make_mlp() + + def make_mlp(self): + + # of the minimum devel. set RMSE detected so far + VALLEY_CONDITION = self.valley_condition + last_devel_rmse = 0 + + def stop_condition(min_devel_rmse, devel_rmse, last_devel_rmse): + """This method will detect a valley in the devel set RMSE""" + stop = (VALLEY_CONDITION * devel_rmse) > (min_devel_rmse) or \ + abs(devel_rmse - last_devel_rmse) / \ + (devel_rmse + last_devel_rmse) < 0.00001 + return stop + + target = [ + numpy.array([+1], 'float64'), + numpy.array([-1], 'float64'), + ] + + logger.info("Preparing analysis framework...") + analyze = Analyzer(self.train, self.devel, target) + + logger.info("Setting up training infrastructure...") + shuffler = bob.learn.mlp.DataShuffler(self.train, target) + shuffler.auto_stdnorm = True + + # shape = (shuffler.data_width, nhidden, 1) + # machine = bob.learn.mlp.Machine(self.shape) + # machine.activation = bob.learn.activation.HyperbolicTangent() #the + # defaults are anyway Hyperbolic Tangent for hidden and output layer + # machine.randomize() + # import ipdb; ipdb.set_trace() + self.machine.input_subtract, self.machine.input_divide = \ + shuffler.stdnorm() + + # trainer = bob.learn.mlp.RProp( + # self.batch_size, + # bob.learn.mlp.SquareError(machine.output_activation), machine) + + self.trainer.train_biases = True + + continue_training = True + iteration = 0 + min_devel_rmse = sys.float_info.max + self.best_machine = bob.learn.mlp.Machine(self.machine) # deep copy + best_machine_iteration = 0 + + # temporary training data selected by the shuffer + shuffled_input = numpy.ndarray( + (self.batch_size, shuffler.data_width), 'float64') + shuffled_target = numpy.ndarray( + (self.batch_size, shuffler.target_width), 'float64') + + logger.info(analyze.str_header()) + + try: + while continue_training: + + analyze(self.machine, iteration) + + logger.info(analyze) + + avg_devel_rmse = (analyze.data['real-devel-rmse'][-1] + + analyze.data['attack-devel-rmse'][-1]) / 2 + + # save best network, record minima + if avg_devel_rmse < min_devel_rmse: + best_machine_iteration = iteration + self.best_machine = bob.learn.mlp.Machine( + self.machine) # deep copy + logger.info("%d: Saving best network so far with average " + "devel. RMSE = %.4e", iteration, avg_devel_rmse) + min_devel_rmse = avg_devel_rmse + logger.info("%d: New valley stop threshold set to %.4e", + iteration, avg_devel_rmse / VALLEY_CONDITION) + if stop_condition(min_devel_rmse, avg_devel_rmse, last_devel_rmse): + logger.info("%d: Stopping on devel valley condition", iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", iteration, best_machine_iteration, + min_devel_rmse) + + break + last_devel_rmse = avg_devel_rmse + + # train for 'epoch' times w/o stopping for tests + for i in range(self.epoch): + # import ipdb; ipdb.set_trace() + shuffler(data=shuffled_input, target=shuffled_target) + self.trainer.batch_size = len(shuffled_input) + self.trainer.train( + self.machine, shuffled_input, shuffled_target) + iteration += 1 + + if self.max_iter > 0 and iteration > self.max_iter: + logger.info("%d: Stopping on max. iterations condition", iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", iteration, best_machine_iteration, + min_devel_rmse) + break + + if self.no_improvements > 0 and \ + (iteration - best_machine_iteration) > self.no_improvements: + logger.info("%d: Stopping because did not observe MLP performance " + "improvements for %d iterations", + iteration, iteration - best_machine_iteration) + logger.info("%d: Best machine happened on iteration %d with average " + "devel. RMSE of %.4e", + iteration, best_machine_iteration, min_devel_rmse) + break + + except KeyboardInterrupt: + logger.info("%d: User interruption captured - exiting in a clean way", + iteration) + logger.info("%d: Best machine happened on iteration %d " + "with average devel. RMSE of %.4e", + iteration, best_machine_iteration, min_devel_rmse) + + analyze(self.machine, iteration) + + return self.best_machine, analyze diff --git a/bob/fusion/base/config/__init__.py b/bob/fusion/base/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/fusion/base/config/algorithm/__init__.py b/bob/fusion/base/config/algorithm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bob/fusion/base/config/algorithm/llr.py b/bob/fusion/base/config/algorithm/llr.py new file mode 100644 index 0000000000000000000000000000000000000000..d5aa17c13bfb4f43426207c62c11047d74ae24f2 --- /dev/null +++ b/bob/fusion/base/config/algorithm/llr.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import bob.fusion.base +import sklearn.preprocessing + +algorithm = bob.fusion.base.algorithm.LogisticRegression( + preprocessors=[(sklearn.preprocessing.RobustScaler(), False)]) diff --git a/bob/fusion/base/config/algorithm/mean.py b/bob/fusion/base/config/algorithm/mean.py new file mode 100644 index 0000000000000000000000000000000000000000..9f6944d8799ffa8027b92b8b02c4d95d290fb699 --- /dev/null +++ b/bob/fusion/base/config/algorithm/mean.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python + +import bob.fusion.base + +algorithm = bob.fusion.base.algorithm.Weighted_Sum() diff --git a/bob/fusion/base/config/algorithm/mlp.py b/bob/fusion/base/config/algorithm/mlp.py new file mode 100644 index 0000000000000000000000000000000000000000..dd02849afd2c3e05976f791c368dd8a0122ebc13 --- /dev/null +++ b/bob/fusion/base/config/algorithm/mlp.py @@ -0,0 +1,7 @@ +#!/usr/bin/env python + +import bob.fusion.base +import sklearn.preprocessing + +algorithm = bob.fusion.base.algorithm.MLP( + preprocessors=[(sklearn.preprocessing.RobustScaler(), False)]) diff --git a/bob/fusion/base/config/algorithm/plr_2.py b/bob/fusion/base/config/algorithm/plr_2.py new file mode 100644 index 0000000000000000000000000000000000000000..20da6fa53ac82d73bad4797c4d84713484ccdfea --- /dev/null +++ b/bob/fusion/base/config/algorithm/plr_2.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python + +import bob.fusion.base +import sklearn.preprocessing + +algorithm = bob.fusion.base.algorithm.LogisticRegression( + preprocessors=[(sklearn.preprocessing.RobustScaler(), False), + (sklearn.preprocessing.PolynomialFeatures(degree=2), False)]) diff --git a/bob/fusion/base/normalizer/MinMaxNorm.py b/bob/fusion/base/normalizer/MinMaxNorm.py deleted file mode 100644 index ff51a99a0b24ea760f8a705ddddf601edb33a802..0000000000000000000000000000000000000000 --- a/bob/fusion/base/normalizer/MinMaxNorm.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import absolute_import - -import numpy - -from .Normalizer import Normalizer - -import logging -logger = logging.getLogger("bob.fusion.base") - - -class MinMaxNorm(Normalizer): - """ - the MinMaxNorm score normalization - Normalize the score in an specific interval - - @param lowBound The lower bound - @param upperBound The upper bound - """ - - def __init__(self, - lowerBound=-1, - upperBound=1, - *args, - **kwargs - ): - super(MinMaxNorm, self).__init__(performs_training=True) - self.lowerBound = lowerBound - self.upperBound = upperBound - - def train(self, scores): - super(MinMaxNorm, self).train(scores) - self.mins = numpy.min(scores, axis=0) - self.maxs = numpy.max(scores, axis=0) - - def __call__(self, scores): - scores = super(MinMaxNorm, self).__call__(scores) - denom = self.maxs - self.mins - normalizedScores = (self.upperBound - self.lowerBound) * \ - (scores - self.mins) / denom + self.lowerBound - - return normalizedScores diff --git a/bob/fusion/base/normalizer/Normalizer.py b/bob/fusion/base/normalizer/Normalizer.py deleted file mode 100644 index a483a4ac78aff6aeb29b8f2117638a91bb004d02..0000000000000000000000000000000000000000 --- a/bob/fusion/base/normalizer/Normalizer.py +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import absolute_import - - -class Normalizer(object): - """docstring for Normalizer""" - - def __init__(self, - performs_training=False, - trained=False, - *args, - **kwargs - ): - super(Normalizer, self).__init__() - self.performs_training = performs_training - if not self.performs_training: - trained = True - self.trained = trained - - def train(self, scores): - """ - Trains the Normalizer - calls to this function changes the self.trained to True - @param scores numpy.array of scores to be used for training - """ - self.trained = True - - def __call__(self, scores): - """ - Normalizes the scores - @param scores numpy.array to be normalized - @return numpy.array with the normalized scores. - """ - return scores diff --git a/bob/fusion/base/normalizer/ZNorm.py b/bob/fusion/base/normalizer/ZNorm.py deleted file mode 100644 index 6801f65de065e1fbcb1553216cf77523c8dec0fc..0000000000000000000000000000000000000000 --- a/bob/fusion/base/normalizer/ZNorm.py +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import absolute_import - -import numpy - -from .Normalizer import Normalizer - -import logging -logger = logging.getLogger("bob.fusion.base") - - -class ZNorm(Normalizer): - """the ZNorm score normalization""" - - def __init__(self, - *args, - **kwargs - ): - super(ZNorm, self).__init__(performs_training=True) - - def train(self, scores): - super(ZNorm, self).train(scores) - self.avg = numpy.average(scores, axis=0) - self.std = numpy.std(scores, axis=0) - - def __call__(self, scores): - scores = super(ZNorm, self).__call__(scores) - return (scores - self.avg) / self.std diff --git a/bob/fusion/base/script/__init__.py b/bob/fusion/base/script/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9ab444a2db44da8d8103e68cc46174e97c7672a --- /dev/null +++ b/bob/fusion/base/script/__init__.py @@ -0,0 +1 @@ +from . import fuse diff --git a/bob/fusion/base/script/fuse.py b/bob/fusion/base/script/fuse.py new file mode 100755 index 0000000000000000000000000000000000000000..29ce8e85a29e1f446e75878f0dd5d2f3b8fad3f3 --- /dev/null +++ b/bob/fusion/base/script/fuse.py @@ -0,0 +1,107 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Amir Mohammadi <amir.mohammadi@idiap.ch> + +from __future__ import print_function, absolute_import, division + +import os +import numpy as np + +from bob.io.base import create_directories_safe +from bob.measure.load import load_score, get_all_scores,\ + get_negatives_positives_all +from bob.bio.base import utils + +from ..tools import parse_arguments, write_info + +import bob.core +logger = bob.core.log.setup("bob.fusion.base") + + +def fuse(args, command_line_parameters): + """Do the actual fusion.""" + algorithm = args.algorithm + if args.score_type == 4: + fmt = '%s %s %s %.6f' + else: + fmt = '%s %s %s %s %.6f' + + write_info(args, command_line_parameters) + + # load the scores + score_lines_list_dev = [load_score(path, ncolumns=args.score_type) + for path in args.dev_files] + scores_dev = get_all_scores(score_lines_list_dev) + trainer_scores = get_negatives_positives_all(score_lines_list_dev) + if args.eval_files: + score_lines_list_eval = [load_score(path, ncolumns=args.score_type) + for path in args.eval_files] + scores_eval = get_all_scores(score_lines_list_eval) + else: + score_lines_list_eval = [] + scores_eval = [] + + # check if score lines are consistent + if not args.skip_check: + score_lines0 = score_lines_list_dev[0] + for score_lines in score_lines_list_dev[1:]: + assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) + assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + if args.eval_files: + score_lines0 = score_lines_list_eval[0] + for score_lines in score_lines_list_eval[1:]: + assert(np.all(score_lines['claimed_id'] == score_lines0['claimed_id'])) + assert(np.all(score_lines['real_id'] == score_lines0['real_id'])) + + # train the model + if utils.check_file(args.model_file, args.force, 1000): + logger.info( + "- Fusion: model '%s' already exists.", args.model_file) + algorithm = algorithm.load(args.model_file) + algorithm.trainer_scores = trainer_scores + elif algorithm.performs_training: + algorithm.trainer_scores = trainer_scores + algorithm.train() + algorithm.save(args.model_file) + + # fuse the scores (dev) + if utils.check_file(args.fused_dev_file, args.force, 1000): + logger.info( + "- Fusion: scores '%s' already exists.", args.fused_dev_file) + else: + algorithm.scores = scores_dev + fused_scores_dev = algorithm() + score_lines = np.array(score_lines_list_dev[0]) + score_lines['score'] = fused_scores_dev + create_directories_safe(os.path.dirname(args.fused_dev_file)) + np.savetxt(args.fused_dev_file, score_lines, fmt=fmt) + + # fuse the scores (eval) + if args.eval_files: + if utils.check_file(args.fused_eval_file, args.force, 1000): + logger.info( + "- Fusion: scores '%s' already exists.", args.fused_eval_file) + else: + algorithm.scores = scores_eval + fused_scores_eval = algorithm() + score_lines = np.array(score_lines_list_eval[0]) + score_lines['score'] = fused_scores_eval + create_directories_safe(os.path.dirname(args.fused_eval_file)) + np.savetxt(args.fused_eval_file, score_lines, fmt=fmt) + + +def main(command_line_parameters=None): + """Executes the main function""" + try: + # do the command line parsing + args = parse_arguments(command_line_parameters) + + # perform face verification test + fuse(args, command_line_parameters) + except Exception as e: + # track any exceptions as error logs (i.e., to get a time stamp) + logger.error("During the execution, an exception was raised: %s" % e) + raise + +if __name__ == "__main__": + main() diff --git a/bob/fusion/base/normalizer/__init__.py b/bob/fusion/base/tools/__init__.py similarity index 52% rename from bob/fusion/base/normalizer/__init__.py rename to bob/fusion/base/tools/__init__.py index 51a93668a5eb373cf6701afb129607fe351741bd..133f547ba9ea1471b235b89117bd3d43504ed2be 100644 --- a/bob/fusion/base/normalizer/__init__.py +++ b/bob/fusion/base/tools/__init__.py @@ -1,6 +1,5 @@ -from .Normalizer import Normalizer -from .ZNorm import ZNorm -from .MinMaxNorm import MinMaxNorm +from command_line import * +from plotting import * # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/fusion/base/tools/command_line.py b/bob/fusion/base/tools/command_line.py new file mode 100644 index 0000000000000000000000000000000000000000..a72a12ba9a85c481f02f6810af1c3d19f35f51bc --- /dev/null +++ b/bob/fusion/base/tools/command_line.py @@ -0,0 +1,256 @@ +#!/usr/bin/env python + +"""Score Fusion script +""" + +import argparse +import os +import sys +import pkg_resources +from bob.bio.base import tools, utils +from bob.io.base import create_directories_safe + +import bob.core +logger = bob.core.log.setup("bob.bio.base") +valid_keywords = ('algorithm') + + +def _get_entry_points(keyword, strip=[]): + """Returns the list of entry points for registered resources with the given + keyword.""" + return [entry_point for entry_point in + pkg_resources.iter_entry_points('bob.fusion.' + keyword) + if not entry_point.name.startswith(tuple(strip))] + + +def resource_keys(keyword, exclude_packages=[], strip=['dummy']): + """Reads and returns all resources that are registered with the given keyword. + Entry points from the given ``exclude_packages`` are ignored.""" + return sorted([entry_point.name for entry_point in + _get_entry_points(keyword, strip) if + entry_point.dist.project_name not in exclude_packages]) + + +def command_line_parser(description=__doc__, exclude_resources_from=[]): + parser = argparse.ArgumentParser( + description=description, + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + + parser.add_argument('-i', '--dev-files', required=True, + nargs='+', help="A list of score files of " + "the development set.") + parser.add_argument('-I', '--eval-files', nargs='+', + help="A list of score files of the evaluation set; " + "if given it must be the same number of files " + "as the --dev-files.") + parser.add_argument('-o', '--fused-dev-file', + required=True, help='The fused development score file.') + parser.add_argument( + '-O', '--fused-eval-file', help='The fused evaluation score file.') + parser.add_argument('--score-type', choices=[4, 5], default=4, + help='The format the scores are provided.') + parser.add_argument('--skip-check', action='store_true', + help='If provided, score files are not checked ' + 'for consistency') + parser.add_argument('-s', '--save-directory', help='The directory to save ' + 'the experiment artifacts. If not given, the directory' + ' of fused-dev-file will be used.') + + config_group = parser.add_argument_group( + 'Parameters defining the experiment', ' Most of these parameters can be a' + ' registered resource, a configuration file, or even a string that ' + 'defines a newly created object') + config_group.add_argument( + '-a', '--algorithm', metavar='x', required=True, + help='Fusion; registered algorithms are: %s' % resource_keys( + 'algorithm', exclude_resources_from)) + config_group.add_argument( + '-m', '--imports', metavar='LIB', nargs='+', + default=['bob.fusion.base'], help='If one of your configuration files is' + ' an actual command, please specify the lists of' + ' required libraries (imports) to execute this command') + + flag_group = parser.add_argument_group( + 'Flags that change the behavior of the experiment') + bob.core.log.add_command_line_option(flag_group) + flag_group.add_argument('-F', '--force', action='store_true', + help='Force to erase former data if already exist') + + return { + 'main': parser, + 'config': config_group, + 'flag': flag_group + } + + +def initialize(parsers, command_line_parameters=None, skips=[]): + + args = parsers['main'].parse_args(command_line_parameters) + + # logging + bob.core.log.set_verbosity_level(logger, args.verbose) + + # load configuration resources + args.algorithm = load_resource( + args.algorithm, 'algorithm', imports=args.imports) + + # set base directories + if args.save_directory is None: + args.save_directory = os.path.dirname(args.fused_dev_file) + + # result files + args.info_file = os.path.join(args.save_directory, 'Experiment.info') + + args.model_file = os.path.join(args.save_directory, 'Model.pkl') + + return args + + +def write_info(args, command_line_parameters): + """Writes information about the current experimental setup into a file + specified on command line. + + **Parameters:** + + args : namespace + The interpreted command line arguments as returned by the + :py:func:`initialize` function. + + command_line_parameters : [str] or ``None`` + The command line parameters that have been interpreted. + If ``None``, the parameters specified by the user on command line + are considered. + + executable : str + The name of the executable (such as ``'./bin/verify.py'``) that is used + to run the experiments. + """ + if command_line_parameters is None: + command_line_parameters = sys.argv[1:] + executable = sys.argv[0] + # write configuration + try: + create_directories_safe(os.path.dirname(args.info_file)) + with open(args.info_file, 'w') as f: + f.write("Command line:\n") + f.write( + tools.command_line([executable] + command_line_parameters) + "\n\n") + f.write("Configuration:\n\n") + f.write("Algorithm:\n%s\n\n" % args.algorithm) + except IOError: + logger.error( + "Could not write the experimental setup into file '%s'", args.info_file) + + +def parse_arguments(command_line_parameters, exclude_resources_from=[]): + """This function parses the given options (which by default are the command + line options). If exclude_resources_from is specified (as a list), the + resources from the given packages are not listed in the help message.""" + # set up command line parser + parsers = command_line_parser(exclude_resources_from=exclude_resources_from) + + # now that we have set up everything, get the command line arguments + return initialize(parsers, command_line_parameters) + + +def load_resource(resource, keyword, imports=['bob.fusion.base'], + preferred_package=None): + """Loads the given resource that is registered with the given keyword. + The resource can be: + + 1. a resource as defined in the setup.py + 2. a configuration file + 3. a string defining the construction of an object. If imports are required + for the construction of this object, they can be given as list of strings. + + **Parameters:** + + resource : str + Any string interpretable as a resource (see above). + + keyword : str + A valid resource keyword, can be one of :py:attr:`valid_keywords`. + + imports : [str] + A list of strings defining which modules to import, when constructing new + objects (option 3). + + preferred_package : str or ``None`` + When several resources with the same name are found in different packages + (e.g., in different ``bob.bio`` or other packages), this specifies the + preferred package to load the resource from. If not specified, the + extension that is **not** from ``bob.bio`` is selected. + + **Returns:** + + resource : object + The resulting resource object is returned, either read from file or + resource, or created newly. + """ + + # first, look if the resource is a file name + if os.path.isfile(resource): + return utils.read_config_file(resource, keyword) + + if keyword not in valid_keywords: + raise ValueError("The given keyword '%s' is not valid. " + "Please use one of %s!" % (str(keyword), + str(valid_keywords))) + + # now, we check if the resource is registered as an entry point in the + # resource files + entry_points = [entry_point for entry_point in _get_entry_points( + keyword) if entry_point.name == resource] + + if len(entry_points): + if len(entry_points) == 1: + return entry_points[0].load() + else: + # TODO: extract current package name and use this one, if possible + + # Now: check if there are only two entry points, and one is from the + # bob.fusion.base, then use the other one + index = -1 + if preferred_package is not None: + for i, p in enumerate(entry_points): + if p.dist.project_name == preferred_package: + index = i + break + + if index == -1: + # by default, use the first one that is not from bob.bio + for i, p in enumerate(entry_points): + if not p.dist.project_name.startswith('bob.bio'): + index = i + break + + if index != -1: + logger.debug("RESOURCES: Using the resource '%s' from '%s', " + "and ignoring the one from '%s'", + resource, entry_points[index].module_name, + entry_points[1 - index].module_name) + return entry_points[index].load() + else: + logger.warn("Under the desired name '%s', there are multiple " + "entry points defined, we return the first one: %s", + resource, + [entry_point.module_name for entry_point in entry_points]) + return entry_points[0].load() + + # if the resource is neither a config file nor an entry point, + # just execute it as a command + try: + # first, execute all import commands that are required + for i in imports: + exec("import %s" % i) + # now, evaluate the resource (re-evaluate if the resource is still a + # string) + while isinstance(resource, str): + resource = eval(resource) + return resource + + except Exception as e: + raise ImportError("The given command line option '%s' is neither a " + "resource for a '%s', nor an existing configuration" + " file, nor could be interpreted as a command " + "(error: %s)" % (resource, keyword, str(e))) diff --git a/bob/fusion/base/utils.py b/bob/fusion/base/tools/plotting.py similarity index 100% rename from bob/fusion/base/utils.py rename to bob/fusion/base/tools/plotting.py index 1b3a4fd3ef5126c67733a16e23a554093dc6f57d..4459912c020efa9f3fa47020ccfdab7c14bbf79b 100644 --- a/bob/fusion/base/utils.py +++ b/bob/fusion/base/tools/plotting.py @@ -1,7 +1,7 @@ #!/usr/bin/env python -import bob.learn.em import numpy +import bob.learn.em def grouping(negatives, positives, diff --git a/bootstrap.py b/bootstrap-buildout.py similarity index 80% rename from bootstrap.py rename to bootstrap-buildout.py index a4599211f741c468cd37a29861d1c7f2c3a641d1..a629566735c1c84fbec7173f0f30015b2b432512 100644 --- a/bootstrap.py +++ b/bootstrap-buildout.py @@ -25,10 +25,7 @@ import tempfile from optparse import OptionParser -__version__ = '2015-07-01' -# See zc.buildout's changelog if this version is up to date. - -tmpeggs = tempfile.mkdtemp(prefix='bootstrap-') +tmpeggs = tempfile.mkdtemp() usage = '''\ [DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] @@ -43,9 +40,8 @@ this script from going over the network. ''' parser = OptionParser(usage=usage) -parser.add_option("--version", - action="store_true", default=False, - help=("Return bootstrap.py version.")) +parser.add_option("-v", "--version", help="use a specific zc.buildout version") + parser.add_option("-t", "--accept-buildout-test-releases", dest='accept_buildout_test_releases', action="store_true", default=False, @@ -63,33 +59,25 @@ parser.add_option("-f", "--find-links", parser.add_option("--allow-site-packages", action="store_true", default=False, help=("Let bootstrap.py use existing site packages")) -parser.add_option("--buildout-version", - help="Use a specific zc.buildout version") parser.add_option("--setuptools-version", - help="Use a specific setuptools version") -parser.add_option("--setuptools-to-dir", - help=("Allow for re-use of existing directory of " - "setuptools versions")) + help="use a specific setuptools version") -options, args = parser.parse_args() -if options.version: - print("bootstrap.py version %s" % __version__) - sys.exit(0) +options, args = parser.parse_args() ###################################################################### # load/install setuptools try: + if options.allow_site_packages: + import setuptools + import pkg_resources from urllib.request import urlopen except ImportError: from urllib2 import urlopen ez = {} -if os.path.exists('ez_setup.py'): - exec(open('ez_setup.py').read(), ez) -else: - exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez) +exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez) if not options.allow_site_packages: # ez_setup imports site, which adds site packages @@ -100,19 +88,12 @@ if not options.allow_site_packages: # We can't remove these reliably if hasattr(site, 'getsitepackages'): for sitepackage_path in site.getsitepackages(): - # Strip all site-packages directories from sys.path that - # are not sys.prefix; this is because on Windows - # sys.prefix is a site-package directory. - if sitepackage_path != sys.prefix: - sys.path[:] = [x for x in sys.path - if sitepackage_path not in x] + sys.path[:] = [x for x in sys.path if sitepackage_path not in x] setup_args = dict(to_dir=tmpeggs, download_delay=0) if options.setuptools_version is not None: setup_args['version'] = options.setuptools_version -if options.setuptools_to_dir is not None: - setup_args['to_dir'] = options.setuptools_to_dir ez['use_setuptools'](**setup_args) import setuptools @@ -129,12 +110,7 @@ for path in sys.path: ws = pkg_resources.working_set -setuptools_path = ws.find( - pkg_resources.Requirement.parse('setuptools')).location - -# Fix sys.path here as easy_install.pth added before PYTHONPATH cmd = [sys.executable, '-c', - 'import sys; sys.path[0:0] = [%r]; ' % setuptools_path + 'from setuptools.command.easy_install import main; main()', '-mZqNxd', tmpeggs] @@ -147,8 +123,11 @@ find_links = os.environ.get( if find_links: cmd.extend(['-f', find_links]) +setuptools_path = ws.find( + pkg_resources.Requirement.parse('setuptools')).location + requirement = 'zc.buildout' -version = options.buildout_version +version = options.version if version is None and not options.accept_buildout_test_releases: # Figure out the most recent final version of zc.buildout. import setuptools.package_index @@ -188,7 +167,7 @@ if version: cmd.append(requirement) import subprocess -if subprocess.call(cmd) != 0: +if subprocess.call(cmd, env=dict(os.environ, PYTHONPATH=setuptools_path)) != 0: raise Exception( "Failed to execute command:\n%s" % repr(cmd)[1:-1]) diff --git a/buildout.cfg b/buildout.cfg index 8269dedb1b862c0a73480275751df90044558080..4184edc32de25d3f054a905fcb058ac689a015fe 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -1,23 +1,32 @@ ; vim: set fileencoding=utf-8 : -; Amir Mohammadi <amir.mohammadi@idiap.ch> -; Mon Mar 21 16:51:06 CEST 2016 +; Manuel Guenther <manuel.guenther@idiap.ch> +; Thu Oct 9 16:51:06 CEST 2014 [buildout] parts = scripts eggs = bob.fusion.base - ipython + gridtk + ipdb extensions = bob.buildout mr.developer - auto-checkout = * - -develop = ../bob.extension - ../bob.learn.linear - ../bob.learn.activation - ../bob.learn.mlp - ../bob.measure - . +develop = . + src/bob.measure +; src/bob.blitz +; src/bob.core +; src/bob.io.base +; src/bob.learn.activation +; src/bob.math +; src/bob.learn.linear +; src/bob.sp +; src/bob.learn.em +; src/bob.measure +; src/bob.db.base +; src/bob.db.verification.utils +; src/bob.db.verification.filelist +; src/bob.db.atnt +; src/bob.io.image ; options for bob.buildout debug = true @@ -25,11 +34,21 @@ verbose = true newest = false [sources] -bob.extension = git https://github.com/bioidiap/bob.extension -bob.learn.linear = git https://github.com/bioidiap/bob.learn.linear -bob.learn.activation = git https://github.com/bioidiap/bob.learn.activation -bob.learn.mlp = git https://github.com/bioidiap/bob.learn.mlp +;bob.extension = git https://github.com/bioidiap/bob.extension +;bob.blitz = git https://github.com/bioidiap/bob.blitz +;bob.core = git https://github.com/bioidiap/bob.core +;bob.io.base = git https://github.com/bioidiap/bob.io.base +;bob.learn.activation = git https://github.com/bioidiap/bob.learn.activation +;bob.math = git https://github.com/bioidiap/bob.math +;bob.sp = git https://github.com/bioidiap/bob.sp +;bob.learn.linear = git https://github.com/bioidiap/bob.learn.linear +;bob.learn.em = git https://github.com/bioidiap/bob.learn.em bob.measure = git https://github.com/bioidiap/bob.measure +;bob.db.base = git https://github.com/bioidiap/bob.db.base +;bob.db.verification.utils = git https://github.com/bioidiap/bob.db.verification.utils +;bob.db.verification.filelist = git https://github.com/bioidiap/bob.db.verification.filelist +;bob.db.atnt = git https://github.com/bioidiap/bob.db.atnt +;bob.io.image = git https://github.com/bioidiap/bob.io.image [scripts] recipe = bob.buildout:scripts diff --git a/doc/conf.py b/doc/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..22958db4da9789a3eab05df5c2ec4a13255083fc --- /dev/null +++ b/doc/conf.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Andre Anjos <andre.anjos@idiap.ch> +# Mon 13 Aug 2012 12:38:15 CEST +# +# Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland + +import os +import sys +import glob +import pkg_resources + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +# -- General configuration ----------------------------------------------------- + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be extensions +# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +extensions = [ + 'sphinx.ext.todo', + 'sphinx.ext.coverage', + 'sphinx.ext.pngmath', + 'sphinx.ext.ifconfig', + 'sphinx.ext.autodoc', + 'sphinx.ext.autosummary', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + ] + +# The viewcode extension appeared only on Sphinx >= 1.0.0 +import sphinx +if sphinx.__version__ >= "1.0": + extensions.append('sphinx.ext.viewcode') + +# Always includes todos +todo_include_todos = True + +# If we are on OSX, the 'dvipng' path maybe different +dvipng_osx = '/opt/local/libexec/texlive/binaries/dvipng' +if os.path.exists(dvipng_osx): pngmath_dvipng = dvipng_osx + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix of source filenames. +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'Bobs interface for running biometric recognition experiments' +import time +copyright = u'%s, Idiap Research Institute' % time.strftime('%Y') + +# Grab the setup entry +distribution = pkg_resources.require('bob.bio.base')[0] + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = distribution.version +# The full version, including alpha/beta/rc tags. +release = distribution.version + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +#language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['links.rst'] + +# The reST default role (used for this markup: `text`) to use for all documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + + +# -- Options for HTML output --------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +if sphinx.__version__ >= "1.0": + html_theme = 'nature' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# "<project> v<release> documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = 'bob' + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +html_logo = 'img/logo.png' + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +html_favicon = 'img/favicon.ico' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +#html_static_path = ['_static'] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a <link> tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Output file base name for HTML help builder. +htmlhelp_basename = 'bob_bio_base_doc' + + +# -- Options for LaTeX output -------------------------------------------------- + +# The paper size ('letter' or 'a4'). +latex_paper_size = 'a4' + +# The font size ('10pt', '11pt' or '12pt'). +latex_font_size = '10pt' + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, author, documentclass [howto/manual]). +latex_documents = [ + ('index', 'bob_bio_base.tex', u'Bob', + u'Biometrics Group, Idiap Research Institute', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +latex_logo = '' + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Additional stuff for the LaTeX preamble. +#latex_preamble = '' + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + +# Included after all input documents +rst_epilog = '' + +# -- Options for manual page output -------------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + ('index', 'bob.bio.base', u'Base tools to run biometric recognition experiments', [u'Idiap Research Institute'], 1) +] + +# Default processing flags for sphinx +autoclass_content = 'both' +autodoc_member_order = 'bysource' +autodoc_default_flags = ['members', 'inherited-members', 'show-inheritance'] + +# For inter-documentation mapping: +from bob.extension.utils import link_documentation +intersphinx_mapping = link_documentation(['python', 'numpy', 'bob.bio.face', 'bob.bio.speaker', 'bob.bio.gmm', 'bob.bio.video', 'bob.bio.csu', 'bob.bio.spear', 'gridtk', 'bob.db.youtube']) + + +def skip(app, what, name, obj, skip, options): + # Do not skip the __call__ and the __str__ functions as we have special implementations for them. + if name in ("__str__", "__call__"): + return False + return skip + +# getting dictionaries printed nicely. +# see: http://stackoverflow.com/questions/7250659/python-code-to-generate-part-of-sphinx-documentation-is-it-possible/18143318#18143318 +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +from sphinx.util.compat import Directive +from docutils import nodes, statemachine + +class ExecDirective(Directive): + """Execute the specified python code and insert the output into the document""" + has_content = True + + def run(self): + oldStdout, sys.stdout = sys.stdout, StringIO() + + tab_width = self.options.get('tab-width', self.state.document.settings.tab_width) + source = self.state_machine.input_lines.source(self.lineno - self.state_machine.input_offset - 1) + + try: + exec('\n'.join(self.content)) + text = sys.stdout.getvalue() + lines = statemachine.string2lines(text, tab_width, convert_whitespace=True) + self.state_machine.insert_input(lines, source) + return [] + except Exception: + return [nodes.error(None, nodes.paragraph(text = "Unable to execute python code at %s:%d:" % (os.path.basename(source), self.lineno)), nodes.paragraph(text = str(sys.exc_info()[1])))] + finally: + sys.stdout = oldStdout + +def setup(app): + app.connect("autodoc-skip-member", skip) + app.add_directive('exec', ExecDirective) diff --git a/doc/experiments.rst b/doc/experiments.rst new file mode 100644 index 0000000000000000000000000000000000000000..77dbf262dc46867285daf38f1dcb9d79b1e36616 --- /dev/null +++ b/doc/experiments.rst @@ -0,0 +1,235 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +.. _bob.bio.base.experiments: + + +========================================= +Running Biometric Recognition Experiments +========================================= + +Now, you are almost ready to run your first biometric recognition experiment. +Just a little bit of theory, and then: off we go. + + +Structure of a Biometric Recognition Experiment +----------------------------------------------- + +Each biometric recognition experiment that is run with ``bob.bio`` is divided into several steps. +The steps are: + +1. Data preprocessing: Raw data is preprocessed, e.g., for face recognition, faces are detected, images are aligned and photometrically enhanced. +2. Feature extractor training: Feature extraction parameters are learned. +3. Feature extraction: Features are extracted from the preprocessed data. +4. Feature projector training: Parameters of a subspace-projection of the features are learned. +5. Feature projection: The extracted features are projected into a subspace. +6. Model enroller training: The ways how to enroll models from extracted or projected features is learned. +7. Model enrollment: One model is enrolled from the features of one or more images. +8. Scoring: The verification scores between various models and probe features are computed. +9. Evaluation: The computed scores are evaluated and curves are plotted. + +These 9 steps are divided into four distinct groups, which are discussed in more detail later: + +* Preprocessing (only step 1) +* Feature extraction (steps 2 and 3) +* Biometric recognition (steps 4 to 8) +* Evaluation (step 9) + +The communication between two steps is file-based, usually using a binary HDF5_ interface, which is implemented in the :py:class:`bob.io.base.HDF5File` class. +The output of one step usually serves as the input of the subsequent step(s). +Depending on the algorithm, some of the steps are not applicable/available. +E.g. most of the feature extractors do not need a special training step, or some algorithms do not require a subspace projection. +In these cases, the according steps are skipped. +``bob.bio`` takes care that always the correct files are forwarded to the subsequent steps. + + +.. _running_part_1: + +Running Experiments (part I) +---------------------------- + +To run an experiment, we provide a generic script ``./bin/verify.py``, which is highly parametrizable. +To get a complete list of command line options, please run: + +.. code-block:: sh + + $ ./bin/verify.py --help + +Whoops, that's a lot of options. +But, no worries, most of them have proper default values. + +.. note:: + Sometimes, command line options have a long version starting with ``--`` and a short one starting with a single ``-``. + In this section, only the long names of the arguments are listed, please refer to ``./bin/verify.py --help`` (or short: ``./bin/faceverify.py -h``) for the abbreviations. + +There are five command line options, which are required and sufficient to define the complete biometric recognition experiment. +These five options are: + +* ``--database``: The database to run the experiments on +* ``--preprocessor``: The data preprocessor +* ``--extractor``: The feature extractor +* ``--algorithm``: The recognition algorithm +* ``--sub-directory``: A descriptive name for your experiment, which will serve as a sub-directory + +The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways. +For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`. +These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*). +To get a list of registered resources, please call: + +.. code-block:: sh + + $ ./bin/resources.py + +Each package in ``bob.bio`` defines its own resources, and the printed list of registered resources differs according to the installed packages. +If only ``bob.bio.base`` is installed, no databases and no preprocessors will be listed. + +.. note:: + You will also find some ``grid`` resources being listed. + These type of resources will be explained :ref:`later <running_in_parallel>`. + +Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments. + +One command line option, which is not required, but recommended, is the ``--verbose`` option. +By default, the algorithms are set up to execute quietly, and only errors are reported. +To change this behavior, you can use the ``--verbose`` option several times to increase the verbosity level to show: + +1) Warning messages +2) Informative messages +3) Debug messages + +When running experiments, my personal preference is verbose level 2, which can be enabled by ``--verbose --verbose``, or using the short version: ``-vv``. +So, a typical biometric recognition experiment (in this case, face recognition) could look something like: + +.. code-block:: sh + + $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv + +.. note:: + To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed. + +Before running an experiment, it is recommended to add the ``--dry-run`` option, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected. + +The final result of the experiment will be one (or more) score file(s). +Usually, they will be called something like ``scores-dev``. +By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``--result-directory`` command line option. + +.. note:: + At Idiap_, the default result directory differs, see ``./bin/verify.py --help`` for your directory. + + +.. _bob.bio.base.evaluate: + +Evaluating Experiments +---------------------- + +After the experiment has finished successfully, one or more text file containing all the scores are written. + +To evaluate the experiment, you can use the generic ``./bin/evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF. +Additionally, a combination of different algorithms can be plotted into the same files. +Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated. +For example, to create a ROC curve for the experiment above, use: + +.. code-block:: sh + + $ ./bin/evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv + +Please note that there exists another file called ``Experiment.info`` inside the result directory. +This file is a pure text file and contains the complete configuration of the experiment. +With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment. + + +.. _running_in_parallel: + +Running in Parallel +------------------- + +One important property of the ``./bin/verify.py`` script is that it can run in parallel, using either several threads on the local machine, or an SGE grid. +To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section. +The ``./bin/verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs. + +The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``--gridtk-database-file`` option. +Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``./bin/jman``. + +Two different types of ``grid`` resources are defined, which can be used with the ``--grid`` command line option. +The first type of resources will submit jobs to an SGE grid. +They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid. +The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``./bin/jman --local run-scheduler --parallel 4``), or by using the command line option ``--run-local-scheduler``. +The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not. + +Hence, to run the same experiment as above using four parallel threads on the local machine, re-nicing the jobs to level 10, simply call: + +.. code-block:: sh + + $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv --grid local-p4 --run-local-scheduler --nice 10 + +.. note:: + You might realize that the second execution of the same experiment is much faster than the first one. + This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped. + To override this behavior, i.e., to always regenerate all parts of the experiments, you can use the ``--force`` option. + + +Command Line Options to change Default Behavior +----------------------------------------------- +Additionally to the required command line arguments discussed above, there are several options to modify the behavior of the experiments. +One set of command line options change the directory structure of the output. +By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``--temp-directory`` command line option, which expects relative or absolute paths: + +Re-using Parts of Experiments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``--temp-directory``, but you can also specify absolute paths): + +* ``--preprocessed-data-directory`` +* ``--extracted-directory`` +* ``--projected-directory`` +* ``--models-directories`` (one for each the models and the ZT-norm-models, see below) + +or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training): + +* ``--extractor-file`` +* ``--projector-file`` +* ``--enroller-file`` + +For that purpose, it is also useful to skip parts of the tool chain. +To do that you can use: + +* ``--skip-preprocessing`` +* ``--skip-extractor-training`` +* ``--skip-extraction`` +* ``--skip-projector-training`` +* ``--skip-projection`` +* ``--skip-enroller-training`` +* ``--skip-enrollment`` +* ``--skip-score-computation`` +* ``--skip-concatenation`` +* ``--skip-calibration`` + +although by default files that already exist are not re-created. +You can use the ``--force`` argument combined with the ``--skip...`` arguments (in which case the skip is preferred). +To run just a sub-selection of the tool chain, you can also use the ``--execute-only`` option, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``. + + +Database-dependent Arguments +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Many databases define several protocols that can be executed. +To change the protocol, you can either modify the configuration file, or simply use the ``--protocol`` option. + +Some databases define several kinds of evaluation setups. +For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*. +The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above). +In this case, by default only the development set is employed. +To use both groups, just specify ``--groups dev eval`` (of course, you can also only use the ``'eval'`` set by calling ``--groups eval``). + +One score normalization technique is the so-called ZT score normalization. +To enable this, simply use the ``--zt-norm`` option. +If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``--zt-score-directories`` option. + + +Other Arguments +--------------- + +For some applications it is interesting to get calibrated scores. +Simply add the ``--calibrate-scores`` option and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups. +The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable) . + +.. include:: links.rst diff --git a/doc/img/favicon.ico b/doc/img/favicon.ico new file mode 100644 index 0000000000000000000000000000000000000000..4cc3264302627d40868261add69eb755856611b6 Binary files /dev/null and b/doc/img/favicon.ico differ diff --git a/doc/img/logo.png b/doc/img/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b9dd573a01019afd1af58a881996930e5212699d Binary files /dev/null and b/doc/img/logo.png differ diff --git a/doc/implementation.rst b/doc/implementation.rst new file mode 100644 index 0000000000000000000000000000000000000000..356aba45ba24db91729de887f716a6d1e2bc2bdd --- /dev/null +++ b/doc/implementation.rst @@ -0,0 +1,330 @@ +.. vim: set fileencoding=utf-8 : +.. Manuel Guenther <Manuel.Guenther@idiap.ch> +.. Mon 23 04 2012 + +====================== +Implementation Details +====================== + +The ``bob.bio`` module is specifically designed to be as flexible as possible while trying to keep things simple. +Therefore, it uses python to implement tools such as preprocessors, feature extractors and recognition algorithms. +It is file based so any tool can implement its own way of reading and writing data, features or models. +Configurations are stored in configuration files, so it should be easy to test different parameters of your algorithms without modifying the code. + + +Base Classes +------------ + +All tools implemented in the ``bob.bio`` packages are based on some classes, which are defined in the ``bob.bio.base`` package, and which are detailed below. +Most of the functionality is provided in the base classes, but any function can be overridden in the derived class implementations. + +In the derived class constructors, the base class constructor needs to be called. +For automatically tracing the algorithms, all parameters that are passed to the derived class constructor should be passed to the base class constructor as a list of keyword arguments (which is indicated by ``...`` below). +This will assure that all parameters of the experiments are stored into the ``Experiment.info`` file. + +.. note:: + All tools are based on reading, processing and writing files. + By default, any type of file is allowed to be handled, and file names are provided to the ``read_...`` and ``write_...`` functions as strings. + However, some of the extensions -- particularly the :ref:`bob.bio.video <bob.bio.video>` extension -- requires the read and write functions to handle files of type :py:class:`bob.io.base.HDF5File`. + +If you plan to write your own tools, please assure that you are following the following structure. + + +.. _bob.bio.base.preprocessors: + +Preprocessors +~~~~~~~~~~~~~ + +All preprocessor classes are derived from :py:class:`bob.bio.base.preprocessor.Preprocessor`. +All of them implement the following two functions: + +* ``__init__(self, <parameters>)``: Initializes the preprocessing algorithm with the parameters it needs. + The base class constructor is called in the derived class constructor, e.g. as ``bob.bio.base.preprocessor.Preprocessor.__init__(self, ...)``. +* ``__call__(self, original_data, annotations) -> data``: preprocesses the data given the dictionary of annotations (e.g. ``{'reye' : [re_y, re_x], 'leye': [le_y, le_x]}`` for face images). + + .. note:: + When the database does not provide annotations, the ``annotations`` parameter might be ``None``. + +By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`. +In that case, the base class IO functionality can be used. +If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class: + +* ``write_data(data, data_file)``: Writes the given data (that has been generated using the ``__call__`` function of this class) to file. +* ``read_data(data_file)``: Reads the preprocessed data from file. + +By default, the original data is read by :py:func:`bob.io.base.load`. +Hence, data is given as :py:class:`numpy.ndarray`\s. +When a different IO for the original data is required (for example to read videos in :py:class:`bob.bio.video.preprocessor.Video`), the following function is overridden: + +* ``read_original_data(filename)``: Reads the original data from file. + + +.. _bob.bio.base.extractors: + +Extractors +~~~~~~~~~~ + +Feature extractors should be derived from the :py:class:`bob.bio.base.extractor.Extractor` class. +All extractor classes provide at least the functions: + +* ``__init__(self, <parameters>)``: Initializes the feature extraction algorithm with the parameters it needs. + Calls the base class constructor, e.g. as ``bob.bio.base.extractor.Extractor.__init__(self, ...)`` (there are more parameters to this constructor, see below). +* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data. + By default, the returned feature should be a :py:class:`numpy.ndarray`. + +If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden. +In this case, also the function to read that kind of features needs to be overridden: + +* ``write_feature(self, feature, feature_file)``: Writes the feature (as returned by the ``__call__`` function) to the given file name. +* ``read_feature(self, feature_file) -> feature``: Reads the feature (as written by the ``save_feature`` function) from the given file name. + +.. note:: + If the feature is of a class that contains and is written via a ``save(bob.io.base.HDF5File)`` method, the ``write_feature`` function does not need to be overridden. + However, the ``read_feature`` function is required in this case. + +If the feature extraction process requires to read a trained extractor model from file, the following function is overloaded: + +* ``load(self, extractor_file)``: Loads the extractor from file. + This function is called at least once before the ``__call__`` function is executed. + +It is also possible to train the extractor model before it is used. +In this case, two things are done. +First, the ``train`` function is overridden: + +* ``train(self, image_list, extractor_file)``: Trains the feature extractor with the given list of images and writes the ``extractor_file``. + +Second, this behavior is registered in the ``__init__`` function by calling the base class constructor with more parameters: ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, ...)``. +Given that the training algorithm needs to have the training data split by identity, the ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, split_training_images_by_client = True, ...)`` is used instead. + + +.. _bob.bio.base.algorithms: + +Algorithms +~~~~~~~~~~ +The implementation of recognition algorithm is as straightforward. +All algorithms are derived from the :py:class:`bob.bio.base.algorithm.Algorithm` class. +The constructor of this class has the following options, which are selected according to the current algorithm: + +* ``performs_projection``: If set to ``True``, features will be projected using the ``project`` function. + With the default ``False``, the ``project`` function will not be called at all. +* ``requires_projector_training``: If ``performs_projection`` is enabled, this flag specifies if the projector needs training. + If ``True`` (the default), the ``train_projector`` function will be called. +* ``split_training_features_by_client``: If the projector training needs training images split up by client identity, this flag is enabled. + In this case, the ``train_projector`` function will receive a list of lists of features. + If set to ``False`` (the default), the training features are given in one list. +* ``use_projected_features_for_enrollment``: If features are projected, by default (``True``) models are enrolled using the projected features. + If the algorithm requires the original unprojected features to enroll the model, ``use_projected_features_for_enrollment=False`` is selected. +* ``requires_enroller_training``: Enables the enroller training. + By default (``False``), no enroller training is performed, i.e., the ``train_enroller`` function is not called. + +* ``multiple_model_scoring``: The way to handle scoring when models store several features. + Set this parameter to ``None`` when you implement your own functionality to handle models from several features (see below). +* ``multiple_probe_scoring``: The way to handle scoring when models store several features. + Set this parameter to ``None`` when you handle scoring with multiple probes with your own ``score_for_multiple_probes`` function (see below). + +A recognition algorithm has to override at least three functions: + +* ``__init__(self, <parameters>)``: Initializes the face recognition algorithm with the parameters it needs. + Calls the base class constructor, e.g. as ``bob.bio.base.algorithm.Algorithm.__init__(self, ...)`` (there are more parameters to this constructor, see above). +* ``enroll(self, enroll_features) -> model``: Enrolls a model from the given vector of features (this list usually contains features from several files of one subject) and returns it. + The returned model is either a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method. + If neither of the two options are appropriate, a ``write_model`` function is defined (see below). +* ``score(self, model, probe) -> value``: Computes a similarity or probability score that the given probe feature and the given model stem from the same identity. + + .. note:: + When you use a distance measure in your scoring function, and lower distances represents higher probabilities of having the same identity, please return the negative distance. + +Additionally, an algorithm may need to project the features before they can be used for enrollment or recognition. +In this case, (some of) the function(s) are overridden: + +* ``train_projector(self, train_features, projector_file)``: Uses the given list of features and writes the ``projector_file``. + + .. warning:: + If you write this function, please assure that you use both ``performs_projection=True`` and ``requires_projector_training=True`` (for the latter, this is the default, but not for the former) during the base class constructor call in your ``__init__`` function. + If you need the training data to be sorted by clients, please use ``split_training_features_by_client=True`` as well. + Please also assure that you overload the ``project`` function. + +* ``load_projector(self, projector_file)``: Loads the projector from the given file, i.e., as stored by ``train_projector``. + This function is always called before the ``project``, ``enroll``, and ``score`` functions are executed. +* ``project(self, feature) -> feature``: Projects the given feature and returns the projected feature, which should either be a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method. + + .. note:: + If you write this function, please assure that you use ``performs_projection=True`` during the base class constructor call in your ``__init__`` function. + +And once more, if the projected feature is not of type ``numpy.ndarray``, the following methods are overridden: + +* ``write_feature(feature, feature_file)``: Writes the feature (as returned by the ``project`` function) to file. +* ``read_feature(feature_file) -> feature``: Reads and returns the feature (as written by the ``write_feature`` function). + +Some tools also require to train the model enrollment functionality (or shortly the ``enroller``). +In this case, these functions are overridden: + +* ``train_enroller(self, training_features, enroller_file)``: Trains the model enrollment with the list of lists of features and writes the ``enroller_file``. + + .. note:: + If you write this function, please assure that you use ``requires_enroller_training=True`` during the base class constructor call in your ``__init__`` function. + +* ``load_enroller(self, enroller_file)``: Loads the enroller from file. + This function is always called before the ``enroll`` and ``score`` functions are executed. + + +By default, it is assumed that both the models and the probe features are of type :py:class:`numpy.ndarray`. +If the ``score`` function expects models and probe features to be of a different type, these functions are overridden: + +* ``write_model(self, model, model_file)``: writes the model (as returned by the ``enroll`` function). +* ``read_model(self, model_file) -> model``: reads the model (as written by the ``write_model`` function) from file. +* ``read_probe(self, probe_file) -> feature``: reads the probe feature from file. + + .. note:: + In many cases, the ``read_feature`` and ``read_probe`` functions are identical (if both are present). + +Finally, the :py:class:`bob.bio.base.algorithm.Algorithm` class provides default implementations for the case that models store several features, or that several probe features should be combined into one score. +These two functions are: + +* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores. +* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior. + + +Implemented Tools +----------------- + +In this base class, only one feature extractor and some recognition algorithms are defined. +However, implementations of the base classes can be found in all of the ``bob.bio`` packages. +Here is a list of implementations: + +* :ref:`bob.bio.base <bob.bio.base>` : :ref:`bob.bio.base.implemented` +* :ref:`bob.bio.face <bob.bio.face>` : :ref:`bob.bio.face.implemented` +* :ref:`bob.bio.video <bob.bio.video>` : :ref:`bob.bio.video.implemented` +* :ref:`bob.bio.gmm <bob.bio.gmm>` : :ref:`bob.bio.gmm.implemented` +* :ref:`bob.bio.csu <bob.bio.csu>` : :ref:`bob.bio.csu.implemented` + +.. * :ref:`bob.bio.spear <bob.bio.spear>` : :ref:`bob.bio.spear.implemented` + + +.. todo:: complete this list, once the other packages are documented as well. + + +Databases +--------- + +Databases provide information about the data sets, on which the recognition algorithm should run on. +Particularly, databases come with one or more evaluation protocols, which defines, which part of the data should be used for training, enrollment and probing. +Some protocols split up the data into three different groups: a training set (aka. ``world`` group), a development set (aka. ``dev`` group) and an evaluation set (``eval``, sometimes also referred as test set). +Furthermore, some of the databases split off some data from the training set, which is used to perform a ZT score normalization. +Finally, most of the databases come with specific annotation files, which define additional information about the data, e.g., hand-labeled eye locations for face images. + + +Verification Database Interface +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +For most of the data sets, we rely on the database interfaces from Bob_. +Particularly, all databases that are derived from the :py:class:`bob.db.verification.utils.Database` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by a special derivation of the databases from above. +For these databases, the special :py:class:`bob.bio.base.database.DatabaseBob` interface is provided, which takes the Bob_ database as parameter. +Several such databases are defined in the according packages, i.e., :ref:`bob.bio.spear <bob.bio.spear>`, :ref:`bob.bio.face <bob.bio.face>` and :ref:`bob.bio.video <bob.bio.video>`. +For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.DatabaseBobZT` interface. + +Additionally, a generic database interface, which is derived from :py:class:`bob.bio.base.database.DatabaseBobZT`, is the :py:class:`bob.bio.base.database.DatabaseFileList`. +This database interfaces with the :py:class:`bob.db.verification.filelist.Database`, which is a generic database based on file lists, implementing the :py:class:`bob.db.verification.utils.Database` interface. + +Defining your own Database +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +If you have your own database that you want to execute the recognition experiments on, you should first check if you could use the :ref:`Verifcation FileList Database <bob.db.verification.filelist>` interface by defining appropriate file lists for the training set, the model set, and the probes. +In most of the cases, the :py:class:`bob.db.verification.filelist.Database` should be sufficient to run experiments. +Please refer to the documentation :ref:`Documentation <bob.db.verification.filelist>` of this database for more instructions on how to configure this database. + +In case you want to have a more complicated interface to your database, you are welcome to write your own database wrapper class. +In this case, you have to derive your class from the :py:class:`facereclib.databases.Database`, and provide the following functions: + +* ``__init__(self, <your-parameters>, **kwargs)``: Constructor of your database interface. + Please call the base class constructor, providing all the required parameters, e.g. by ``bob.bio.base.database.Database.__init__(self, **kwargs)``. +* ``all_files(self)``: Returns a list of all :py:class:`bob.bio.base.database.File` objects of the database. + The list needs to be sorted by the file id (you can use the ``self.sort(files)`` function for sorting). +* ``training_files(self, step, arrange_by_client = False)``: A sorted list of the :py:class:`bob.bio.base.database.File` objects that is used for training. + If ``arrange_by_clients`` is enabled, you might want to use the :py:meth:`bob.bio.base.database.Database.arrange_by_client` function to perform the job. +* ``model_ids(self, group = 'dev'): The ids for the models (usually, there is only one model per client and, thus, you can simply use the client ids) for the given group. + Usually, providing ids for the group ``'dev'`` should be sufficient. +* ``client_id_from_model_id(self, model_id)``: Returns the client id for the given model id. +* ``enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given model id. +* ``probe_files(self, model_id=None, group='dev')``: Returns the list of probe files, the given model_id should be compared with. + Usually, all probe files are compared with all model files. + In this case, you can just ignore the ``model_id``. + If the ``model_id`` is ``None``, this function is supposed to return *all* probe files for all models of the given group. + +Additionally, you can define more lists that can be used for ZT score normalization. +In this case, derive you class from :py:class:`bob.bio.base.database.DatabaseZT` instead, and additionally overwrite the following functions: + +* ``t_model_ids(self, group = 'dev')``: The ids for the T-Norm models for the given group. +* ``t_enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given T-Norm model id. +* ``z_probe_files(self, group='dev')``: Returns the list of Z-probe :py:class:`bob.bio.base.database.File` objects, with which all the models and T-Norm models are compared. + +.. note: + For a proper biometric recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different. + +For some protocols, a single probe consists of several features, see :ref:`bob.bio.base.algorithms` about strategies how to incorporate several probe files into one score. +If your database should provide this functionality, please overwrite: + +* ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe. +* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`bob.bio.base.database.FileSet` objects. +* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`bob.bio.base.database.FileSet` objects (only needed if the base class is :py:class:`bob.bio.base.database.DatabaseZT`). + + + +.. _bob.bio.base.configuration-files: + +Configuration Files +------------------- + +One important aspect of the ``bob.bio`` packages is reproducibility. +To be able to reproduce an experiment, it is required that all parameters of all tools are present. + +In ``bob.bio`` this is achieved by providing these parameters in configuration files. +In these files, an *instance* of one of the tools is generated, and assigned to a variable with a specific name. +These variable names are: + +* ``database`` for an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` +* ``preprocessor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` +* ``extractor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` +* ``algorithm`` for an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` +* ``grid`` for an instance of the :py:class:`bob.bio.base.grid.Grid` + +For example, the configuration file for a PCA algorithm, which uses 80% of variance and a cosine distance function, could look somewhat like: + +.. code-block:: py + + import bob.bio.base + import scipy.spatial + + algorithm = bob.bio.base.algorithm.PCA(subspace_dimension = 0.8, distance_function = scipy.spatial.distance.cosine, is_distance_function = True) + +Some default configuration files can be found in the ``bob/bio/*/config`` directories of all ``bob.bio`` packages, but you can create configuration files in any directory you like. +In fact, since all tools have a different keyword, you can define a complete experiment in a single configuration file. + + +.. _bob.bio.base.resources: + +Resources +--------- + +Finally, some of the configuration files, which sit in the ``bob/bio/*/config`` directories, are registered as *resources*. +This means that a resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio``, which has a pre-defined set of parameters. + +The process of registering a resource is relatively easy. +We use the SetupTools_ mechanism of registering so-called entry points in the ``setup.py`` file of the according ``bob.bio`` package. +Particularly, we use a specific list of entry points, which are: + +* ``bob.bio.database`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` +* ``bob.bio.preprocessor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` +* ``bob.bio.extractor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` +* ``bob.bio.algorithm`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` +* ``bob.bio.grid`` to register an instance of the :py:class:`bob.bio.base.grid.Grid` + +For each of the tools, several resources are defined, which you can list with the ``./bin/resources.py`` command line. + +When you want to register your own resource, make sure that your configuration file is importable (usually it is sufficient to have an empty ``__init__.py`` file in the same directory as your configuration file). +Then, you can simply add a line inside the according ``entry_points`` section of the ``setup.py`` file (you might need to create that section, just follow the example of the ``setup.py`` file that you can find online in the base directory of our `bob.bio.base GitHub page <http://github.com/bioidiap/bob.bio.base>`__). + +After re-running ``./bin/buildout``, your new resource should be listed in the output of ``./bin/resources.py``. + + +.. include:: links.rst diff --git a/doc/implemented.rst b/doc/implemented.rst new file mode 100644 index 0000000000000000000000000000000000000000..6737d58008d04e68f8af7be834b4c355a02bf9b4 --- /dev/null +++ b/doc/implemented.rst @@ -0,0 +1,78 @@ +.. _bob.bio.base.implemented: + +================================= +Tools implemented in bob.bio.base +================================= + +Summary +------- + +Base Classes +~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.preprocessor.Preprocessor + bob.bio.base.extractor.Extractor + bob.bio.base.algorithm.Algorithm + bob.bio.base.database.Database + bob.bio.base.database.DatabaseZT + bob.bio.base.grid.Grid + + +Implementations +~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.extractor.Linearize + bob.bio.base.algorithm.Distance + bob.bio.base.algorithm.PCA + bob.bio.base.algorithm.LDA + bob.bio.base.algorithm.PLDA + bob.bio.base.algorithm.BIC + bob.bio.base.database.DatabaseBob + bob.bio.base.database.DatabaseBobZT + bob.bio.base.database.DatabaseFileList + + +Preprocessors +------------- + +.. automodule:: bob.bio.base.preprocessor + +Extractors +---------- + +.. automodule:: bob.bio.base.extractor + +Algorithms +---------- + +.. automodule:: bob.bio.base.algorithm + +Databases +--------- + +.. automodule:: bob.bio.base.database + +Grid Configuration +------------------ + +.. automodule:: bob.bio.base.grid + + +.. data:: PREDEFINED_QUEUES + + A dictionary of predefined queue keywords, which are adapted to the Idiap_ SGE. + + + .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view + + .. exec:: + import json + from bob.bio.base.grid import PREDEFINED_QUEUES + json_obj = json.dumps(PREDEFINED_QUEUES, sort_keys=True, indent=2) + json_obj = json_obj.replace("\n", "\n ") + print ('.. code-block:: JavaScript\n\n PREDEFINED_QUEUES = %s\n\n' % json_obj) + + +.. include:: links.rst diff --git a/doc/index.rst b/doc/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..3b1c0b3d3ba0d747b3fe1d36dac151e4ed9ef52c --- /dev/null +++ b/doc/index.rst @@ -0,0 +1,98 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +.. _bob.bio.base: + +=========================================== + Running Biometric Recognition Experiments +=========================================== + +The ``bob.bio`` packages provide open source tools to run comparable and reproducible biometric recognition experiments. +To design a biometric recognition experiment, one has to choose: + +* a databases containing the original data, and a protocol that defines how to use the data, +* a data preprocessing algorithm, i.e., face detection for face recognition experiments or voice activity detection for speaker recognition, +* the type of features to extract from the preprocessed data, +* the biometric recognition algorithm to employ, +* the score fusion to combine outputs from different systems, and +* the way to evaluate the results + +For any of these parts, several different types are implemented in the ``bob.bio`` packages, and basically any combination of the five parts can be executed. +For each type, several meta-parameters can be tested. +This results in a nearly infinite amount of possible experiments that can be run using the current setup. +But it is also possible to use your own database, preprocessor, feature extractor, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages. + +.. note:: + The ``bob.bio`` packages are derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated. + +This package :py:mod:`bob.bio.base` includes the basic definition of a biometric recognition experiment, as well as a generic script, which can execute the full biometric experiment in a single command line. +Changing the employed tolls such as the database, protocol, preprocessor, feature extractor or recognition algorithm is as simple as changing a command line parameter. + +The implementation of (most of) the tools is separated into other packages in the ``bob.bio`` namespace. +All these packages can be easily combined. +Here is a growing list of derived packages: + +* :ref:`bob.bio.spear <bob.bio.spear>` Tools to run speaker recognition experiments, including voice activity detection, Cepstral feature extraction, and speaker databases +* :ref:`bob.bio.face <bob.bio.face>` Tools to run face recognition experiments, such as face detection, facial feature extraction and comparison, and face image databases +* :ref:`bob.bio.video <bob.bio.video>` An extension of face recognition algorithms to run on video data, and the according video databases +* :ref:`bob.bio.gmm <bob.bio.gmm>` Algorithms based on Gaussian Mixture Modeling (GMM) such as Inter-Session Variability modeling (ISV) or Total Variability modeling (TV, aka. I-Vector) +* `bob.bio.csu <http://pypi.python.org/pypi/bob.bio.csu>`__ for wrapper classes of the `CSU Face Recognition Resources <http://www.cs.colostate.edu/facerec>`__ (see `Installation Instructions <http://pythonhosted.org/bob.bio.csu/installation.html>`__ of ``bob.bio.csu``). + +If you are interested, please continue reading: + + +=========== +Users Guide +=========== + +.. toctree:: + :maxdepth: 2 + + installation + experiments + implementation + more + +================ +Reference Manual +================ + +.. toctree:: + :maxdepth: 2 + + implemented + py_api + + +========== +References +========== + +.. [TP91] *M. Turk and A. Pentland*. **Eigenfaces for recognition**. Journal of Cognitive Neuroscience, 3(1):71-86, 1991. +.. [ZKC+98] *W. Zhao, A. Krishnaswamy, R. Chellappa, D. Swets and J. Weng*. **Discriminant analysis of principal components for face recognition**, pages 73-85. Springer Verlag Berlin, 1998. +.. [Pri07] *S. J. D. Prince*. **Probabilistic linear discriminant analysis for inferences about identity**. Proceedings of the International Conference on Computer Vision. 2007. +.. [ESM+13] *L. El Shafey, Chris McCool, Roy Wallace and Sébastien Marcel*. **A scalable formulation of probabilistic linear discriminant analysis: applied to face recognition**. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(7):1788-1794, 7/2013. +.. [MWP98] *B. Moghaddam, W. Wahid and A. Pentland*. **Beyond eigenfaces: probabilistic matching for face recognition**. IEEE International Conference on Automatic Face and Gesture Recognition, pages 30-35. 1998. +.. [GW09] *M. Günther and R.P. Würtz*. **Face detection and recognition using maximum likelihood classifiers on Gabor graphs**. International Journal of Pattern Recognition and Artificial Intelligence, 23(3):433-461, 2009. + + +========= +ToDo-List +========= + +This documentation is still under development. +Here is a list of things that needs to be done: + +.. todolist:: + + +================== +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + +.. include:: links.rst diff --git a/doc/installation.rst b/doc/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..f3811adc0a20cb8b205573663dc3be59b3f11bb6 --- /dev/null +++ b/doc/installation.rst @@ -0,0 +1,126 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +.. _bob.bio.base.installation: + +========================= +Installation Instructions +========================= + +As noted before, this package is part of the ``bob.bio`` packages, which in turn are part of the signal-processing and machine learning toolbox Bob_. +To install `Packages of Bob <https://github.com/idiap/bob/wiki/Packages>`_, please read the `Installation Instructions <https://github.com/idiap/bob/wiki/Installation>`_. +For Bob_ to be able to work properly, some dependent packages are required to be installed. +Please make sure that you have read the `Dependencies <https://github.com/idiap/bob/wiki/Dependencies>`_ for your operating system. + +.. note:: + Currently, running Bob_ under MS Windows in not yet supported. + However, we found that running Bob_ in a virtual Unix environment such as the one provided by VirtualBox_ is a good alternative. + +The most simple and most convenient way to use the ``bob.bio`` tools is to use a ``zc.buildout`` package, as explained in more detail `here <https://github.com/idiap/bob/wiki/Installation#using-zcbuildout-for-production>`__. +There, in the ``eggs`` section of the ``buildout.cfg`` file, simply list the ``bob.bio`` packages that you want, like: + +.. code-block:: python + + eggs = bob.bio.base + bob.bio.face + bob.bio.gmm + bob.bio.video + bob.db.youtube + gridtk + +in order to download and install all packages that are required for your experiments. +In the example above, you might want to run a video face recognition experiments using the :py:class:`bob.bio.face.preprocessor.FaceDetector` and the :py:class:`bob.bio.face.extractor.DCTBlocks` feature extractor defined in :ref:`bob.bio.face <bob.bio.face>`, the :py:class:`bob.bio.gmm.algorithm.IVector` algorithm defined in :ref:`bob.bio.gmm <bob.bio.gmm>` and the video extensions defined in :ref:`bob.bio.video <bob.bio.video>`, using the YouTube faces database interface defined in :ref:`bob.db.youtube <bob.db.youtube>`. +Running the simple command line: + +.. code-block:: sh + + $ python bootstrap-buildout.py + $ ./bin/buildout + +will the download and install all dependent packages locally (relative to your current working directory), and create a ``./bin`` directory containing all the necessary scripts to run the experiments. + + +Databases +~~~~~~~~~ + +With ``bob.bio`` you will run biometric recognition experiments using some default biometric recognition databases. +Though the verification protocols are implemented in ``bob.bio``, the original data are **not included**. +To download the original data of the databases, please refer to the according Web-pages. +For a list of supported databases including their download URLs, please refer to the :ref:`verification_databases`. + +After downloading the original data for the databases, you will need to tell ``bob.bio``, where these databases can be found. +For this purpose, we have decided to implement a special file, where you can set your directories. +By default, this file is located in ``~/.bob_bio_databases.txt``, and it contains several lines, each line looking somewhat like: + +.. code-block:: text + + [YOUR_ATNT_DIRECTORY] = /path/to/your/directory + +.. note:: + If this file does not exist, feel free to create and populate it yourself. + + +Please use ``./bin/databases.py`` for a list of known databases, where you can see the raw ``[YOUR_DATABASE_PATH]`` entries for all databases that you haven't updated, and the corrected paths for those you have. + + +.. note:: + If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.spear <bob.bio.spear>`. + + +Test your Installation +~~~~~~~~~~~~~~~~~~~~~~ + +One of the scripts that were generated during the bootstrap/buildout step is a test script. +To verify your installation, you should run the script running the nose tests for each of the ``bob.bio`` packages: + +.. code-block:: sh + + $ ./bin/nosetests -vs bob.bio.base + $ ./bin/nosetests -vs bob.bio.gmm + ... + +Some of the tests that are run require the images of the `AT&T database`_ database. +If the database is not found on your system, it will automatically download and extract the `AT&T database`_ a temporary directory, **which will not be erased**. + +To avoid the download to happen each time you call the nose tests, please: + +1. Download the `AT&T database`_ database and extract it to the directory of your choice. +2. Set an environment variable ``ATNT_DATABASE_DIRECTORY`` to the directory, where you extracted the database to. + For example, in a ``bash`` you can call: + +.. code-block:: sh + + $ export ATNT_DATABASE_DIRECTORY=/path/to/your/copy/of/atnt + +.. note:: + To set the directory permanently, you can also change the ``atnt_default_directory`` in the file `bob/bio/base/test/utils.py <file:../bob/bio/base/test/utils.py>`_. + In this case, there is no need to set the environment variable any more. + +In case any of the tests fail for unexplainable reasons, please file a bug report through the `GitHub bug reporting system`_. + +.. note:: + Usually, all tests should pass with the latest stable versions of the Bob_ packages. + In other versions, some of the tests may fail. + + +Generate this documentation +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Generally, the documentation of this package is `available online <http://pythonhosted.org/bob.bio.base>`__, and this should be your preferred resource. +However, to generate this documentation locally, you call: + +.. code-block:: sh + + $ ./bin/sphinx-build doc sphinx + +Afterward, the documentation is available and you can read it, e.g., by using: + +.. code-block:: sh + + $ firefox sphinx/index.html + + +.. _buildout.cfg: file:../buildout.cfg + +.. include:: links.rst diff --git a/doc/links.rst b/doc/links.rst new file mode 100644 index 0000000000000000000000000000000000000000..2a20a1bc4ff44ad46e0ac1d4cf700fd350d8df8a --- /dev/null +++ b/doc/links.rst @@ -0,0 +1,23 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +.. This file contains all links we use for documentation in a centralized place + +.. _idiap: http://www.idiap.ch +.. _github: http://www.github.com/idiap +.. _bob: http://www.idiap.ch/software/bob +.. _github bug reporting system: http://github.com/bioidiap/bob.bio.base/issues +.. _idiap at github: http://www.github.com/bioidiap +.. _at&t database: http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html +.. _bob's github page: http://idiap.github.com/bob +.. _gridtk: http://github.com/idiap/gridtk +.. _buildout: http://www.buildout.org +.. _setuptools: http://trac.edgewall.org/wiki/setuptools +.. _nist: http://www.nist.gov/itl/iad/ig/focs.cfm +.. _pypi: http://pypi.python.org +.. _sge: http://wiki.idiap.ch/linux/SunGridEngine +.. _csu face recognition resources: http://www.cs.colostate.edu/facerec +.. _xfacereclib.extension.csu: http://pypi.python.org/pypi/xfacereclib.extension.CSU +.. _virtualbox: https://www.virtualbox.org +.. _hdf5: http://www.hdfgroup.org/HDF5 diff --git a/doc/more.rst b/doc/more.rst new file mode 100644 index 0000000000000000000000000000000000000000..c14172433cd6dd788966b8b2cf77197ff13ebb12 --- /dev/null +++ b/doc/more.rst @@ -0,0 +1,208 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +============================== +More about Running Experiments +============================== + +Now that we have learned the implementation details, we can have a closer look into how experiments can be parametrized. + +.. _running_part_2: + +Running Experiments (part II) +----------------------------- + +As mentioned before, running biometric recognition experiments can be achieved using the ``./bin/verify.py`` command line. +In section :ref:`running_part_1`, we have used registered resources to run an experiment. +However, the command line options of ``./bin/verify.py`` is more flexible, as you can have three different ways of defining tools: + +1. Choose a resource (see ``./bin/resources.py`` or ``./bin/verify.py --help`` for a list of registered resources): + + .. code-block:: sh + + $ ./bin/verify.py --algorithm pca + + +2. Use a configuration file. Make sure that your configuration file has the correct variable name: + + .. code-block:: sh + + $ ./bin/verify.py --algorithm bob/bio/base/config/algorithm/pca.py + + +3. Instantiate a class on the command line. Usually, quotes ``"..."`` are required, and the ``--imports`` need to be specified: + + .. code-block:: sh + + $ ./bin/verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial + +All these three ways can be used for any of the five command line options: ``--database``, ``--preprocessor``, ``--extractor``, ``--algorithm`` and ``--grid``. +You can even mix these three types freely in a single command line. + + +Score Level Fusion of Different Algorithms on the same Database +--------------------------------------------------------------- + +In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm. +This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__. +As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/fuse_scores.py`` script, generating a new score file: + +.. code-block:: sh + + $ ./bin/fuse_scores.py --dev + +This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems. +Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``. +If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written. + +.. note:: + When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined. + +The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/evaluate.py`` script. + + +.. _grid-search: + +Finding the Optimal Configuration +--------------------------------- + +Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol. +Additionally, configuration parameters depend on each other. +``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations. +For this, the ``./bin/grid_search.py`` script can be employed. +This script executes a configurable series of experiments, which reuse data as far as possible. +Please check out ``./bin/grid_search.py --help`` for a list of command line options. + +The Configuration File +~~~~~~~~~~~~~~~~~~~~~~ +The most important parameter to the ``./bin/grid_search.py`` is the ``--configuration-file``. +In this configuration file it is specified, which parameters of which part of the algorithms will be tested. +An example for a configuration file can be found in the test scripts: ``bob/bio/base/test/dummy/grid_search.py``. +The configuration file is a common python file, which can contain certain variables: + +1. ``preprocessor =`` +2. ``extractor =`` +3. ``algorithm =`` +4. ``replace =`` +5. ``requirement =`` +6. ``imports =`` + +The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used. +For any of the parameters of the classes, a *placeholder* can be put. +By default, these place holders start with a # character, followed by a digit or character. +The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``./bin/grid_search.py`` script. + +The ``replace`` variable has to be set as a dictionary. +In it, you can define with which values your place holder key should be filled, and in which step of the tool chain execution this should happen. +The steps are ``'preprocess'``, ``'extract'``, ``'project'``, ``'enroll'`` and ``'score'``. +For each of the steps, it can be defined, which placeholder should be replaced by which values. +To be able to differentiate the results later on, each of the replacement values is bound to a directory name. +The final structure looks somewhat like that: + +.. code-block:: python + + replace = { + step1 : { + '#a' : { + 'Dir_a1' : 'Value_a1', + 'Dir_a2' : 'Value_a2' + }, + + '#b' : { + 'Dir_b1' : 'Value_b1', + 'Dir_b2' : 'Value_b2' + } + }, + + step2 : { + '#c' : { + 'Dir_c1' : 'Value_c1', + 'Dir_c2' : 'Value_c2' + } + } + } + + +Of course, more than two values can be selected. +In the above example, the results of the experiments will be placed into a directory structure as ``results/[...]/Dir_a1/Dir_b1/Dir_c1/[...]``. + +.. note:: + Please note that we are using a dictionary structure to define the replacements. + Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file. + For the above example, a directory structure of `results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well. + + +Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot. +Continuing the above example, it is possible to add: + +.. code-block:: python + + ... + step3 : { + '(#d,#e)' : { + 'Dir_de1' : ('Value_d1', 'Value_e1'), + 'Dir_de2' : ('Value_d2', 'Value_e2') + } + } + +.. warning:: + *All possible combinations* of the configuration parameters are tested, which might result in a *huge number of executed experiments*. + +Some combinations of parameters might not make any sense. +In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable. +In the requirements, any string including any placeholder can be put that can be evaluated using pythons ``eval`` function: + +.. code-block:: python + + requirement = ['#a > #b', '2*#c != #a', ...] + +Finally, when any of the classes or variables need to import a certain python module, it needs to be declared in the ``imports`` variable. +If you, e.g., test, which ``scipy.spatial`` distance function works best for your features, please add the imports (and don't forget the ``bob.bio.base`` and other ``bob.bio`` packages in case you use their tools): + +.. code-block:: python + + imports = ['scipy', 'bob.bio.base', 'bob.bio.face'] + + +Further Command Line Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The ``./bin/grid_search.py`` script has a further set of command line options. + +- The ``--database`` and the ``--protocol`` define, which database and (optionally) which protocol should be used. +- The ``--sub-directory`` is similar to the one in the ``./bin/verify.py``. +- ``--result-directory`` and ``--temp-directory`` specify directories to write results and temporary files into. Defaults are ``./results/grid_search`` and ``./temp/grid_search`` in the current directory. Make sure that the ``--temp-directory`` can store sufficient amount of data. +- The ``--preprocessor``, ``--extractor`` and ``--algorithm`` can be used to override the ``preprocessor``, ``extractor`` and ``algorithm`` fields in the configuration file (in which case the configuration file does not need to contain these variables). +- The ``--grid`` option can select the SGE_ configuration. +- The ``--parallel`` option can run on the local machine using the given number of parallel threads. +- The ``--preprocessed-directory`` can be used to select a directory of previously preprocessed data. This should not be used in combination with testing different preprocessor parameters. +- The ``--gridtk-database-directory`` can be used to select another directory, where the ``submitted.sql3`` files will be stored. +- Sometimes, the gridtk databases grow, and are too large for holding all experiments. Using the ``--gridtk-database-split-level``, databases can be split at the desired level. +- The ``--write-commands`` directory can be selected to write the executed commands into (this is useful in case some experiments fail and need to be rerun). +- The ``--dry-run`` flag should always be used before the final execution to see if the experiment definition works as expected. +- The ``--skip-when-existent`` flag will only execute the experiments that have not yet finished (i.e., where the resulting score files are not produced yet). +- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``). +- Finally, additional options might be sent to the ``./bin/verify.py`` script directly. These options might be put after a ``--`` separation. + + +Evaluation of Results +~~~~~~~~~~~~~~~~~~~~~ + +To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories. +Simply call: + +.. code-block:: sh + + $ ./bin/collect_results.py -vv --directory [result-base-directory] --sort + +This will iterate through all result files found in ``[result-base-directory]`` and sort the results according to the EER on the development set (the sorting criterion can be modified using the ``--criterion`` and the ``--sort-key`` comamnd line options). +Hence, to find the best results of your grid search experiments (with default directories), simply run: + +.. code-block:: sh + + $ ./bin/collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev + + + + +.. include:: links.rst diff --git a/doc/py_api.rst b/doc/py_api.rst new file mode 100644 index 0000000000000000000000000000000000000000..a43beead5e5c32c6ea92aefbea50a2232b2457df --- /dev/null +++ b/doc/py_api.rst @@ -0,0 +1,110 @@ + +=========================== +Python API for bob.bio.base +=========================== + +Generic functions +----------------- + +IO-related functions +~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.load + bob.bio.base.save + bob.bio.base.load_compressed + bob.bio.base.save_compressed + bob.bio.base.open_compressed + bob.bio.base.close_compressed + bob.bio.base.check_file + + +Functions dealing with resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.load_resource + bob.bio.base.read_config_file + bob.bio.base.resource_keys + bob.bio.base.extensions + bob.bio.base.valid_keywords + + +Miscellaneous functions +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.get_config + bob.bio.base.score_fusion_strategy + bob.bio.base.selected_elements + bob.bio.base.selected_indices + + +Tools to run recognition experiments +------------------------------------ + +Command line generation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.command_line_parser + bob.bio.base.tools.initialize + bob.bio.base.tools.command_line + bob.bio.base.tools.write_info + bob.bio.base.tools.FileSelector + +Controlling of elements +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.groups + bob.bio.base.tools.indices + +Preprocessing +~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.preprocess + bob.bio.base.tools.read_preprocessed_data + +Feature Extraction +~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.train_extractor + bob.bio.base.tools.extract + bob.bio.base.tools.read_features + +Algorithm +~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.train_projector + bob.bio.base.tools.project + bob.bio.base.tools.train_enroller + bob.bio.base.tools.enroll + +Scoring +~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.compute_scores + bob.bio.base.tools.concatenate + bob.bio.base.tools.calibrate + +Details +------- + +.. automodule:: bob.bio.base + + .. attribute:: valid_keywords + + Valid keywords, for which resources are defined, are ``('database', 'preprocessor', 'extractor', 'algorithm', 'grid')`` + + +.. automodule:: bob.bio.base.tools + + .. autoclass:: FileSelector + + +.. include:: links.rst diff --git a/requirements.txt b/requirements.txt index daa8ad84f13ef3e6b41b675b807983e81ff86a9b..a4abcbc1f465dc153b5b967cf52bba4adaf7c508 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,10 @@ +setuptools +numpy +bob.core bob.extension +bob.measure bob.learn.linear -bob.learn.activation +bob.learn.em bob.learn.mlp -bob.measure -numpy +bob.bio.base +matplotlib # for plotting diff --git a/run.sh b/run.sh new file mode 100755 index 0000000000000000000000000000000000000000..94c50fec6da45a7ace66e632f6395a18751f0ec0 --- /dev/null +++ b/run.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -ex + +INPUT1="-i /home/amir/idiap/remote/user/mobio-male/baselines/gmm/male/ztnorm/scores-dev /home/amir/idiap/remote/user/mobio-male/baselines/gabor-graph/male/ztnorm/scores-dev /home/amir/idiap/remote/user/mobio-male/voice/gmm/male/ztnorm/scores-dev /home/amir/idiap/remote/user/mobio-male/voice/isv/male/ztnorm/scores-dev" +INPUT2="-I /home/amir/idiap/remote/user/mobio-male/baselines/gmm/male/ztnorm/scores-eval /home/amir/idiap/remote/user/mobio-male/baselines/gabor-graph/male/ztnorm/scores-eval /home/amir/idiap/remote/user/mobio-male/voice/gmm/male/ztnorm/scores-eval /home/amir/idiap/remote/user/mobio-male/voice/isv/male/ztnorm/scores-eval" + +for HIDDEN_NODES in 5 10 25 50 100 200; do + ./bin/fuse.py -vvv $INPUT1 $INPUT2 -o "/home/amir/idiap/remote/user/mobio-male/face-voice/F-gmm-gabor-graph-S-gmm-isv_MLP_${HIDDEN_NODES}/male/ztnorm/scores-dev" -O "/home/amir/idiap/remote/user/mobio-male/face-voice/F-gmm-gabor-graph-S-gmm-isv_MLP_${HIDDEN_NODES}/male/ztnorm/scores-eval" -a "bob.fusion.base.algorithm.MLP(preprocessors=[(sklearn.preprocessing.RobustScaler(), False)], n_systems=4, hidden_layers=[${HIDDEN_NODES}], seed=0)" --force --imports='sklearn.preprocessing' 'bob.fusion.base' +done diff --git a/setup.py b/setup.py index 57aecdb75b99a3c0621d14cea4224da69f650955..5668c611fe8ffa0815744d3b0971a0b9b1491e88 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ #!/usr/bin/env python -# Amir Mohammadi <amir.mohammadi@idiap.ch> -# Mon 21 Mar 08:18:08 2016 CEST +# vim: set fileencoding=utf-8 : +# Andre Anjos <andre.anjos@idiap.ch> +# Mon 16 Apr 08:18:08 2012 CEST # # Copyright (C) Idiap Research Institute, Martigny, Switzerland # @@ -32,7 +33,6 @@ # allows you to test your package with new python dependencies w/o requiring # administrative interventions. - from setuptools import setup, dist dist.Distribution(dict(setup_requires=['bob.extension'])) @@ -47,13 +47,13 @@ setup( # information before releasing code publicly. name='bob.fusion.base', version=open("version.txt").read().rstrip(), - description='Basic fusion implementations', + description='Basic tools for running score fusion experiments', url='https://www.github.com/bioidiap/bob.fusion.base', license='GPLv3', author='Amir Mohammadi', - author_email='amir.mohammadi@idiap.ch', - keywords='bob, fusion', + author_email='183.amir@gmail.com', + keywords='bob, score fusion, evaluation', # If you have a better, long description of your package, place it on the # 'doc' directory and then hook it here @@ -97,60 +97,21 @@ setup( # # In this simple example we will create a single program that will print # the version of bob. - # entry_points={ - - # # scripts should be declared using this entry: - # 'console_scripts': [ - # 'verify.py = bob.fusion.base.script.verify:main', - # 'resources.py = bob.fusion.base.script.resources:resources', - # 'databases.py = bob.fusion.base.script.resources:databases', - # 'evaluate.py = bob.fusion.base.script.evaluate:main', - # 'collect_results.py = bob.fusion.base.script.collect_results:main', - # 'grid_search.py = bob.fusion.base.script.grid_search:main', - # 'preprocess.py = bob.fusion.base.script.preprocess:main', - # 'extract.py = bob.fusion.base.script.extract:main', - # 'enroll.py = bob.fusion.base.script.enroll:main', - # 'score.py = bob.fusion.base.script.score:main', - # 'fusion_llr.py = bob.fusion.base.script.fusion_llr:main', - # ], - - # 'bob.bio.database': [ - # # for test purposes only - # 'dummy = bob.fusion.base.test.dummy.database:database', - # ], - - # 'bob.bio.preprocessor': [ - # # for test purposes only - # 'dummy = bob.fusion.base.test.dummy.preprocessor:preprocessor', - # ], - - # 'bob.bio.extractor': [ - # # for test purposes only - # 'dummy = bob.fusion.base.test.dummy.extractor:extractor', - # 'linearize = bob.fusion.base.config.extractor.linearize:extractor', - # ], - - # 'bob.bio.algorithm': [ - # # for test purposes only - # 'dummy = bob.fusion.base.test.dummy.algorithm:algorithm', - # 'distance-euclidean = bob.fusion.base.config.algorithm.distance_euclidean:algorithm', - # 'distance-cosine = bob.fusion.base.config.algorithm.distance_cosine:algorithm', - # 'pca = bob.fusion.base.config.algorithm.pca:algorithm', - # 'lda = bob.fusion.base.config.algorithm.lda:algorithm', - # 'pca+lda = bob.fusion.base.config.algorithm.pca_lda:algorithm', - # 'plda = bob.fusion.base.config.algorithm.plda:algorithm', - # 'pca+plda = bob.fusion.base.config.algorithm.pca_plda:algorithm', - # 'bic = bob.fusion.base.config.algorithm.bic:algorithm', - # ], - - # 'bob.bio.grid': [ - # 'local-p4 = bob.fusion.base.config.grid.local:grid', - # 'local-p8 = bob.fusion.base.config.grid.local:grid_p8', - # 'local-p16 = bob.fusion.base.config.grid.local:grid_p16', - # 'grid = bob.fusion.base.config.grid.grid:grid', - # 'demanding = bob.fusion.base.config.grid.demanding:grid', - # ], - # }, + entry_points={ + + # scripts should be declared using this entry: + 'console_scripts': [ + 'fuse.py = bob.fusion.base.script.fuse:main', + ], + + 'bob.fusion.algorithm': [ + 'mean = bob.fusion.base.config.algorithm.mean:algorithm', + 'llr = bob.fusion.base.config.algorithm.llr:algorithm', + 'plr-2 = bob.fusion.base.config.algorithm.plr_2:algorithm', + 'mlp = bob.fusion.base.config.algorithm.mlp:algorithm', + ], + + }, # Classifiers are important if you plan to distribute this package through # PyPI. You can find the complete list of classifiers that are valid and diff --git a/version.txt b/version.txt index 8acdd82b765e8e0b8cd8787f7f18c7fe2ec52493..00658575dc28cb9136c007d50366158460e181d9 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -0.0.1 +2.0.0b0 \ No newline at end of file