diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ea2740997c121e44d879e35b030b728e7e0ad92e --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +__pycache__/ +.ipynb_checkpoints +.vscode/ +submitted.sql3 +.nfs* \ No newline at end of file diff --git a/COPYING b/COPYING new file mode 100644 index 0000000000000000000000000000000000000000..f288702d2fa16d3cdf0035b15a9fcbc552cd88e7 --- /dev/null +++ b/COPYING @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/> + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + <program> Copyright (C) <year> <name of author> + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +<https://www.gnu.org/licenses/>. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +<https://www.gnu.org/licenses/why-not-lgpl.html>. diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..121f6f9710cb97140ad33819047a899e64356613 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,14 @@ +License - for Non-Commercial Research and Educational Use Only + +Copyright (c) 2021, Idiap research institute + +All rights reserved. + +Run, copy, study, change, improve and redistribute source and binary forms, with or without modification, are permitted for non-commercial research and educational use only provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +For any questions regarding this license agreement, please contact Idiap's Technology Transfer Office at tto@idiap.ch \ No newline at end of file diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000000000000000000000000000000000000..9664321dc37368a5718a37de743a714a2b98f2b0 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include README.rst COPYING environment.yml buildout.cfg LICENSE.txt version.txt requirements.txt diff --git a/README.md b/README.md deleted file mode 100644 index 0c1c57fbd42c49a7d63a15800e42900858b91c90..0000000000000000000000000000000000000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# bob.paper.icassp2022_morph_generate - diff --git a/README.rst b/README.rst new file mode 100644 index 0000000000000000000000000000000000000000..17a7de53ee90487ec1f591dda427094a27663d54 --- /dev/null +++ b/README.rst @@ -0,0 +1,102 @@ +==================================================== + Are GAN-based Morphs Threatening Face Recognition? +==================================================== + +This package contains the source code for generating the morphing attacks used in the experiments of the paper "Are GAN-based Morphs Threatening Face Recognition?":: + + @INPROCEEDINGS{Sarkar_ICASSP_2022, + author = {Sarkar, Eklavya and Korshunov, Pavel and Colbois, Laurent and Marcel, S{\'e}bastien}, + projects = {Idiap, Biometrics Center}, + title = {Are GAN-based Morphs Threatening Face Recognition?}, + booktitle = {International Conference on Acoustics, Speech, & Signal Processing (ICASSP 2022)}, + year = {2022}, + note = {Accepted for Publication in ICASSP2022}, + pdf = {http://publications.idiap.ch/attachments/papers/2022/Sarkar_ICASSP_2022.pdf} + } + +Installation +------------ +This package is part of the signal-processing and machine learning toolbox bob_. +Install conda_ before continuing. + +Download the source code of this paper and unpack it. +Then, you can create and activate the required conda environment with the following commands:: + + $ cd bob.paper.icassp2022_morph_generate + $ conda install -n base -c conda-forge mamba + $ mamba env create -f environment.yml -n bob.paper.icassp2022_morph_generate + +This will install all the required software to generate the morphing attacks. + + +Downloading Models +------------------ +The projection process relies on several pre-existing models: + + * **DLIB Face Landmark detector** for cropping and aligning the projected faces exactly as in FFHQ. (Example_). + * StyleGAN2_ as the main face synthesis network. (Original repository_). We are using Config-F, trained on FFHQ at resolution 1024 x 1024. + * A pretrained VGG16_ model, used to compute a perceptual loss between projected and target image. + +In order to download those models, one must specify the destination path in the ``~/.bobrc`` file, through the following commands:: + + $ bob config set sg2_morph.dlib_lmd_path /path/to/dlib/landmark/detector.dat + $ bob config set sg2_morph.sg2_path /path/to/stylegan2/pretrained/model.pkl + $ bob config set sg2_morph.vgg16_path /path/to/vgg16/pretrained/model.pkl + +Finally, all the models can be downloaded by running:: + + ``python download_models.py``. + +Generating Morphs +------------------ +**Note**: StyleGAN2 requires custom GPU-only operations, and at least 12 GB of GPU RAM. Therefore, to run all following examples and perform additional experiments, it is necessary to run this code on a GPU. + +The morphs of the following varieties can be generated with ``gen_morphs.py``: + + * OpenCV + * FaceMorpher + * StyleGAN2 + * MIPGAN-II + +Typical usage:: + + $ conda activate bob.paper.icassp2021_morph + $ python gen_morphs.py --opencv --facemorpher --stylegan2 --mipgan2 -s path/to/folder/of/images/ -l path/to/csv/of/pairs.csv -d path/to/destination/folder --latents path/to/latent/vectors --alphas 0.3 0.5 0.7 + +The ``pairs.csv`` file should simply be a 2 column `.csv` file **without a header** containing only the filenames of the 2 images you want to morph: + + * image1.png, image2.png + * image1.png, image3.png + + +**Note**: Keep in mind that for the ``--stylegan2`` and ``--mipgan2`` arguments, it is necessary to have the latent vectors of all required images generated **beforehand**. + +This can be done with the ``gen_latents.py``. Typical usage:: + + $ python gen_latents.py -s path/to/folder/of/images/ + +License +------- + +This package is released under a custom `license <https://gitlab.idiap.ch/bob/bob.paper.icassp2022_morph_generate/-/blob/master/LICENSE.txt>`_. It uses some components from the `official release of the StyleGAN2 model <https://github.com/NVlabs/stylegan2>`_, which is itself released under the `Nvidia Source Code License-NC <https://gitlab.idiap.ch/bob/bob.paper.ijcb2021_synthetic_dataset/-/blob/master/bob/paper/ijcb2021_synthetic_dataset/stylegan2/LICENSE.txt>`_. + + +Contact +------- + +For questions or reporting issues to this software package, kindly contact our +development team by asking your question on `stackoverflow`_ and with the tag *python-bob*, or alternatively contact the first author_. + +.. _author: eklavya.sarkar@idiap.ch + +Before doing that, check our documentation by clicking in the links on the top. + +.. Place your references here: +.. _bob: https://www.idiap.ch/software/bob +.. _installation: https://www.idiap.ch/software/bob/install +.. _conda: https://conda.io +.. _stackoverflow: https://stackoverflow.com/questions/tagged/python-bob +.. _example: http://dlib.net/face_landmark_detection.py.html +.. _StyleGAN2: https://arxiv.org/abs/1912.04958 +.. _repository: https://github.com/NVlabs/stylegan2 +.. _VGG16: https://arxiv.org/abs/1801.03924 \ No newline at end of file diff --git a/dnnlib/__init__.py b/dnnlib/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..e34112b628e3d526739681eac984c5c2db704814 --- /dev/null +++ b/dnnlib/__init__.py @@ -0,0 +1,21 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import submission + +from .submission.run_context import RunContext + +from .submission.submit import SubmitTarget +from .submission.submit import PathType +from .submission.submit import SubmitConfig +from .submission.submit import submit_run +from .submission.submit import get_path_from_template +from .submission.submit import convert_path +from .submission.submit import make_run_dir_path + +from .util import EasyDict + +submit_config: SubmitConfig = None # Package level variable for SubmitConfig which is only valid when inside the run function. diff --git a/dnnlib/submission/__init__.py b/dnnlib/submission/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..acf2fbee4b216cb9f2a0b73993fd1c7042e2248d --- /dev/null +++ b/dnnlib/submission/__init__.py @@ -0,0 +1,8 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import run_context +from . import submit diff --git a/dnnlib/submission/internal/__init__.py b/dnnlib/submission/internal/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..0f11279893d6056e8cb6f9e04e12aad07a776496 --- /dev/null +++ b/dnnlib/submission/internal/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import local diff --git a/dnnlib/submission/internal/local.py b/dnnlib/submission/internal/local.py new file mode 100755 index 0000000000000000000000000000000000000000..c03c79e93ca19704157782a0bae556a7752b775c --- /dev/null +++ b/dnnlib/submission/internal/local.py @@ -0,0 +1,22 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +class TargetOptions(): + def __init__(self): + self.do_not_copy_source_files = False + +class Target(): + def __init__(self): + pass + + def finalize_submit_config(self, submit_config, host_run_dir): + print ('Local submit ', end='', flush=True) + submit_config.run_dir = host_run_dir + + def submit(self, submit_config, host_run_dir): + from ..submit import run_wrapper, convert_path + print('- run_dir: %s' % convert_path(submit_config.run_dir), flush=True) + return run_wrapper(submit_config) diff --git a/dnnlib/submission/run_context.py b/dnnlib/submission/run_context.py new file mode 100755 index 0000000000000000000000000000000000000000..62fbb1afd86be9d5fa963a1958485a2fc6d1152a --- /dev/null +++ b/dnnlib/submission/run_context.py @@ -0,0 +1,110 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helpers for managing the run/training loop.""" + +import datetime +import json +import os +import pprint +import time +import types + +from typing import Any + +from . import submit + +# Singleton RunContext +_run_context = None + +class RunContext(object): + """Helper class for managing the run/training loop. + + The context will hide the implementation details of a basic run/training loop. + It will set things up properly, tell if run should be stopped, and then cleans up. + User should call update periodically and use should_stop to determine if run should be stopped. + + Args: + submit_config: The SubmitConfig that is used for the current run. + config_module: (deprecated) The whole config module that is used for the current run. + """ + + def __init__(self, submit_config: submit.SubmitConfig, config_module: types.ModuleType = None): + global _run_context + # Only a single RunContext can be alive + assert _run_context is None + _run_context = self + self.submit_config = submit_config + self.should_stop_flag = False + self.has_closed = False + self.start_time = time.time() + self.last_update_time = time.time() + self.last_update_interval = 0.0 + self.progress_monitor_file_path = None + + # vestigial config_module support just prints a warning + if config_module is not None: + print("RunContext.config_module parameter support has been removed.") + + # write out details about the run to a text file + self.run_txt_data = {"task_name": submit_config.task_name, "host_name": submit_config.host_name, "start_time": datetime.datetime.now().isoformat(sep=" ")} + with open(os.path.join(submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + + def __enter__(self) -> "RunContext": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def update(self, loss: Any = 0, cur_epoch: Any = 0, max_epoch: Any = None) -> None: + """Do general housekeeping and keep the state of the context up-to-date. + Should be called often enough but not in a tight loop.""" + assert not self.has_closed + + self.last_update_interval = time.time() - self.last_update_time + self.last_update_time = time.time() + + if os.path.exists(os.path.join(self.submit_config.run_dir, "abort.txt")): + self.should_stop_flag = True + + def should_stop(self) -> bool: + """Tell whether a stopping condition has been triggered one way or another.""" + return self.should_stop_flag + + def get_time_since_start(self) -> float: + """How much time has passed since the creation of the context.""" + return time.time() - self.start_time + + def get_time_since_last_update(self) -> float: + """How much time has passed since the last call to update.""" + return time.time() - self.last_update_time + + def get_last_update_interval(self) -> float: + """How much time passed between the previous two calls to update.""" + return self.last_update_interval + + def close(self) -> None: + """Close the context and clean up. + Should only be called once.""" + if not self.has_closed: + # update the run.txt with stopping time + self.run_txt_data["stop_time"] = datetime.datetime.now().isoformat(sep=" ") + with open(os.path.join(self.submit_config.run_dir, "run.txt"), "w") as f: + pprint.pprint(self.run_txt_data, stream=f, indent=4, width=200, compact=False) + self.has_closed = True + + # detach the global singleton + global _run_context + if _run_context is self: + _run_context = None + + @staticmethod + def get(): + import dnnlib + if _run_context is not None: + return _run_context + return RunContext(dnnlib.submit_config) diff --git a/dnnlib/submission/submit.py b/dnnlib/submission/submit.py new file mode 100755 index 0000000000000000000000000000000000000000..514647dd6a0585c7bd6864380a95b8059bcfba42 --- /dev/null +++ b/dnnlib/submission/submit.py @@ -0,0 +1,343 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Submit a function to be run either locally or in a computing cluster.""" + +import copy +import inspect +import os +import pathlib +import pickle +import platform +import pprint +import re +import shutil +import sys +import time +import traceback + +from enum import Enum + +from .. import util +from ..util import EasyDict + +from . import internal + +class SubmitTarget(Enum): + """The target where the function should be run. + + LOCAL: Run it locally. + """ + LOCAL = 1 + + +class PathType(Enum): + """Determines in which format should a path be formatted. + + WINDOWS: Format with Windows style. + LINUX: Format with Linux/Posix style. + AUTO: Use current OS type to select either WINDOWS or LINUX. + """ + WINDOWS = 1 + LINUX = 2 + AUTO = 3 + + +class PlatformExtras: + """A mixed bag of values used by dnnlib heuristics. + + Attributes: + + data_reader_buffer_size: Used by DataReader to size internal shared memory buffers. + data_reader_process_count: Number of worker processes to spawn (zero for single thread operation) + """ + def __init__(self): + self.data_reader_buffer_size = 1<<30 # 1 GB + self.data_reader_process_count = 0 # single threaded default + + +_user_name_override = None + +class SubmitConfig(util.EasyDict): + """Strongly typed config dict needed to submit runs. + + Attributes: + run_dir_root: Path to the run dir root. Can be optionally templated with tags. Needs to always be run through get_path_from_template. + run_desc: Description of the run. Will be used in the run dir and task name. + run_dir_ignore: List of file patterns used to ignore files when copying files to the run dir. + run_dir_extra_files: List of (abs_path, rel_path) tuples of file paths. rel_path root will be the src directory inside the run dir. + submit_target: Submit target enum value. Used to select where the run is actually launched. + num_gpus: Number of GPUs used/requested for the run. + print_info: Whether to print debug information when submitting. + local.do_not_copy_source_files: Do not copy source files from the working directory to the run dir. + run_id: Automatically populated value during submit. + run_name: Automatically populated value during submit. + run_dir: Automatically populated value during submit. + run_func_name: Automatically populated value during submit. + run_func_kwargs: Automatically populated value during submit. + user_name: Automatically populated value during submit. Can be set by the user which will then override the automatic value. + task_name: Automatically populated value during submit. + host_name: Automatically populated value during submit. + platform_extras: Automatically populated values during submit. Used by various dnnlib libraries such as the DataReader class. + """ + + def __init__(self): + super().__init__() + + # run (set these) + self.run_dir_root = "" # should always be passed through get_path_from_template + self.run_desc = "" + self.run_dir_ignore = ["__pycache__", "*.pyproj", "*.sln", "*.suo", ".cache", ".idea", ".vs", ".vscode", "_cudacache"] + self.run_dir_extra_files = [] + + # submit (set these) + self.submit_target = SubmitTarget.LOCAL + self.num_gpus = 1 + self.print_info = False + self.nvprof = False + self.local = internal.local.TargetOptions() + self.datasets = [] + + # (automatically populated) + self.run_id = None + self.run_name = None + self.run_dir = None + self.run_func_name = None + self.run_func_kwargs = None + self.user_name = None + self.task_name = None + self.host_name = "localhost" + self.platform_extras = PlatformExtras() + + +def get_path_from_template(path_template: str, path_type: PathType = PathType.AUTO) -> str: + """Replace tags in the given path template and return either Windows or Linux formatted path.""" + # automatically select path type depending on running OS + if path_type == PathType.AUTO: + if platform.system() == "Windows": + path_type = PathType.WINDOWS + elif platform.system() == "Linux": + path_type = PathType.LINUX + else: + raise RuntimeError("Unknown platform") + + path_template = path_template.replace("<USERNAME>", get_user_name()) + + # return correctly formatted path + if path_type == PathType.WINDOWS: + return str(pathlib.PureWindowsPath(path_template)) + elif path_type == PathType.LINUX: + return str(pathlib.PurePosixPath(path_template)) + else: + raise RuntimeError("Unknown platform") + + +def get_template_from_path(path: str) -> str: + """Convert a normal path back to its template representation.""" + path = path.replace("\\", "/") + return path + + +def convert_path(path: str, path_type: PathType = PathType.AUTO) -> str: + """Convert a normal path to template and the convert it back to a normal path with given path type.""" + path_template = get_template_from_path(path) + path = get_path_from_template(path_template, path_type) + return path + + +def set_user_name_override(name: str) -> None: + """Set the global username override value.""" + global _user_name_override + _user_name_override = name + + +def get_user_name(): + """Get the current user name.""" + if _user_name_override is not None: + return _user_name_override + elif platform.system() == "Windows": + return os.getlogin() + elif platform.system() == "Linux": + try: + import pwd + return pwd.getpwuid(os.geteuid()).pw_name + except: + return "unknown" + else: + raise RuntimeError("Unknown platform") + + +def make_run_dir_path(*paths): + """Make a path/filename that resides under the current submit run_dir. + + Args: + *paths: Path components to be passed to os.path.join + + Returns: + A file/dirname rooted at submit_config.run_dir. If there's no + submit_config or run_dir, the base directory is the current + working directory. + + E.g., `os.path.join(dnnlib.submit_config.run_dir, "output.txt"))` + """ + import dnnlib + if (dnnlib.submit_config is None) or (dnnlib.submit_config.run_dir is None): + return os.path.join(os.getcwd(), *paths) + return os.path.join(dnnlib.submit_config.run_dir, *paths) + + +def _create_run_dir_local(submit_config: SubmitConfig) -> str: + """Create a new run dir with increasing ID number at the start.""" + run_dir_root = get_path_from_template(submit_config.run_dir_root, PathType.AUTO) + + if not os.path.exists(run_dir_root): + os.makedirs(run_dir_root) + + submit_config.run_id = _get_next_run_id_local(run_dir_root) + submit_config.run_name = "{0:05d}-{1}".format(submit_config.run_id, submit_config.run_desc) + run_dir = os.path.join(run_dir_root, submit_config.run_name) + + if os.path.exists(run_dir): + raise RuntimeError("The run dir already exists! ({0})".format(run_dir)) + + os.makedirs(run_dir) + + return run_dir + + +def _get_next_run_id_local(run_dir_root: str) -> int: + """Reads all directory names in a given directory (non-recursive) and returns the next (increasing) run id. Assumes IDs are numbers at the start of the directory names.""" + dir_names = [d for d in os.listdir(run_dir_root) if os.path.isdir(os.path.join(run_dir_root, d))] + r = re.compile("^\\d+") # match one or more digits at the start of the string + run_id = 0 + + for dir_name in dir_names: + m = r.match(dir_name) + + if m is not None: + i = int(m.group()) + run_id = max(run_id, i + 1) + + return run_id + + +def _populate_run_dir(submit_config: SubmitConfig, run_dir: str) -> None: + """Copy all necessary files into the run dir. Assumes that the dir exists, is local, and is writable.""" + pickle.dump(submit_config, open(os.path.join(run_dir, "submit_config.pkl"), "wb")) + with open(os.path.join(run_dir, "submit_config.txt"), "w") as f: + pprint.pprint(submit_config, stream=f, indent=4, width=200, compact=False) + + if (submit_config.submit_target == SubmitTarget.LOCAL) and submit_config.local.do_not_copy_source_files: + return + + files = [] + + run_func_module_dir_path = util.get_module_dir_by_obj_name(submit_config.run_func_name) + assert '.' in submit_config.run_func_name + for _idx in range(submit_config.run_func_name.count('.') - 1): + run_func_module_dir_path = os.path.dirname(run_func_module_dir_path) + files += util.list_dir_recursively_with_ignore(run_func_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=False) + + dnnlib_module_dir_path = util.get_module_dir_by_obj_name("dnnlib") + files += util.list_dir_recursively_with_ignore(dnnlib_module_dir_path, ignores=submit_config.run_dir_ignore, add_base_to_relative=True) + + files += submit_config.run_dir_extra_files + + files = [(f[0], os.path.join(run_dir, "src", f[1])) for f in files] + files += [(os.path.join(dnnlib_module_dir_path, "submission", "internal", "run.py"), os.path.join(run_dir, "run.py"))] + + util.copy_files_and_create_dirs(files) + + + +def run_wrapper(submit_config: SubmitConfig) -> None: + """Wrap the actual run function call for handling logging, exceptions, typing, etc.""" + is_local = submit_config.submit_target == SubmitTarget.LOCAL + + # when running locally, redirect stderr to stdout, log stdout to a file, and force flushing + if is_local: + logger = util.Logger(file_name=os.path.join(submit_config.run_dir, "log.txt"), file_mode="w", should_flush=True) + else: # when running in a cluster, redirect stderr to stdout, and just force flushing (log writing is handled by run.sh) + logger = util.Logger(file_name=None, should_flush=True) + + import dnnlib + dnnlib.submit_config = submit_config + + exit_with_errcode = False + try: + print("dnnlib: Running {0}() on {1}...".format(submit_config.run_func_name, submit_config.host_name)) + start_time = time.time() + + run_func_obj = util.get_obj_by_name(submit_config.run_func_name) + assert callable(run_func_obj) + sig = inspect.signature(run_func_obj) + if 'submit_config' in sig.parameters: + run_func_obj(submit_config=submit_config, **submit_config.run_func_kwargs) + else: + run_func_obj(**submit_config.run_func_kwargs) + + print("dnnlib: Finished {0}() in {1}.".format(submit_config.run_func_name, util.format_time(time.time() - start_time))) + except: + if is_local: + raise + else: + traceback.print_exc() + + log_src = os.path.join(submit_config.run_dir, "log.txt") + log_dst = os.path.join(get_path_from_template(submit_config.run_dir_root), "{0}-error.txt".format(submit_config.run_name)) + shutil.copyfile(log_src, log_dst) + + # Defer sys.exit(1) to happen after we close the logs and create a _finished.txt + exit_with_errcode = True + finally: + open(os.path.join(submit_config.run_dir, "_finished.txt"), "w").close() + + dnnlib.RunContext.get().close() + dnnlib.submit_config = None + logger.close() + + # If we hit an error, get out of the script now and signal the error + # to whatever process that started this script. + if exit_with_errcode: + sys.exit(1) + + return submit_config + + +def submit_run(submit_config: SubmitConfig, run_func_name: str, **run_func_kwargs) -> None: + """Create a run dir, gather files related to the run, copy files to the run dir, and launch the run in appropriate place.""" + submit_config = copy.deepcopy(submit_config) + + submit_target = submit_config.submit_target + farm = None + if submit_target == SubmitTarget.LOCAL: + farm = internal.local.Target() + assert farm is not None # unknown target + + # Disallow submitting jobs with zero num_gpus. + if (submit_config.num_gpus is None) or (submit_config.num_gpus == 0): + raise RuntimeError("submit_config.num_gpus must be set to a non-zero value") + + if submit_config.user_name is None: + submit_config.user_name = get_user_name() + + submit_config.run_func_name = run_func_name + submit_config.run_func_kwargs = run_func_kwargs + + #-------------------------------------------------------------------- + # Prepare submission by populating the run dir + #-------------------------------------------------------------------- + host_run_dir = _create_run_dir_local(submit_config) + + submit_config.task_name = "{0}-{1:05d}-{2}".format(submit_config.user_name, submit_config.run_id, submit_config.run_desc) + docker_valid_name_regex = "^[a-zA-Z0-9][a-zA-Z0-9_.-]+$" + if not re.match(docker_valid_name_regex, submit_config.task_name): + raise RuntimeError("Invalid task name. Probable reason: unacceptable characters in your submit_config.run_desc. Task name must be accepted by the following regex: " + docker_valid_name_regex + ", got " + submit_config.task_name) + + # Farm specific preparations for a submit + farm.finalize_submit_config(submit_config, host_run_dir) + _populate_run_dir(submit_config, host_run_dir) + return farm.submit(submit_config, host_run_dir) diff --git a/dnnlib/tflib/__init__.py b/dnnlib/tflib/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..02c25173d3f2391c88b142cf80af02cd93b0b5a0 --- /dev/null +++ b/dnnlib/tflib/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +from . import autosummary +from . import network +from . import optimizer +from . import tfutil +from . import custom_ops + +from .tfutil import * +from .network import Network + +from .optimizer import Optimizer + +from .custom_ops import get_plugin diff --git a/dnnlib/tflib/autosummary.py b/dnnlib/tflib/autosummary.py new file mode 100755 index 0000000000000000000000000000000000000000..6b0d80b371620bedadf8164772b7d6f87806fc11 --- /dev/null +++ b/dnnlib/tflib/autosummary.py @@ -0,0 +1,191 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for adding automatically tracked values to Tensorboard. + +Autosummary creates an identity op that internally keeps track of the input +values and automatically shows up in TensorBoard. The reported value +represents an average over input components. The average is accumulated +constantly over time and flushed when save_summaries() is called. + +Notes: +- The output tensor must be used as an input for something else in the + graph. Otherwise, the autosummary op will not get executed, and the average + value will not get accumulated. +- It is perfectly fine to include autosummaries with the same name in + several places throughout the graph, even if they are executed concurrently. +- It is ok to also pass in a python scalar or numpy array. In this case, it + is added to the average immediately. +""" + +from collections import OrderedDict +import numpy as np +import tensorflow as tf +from tensorboard import summary as summary_lib +from tensorboard.plugins.custom_scalar import layout_pb2 + +from . import tfutil +from .tfutil import TfExpression +from .tfutil import TfExpressionEx + +# Enable "Custom scalars" tab in TensorBoard for advanced formatting. +# Disabled by default to reduce tfevents file size. +enable_custom_scalars = False + +_dtype = tf.float64 +_vars = OrderedDict() # name => [var, ...] +_immediate = OrderedDict() # name => update_op, update_value +_finalized = False +_merge_op = None + + +def _create_var(name: str, value_expr: TfExpression) -> TfExpression: + """Internal helper for creating autosummary accumulators.""" + assert not _finalized + name_id = name.replace("/", "_") + v = tf.cast(value_expr, _dtype) + + if v.shape.is_fully_defined(): + size = np.prod(v.shape.as_list()) + size_expr = tf.constant(size, dtype=_dtype) + else: + size = None + size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) + + if size == 1: + if v.shape.ndims != 0: + v = tf.reshape(v, []) + v = [size_expr, v, tf.square(v)] + else: + v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] + v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) + + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): + var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] + update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) + + if name in _vars: + _vars[name].append(var) + else: + _vars[name] = [var] + return update_op + + +def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: + """Create a new autosummary. + + Args: + name: Name to use in TensorBoard + value: TensorFlow expression or python value to track + passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. + + Example use of the passthru mechanism: + + n = autosummary('l2loss', loss, passthru=n) + + This is a shorthand for the following code: + + with tf.control_dependencies([autosummary('l2loss', loss)]): + n = tf.identity(n) + """ + tfutil.assert_tf_initialized() + name_id = name.replace("/", "_") + + if tfutil.is_tf_expression(value): + with tf.name_scope("summary_" + name_id), tf.device(value.device): + condition = tf.convert_to_tensor(condition, name='condition') + update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) + with tf.control_dependencies([update_op]): + return tf.identity(value if passthru is None else passthru) + + else: # python scalar or numpy array + assert not tfutil.is_tf_expression(passthru) + assert not tfutil.is_tf_expression(condition) + if condition: + if name not in _immediate: + with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): + update_value = tf.placeholder(_dtype) + update_op = _create_var(name, update_value) + _immediate[name] = update_op, update_value + update_op, update_value = _immediate[name] + tfutil.run(update_op, {update_value: value}) + return value if passthru is None else passthru + + +def finalize_autosummaries() -> None: + """Create the necessary ops to include autosummaries in TensorBoard report. + Note: This should be done only once per graph. + """ + global _finalized + tfutil.assert_tf_initialized() + + if _finalized: + return None + + _finalized = True + tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) + + # Create summary ops. + with tf.device(None), tf.control_dependencies(None): + for name, vars_list in _vars.items(): + name_id = name.replace("/", "_") + with tfutil.absolute_name_scope("Autosummary/" + name_id): + moments = tf.add_n(vars_list) + moments /= moments[0] + with tf.control_dependencies([moments]): # read before resetting + reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] + with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting + mean = moments[1] + std = tf.sqrt(moments[2] - tf.square(moments[1])) + tf.summary.scalar(name, mean) + if enable_custom_scalars: + tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) + tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) + + # Setup layout for custom scalars. + layout = None + if enable_custom_scalars: + cat_dict = OrderedDict() + for series_name in sorted(_vars.keys()): + p = series_name.split("/") + cat = p[0] if len(p) >= 2 else "" + chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] + if cat not in cat_dict: + cat_dict[cat] = OrderedDict() + if chart not in cat_dict[cat]: + cat_dict[cat][chart] = [] + cat_dict[cat][chart].append(series_name) + categories = [] + for cat_name, chart_dict in cat_dict.items(): + charts = [] + for chart_name, series_names in chart_dict.items(): + series = [] + for series_name in series_names: + series.append(layout_pb2.MarginChartContent.Series( + value=series_name, + lower="xCustomScalars/" + series_name + "/margin_lo", + upper="xCustomScalars/" + series_name + "/margin_hi")) + margin = layout_pb2.MarginChartContent(series=series) + charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) + categories.append(layout_pb2.Category(title=cat_name, chart=charts)) + layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) + return layout + +def save_summaries(file_writer, global_step=None): + """Call FileWriter.add_summary() with all summaries in the default graph, + automatically finalizing and merging them on the first call. + """ + global _merge_op + tfutil.assert_tf_initialized() + + if _merge_op is None: + layout = finalize_autosummaries() + if layout is not None: + file_writer.add_summary(layout) + with tf.device(None), tf.control_dependencies(None): + _merge_op = tf.summary.merge_all() + + file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/dnnlib/tflib/custom_ops.py b/dnnlib/tflib/custom_ops.py new file mode 100755 index 0000000000000000000000000000000000000000..bc72a8fa588a5e78cac9f5512aa3213f7a64a26f --- /dev/null +++ b/dnnlib/tflib/custom_ops.py @@ -0,0 +1,169 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""TensorFlow custom ops builder. +""" + +import os +import re +import uuid +import hashlib +import tempfile +import shutil +import tensorflow as tf +from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module + +#---------------------------------------------------------------------------- +# Global options. + +cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') +cuda_cache_version_tag = 'v1' +do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! +verbose = True # Print status messages to stdout. + +compiler_bindir_search_path = [ + 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.14.26428/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', + 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', +] + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _find_compiler_bindir(): + for compiler_path in compiler_bindir_search_path: + if os.path.isdir(compiler_path): + return compiler_path + return None + +def _get_compute_cap(device): + caps_str = device.physical_device_desc + m = re.search('compute capability: (\\d+).(\\d+)', caps_str) + major = m.group(1) + minor = m.group(2) + return (major, minor) + +def _get_cuda_gpu_arch_string(): + gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] + if len(gpus) == 0: + raise RuntimeError('No GPU devices found') + (major, minor) = _get_compute_cap(gpus[0]) + return 'sm_%s%s' % (major, minor) + +def _run_cmd(cmd): + with os.popen(cmd) as pipe: + output = pipe.read() + status = pipe.close() + if status is not None: + raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) + +def _prepare_nvcc_cli(opts): + cmd = 'nvcc ' + opts.strip() + cmd += ' --disable-warnings' + cmd += ' --include-path "%s"' % tf.sysconfig.get_include() + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') + cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') + + compiler_bindir = _find_compiler_bindir() + if compiler_bindir is None: + # Require that _find_compiler_bindir succeeds on Windows. Allow + # nvcc to use whatever is the default on Linux. + if os.name == 'nt': + raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) + else: + cmd += ' --compiler-bindir "%s"' % compiler_bindir + cmd += ' 2>&1' + return cmd + +#---------------------------------------------------------------------------- +# Main entry point. + +_plugin_cache = dict() + +def get_plugin(cuda_file): + cuda_file_base = os.path.basename(cuda_file) + cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) + + # Already in cache? + if cuda_file in _plugin_cache: + return _plugin_cache[cuda_file] + + # Setup plugin. + if verbose: + print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) + try: + # Hash CUDA source. + md5 = hashlib.md5() + with open(cuda_file, 'rb') as f: + md5.update(f.read()) + md5.update(b'\n') + + # Hash headers included by the CUDA code by running it through the preprocessor. + if not do_not_hash_included_headers: + if verbose: + print('Preprocessing... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) + _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) + with open(tmp_file, 'rb') as f: + bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros + good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') + for ln in f: + if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas + ln = ln.replace(bad_file_str, good_file_str) + md5.update(ln) + md5.update(b'\n') + + # Select compiler options. + compile_opts = '' + if os.name == 'nt': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') + elif os.name == 'posix': + compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') + compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=1\'' + else: + assert False # not Windows or Linux, w00t? + compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() + compile_opts += ' --use_fast_math' + nvcc_cmd = _prepare_nvcc_cli(compile_opts) + + # Hash build configuration. + md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') + md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') + md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') + + # Compile if not already compiled. + bin_file_ext = '.dll' if os.name == 'nt' else '.so' + bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) + if not os.path.isfile(bin_file): + if verbose: + print('Compiling... ', end='', flush=True) + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) + _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) + os.makedirs(cuda_cache_path, exist_ok=True) + intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) + shutil.copyfile(tmp_file, intermediate_file) + os.rename(intermediate_file, bin_file) # atomic + + # Load. + if verbose: + print('Loading... ', end='', flush=True) + plugin = tf.load_op_library(bin_file) + + # Add to cache. + _plugin_cache[cuda_file] = plugin + if verbose: + print('Done.', flush=True) + return plugin + + except: + if verbose: + print('Failed!', flush=True) + raise + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/network.py b/dnnlib/tflib/network.py new file mode 100755 index 0000000000000000000000000000000000000000..253fe36584e65d84d195cba71aecab09feaf23d1 --- /dev/null +++ b/dnnlib/tflib/network.py @@ -0,0 +1,590 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper for managing networks.""" + +import types +import inspect +import re +import uuid +import sys +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import Any, List, Tuple, Union + +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. +_import_module_src = dict() # Source code for temporary modules created during pickle import. + + +def import_handler(handler_func): + """Function decorator for declaring custom import handlers.""" + _import_handlers.append(handler_func) + return handler_func + + +class Network: + """Generic network abstraction. + + Acts as a convenience wrapper for a parameterized network construction + function, providing several utility methods and convenient access to + the inputs/outputs/weights. + + Network objects can be safely pickled and unpickled for long-term + archival purposes. The pickling works reliably as long as the underlying + network construction function is defined in a standalone Python module + that has no side effects or application-specific imports. + + Args: + name: Network name. Used to select TensorFlow name and variable scopes. + func_name: Fully qualified name of the underlying network construction function, or a top-level function object. + static_kwargs: Keyword arguments to be passed in to the network construction function. + + Attributes: + name: User-specified name, defaults to build func name if None. + scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. + static_kwargs: Arguments passed to the user-supplied build func. + components: Container for sub-networks. Passed to the build func, and retained between calls. + num_inputs: Number of input tensors. + num_outputs: Number of output tensors. + input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. + output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. + input_shape: Short-hand for input_shapes[0]. + output_shape: Short-hand for output_shapes[0]. + input_templates: Input placeholders in the template graph. + output_templates: Output tensors in the template graph. + input_names: Name string for each input. + output_names: Name string for each output. + own_vars: Variables defined by this network (local_name => var), excluding sub-networks. + vars: All variables (local_name => var). + trainables: All trainable variables (local_name => var). + var_global_to_local: Mapping from variable global names to local names. + """ + + def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): + tfutil.assert_tf_initialized() + assert isinstance(name, str) or name is None + assert func_name is not None + assert isinstance(func_name, str) or util.is_top_level_function(func_name) + assert util.is_pickleable(static_kwargs) + + self._init_fields() + self.name = name + self.static_kwargs = util.EasyDict(static_kwargs) + + # Locate the user-specified network build function. + if util.is_top_level_function(func_name): + func_name = util.get_top_level_function_name(func_name) + module, self._build_func_name = util.get_module_from_obj_name(func_name) + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Dig up source code for the module containing the build function. + self._build_module_src = _import_module_src.get(module, None) + if self._build_module_src is None: + self._build_module_src = inspect.getsource(module) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + + def _init_fields(self) -> None: + self.name = None + self.scope = None + self.static_kwargs = util.EasyDict() + self.components = util.EasyDict() + self.num_inputs = 0 + self.num_outputs = 0 + self.input_shapes = [[]] + self.output_shapes = [[]] + self.input_shape = [] + self.output_shape = [] + self.input_templates = [] + self.output_templates = [] + self.input_names = [] + self.output_names = [] + self.own_vars = OrderedDict() + self.vars = OrderedDict() + self.trainables = OrderedDict() + self.var_global_to_local = OrderedDict() + + self._build_func = None # User-supplied build function that constructs the network. + self._build_func_name = None # Name of the build function. + self._build_module_src = None # Full source code of the module containing the build function. + self._run_cache = dict() # Cached graph data for Network.run(). + + def _init_graph(self) -> None: + # Collect inputs. + self.input_names = [] + + for param in inspect.signature(self._build_func).parameters.values(): + if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: + self.input_names.append(param.name) + + self.num_inputs = len(self.input_names) + assert self.num_inputs >= 1 + + # Choose name and scope. + if self.name is None: + self.name = self._build_func_name + assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) + with tf.name_scope(None): + self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs["is_template_graph"] = True + build_kwargs["components"] = self.components + + # Build template graph. + with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes + assert tf.get_variable_scope().name == self.scope + assert tf.get_default_graph().get_name_scope() == self.scope + with tf.control_dependencies(None): # ignore surrounding control dependencies + self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + out_expr = self._build_func(*self.input_templates, **build_kwargs) + + # Collect outputs. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + self.num_outputs = len(self.output_templates) + assert self.num_outputs >= 1 + assert all(tfutil.is_tf_expression(t) for t in self.output_templates) + + # Perform sanity checks. + if any(t.shape.ndims is None for t in self.input_templates): + raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") + if any(t.shape.ndims is None for t in self.output_templates): + raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") + if any(not isinstance(comp, Network) for comp in self.components.values()): + raise ValueError("Components of a Network must be Networks themselves.") + if len(self.components) != len(set(comp.name for comp in self.components.values())): + raise ValueError("Components of a Network must have unique names.") + + # List inputs and outputs. + self.input_shapes = [t.shape.as_list() for t in self.input_templates] + self.output_shapes = [t.shape.as_list() for t in self.output_templates] + self.input_shape = self.input_shapes[0] + self.output_shape = self.output_shapes[0] + self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] + + # List variables. + self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) + self.vars = OrderedDict(self.own_vars) + self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) + self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) + self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) + + def reset_own_vars(self) -> None: + """Re-initialize all variables of this network, excluding sub-networks.""" + tfutil.run([var.initializer for var in self.own_vars.values()]) + + def reset_vars(self) -> None: + """Re-initialize all variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.vars.values()]) + + def reset_trainables(self) -> None: + """Re-initialize all trainable variables of this network, including sub-networks.""" + tfutil.run([var.initializer for var in self.trainables.values()]) + + def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: + """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" + assert len(in_expr) == self.num_inputs + assert not all(expr is None for expr in in_expr) + + # Finalize build func kwargs. + build_kwargs = dict(self.static_kwargs) + build_kwargs.update(dynamic_kwargs) + build_kwargs["is_template_graph"] = False + build_kwargs["components"] = self.components + + # Build TensorFlow graph to evaluate the network. + with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): + assert tf.get_variable_scope().name == self.scope + valid_inputs = [expr for expr in in_expr if expr is not None] + final_inputs = [] + for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): + if expr is not None: + expr = tf.identity(expr, name=name) + else: + expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) + final_inputs.append(expr) + out_expr = self._build_func(*final_inputs, **build_kwargs) + + # Propagate input shapes back to the user-specified expressions. + for expr, final in zip(in_expr, final_inputs): + if isinstance(expr, tf.Tensor): + expr.set_shape(final.shape) + + # Express outputs in the desired format. + assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) + if return_as_list: + out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) + return out_expr + + def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: + """Get the local name of a given variable, without any surrounding name scopes.""" + assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) + global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name + return self.var_global_to_local[global_name] + + def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: + """Find variable by local or global name.""" + assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) + return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name + + def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: + """Get the value of a given variable as NumPy array. + Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" + return self.find_var(var_or_local_name).eval() + + def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: + """Set the value of a given variable based on the given NumPy array. + Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" + tfutil.set_vars({self.find_var(var_or_local_name): new_value}) + + def __getstate__(self) -> dict: + """Pickle export.""" + state = dict() + state["version"] = 4 + state["name"] = self.name + state["static_kwargs"] = dict(self.static_kwargs) + state["components"] = dict(self.components) + state["build_module_src"] = self._build_module_src + state["build_func_name"] = self._build_func_name + state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) + return state + + def __setstate__(self, state: dict) -> None: + """Pickle import.""" + # pylint: disable=attribute-defined-outside-init + tfutil.assert_tf_initialized() + self._init_fields() + + # Execute custom import handlers. + for handler in _import_handlers: + state = handler(state) + + # Set basic fields. + assert state["version"] in [2, 3, 4] + self.name = state["name"] + self.static_kwargs = util.EasyDict(state["static_kwargs"]) + self.components = util.EasyDict(state.get("components", {})) + self._build_module_src = state["build_module_src"] + self._build_func_name = state["build_func_name"] + + # Create temporary module from the imported source code. + module_name = "_tflib_network_import_" + uuid.uuid4().hex + module = types.ModuleType(module_name) + sys.modules[module_name] = module + _import_module_src[module] = self._build_module_src + exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used + + # Locate network build function in the temporary module. + self._build_func = util.get_obj_from_module(module, self._build_func_name) + assert callable(self._build_func) + + # Init TensorFlow graph. + self._init_graph() + self.reset_own_vars() + tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) + + def clone(self, name: str = None, **new_static_kwargs) -> "Network": + """Create a clone of this network with its own copy of the variables.""" + # pylint: disable=protected-access + net = object.__new__(Network) + net._init_fields() + net.name = name if name is not None else self.name + net.static_kwargs = util.EasyDict(self.static_kwargs) + net.static_kwargs.update(new_static_kwargs) + net._build_module_src = self._build_module_src + net._build_func_name = self._build_func_name + net._build_func = self._build_func + net._init_graph() + net.copy_vars_from(self) + return net + + def copy_own_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, excluding sub-networks.""" + names = [name for name in self.own_vars.keys() if name in src_net.own_vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_vars_from(self, src_net: "Network") -> None: + """Copy the values of all variables from the given network, including sub-networks.""" + names = [name for name in self.vars.keys() if name in src_net.vars] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def copy_trainables_from(self, src_net: "Network") -> None: + """Copy the values of all trainable variables from the given network, including sub-networks.""" + names = [name for name in self.trainables.keys() if name in src_net.trainables] + tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) + + def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": + """Create new network with the given parameters, and copy all variables from this network.""" + if new_name is None: + new_name = self.name + static_kwargs = dict(self.static_kwargs) + static_kwargs.update(new_static_kwargs) + net = Network(name=new_name, func_name=new_func_name, **static_kwargs) + net.copy_vars_from(self) + return net + + def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: + """Construct a TensorFlow op that updates the variables of this network + to be slightly closer to those of the given network.""" + with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): + ops = [] + for name, var in self.vars.items(): + if name in src_net.vars: + cur_beta = beta if name in self.trainables else beta_nontrainable + new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) + ops.append(var.assign(new_value)) + return tf.group(*ops) + + def run(self, + *in_arrays: Tuple[Union[np.ndarray, None], ...], + input_transform: dict = None, + output_transform: dict = None, + return_as_list: bool = False, + print_progress: bool = False, + minibatch_size: int = None, + num_gpus: int = 1, + assume_frozen: bool = False, + **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: + """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). + + Args: + input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the input + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. + The dict must contain a 'func' field that points to a top-level function. The function is called with the output + TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. + return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. + print_progress: Print progress to the console? Useful for very large input arrays. + minibatch_size: Maximum minibatch size to use, None = disable batching. + num_gpus: Number of GPUs to use. + assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. + dynamic_kwargs: Additional keyword arguments to be passed into the network build function. + """ + assert len(in_arrays) == self.num_inputs + assert not all(arr is None for arr in in_arrays) + assert input_transform is None or util.is_top_level_function(input_transform["func"]) + assert output_transform is None or util.is_top_level_function(output_transform["func"]) + output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) + num_items = in_arrays[0].shape[0] + if minibatch_size is None: + minibatch_size = num_items + + # Construct unique hash key from all arguments that affect the TensorFlow graph. + key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) + def unwind_key(obj): + if isinstance(obj, dict): + return [(key, unwind_key(value)) for key, value in sorted(obj.items())] + if callable(obj): + return util.get_top_level_function_name(obj) + return obj + key = repr(unwind_key(key)) + + # Build graph. + if key not in self._run_cache: + with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): + with tf.device("/cpu:0"): + in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] + in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) + + out_split = [] + for gpu in range(num_gpus): + with tf.device("/gpu:%d" % gpu): + net_gpu = self.clone() if assume_frozen else self + in_gpu = in_split[gpu] + + if input_transform is not None: + in_kwargs = dict(input_transform) + in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) + in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) + + assert len(in_gpu) == self.num_inputs + out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) + + if output_transform is not None: + out_kwargs = dict(output_transform) + out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) + out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) + + #assert len(out_gpu) == self.num_outputs + out_split.append(out_gpu) + + with tf.device("/cpu:0"): + out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] + self._run_cache[key] = in_expr, out_expr + + # Run minibatches. + in_expr, out_expr = self._run_cache[key] + out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] + + for mb_begin in range(0, num_items, minibatch_size): + if print_progress: + print("\r%d / %d" % (mb_begin, num_items), end="") + + mb_end = min(mb_begin + minibatch_size, num_items) + mb_num = mb_end - mb_begin + mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] + mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) + + for dst, src in zip(out_arrays, mb_out): + dst[mb_begin: mb_end] = src + + # Done. + if print_progress: + print("\r%d / %d" % (num_items, num_items)) + + if not return_as_list: + out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) + return out_arrays + + def list_ops(self) -> List[TfExpression]: + include_prefix = self.scope + "/" + exclude_prefix = include_prefix + "_" + ops = tf.get_default_graph().get_operations() + ops = [op for op in ops if op.name.startswith(include_prefix)] + ops = [op for op in ops if not op.name.startswith(exclude_prefix)] + return ops + + def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: + """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to + individual layers of the network. Mainly intended to be used for reporting.""" + layers = [] + + def recurse(scope, parent_ops, parent_vars, level): + # Ignore specific patterns. + if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): + return + + # Filter ops and vars by scope. + global_prefix = scope + "/" + local_prefix = global_prefix[len(self.scope) + 1:] + cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] + cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] + if not cur_ops and not cur_vars: + return + + # Filter out all ops related to variables. + for var in [op for op in cur_ops if op.type.startswith("Variable")]: + var_prefix = var.name + "/" + cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] + + # Scope does not contain ops as immediate children => recurse deeper. + contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) + if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: + visited = set() + for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: + token = rel_name.split("/")[0] + if token not in visited: + recurse(global_prefix + token, cur_ops, cur_vars, level + 1) + visited.add(token) + return + + # Report layer. + layer_name = scope[len(self.scope) + 1:] + layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] + layer_trainables = [var for _name, var in cur_vars if var.trainable] + layers.append((layer_name, layer_output, layer_trainables)) + + recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) + return layers + + def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: + """Print a summary table of the network structure.""" + rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] + rows += [["---"] * 4] + total_params = 0 + + for layer_name, layer_output, layer_trainables in self.list_layers(): + num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) + weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] + weights.sort(key=lambda x: len(x.name)) + if len(weights) == 0 and len(layer_trainables) == 1: + weights = layer_trainables + total_params += num_params + + if not hide_layers_with_no_params or num_params != 0: + num_params_str = str(num_params) if num_params > 0 else "-" + output_shape_str = str(layer_output.shape) + weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" + rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] + + rows += [["---"] * 4] + rows += [["Total", str(total_params), "", ""]] + + widths = [max(len(cell) for cell in column) for column in zip(*rows)] + print() + for row in rows: + print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) + print() + + def setup_weight_histograms(self, title: str = None) -> None: + """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" + if title is None: + title = self.name + + with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): + for local_name, var in self.trainables.items(): + if "/" in local_name: + p = local_name.split("/") + name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) + else: + name = title + "_toplevel/" + local_name + + tf.summary.histogram(name, var) + +#---------------------------------------------------------------------------- +# Backwards-compatible emulation of legacy output transformation in Network.run(). + +_print_legacy_warning = True + +def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): + global _print_legacy_warning + legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] + if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): + return output_transform, dynamic_kwargs + + if _print_legacy_warning: + _print_legacy_warning = False + print() + print("WARNING: Old-style output transformations in Network.run() are deprecated.") + print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") + print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") + print() + assert output_transform is None + + new_kwargs = dict(dynamic_kwargs) + new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} + new_transform["func"] = _legacy_output_transform_func + return new_transform, new_kwargs + +def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): + if out_mul != 1.0: + expr = [x * out_mul for x in expr] + + if out_add != 0.0: + expr = [x + out_add for x in expr] + + if out_shrink > 1: + ksize = [1, 1, out_shrink, out_shrink] + expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] + + if out_dtype is not None: + if tf.as_dtype(out_dtype).is_integer: + expr = [tf.round(x) for x in expr] + expr = [tf.saturate_cast(x, out_dtype) for x in expr] + return expr diff --git a/dnnlib/tflib/ops/__init__.py b/dnnlib/tflib/ops/__init__.py new file mode 100755 index 0000000000000000000000000000000000000000..9ab9908efa3cb38af52e8d5bcaa8acffde5a8875 --- /dev/null +++ b/dnnlib/tflib/ops/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# empty diff --git a/dnnlib/tflib/ops/fused_bias_act.cu b/dnnlib/tflib/ops/fused_bias_act.cu new file mode 100755 index 0000000000000000000000000000000000000000..1102f624fadd0b803bdfb99fecfe145d7ec8abc4 --- /dev/null +++ b/dnnlib/tflib/ops/fused_bias_act.cu @@ -0,0 +1,188 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include <stdio.h> + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +//------------------------------------------------------------------------ +// CUDA kernel. + +template <class T> +struct FusedBiasActKernelParams +{ + const T* x; // [sizeX] + const T* b; // [sizeB] or NULL + const T* ref; // [sizeX] or NULL + T* y; // [sizeX] + + int grad; + int axis; + int act; + float alpha; + float gain; + + int sizeX; + int sizeB; + int stepB; + int loopX; +}; + +template <class T> +static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams<T> p) +{ + const float expRange = 80.0f; + const float halfExpRange = 40.0f; + const float seluScale = 1.0507009873554804934193349852946f; + const float seluAlpha = 1.6732632423543772848170429916717f; + + // Loop over elements. + int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; + for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) + { + // Load and apply bias. + float x = (float)p.x[xi]; + if (p.b) + x += (float)p.b[(xi / p.stepB) % p.sizeB]; + float ref = (p.ref) ? (float)p.ref[xi] : 0.0f; + if (p.gain != 0.0f & p.act != 9) + ref /= p.gain; + + // Evaluate activation func. + float y; + switch (p.act * 10 + p.grad) + { + // linear + default: + case 10: y = x; break; + case 11: y = x; break; + case 12: y = 0.0f; break; + + // relu + case 20: y = (x > 0.0f) ? x : 0.0f; break; + case 21: y = (ref > 0.0f) ? x : 0.0f; break; + case 22: y = 0.0f; break; + + // lrelu + case 30: y = (x > 0.0f) ? x : x * p.alpha; break; + case 31: y = (ref > 0.0f) ? x : x * p.alpha; break; + case 32: y = 0.0f; break; + + // tanh + case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; + case 41: y = x * (1.0f - ref * ref); break; + case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break; + + // sigmoid + case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; + case 51: y = x * ref * (1.0f - ref); break; + case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break; + + // elu + case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; + case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break; + case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break; + + // selu + case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; + case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break; + case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break; + + // softplus + case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; + case 81: y = x * (1.0f - expf(-ref)); break; + case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break; + + // swish + case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; + case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break; + case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break; + } + + // Apply gain and store. + p.y[xi] = (T)(y * p.gain); + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template <class T> +struct FusedBiasActOp : public OpKernel +{ + FusedBiasActKernelParams<T> m_attribs; + + FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); + OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); + OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); + OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); + } + + void Compute(OpKernelContext* ctx) + { + FusedBiasActKernelParams<T> p = m_attribs; + cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream(); + + const Tensor& x = ctx->input(0); // [...] + const Tensor& b = ctx->input(1); // [sizeB] or [0] + const Tensor& ref = ctx->input(2); // x.shape or [0] + p.x = x.flat<T>().data(); + p.b = (b.NumElements()) ? b.flat<T>().data() : NULL; + p.ref = (ref.NumElements()) ? ref.flat<T>().data() : NULL; + OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); + OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); + OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); + OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); + + p.sizeX = (int)x.NumElements(); + p.sizeB = (int)b.NumElements(); + p.stepB = 1; + for (int i = m_attribs.axis + 1; i < x.dims(); i++) + p.stepB *= (int)x.dim_size(i); + + Tensor* y = NULL; // x.shape + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); + p.y = y->flat<T>().data(); + + p.loopX = 4; + int blockSize = 4 * 32; + int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel<T>, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("FusedBiasAct") + .Input ("x: T") + .Input ("b: T") + .Input ("ref: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("grad: int = 0") + .Attr ("axis: int = 1") + .Attr ("act: int = 0") + .Attr ("alpha: float = 0.0") + .Attr ("gain: float = 1.0"); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<float>("T"), FusedBiasActOp<float>); +REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), FusedBiasActOp<Eigen::half>); + +//------------------------------------------------------------------------ diff --git a/dnnlib/tflib/ops/fused_bias_act.py b/dnnlib/tflib/ops/fused_bias_act.py new file mode 100755 index 0000000000000000000000000000000000000000..52f6bfd77a4b0151103c1a76fa877e084831f7c4 --- /dev/null +++ b/dnnlib/tflib/ops/fused_bias_act.py @@ -0,0 +1,196 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient bias and activation.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops +from ...util import EasyDict + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +activation_funcs = { + 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), + 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), + 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), + 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), + 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), + 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), + 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), + 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), + 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), +} + +#---------------------------------------------------------------------------- + +def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'): + r"""Fused bias and activation function. + + Adds bias `b` to activation tensor `x`, evaluates activation function `act`, + and scales the result by `gain`. Each of the steps is optional. In most cases, + the fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports first and second order gradients, + but not third order gradients. + + Args: + x: Input activation tensor. Can have any shape, but if `b` is defined, the + dimension corresponding to `axis`, as well as the rank, must be known. + b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type + as `x`. The shape must be known, and it must match the dimension of `x` + corresponding to `axis`. + axis: The dimension in `x` corresponding to the elements of `b`. + The value of `axis` is ignored if `b` is not specified. + act: Name of the activation function to evaluate, or `"linear"` to disable. + Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. + See `activation_funcs` for a full list. `None` is not allowed. + alpha: Shape parameter for the activation function, or `None` to use the default. + gain: Scaling factor for the output tensor, or `None` to use default. + See `activation_funcs` for the default scaling of each activation function. + If unsure, consider specifying `1.0`. + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + impl_dict = { + 'ref': _fused_bias_act_ref, + 'cuda': _fused_bias_act_cuda, + } + return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + +#---------------------------------------------------------------------------- + +def _fused_bias_act_ref(x, b, axis, act, alpha, gain): + """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Add bias. + if b.shape[0] != 0: + x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) + + # Evaluate activation function. + x = act_spec.func(x, alpha=alpha) + + # Scale by gain. + if gain != 1: + x *= gain + return x + +#---------------------------------------------------------------------------- + +def _fused_bias_act_cuda(x, b, axis, act, alpha, gain): + """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" + + # Validate arguments. + x = tf.convert_to_tensor(x) + empty_tensor = tf.constant([], dtype=x.dtype) + b = tf.convert_to_tensor(b) if b is not None else empty_tensor + act_spec = activation_funcs[act] + assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) + assert b.shape[0] == 0 or 0 <= axis < x.shape.rank + if alpha is None: + alpha = act_spec.def_alpha + if gain is None: + gain = act_spec.def_gain + + # Special cases. + if act == 'linear' and b is None and gain == 1.0: + return x + if act_spec.cuda_idx is None: + return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) + + # CUDA kernel. + cuda_kernel = _get_plugin().fused_bias_act + cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain) + + # Forward pass: y = func(x, b). + def func_y(x, b): + y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs) + y.set_shape(x.shape) + return y + + # Backward pass: dx, db = grad(dy, x, y) + def grad_dx(dy, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs) + dx.set_shape(x.shape) + return dx + def grad_db(dx): + if b.shape[0] == 0: + return empty_tensor + db = dx + if axis < x.shape.rank - 1: + db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) + if axis > 0: + db = tf.reduce_sum(db, list(range(axis))) + db.set_shape(b.shape) + return db + + # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) + def grad2_d_dy(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs) + d_dy.set_shape(x.shape) + return d_dy + def grad2_d_x(d_dx, d_db, x, y): + ref = {'x': x, 'y': y}[act_spec.ref] + d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs) + d_x.set_shape(x.shape) + return d_x + + # Fast version for piecewise-linear activation funcs. + @tf.custom_gradient + def func_zero_2nd_grad(x, b): + y = func_y(x, b) + @tf.custom_gradient + def grad(dy): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + return d_dy + return (dx, db), grad2 + return y, grad + + # Slow version for general activation funcs. + @tf.custom_gradient + def func_nonzero_2nd_grad(x, b): + y = func_y(x, b) + def grad_wrap(dy): + @tf.custom_gradient + def grad_impl(dy, x): + dx = grad_dx(dy, x, y) + db = grad_db(dx) + def grad2(d_dx, d_db): + d_dy = grad2_d_dy(d_dx, d_db, x, y) + d_x = grad2_d_x(d_dx, d_db, x, y) + return d_dy, d_x + return (dx, db), grad2 + return grad_impl(dy, x) + return y, grad_wrap + + # Which version to use? + if act_spec.zero_2nd_grad: + return func_zero_2nd_grad(x, b) + return func_nonzero_2nd_grad(x, b) + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/ops/upfirdn_2d.cu b/dnnlib/tflib/ops/upfirdn_2d.cu new file mode 100755 index 0000000000000000000000000000000000000000..b97ef36c9e5ba46a92a380dbc687e275235a1ccf --- /dev/null +++ b/dnnlib/tflib/ops/upfirdn_2d.cu @@ -0,0 +1,326 @@ +// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +// +// This work is made available under the Nvidia Source Code License-NC. +// To view a copy of this license, visit +// https://nvlabs.github.io/stylegan2/license.html + +#define EIGEN_USE_GPU +#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include <stdio.h> + +using namespace tensorflow; +using namespace tensorflow::shape_inference; + +//------------------------------------------------------------------------ +// Helpers. + +#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) + +static __host__ __device__ __forceinline__ int floorDiv(int a, int b) +{ + int c = a / b; + if (c * b > a) + c--; + return c; +} + +//------------------------------------------------------------------------ +// CUDA kernel params. + +template <class T> +struct UpFirDn2DKernelParams +{ + const T* x; // [majorDim, inH, inW, minorDim] + const T* k; // [kernelH, kernelW] + T* y; // [majorDim, outH, outW, minorDim] + + int upx; + int upy; + int downx; + int downy; + int padx0; + int padx1; + int pady0; + int pady1; + + int majorDim; + int inH; + int inW; + int minorDim; + int kernelH; + int kernelW; + int outH; + int outW; + int loopMajor; + int loopX; +}; + +//------------------------------------------------------------------------ +// General CUDA implementation for large filter kernels. + +template <class T> +static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams<T> p) +{ + // Calculate thread index. + int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; + int outY = minorIdx / p.minorDim; + minorIdx -= outY * p.minorDim; + int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) + return; + + // Setup Y receptive field. + int midY = outY * p.downy + p.upy - 1 - p.pady0; + int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); + int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; + int kernelY = midY + p.kernelH - (inY + 1) * p.upy; + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) + { + // Setup X receptive field. + int midX = outX * p.downx + p.upx - 1 - p.padx0; + int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); + int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; + int kernelX = midX + p.kernelW - (inX + 1) * p.upx; + + // Initialize pointers. + const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + const T* kp = &p.k[kernelY * p.kernelW + kernelX]; + int xpx = p.minorDim; + int kpx = -p.upx; + int xpy = p.inW * p.minorDim; + int kpy = -p.upy * p.kernelW; + + // Inner loop. + float v = 0.0f; + for (int y = 0; y < h; y++) + { + for (int x = 0; x < w; x++) + { + v += (float)(*xp) * (float)(*kp); + xp += xpx; + kp += kpx; + } + xp += xpy - w * xpx; + kp += kpy - w * kpx; + } + + // Store result. + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } +} + +//------------------------------------------------------------------------ +// Specialized CUDA implementation for small filter kernels. + +template <class T, int upx, int upy, int downx, int downy, int kernelW, int kernelH, int tileOutW, int tileOutH> +static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams<T> p) +{ + //assert(kernelW % upx == 0); + //assert(kernelH % upy == 0); + const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; + const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; + __shared__ volatile float sk[kernelH][kernelW]; + __shared__ volatile float sx[tileInH][tileInW]; + + // Calculate tile index. + int minorIdx = blockIdx.x; + int tileOutY = minorIdx / p.minorDim; + minorIdx -= tileOutY * p.minorDim; + tileOutY *= tileOutH; + int tileOutXBase = blockIdx.y * p.loopX * tileOutW; + int majorIdxBase = blockIdx.z * p.loopMajor; + if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) + return; + + // Load filter kernel (flipped). + for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) + { + int ky = tapIdx / kernelW; + int kx = tapIdx - ky * kernelW; + float v = 0.0f; + if (kx < p.kernelW & ky < p.kernelH) + v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; + sk[ky][kx] = v; + } + + // Loop over majorDim and outX. + for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) + for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) + { + // Load input pixels. + int tileMidX = tileOutX * downx + upx - 1 - p.padx0; + int tileMidY = tileOutY * downy + upy - 1 - p.pady0; + int tileInX = floorDiv(tileMidX, upx); + int tileInY = floorDiv(tileMidY, upy); + __syncthreads(); + for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) + { + int relInY = inIdx / tileInW; + int relInX = inIdx - relInY * tileInW; + int inX = relInX + tileInX; + int inY = relInY + tileInY; + float v = 0.0f; + if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) + v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; + sx[relInY][relInX] = v; + } + + // Loop over output pixels. + __syncthreads(); + for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) + { + int relOutY = outIdx / tileOutW; + int relOutX = outIdx - relOutY * tileOutW; + int outX = relOutX + tileOutX; + int outY = relOutY + tileOutY; + + // Setup receptive field. + int midX = tileMidX + relOutX * downx; + int midY = tileMidY + relOutY * downy; + int inX = floorDiv(midX, upx); + int inY = floorDiv(midY, upy); + int relInX = inX - tileInX; + int relInY = inY - tileInY; + int kernelX = (inX + 1) * upx - midX - 1; // flipped + int kernelY = (inY + 1) * upy - midY - 1; // flipped + + // Inner loop. + float v = 0.0f; + #pragma unroll + for (int y = 0; y < kernelH / upy; y++) + #pragma unroll + for (int x = 0; x < kernelW / upx; x++) + v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; + + // Store result. + if (outX < p.outW & outY < p.outH) + p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; + } + } +} + +//------------------------------------------------------------------------ +// TensorFlow op. + +template <class T> +struct UpFirDn2DOp : public OpKernel +{ + UpFirDn2DKernelParams<T> m_attribs; + + UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) + { + memset(&m_attribs, 0, sizeof(m_attribs)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); + OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); + OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); + } + + void Compute(OpKernelContext* ctx) + { + UpFirDn2DKernelParams<T> p = m_attribs; + cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream(); + + const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] + const Tensor& k = ctx->input(1); // [kernelH, kernelW] + p.x = x.flat<T>().data(); + p.k = k.flat<T>().data(); + OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); + OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); + OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); + OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); + + p.majorDim = (int)x.dim_size(0); + p.inH = (int)x.dim_size(1); + p.inW = (int)x.dim_size(2); + p.minorDim = (int)x.dim_size(3); + p.kernelH = (int)k.dim_size(0); + p.kernelW = (int)k.dim_size(1); + OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); + + p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; + p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; + OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); + + Tensor* y = NULL; // [majorDim, outH, outW, minorDim] + TensorShape ys; + ys.AddDim(p.majorDim); + ys.AddDim(p.outH); + ys.AddDim(p.outW); + ys.AddDim(p.minorDim); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); + p.y = y->flat<T>().data(); + OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); + + // Choose CUDA kernel to use. + void* cudaKernel = (void*)UpFirDn2DKernel_large<T>; + int tileOutW = -1; + int tileOutH = -1; + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 7,7, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 5,5, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 3,3, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 8,8, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 2,2, 64,16>; tileOutW = 64; tileOutH = 16; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 8,8, 32,8>; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 6,6, 32,8>; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 4,4, 32,8>; tileOutW = 32; tileOutH = 8; } + if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 2,2, 32,8>; tileOutW = 32; tileOutH = 8; } + + // Choose launch params. + dim3 blockSize; + dim3 gridSize; + if (tileOutW > 0 && tileOutH > 0) // small + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 1; + blockSize = dim3(32 * 8, 1, 1); + gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + else // large + { + p.loopMajor = (p.majorDim - 1) / 16384 + 1; + p.loopX = 4; + blockSize = dim3(4, 32, 1); + gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); + } + + // Launch CUDA kernel. + void* args[] = {&p}; + OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); + } +}; + +REGISTER_OP("UpFirDn2D") + .Input ("x: T") + .Input ("k: T") + .Output ("y: T") + .Attr ("T: {float, half}") + .Attr ("upx: int = 1") + .Attr ("upy: int = 1") + .Attr ("downx: int = 1") + .Attr ("downy: int = 1") + .Attr ("padx0: int = 0") + .Attr ("padx1: int = 0") + .Attr ("pady0: int = 0") + .Attr ("pady1: int = 0"); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<float>("T"), UpFirDn2DOp<float>); +REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), UpFirDn2DOp<Eigen::half>); + +//------------------------------------------------------------------------ diff --git a/dnnlib/tflib/ops/upfirdn_2d.py b/dnnlib/tflib/ops/upfirdn_2d.py new file mode 100755 index 0000000000000000000000000000000000000000..fd23777ebb87bc83e8728d6fe3904fbbfb5c524c --- /dev/null +++ b/dnnlib/tflib/ops/upfirdn_2d.py @@ -0,0 +1,364 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Custom TensorFlow ops for efficient resampling of 2D images.""" + +import os +import numpy as np +import tensorflow as tf +from .. import custom_ops + +def _get_plugin(): + return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') + +#---------------------------------------------------------------------------- + +def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): + r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. + + Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` + and performs the following operations for each image, batched across + `majorDim` and `minorDim`: + + 1. Pad the image with zeros by the specified number of pixels on each side + (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value + corresponds to cropping the image. + + 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). + + 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the + image so that the footprint of all output pixels lies within the input image. + + 4. Downsample the image by throwing away pixels (`downx`, `downy`). + + This sequence of operations bears close resemblance to scipy.signal.upfirdn(). + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. + k: 2D FIR filter of the shape `[firH, firW]`. + upx: Integer upsampling factor along the X-axis (default: 1). + upy: Integer upsampling factor along the Y-axis (default: 1). + downx: Integer downsampling factor along the X-axis (default: 1). + downy: Integer downsampling factor along the Y-axis (default: 1). + padx0: Number of pixels to pad on the left side (default: 0). + padx1: Number of pixels to pad on the right side (default: 0). + pady0: Number of pixels to pad on the top side (default: 0). + pady1: Number of pixels to pad on the bottom side (default: 0). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. + """ + + impl_dict = { + 'ref': _upfirdn_2d_ref, + 'cuda': _upfirdn_2d_cuda, + } + return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + assert x.shape.rank == 4 + inH = x.shape[1].value + inW = x.shape[2].value + minorDim = _shape(x, 3) + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + # Upsample (insert zeros). + x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) + x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) + x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) + + # Pad (crop if negative). + x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) + x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] + + # Convolve with filter. + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) + w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) + x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') + x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) + x = tf.transpose(x, [0, 2, 3, 1]) + + # Downsample (throw away pixels). + return x[:, ::downy, ::downx, :] + +#---------------------------------------------------------------------------- + +def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): + """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" + + x = tf.convert_to_tensor(x) + k = np.asarray(k, dtype=np.float32) + majorDim, inH, inW, minorDim = x.shape.as_list() + kernelH, kernelW = k.shape + assert inW >= 1 and inH >= 1 + assert kernelW >= 1 and kernelH >= 1 + assert isinstance(upx, int) and isinstance(upy, int) + assert isinstance(downx, int) and isinstance(downy, int) + assert isinstance(padx0, int) and isinstance(padx1, int) + assert isinstance(pady0, int) and isinstance(pady1, int) + + outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 + outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 + assert outW >= 1 and outH >= 1 + + kc = tf.constant(k, dtype=x.dtype) + gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) + gpadx0 = kernelW - padx0 - 1 + gpady0 = kernelH - pady0 - 1 + gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 + gpady1 = inH * upy - outH * downy + pady0 - upy + 1 + + @tf.custom_gradient + def func(x): + y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) + y.set_shape([majorDim, outH, outW, minorDim]) + @tf.custom_gradient + def grad(dy): + dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1) + dx.set_shape([majorDim, inH, inW, minorDim]) + return dx, func + return y, grad + return func(x) + +#---------------------------------------------------------------------------- + +def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'): + r"""Filter a batch of 2D images with the given FIR filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and filters each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the same shape and datatype as `x`. + """ + + k = _setup_kernel(k) * gain + p = k.shape[0] - 1 + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Upsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and upsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the upsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Downsample a batch of 2D images with the given filter. + + Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` + and downsamples each image with the given filter. The filter is normalized so that + if the input pixels are constant, they will be scaled by the specified `gain`. + Pixels outside the image are assumed to be zero, and the filter is padded with + zeros so that its shape is a multiple of the downsampling factor. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = k.shape[0] - factor + return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to nearest-neighbor + upsampling. + factor: Integer upsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H * factor, W * factor]` or + `[N, H * factor, W * factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + + # Check weight shape. + w = tf.convert_to_tensor(w) + assert w.shape.rank == 4 + convH = w.shape[0].value + convW = w.shape[1].value + inC = _shape(w, 2) + outC = _shape(w, 3) + assert convW == convH + + # Setup filter kernel. + if k is None: + k = [1] * factor + k = _setup_kernel(k) * (gain * (factor ** 2)) + p = (k.shape[0] - factor) - (convW - 1) + + # Determine data dimensions. + if data_format == 'NCHW': + stride = [1, 1, factor, factor] + output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW] + num_groups = _shape(x, 1) // inC + else: + stride = [1, factor, factor, 1] + output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC] + num_groups = _shape(x, 3) // inC + + # Transpose weights. + w = tf.reshape(w, [convH, convW, inC, num_groups, -1]) + w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) + w = tf.reshape(w, [convH, convW, -1, num_groups * inC]) + + # Execute. + x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) + return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl) + +#---------------------------------------------------------------------------- + +def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): + r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. + + Padding is performed only once at the beginning, not between the operations. + The fused op is considerably more efficient than performing the same calculation + using standard TensorFlow ops. It supports gradients of arbitrary order. + + Args: + x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. + w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. + Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. + k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). + The default is `[1] * factor`, which corresponds to average pooling. + factor: Integer downsampling factor (default: 2). + gain: Scaling factor for signal magnitude (default: 1.0). + data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). + impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). + + Returns: + Tensor of the shape `[N, C, H // factor, W // factor]` or + `[N, H // factor, W // factor, C]`, and same datatype as `x`. + """ + + assert isinstance(factor, int) and factor >= 1 + w = tf.convert_to_tensor(w) + convH, convW, _inC, _outC = w.shape.as_list() + assert convW == convH + if k is None: + k = [1] * factor + k = _setup_kernel(k) * gain + p = (k.shape[0] - factor) + (convW - 1) + if data_format == 'NCHW': + s = [1, 1, factor, factor] + else: + s = [1, factor, factor, 1] + x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) + return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) + +#---------------------------------------------------------------------------- +# Internal helper funcs. + +def _shape(tf_expr, dim_idx): + if tf_expr.shape.rank is not None: + dim = tf_expr.shape[dim_idx].value + if dim is not None: + return dim + return tf.shape(tf_expr)[dim_idx] + +def _setup_kernel(k): + k = np.asarray(k, dtype=np.float32) + if k.ndim == 1: + k = np.outer(k, k) + k /= np.sum(k) + assert k.ndim == 2 + assert k.shape[0] == k.shape[1] + return k + +def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): + assert data_format in ['NCHW', 'NHWC'] + assert x.shape.rank == 4 + y = x + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) + y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) + if data_format == 'NCHW': + y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) + return y + +#---------------------------------------------------------------------------- diff --git a/dnnlib/tflib/optimizer.py b/dnnlib/tflib/optimizer.py new file mode 100755 index 0000000000000000000000000000000000000000..9a1b1b833e218902ef145c59a03128e2fba73baf --- /dev/null +++ b/dnnlib/tflib/optimizer.py @@ -0,0 +1,336 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Helper wrapper for a Tensorflow optimizer.""" + +import numpy as np +import tensorflow as tf + +from collections import OrderedDict +from typing import List, Union + +from . import autosummary +from . import tfutil +from .. import util + +from .tfutil import TfExpression, TfExpressionEx + +try: + # TensorFlow 1.13 + from tensorflow.python.ops import nccl_ops +except: + # Older TensorFlow versions + import tensorflow.contrib.nccl as nccl_ops + +class Optimizer: + """A Wrapper for tf.train.Optimizer. + + Automatically takes care of: + - Gradient averaging for multi-GPU training. + - Gradient accumulation for arbitrarily large minibatches. + - Dynamic loss scaling and typecasts for FP16 training. + - Ignoring corrupted gradients that contain NaNs/Infs. + - Reporting statistics. + - Well-chosen default settings. + """ + + def __init__(self, + name: str = "Train", # Name string that will appear in TensorFlow graph. + tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. + learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. + minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. + share: "Optimizer" = None, # Share internal state with a previously created optimizer? + use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? + loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. + loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. + loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. + report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? + **kwargs): + + # Public fields. + self.name = name + self.learning_rate = learning_rate + self.minibatch_multiplier = minibatch_multiplier + self.id = self.name.replace("/", ".") + self.scope = tf.get_default_graph().unique_name(self.id) + self.optimizer_class = util.get_obj_by_name(tf_optimizer) + self.optimizer_kwargs = dict(kwargs) + self.use_loss_scaling = use_loss_scaling + self.loss_scaling_init = loss_scaling_init + self.loss_scaling_inc = loss_scaling_inc + self.loss_scaling_dec = loss_scaling_dec + + # Private fields. + self._updates_applied = False + self._devices = OrderedDict() # device_name => EasyDict() + self._shared_optimizers = OrderedDict() # device_name => optimizer_class + self._gradient_shapes = None # [shape, ...] + self._report_mem_usage = report_mem_usage + + # Validate arguments. + assert callable(self.optimizer_class) + + # Share internal state if requested. + if share is not None: + assert isinstance(share, Optimizer) + assert self.optimizer_class is share.optimizer_class + assert self.learning_rate is share.learning_rate + assert self.optimizer_kwargs == share.optimizer_kwargs + self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access + + def _get_device(self, device_name: str): + """Get internal state for the given TensorFlow device.""" + tfutil.assert_tf_initialized() + if device_name in self._devices: + return self._devices[device_name] + + # Initialize fields. + device = util.EasyDict() + device.name = device_name + device.optimizer = None # Underlying optimizer: optimizer_class + device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable + device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] + device.grad_clean = OrderedDict() # Clean gradients: var => grad + device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable + device.grad_acc_count = None # Accumulation counter: tf.Variable + device.grad_acc = OrderedDict() # Accumulated gradients: var => grad + + # Setup TensorFlow objects. + with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): + if device_name not in self._shared_optimizers: + optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) + self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) + device.optimizer = self._shared_optimizers[device_name] + if self.use_loss_scaling: + device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") + + # Register device. + self._devices[device_name] = device + return device + + def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: + """Register the gradients of the given loss function with respect to the given variables. + Intended to be called once per GPU.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + device = self._get_device(loss.device) + + # Validate trainables. + if isinstance(trainable_vars, dict): + trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars + assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 + assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) + assert all(var.device == device.name for var in trainable_vars) + + # Validate shapes. + if self._gradient_shapes is None: + self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] + assert len(trainable_vars) == len(self._gradient_shapes) + assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) + + # Report memory usage if requested. + deps = [] + if self._report_mem_usage: + self._report_mem_usage = False + try: + with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): + deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) + except tf.errors.NotFoundError: + pass + + # Compute gradients. + with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): + loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) + gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage + grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) + + # Register gradients. + for grad, var in grad_list: + if var not in device.grad_raw: + device.grad_raw[var] = [] + device.grad_raw[var].append(grad) + + def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: + """Construct training op to update the registered variables based on their gradients.""" + tfutil.assert_tf_initialized() + assert not self._updates_applied + self._updates_applied = True + all_ops = [] + + # Check for no-op. + if allow_no_op and len(self._devices) == 0: + with tfutil.absolute_name_scope(self.scope): + return tf.no_op(name='TrainingOp') + + # Clean up gradients. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): + for var, grad in device.grad_raw.items(): + + # Filter out disconnected gradients and convert to float32. + grad = [g for g in grad if g is not None] + grad = [tf.cast(g, tf.float32) for g in grad] + + # Sum within the device. + if len(grad) == 0: + grad = tf.zeros(var.shape) # No gradients => zero. + elif len(grad) == 1: + grad = grad[0] # Single gradient => use as is. + else: + grad = tf.add_n(grad) # Multiple gradients => sum. + + # Scale as needed. + scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) + scale = tf.constant(scale, dtype=tf.float32, name="scale") + if self.minibatch_multiplier is not None: + scale /= tf.cast(self.minibatch_multiplier, tf.float32) + scale = self.undo_loss_scaling(scale) + device.grad_clean[var] = grad * scale + + # Sum gradients across devices. + if len(self._devices) > 1: + with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): + for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): + if len(all_vars) > 0 and all(dim > 0 for dim in all_vars[0].shape.as_list()): # NCCL does not support zero-sized tensors. + all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] + all_grads = nccl_ops.all_sum(all_grads) + for device, var, grad in zip(self._devices.values(), all_vars, all_grads): + device.grad_clean[var] = grad + + # Apply updates separately on each device. + for device_idx, device in enumerate(self._devices.values()): + with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): + # pylint: disable=cell-var-from-loop + + # Accumulate gradients over time. + if self.minibatch_multiplier is None: + acc_ok = tf.constant(True, name='acc_ok') + device.grad_acc = OrderedDict(device.grad_clean) + else: + # Create variables. + with tf.control_dependencies(None): + for var in device.grad_clean.keys(): + device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") + device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") + + # Track counter. + count_cur = device.grad_acc_count + 1.0 + count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) + count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) + acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) + all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) + + # Track gradients. + for var, grad in device.grad_clean.items(): + acc_var = device.grad_acc_vars[var] + acc_cur = acc_var + grad + device.grad_acc[var] = acc_cur + with tf.control_dependencies([acc_cur]): + acc_inc_op = lambda: tf.assign(acc_var, acc_cur) + acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) + all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) + + # No overflow => apply gradients. + all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) + apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) + all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) + + # Adjust loss scaling. + if self.use_loss_scaling: + ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) + ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) + ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) + all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) + + # Last device => report statistics. + if device_idx == len(self._devices) - 1: + all_ops.append(autosummary.autosummary(self.id + "/learning_rate", self.learning_rate)) + all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) + if self.use_loss_scaling: + all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) + + # Initialize variables. + self.reset_optimizer_state() + if self.use_loss_scaling: + tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) + if self.minibatch_multiplier is not None: + tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) + + # Group everything into a single op. + with tfutil.absolute_name_scope(self.scope): + return tf.group(*all_ops, name="TrainingOp") + + def reset_optimizer_state(self) -> None: + """Reset internal state of the underlying optimizer.""" + tfutil.assert_tf_initialized() + tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) + + def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: + """Get or create variable representing log2 of the current dynamic loss scaling factor.""" + return self._get_device(device).loss_scaling_var + + def apply_loss_scaling(self, value: TfExpression) -> TfExpression: + """Apply dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) + + def undo_loss_scaling(self, value: TfExpression) -> TfExpression: + """Undo the effect of dynamic loss scaling for the given expression.""" + assert tfutil.is_tf_expression(value) + if not self.use_loss_scaling: + return value + return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type + + +class SimpleAdam: + """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" + + def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): + self.name = name + self.learning_rate = learning_rate + self.beta1 = beta1 + self.beta2 = beta2 + self.epsilon = epsilon + self.all_state_vars = [] + + def variables(self): + return self.all_state_vars + + def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): + assert gate_gradients == tf.train.Optimizer.GATE_NONE + return list(zip(tf.gradients(loss, var_list), var_list)) + + def apply_gradients(self, grads_and_vars): + with tf.name_scope(self.name): + state_vars = [] + update_ops = [] + + # Adjust learning rate to deal with startup bias. + with tf.control_dependencies(None): + b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) + state_vars += [b1pow_var, b2pow_var] + b1pow_new = b1pow_var * self.beta1 + b2pow_new = b2pow_var * self.beta2 + update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] + lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) + + # Construct ops to update each variable. + for grad, var in grads_and_vars: + with tf.control_dependencies(None): + m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) + state_vars += [m_var, v_var] + m_new = self.beta1 * m_var + (1 - self.beta1) * grad + v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) + var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) + update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] + + # Group everything together. + self.all_state_vars += state_vars + return tf.group(*update_ops) diff --git a/dnnlib/tflib/tfutil.py b/dnnlib/tflib/tfutil.py new file mode 100755 index 0000000000000000000000000000000000000000..1127c7beecfe526b459b3b99ee34e1c431e19e1c --- /dev/null +++ b/dnnlib/tflib/tfutil.py @@ -0,0 +1,252 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous helper utils for Tensorflow.""" + +import os +import numpy as np +import tensorflow as tf + +# Silence deprecation warnings from TensorFlow 1.13 onwards +import logging +logging.getLogger('tensorflow').setLevel(logging.ERROR) +import tensorflow.contrib # requires TensorFlow 1.x! +tf.contrib = tensorflow.contrib + +from typing import Any, Iterable, List, Union + +TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] +"""A type that represents a valid Tensorflow expression.""" + +TfExpressionEx = Union[TfExpression, int, float, np.ndarray] +"""A type that can be converted to a valid Tensorflow expression.""" + + +def run(*args, **kwargs) -> Any: + """Run the specified ops in the default session.""" + assert_tf_initialized() + return tf.get_default_session().run(*args, **kwargs) + + +def is_tf_expression(x: Any) -> bool: + """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" + return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) + + +def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: + """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" + return [dim.value for dim in shape] + + +def flatten(x: TfExpressionEx) -> TfExpression: + """Shortcut function for flattening a tensor.""" + with tf.name_scope("Flatten"): + return tf.reshape(x, [-1]) + + +def log2(x: TfExpressionEx) -> TfExpression: + """Logarithm in base 2.""" + with tf.name_scope("Log2"): + return tf.log(x) * np.float32(1.0 / np.log(2.0)) + + +def exp2(x: TfExpressionEx) -> TfExpression: + """Exponent in base 2.""" + with tf.name_scope("Exp2"): + return tf.exp(x * np.float32(np.log(2.0))) + + +def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: + """Linear interpolation.""" + with tf.name_scope("Lerp"): + return a + (b - a) * t + + +def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: + """Linear interpolation with clip.""" + with tf.name_scope("LerpClip"): + return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) + + +def absolute_name_scope(scope: str) -> tf.name_scope: + """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" + return tf.name_scope(scope + "/") + + +def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: + """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" + return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) + + +def _sanitize_tf_config(config_dict: dict = None) -> dict: + # Defaults. + cfg = dict() + cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. + cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. + cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. + cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. + cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. + + # Remove defaults for environment variables that are already set. + for key in list(cfg): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + if fields[1] in os.environ: + del cfg[key] + + # User overrides. + if config_dict is not None: + cfg.update(config_dict) + return cfg + + +def init_tf(config_dict: dict = None) -> None: + """Initialize TensorFlow session using good default settings.""" + # Skip if already initialized. + if tf.get_default_session() is not None: + return + + # Setup config dict and random seeds. + cfg = _sanitize_tf_config(config_dict) + np_random_seed = cfg["rnd.np_random_seed"] + if np_random_seed is not None: + np.random.seed(np_random_seed) + tf_random_seed = cfg["rnd.tf_random_seed"] + if tf_random_seed == "auto": + tf_random_seed = np.random.randint(1 << 31) + if tf_random_seed is not None: + tf.set_random_seed(tf_random_seed) + + # Setup environment variables. + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] == "env": + assert len(fields) == 2 + os.environ[fields[1]] = str(value) + + # Create default TensorFlow session. + create_session(cfg, force_as_default=True) + + +def assert_tf_initialized(): + """Check that TensorFlow session has been initialized.""" + if tf.get_default_session() is None: + raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") + + +def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: + """Create tf.Session based on config dict.""" + # Setup TensorFlow config proto. + cfg = _sanitize_tf_config(config_dict) + config_proto = tf.ConfigProto() + for key, value in cfg.items(): + fields = key.split(".") + if fields[0] not in ["rnd", "env"]: + obj = config_proto + for field in fields[:-1]: + obj = getattr(obj, field) + setattr(obj, fields[-1], value) + + # Create session. + session = tf.Session(config=config_proto) + if force_as_default: + # pylint: disable=protected-access + session._default_session = session.as_default() + session._default_session.enforce_nesting = False + session._default_session.__enter__() + return session + + +def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: + """Initialize all tf.Variables that have not already been initialized. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tf.variables_initializer(tf.report_uninitialized_variables()).run() + """ + assert_tf_initialized() + if target_vars is None: + target_vars = tf.global_variables() + + test_vars = [] + test_ops = [] + + with tf.control_dependencies(None): # ignore surrounding control_dependencies + for var in target_vars: + assert is_tf_expression(var) + + try: + tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) + except KeyError: + # Op does not exist => variable may be uninitialized. + test_vars.append(var) + + with absolute_name_scope(var.name.split(":")[0]): + test_ops.append(tf.is_variable_initialized(var)) + + init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] + run([var.initializer for var in init_vars]) + + +def set_vars(var_to_value_dict: dict) -> None: + """Set the values of given tf.Variables. + + Equivalent to the following, but more efficient and does not bloat the tf graph: + tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] + """ + assert_tf_initialized() + ops = [] + feed_dict = {} + + for var, value in var_to_value_dict.items(): + assert is_tf_expression(var) + + try: + setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op + except KeyError: + with absolute_name_scope(var.name.split(":")[0]): + with tf.control_dependencies(None): # ignore surrounding control_dependencies + setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter + + ops.append(setter) + feed_dict[setter.op.inputs[1]] = value + + run(ops, feed_dict) + + +def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): + """Create tf.Variable with large initial value without bloating the tf graph.""" + assert_tf_initialized() + assert isinstance(initial_value, np.ndarray) + zeros = tf.zeros(initial_value.shape, initial_value.dtype) + var = tf.Variable(zeros, *args, **kwargs) + set_vars({var: initial_value}) + return var + + +def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): + """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. + Can be used as an input transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if nhwc_to_nchw: + images = tf.transpose(images, [0, 3, 1, 2]) + return images * ((drange[1] - drange[0]) / 255) + drange[0] + + +def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): + """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. + Can be used as an output transformation for Network.run(). + """ + images = tf.cast(images, tf.float32) + if shrink > 1: + ksize = [1, 1, shrink, shrink] + images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") + if nchw_to_nhwc: + images = tf.transpose(images, [0, 2, 3, 1]) + scale = 255 / (drange[1] - drange[0]) + images = images * scale + (0.5 - drange[0] * scale) + return tf.saturate_cast(images, tf.uint8) diff --git a/dnnlib/util.py b/dnnlib/util.py new file mode 100755 index 0000000000000000000000000000000000000000..73c98d73c26e03800a39c62386c322bb518bbd18 --- /dev/null +++ b/dnnlib/util.py @@ -0,0 +1,410 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous utility classes and functions.""" + +import ctypes +import fnmatch +import importlib +import inspect +import numpy as np +import os +import shutil +import sys +import types +import io +import pickle +import re +import requests +import html +import hashlib +import glob +import uuid + +from distutils.util import strtobool +from typing import Any, List, Tuple, Union + + +# Util classes +# ------------------------------------------------------------------------------------------ + + +class EasyDict(dict): + """Convenience class that behaves like a dict but allows access with the attribute syntax.""" + + def __getattr__(self, name: str) -> Any: + try: + return self[name] + except KeyError: + raise AttributeError(name) + + def __setattr__(self, name: str, value: Any) -> None: + self[name] = value + + def __delattr__(self, name: str) -> None: + del self[name] + + +class Logger(object): + """Redirect stderr to stdout, optionally print stdout to a file, and optionally force flushing on both stdout and the file.""" + + def __init__(self, file_name: str = None, file_mode: str = "w", should_flush: bool = True): + self.file = None + + if file_name is not None: + self.file = open(file_name, file_mode) + + self.should_flush = should_flush + self.stdout = sys.stdout + self.stderr = sys.stderr + + sys.stdout = self + sys.stderr = self + + def __enter__(self) -> "Logger": + return self + + def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: + self.close() + + def write(self, text: str) -> None: + """Write text to stdout (and a file) and optionally flush.""" + if len(text) == 0: # workaround for a bug in VSCode debugger: sys.stdout.write(''); sys.stdout.flush() => crash + return + + if self.file is not None: + self.file.write(text) + + self.stdout.write(text) + + if self.should_flush: + self.flush() + + def flush(self) -> None: + """Flush written text to both stdout and a file, if open.""" + if self.file is not None: + self.file.flush() + + self.stdout.flush() + + def close(self) -> None: + """Flush, close possible files, and remove stdout/stderr mirroring.""" + self.flush() + + # if using multiple loggers, prevent closing in wrong order + if sys.stdout is self: + sys.stdout = self.stdout + if sys.stderr is self: + sys.stderr = self.stderr + + if self.file is not None: + self.file.close() + + +# Small util functions +# ------------------------------------------------------------------------------------------ + + +def format_time(seconds: Union[int, float]) -> str: + """Convert the seconds to human readable string with days, hours, minutes and seconds.""" + s = int(np.rint(seconds)) + + if s < 60: + return "{0}s".format(s) + elif s < 60 * 60: + return "{0}m {1:02}s".format(s // 60, s % 60) + elif s < 24 * 60 * 60: + return "{0}h {1:02}m {2:02}s".format(s // (60 * 60), (s // 60) % 60, s % 60) + else: + return "{0}d {1:02}h {2:02}m".format(s // (24 * 60 * 60), (s // (60 * 60)) % 24, (s // 60) % 60) + + +def ask_yes_no(question: str) -> bool: + """Ask the user the question until the user inputs a valid answer.""" + while True: + try: + print("{0} [y/n]".format(question)) + return strtobool(input().lower()) + except ValueError: + pass + + +def tuple_product(t: Tuple) -> Any: + """Calculate the product of the tuple elements.""" + result = 1 + + for v in t: + result *= v + + return result + + +_str_to_ctype = { + "uint8": ctypes.c_ubyte, + "uint16": ctypes.c_uint16, + "uint32": ctypes.c_uint32, + "uint64": ctypes.c_uint64, + "int8": ctypes.c_byte, + "int16": ctypes.c_int16, + "int32": ctypes.c_int32, + "int64": ctypes.c_int64, + "float32": ctypes.c_float, + "float64": ctypes.c_double +} + + +def get_dtype_and_ctype(type_obj: Any) -> Tuple[np.dtype, Any]: + """Given a type name string (or an object having a __name__ attribute), return matching Numpy and ctypes types that have the same size in bytes.""" + type_str = None + + if isinstance(type_obj, str): + type_str = type_obj + elif hasattr(type_obj, "__name__"): + type_str = type_obj.__name__ + elif hasattr(type_obj, "name"): + type_str = type_obj.name + else: + raise RuntimeError("Cannot infer type name from input") + + assert type_str in _str_to_ctype.keys() + + my_dtype = np.dtype(type_str) + my_ctype = _str_to_ctype[type_str] + + assert my_dtype.itemsize == ctypes.sizeof(my_ctype) + + return my_dtype, my_ctype + + +def is_pickleable(obj: Any) -> bool: + try: + with io.BytesIO() as stream: + pickle.dump(obj, stream) + return True + except: + return False + + +# Functionality to import modules/objects by name, and call functions by name +# ------------------------------------------------------------------------------------------ + +def get_module_from_obj_name(obj_name: str) -> Tuple[types.ModuleType, str]: + """Searches for the underlying module behind the name to some python object. + Returns the module and the object name (original name with module part removed).""" + + # allow convenience shorthands, substitute them by full names + obj_name = re.sub("^np.", "numpy.", obj_name) + obj_name = re.sub("^tf.", "tensorflow.", obj_name) + + # list alternatives for (module_name, local_obj_name) + parts = obj_name.split(".") + name_pairs = [(".".join(parts[:i]), ".".join(parts[i:])) for i in range(len(parts), 0, -1)] + + # try each alternative in turn + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + return module, local_obj_name + except: + pass + + # maybe some of the modules themselves contain errors? + for module_name, _local_obj_name in name_pairs: + try: + importlib.import_module(module_name) # may raise ImportError + except ImportError: + if not str(sys.exc_info()[1]).startswith("No module named '" + module_name + "'"): + raise + + # maybe the requested attribute is missing? + for module_name, local_obj_name in name_pairs: + try: + module = importlib.import_module(module_name) # may raise ImportError + get_obj_from_module(module, local_obj_name) # may raise AttributeError + except ImportError: + pass + + # we are out of luck, but we have no idea why + raise ImportError(obj_name) + + +def get_obj_from_module(module: types.ModuleType, obj_name: str) -> Any: + """Traverses the object name and returns the last (rightmost) python object.""" + if obj_name == '': + return module + obj = module + for part in obj_name.split("."): + obj = getattr(obj, part) + return obj + + +def get_obj_by_name(name: str) -> Any: + """Finds the python object with the given name.""" + module, obj_name = get_module_from_obj_name(name) + return get_obj_from_module(module, obj_name) + + +def call_func_by_name(*args, func_name: str = None, **kwargs) -> Any: + """Finds the python object with the given name and calls it as a function.""" + assert func_name is not None + func_obj = get_obj_by_name(func_name) + assert callable(func_obj) + return func_obj(*args, **kwargs) + + +def get_module_dir_by_obj_name(obj_name: str) -> str: + """Get the directory path of the module containing the given object name.""" + module, _ = get_module_from_obj_name(obj_name) + return os.path.dirname(inspect.getfile(module)) + + +def is_top_level_function(obj: Any) -> bool: + """Determine whether the given object is a top-level function, i.e., defined at module scope using 'def'.""" + return callable(obj) and obj.__name__ in sys.modules[obj.__module__].__dict__ + + +def get_top_level_function_name(obj: Any) -> str: + """Return the fully-qualified name of a top-level function.""" + assert is_top_level_function(obj) + return obj.__module__ + "." + obj.__name__ + + +# File system helpers +# ------------------------------------------------------------------------------------------ + +def list_dir_recursively_with_ignore(dir_path: str, ignores: List[str] = None, add_base_to_relative: bool = False) -> List[Tuple[str, str]]: + """List all files recursively in a given directory while ignoring given file and directory names. + Returns list of tuples containing both absolute and relative paths.""" + assert os.path.isdir(dir_path) + base_name = os.path.basename(os.path.normpath(dir_path)) + + if ignores is None: + ignores = [] + + result = [] + + for root, dirs, files in os.walk(dir_path, topdown=True): + for ignore_ in ignores: + dirs_to_remove = [d for d in dirs if fnmatch.fnmatch(d, ignore_)] + + # dirs need to be edited in-place + for d in dirs_to_remove: + dirs.remove(d) + + files = [f for f in files if not fnmatch.fnmatch(f, ignore_)] + + absolute_paths = [os.path.join(root, f) for f in files] + relative_paths = [os.path.relpath(p, dir_path) for p in absolute_paths] + + if add_base_to_relative: + relative_paths = [os.path.join(base_name, p) for p in relative_paths] + + assert len(absolute_paths) == len(relative_paths) + result += zip(absolute_paths, relative_paths) + + return result + + +def copy_files_and_create_dirs(files: List[Tuple[str, str]]) -> None: + """Takes in a list of tuples of (src, dst) paths and copies files. + Will create all necessary directories.""" + for file in files: + target_dir_name = os.path.dirname(file[1]) + + # will create all intermediate-level directories + if not os.path.exists(target_dir_name): + os.makedirs(target_dir_name) + + shutil.copyfile(file[0], file[1]) + + +# URL helpers +# ------------------------------------------------------------------------------------------ + +def is_url(obj: Any, allow_file_urls: bool = False) -> bool: + """Determine whether the given object is a valid URL string.""" + if not isinstance(obj, str) or not "://" in obj: + return False + if allow_file_urls and obj.startswith('file:///'): + return True + try: + res = requests.compat.urlparse(obj) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + res = requests.compat.urlparse(requests.compat.urljoin(obj, "/")) + if not res.scheme or not res.netloc or not "." in res.netloc: + return False + except: + return False + return True + + +def open_url(url: str, cache_dir: str = None, num_attempts: int = 10, verbose: bool = True) -> Any: + """Download the given URL and return a binary-mode file object to access the data.""" + assert is_url(url, allow_file_urls=True) + assert num_attempts >= 1 + + # Handle file URLs. + if url.startswith('file:///'): + return open(url[len('file:///'):], "rb") + + # Lookup from cache. + url_md5 = hashlib.md5(url.encode("utf-8")).hexdigest() + if cache_dir is not None: + cache_files = glob.glob(os.path.join(cache_dir, url_md5 + "_*")) + if len(cache_files) == 1: + return open(cache_files[0], "rb") + + # Download. + url_name = None + url_data = None + with requests.Session() as session: + if verbose: + print("Downloading %s ..." % url, end="", flush=True) + for attempts_left in reversed(range(num_attempts)): + try: + with session.get(url) as res: + res.raise_for_status() + if len(res.content) == 0: + raise IOError("No data received") + + if len(res.content) < 8192: + content_str = res.content.decode("utf-8") + if "download_warning" in res.headers.get("Set-Cookie", ""): + links = [html.unescape(link) for link in content_str.split('"') if "export=download" in link] + if len(links) == 1: + url = requests.compat.urljoin(url, links[0]) + raise IOError("Google Drive virus checker nag") + if "Google Drive - Quota exceeded" in content_str: + raise IOError("Google Drive download quota exceeded -- please try again later") + + match = re.search(r'filename="([^"]*)"', res.headers.get("Content-Disposition", "")) + url_name = match[1] if match else url + url_data = res.content + if verbose: + print(" done") + break + except: + if not attempts_left: + if verbose: + print(" failed") + raise + if verbose: + print(".", end="", flush=True) + + # Save to cache. + if cache_dir is not None: + safe_name = re.sub(r"[^0-9a-zA-Z-._]", "_", url_name) + cache_file = os.path.join(cache_dir, url_md5 + "_" + safe_name) + temp_file = os.path.join(cache_dir, "tmp_" + uuid.uuid4().hex + "_" + url_md5 + "_" + safe_name) + os.makedirs(cache_dir, exist_ok=True) + with open(temp_file, "wb") as f: + f.write(url_data) + os.replace(temp_file, cache_file) # atomic + + # Return data as file object. + return io.BytesIO(url_data) diff --git a/download_models.py b/download_models.py new file mode 100644 index 0000000000000000000000000000000000000000..712a9553456ed9afdb30c0fd833c6b510af78121 --- /dev/null +++ b/download_models.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021, Idiap Research Institute. All rights reserved. +# +# This work is made available under a custom license, Non-Commercial Research and Educational Use Only +# To view a copy of this license, visit +# https://gitlab.idiap.ch/bob/bob.paper.icassp2022_morph_generate/-/blob/master/LICENSE.txt + + +import os +import urllib.request +import bz2 + +from bob.extension import rc + +DLIB_LMD_PATH = rc['sg2_morph.dlib_lmd_path'] +SG2_PATH = rc['sg2_morph.sg2_path'] +VGG16_PATH = rc['sg2_morph.vgg16_path'] + +def makedirs(path): + folder = os.path.dirname(path) + if not os.path.exists(folder): + os.makedirs(folder) + +def download_dlib_lmd(): + dlib_url = "http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2" + if not os.path.exists(DLIB_LMD_PATH): + makedirs(DLIB_LMD_PATH) + + print('Downloading dlib face landmarks detector...') + tmp_file, _ = urllib.request.urlretrieve(dlib_url) + with bz2.BZ2File(tmp_file, 'rb') as src, open(DLIB_LMD_PATH, 'wb') as dst: + dst.write(src.read()) + print("Success !") + else: + print('dlib landmark detector already downloaded in {}'.format(DLIB_LMD_PATH)) + +def download_stylegan2(): + stylegan2_url = 'http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl' + if not os.path.exists(SG2_PATH): + makedirs(SG2_PATH) + print('Downloading pretrained StyleGAN2 (FFHQ-config-f)...') + dst_file, _ = urllib.request.urlretrieve(stylegan2_url, SG2_PATH) + + print("Success !") + else: + print('StyleGAN2 model already downloaded in {}'.format(SG2_PATH)) + +def download_vgg16(): + vgg16_url = "http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl" + if not os.path.exists(VGG16_PATH): + makedirs(VGG16_PATH) + print("Downloading pretrained VGG16...") + dst_file, _ = urllib.request.urlretrieve(vgg16_url, VGG16_PATH) + print("Success !") + else: + print("VGG16 model already downloaded in {}".format(VGG16_PATH)) + +def download_models(): + download_dlib_lmd() + download_stylegan2() + download_vgg16() + +if __name__ == "__main__": + download_models() diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..58c2aca6615c90187f141f526835f1fdb289a9e8 --- /dev/null +++ b/environment.yml @@ -0,0 +1,22 @@ +name: bob.paper.icassp2022_morph_generate +channels: + - https://www.idiap.ch/software/bob/conda/label/archive + - https://www.idiap.ch/software/bob/conda + - defaults + - conda-forge +dependencies: + - python=3 + - bob.io.image + - bob.ip.dlib + - pillow + - matplotlib + - scikit-image + - requests + - keras=2.2.4 + - tensorflow-gpu=1.15.0 + - docopt + - imutils + - pandas + - pip + - pip: + - keras-vggface \ No newline at end of file diff --git a/gen_latents.py b/gen_latents.py new file mode 100644 index 0000000000000000000000000000000000000000..93cbb1e695d292c797a7e45ed878d554bff950df --- /dev/null +++ b/gen_latents.py @@ -0,0 +1,91 @@ +# Copyright (c) 2021, Idiap Research Institute. All rights reserved. +# +# This work is made available under a custom license, Non-Commercial Research and Educational Use Only +# To view a copy of this license, visit +# https://gitlab.idiap.ch/bob/bob.paper.icassp2022_morph_generate/-/blob/master/LICENSE.txt + + +import sys +import os +sys.path.append(os.getcwd()) +import numpy as np +import bob.io.image +import bob.io.base +import modules +import utils as sg_utils +import argparse +from gridtk.tools import get_array_job_slice + +def parse_arguments(): + '''Parses in CLI arguments''' + parser = argparse.ArgumentParser(description='Process command line arguments.') + parser.add_argument('-d', '--dst', default='latents', help='Provide a destination folder path for the results.') + requiredNamed = parser.add_argument_group('Required arguments') + requiredNamed.add_argument('-s', '--src', type=check_dir_path, help='Provide the folder path containing the source images.', required=True) + return parser.parse_args() + +def check_dir_path(path): + '''Checks if the given folder path as an argument exists.''' + if os.path.isdir(path) or path == 'results': + return path + else: + raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path.") + +def instantiate_sg2_modules(): + '''Instantiates the SG2 Modules''' + print('Setting up StyleGAN2 modules') + # Instantiate the three main modules + sg_utils.fix_randomness(seed=0) + cropper = modules.preprocessor.FFHQCropper() + generator = modules.generator.StyleGAN2() + projector = modules.projector.Projector(num_steps=1000) + projector.set_network(generator.network) + return cropper, projector + +def make_latents_path(LATENT_DIR): + '''Concatenates the `dst_path` and `alpha_val` to create directory to store results.''' + if not os.path.exists(LATENT_DIR): + print('Making new directory', LATENT_DIR) + os.makedirs(LATENT_DIR) + +def main(): + ''' + Creates latent vectors of all images given in the src directory. + ''' + # Parse arguments + args = parse_arguments() + + # Parameters + SRC_DIR = args.src + LATENT_DIR = args.dst + DST_SUFFIX = '.png' + VEC_SUFFIX = '.hdf5' + + # Create latents path with verification + make_latents_path(LATENT_DIR) + + # Don't overwrite existing latents + existing_files = os.listdir(LATENT_DIR) + + # Iterate through a single image - we use SGE_TASK_ID to parallelize + list_images = sorted(os.listdir(SRC_DIR)) + list_images = list_images[get_array_job_slice(len(list_images))] + + # Instantiate the three main modules + cropper, projector = instantiate_sg2_modules() + + # Loop + for img in list_images: + # Ignore data files eg: ._070_08.jpg + if not img.startswith('.') and img.split(DST_SUFFIX)[0]+VEC_SUFFIX not in existing_files: + print('Going through file:', img) + # Load images to convert to latent vector + ref_images = map(bob.io.image.load, [os.path.join(SRC_DIR, img)]) + # Crop & project images + crops = list(map(cropper, ref_images)) + results = [projector(crop, verbose=True) for crop in crops] + # Save generated latent vector as .hdf5 + bob.io.base.save(results[0].w_latent, os.path.join(LATENT_DIR, img.split(DST_SUFFIX)[0]+VEC_SUFFIX)) + +if __name__ == "__main__": + main() diff --git a/gen_morphs.py b/gen_morphs.py new file mode 100644 index 0000000000000000000000000000000000000000..b5582caede848fc4b8bcbea9a0df2f01da7d46ba --- /dev/null +++ b/gen_morphs.py @@ -0,0 +1,293 @@ +# Copyright (c) 2021, Idiap Research Institute. All rights reserved. +# +# This work is made available under a custom license, Non-Commercial Research and Educational Use Only +# To view a copy of this license, visit +# https://gitlab.idiap.ch/bob/bob.paper.icassp2022_morph_generate/-/blob/master/LICENSE.txt + + +import os +import sys +sys.path.append(os.getcwd()) +from src.facemorpher import morpher as fcmorpher +import src.opencv.utils as cv_utils +from bob.extension import rc +from modules import morpher +import utils as sg_utils +from PIL import Image +import pandas as pd +import bob.io.image +import bob.io.base +import numpy as np +import cv2 as cv +import argparse +import modules +import dnnlib + +def parse_arguments(): + '''Parses in CLI arguments''' + parser = argparse.ArgumentParser(description='Process command line arguments.') + parser.add_argument('-d', '--dst', type=check_dir_path, default='results', help='Provide a destination folder path for the results.') + parser.add_argument('-a', '--alphas', nargs='+', type=check_float_range, default=[0.5], help="Provide the morphing's alpha values [0, 1] (default: 0.5). Example: --alphas 0.3 0.5 0.7") + parser.add_argument('--latents', type=check_dir_path, help='Provide the folder path for the latent vectors.') + requiredNamed = parser.add_argument_group('Required arguments') + requiredNamed.add_argument('--opencv', action='store_true', help='Morphs using the `opencv` algorithm.') + requiredNamed.add_argument('--facemorpher', action='store_true', help='Morphs using the `facemorpher` algorithm.') + requiredNamed.add_argument('--stylegan2', action='store_true', help='Morphs using the `stylegan2` algorithm.') + requiredNamed.add_argument('--mipgan2', action='store_true', help='Morphs using the `mipgan2` algorithm.') + requiredNamed.add_argument('-s', '--src', type=check_dir_path, help='Provide the folder path containing the source images.', required=True) + requiredNamed.add_argument('-l', '--lst', type=check_dir_file, help='Provide the file path of the `.csv` file containing the names of the pair of images to be morphed.', required=True) + return parser.parse_args() + +def check_float_range(arg, MIN_VAL=0.0, MAX_VAL=1.0): + '''Type function for argparse - a float within the predefined bounds.''' + try: + f = float(arg) + except ValueError: + raise argparse.ArgumentTypeError("Must be a floating point number.") + if f < MIN_VAL or f > MAX_VAL: + raise argparse.ArgumentTypeError("Argument must be < " + str(MAX_VAL) + "and > " + str(MIN_VAL)) + return f + +def check_dir_path(path): + '''Checks if the given folder path as an argument exists.''' + if os.path.isdir(path) or path == 'results': + return path + else: + raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path.") + +def check_dir_file(path): + '''Checks if the given file path as an argument exists.''' + if os.path.exists(path): + return path + else: + raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid file.") + +def make_dst_path(dst_path, type, alpha_val): + '''Concatenates the `dst_path` and `alpha_val` to create directory to store results.''' + full_dst_path = os.path.join(dst_path, type, str(alpha_val)) + if not os.path.exists(full_dst_path): + print('Making new directory', full_dst_path) + os.makedirs(full_dst_path) + return full_dst_path + +def make_opencv_morphs(PERMUTATIONS, SRC_DIR, dst_path, detector, predictor, fa, alpha): + '''Loops over all given permutations to generate the opencv morph images.''' + print('Generating OpenCV morphs with alpha', alpha) + # Loop + for f1, f2 in PERMUTATIONS: + print('Morphing files:', f1, f2) + # Read images + img1 = np.array(Image.open(os.path.join(SRC_DIR, f1))) + img2 = np.array(Image.open(os.path.join(SRC_DIR, f2))) + # Convert from BGR to RGB + img1 = cv.cvtColor(img1, cv.COLOR_BGR2RGB) + img2 = cv.cvtColor(img2, cv.COLOR_BGR2RGB) + # Get grayscale images + gray1 = cv.cvtColor(img1, cv.COLOR_RGB2GRAY) + gray2 = cv.cvtColor(img2, cv.COLOR_RGB2GRAY) + # Get rectangles + rects1 = detector(img1, 1) + rects2 = detector(img2, 1) + # Align images + img1 = fa.align(img1, gray1, rects1[0]) + img2 = fa.align(img2, gray2, rects2[0]) + # We need the landmarks again as we have changed the size + rects1 = detector(img1, 1) + rects2 = detector(img2, 1) + # Extract landmarks + points1 = predictor(img1, rects1[0]) + points2 = predictor(img2, rects2[0]) + points1 = cv_utils.face_utils.shape_to_np(points1) + points2 = cv_utils.face_utils.shape_to_np(points2) + points = [] + # Compute weighted average point coordinates + for i in range(0, len(points1)): + x = (1 - alpha) * points1[i][0] + alpha * points2[i][0] + y = (1 - alpha) * points1[i][1] + alpha * points2[i][1] + points.append((x, y)) + # Allocate space for final output + imgMorph = np.zeros(img1.shape, dtype=img1.dtype) + # Rectangle to be used with Subdiv2D + size = img1.shape + rect = (0, 0, size[1], size[0]) + # Create an instance of Subdiv2D + subdiv = cv.Subdiv2D(rect) + d_col = (255, 255, 255) + # Calculate and draw delaunay triangles + delaunayTri = cv_utils.calculateDelaunayTriangles( + rect, subdiv, points, img1, 'Delaunay Triangulation', d_col, draw=False) + # Morph by reading calculated triangles + for line in delaunayTri: + x, y, z = line + x = int(x) + y = int(y) + z = int(z) + t1 = [points1[x], points1[y], points1[z]] + t2 = [points2[x], points2[y], points2[z]] + t = [points[x], points[y], points[z]] + # Morph one triangle at a time. + cv_utils.morphTriangle(img1, img2, imgMorph, t1, t2, t, alpha) + # Remove the black + for i in range(len(imgMorph)): + for j in range(len(imgMorph[i])): + if not np.any(imgMorph[i][j]): + imgMorph[i][j] = (1.0 - alpha) * \ + img1[i][j] + alpha * img2[i][j] + # Save morphed image + newname = os.path.join(dst_path, f1 + '_' + f2) + print(newname, imgMorph.shape) + cv.imwrite(newname, imgMorph) + +def make_facemorpher_morphs(PERMUTATIONS, SRC_DIR, WIDTH, HEIGHT, dst_path, alpha): + '''Loops over all given permutations to generate the facemorph morph images.''' + print('Generating FaceMorpher morphs with alpha', alpha) + # Loop + for f1, f2 in PERMUTATIONS: + print('Morphing files:', f1, f2) + fcmorpher.morpher(imgpaths=[os.path.join(SRC_DIR, f1), os.path.join(SRC_DIR, f2)], + width=WIDTH, + height=HEIGHT, + num_frames=12, + out_frames=dst_path, + background='average', + alpha=alpha) + +def fix_randomness(): + '''Fixes np and tf seed.''' + sg_utils.fix_randomness(seed=0) + +def instantiate_generator(): + '''Instantiates SG2 Generator''' + print('Setting up StyleGAN2 modules') + return modules.generator.StyleGAN2() + +def instantiate_cropper(): + '''Instantiates SG2 Cropper''' + return modules.preprocessor.FFHQCropper() + +def check_for_latents(PERMUTATIONS, LATENTS_DIR, vec_suffix='.hdf5'): + '''Checks if all required latent vectors are present in the `LATENTS_DIR` directory''' + print('Checking for all existing latents.') + latents_lst = os.listdir(LATENTS_DIR) + missing = False + for f1, f2 in PERMUTATIONS: + if f1[:-4]+vec_suffix not in latents_lst: + print('Missing latent:', f1) + missing = True + if f2[:-4]+vec_suffix not in latents_lst: + print('Missing latent:', f2) + missing = True + if missing: + sys.exit("Please generate all the necessary latent vectors of the images to morph before running the StyleGAN2 morphing algorithm.") + +def make_stylegan2_morphs(PERMUTATIONS, DST_SUFFIX, LATENTS_DIR, arg_dst_path, generator, ALPHA_LIST, vec_suffix='.hdf5'): + '''Loops over all given permutations to generate the stylegan2 morph images.''' + print('Generating StyleGAN2 morphs') + # Loop + for f1, f2 in PERMUTATIONS: + # Load projected images from existing .hdf5 files + latents_path = [os.path.join(LATENTS_DIR, f1[:-4] + vec_suffix), + os.path.join(LATENTS_DIR, f2[:-4] + vec_suffix)] + latents = list(map(bob.io.image.load, latents_path)) + # Interpolate + morph_lats = [] + for alpha in ALPHA_LIST: + morph_lats.append(latents[0] * (1-alpha) + latents[1] * alpha) + # Stack + w_latents = np.stack(morph_lats) + # Generated associated interpolated images + lerp_images = generator.run_from_W(w_latents) + # Save the morphed image + for i, img in enumerate(lerp_images): + dst_path = make_dst_path(arg_dst_path, 'stylegan2', ALPHA_LIST[i]) + newname = os.path.join(dst_path, f1 + '_' + f2) + bob.io.base.save(img, newname) + +def make_mipgan2_morphs(PERMUTATIONS, DST_SUFFIX, SRC_DIR, LATENTS_DIR, arg_dst_path, generator, cropper, alpha): + '''Loops over all given permutations to generate the mipgan2 morph images.''' + # Get Morpher class, and set network (different init to Projector class) + morph = morpher.Morpher(alpha=alpha) + morph.set_network(generator.network) + # Morph pair-by-pair + for f1, f2 in PERMUTATIONS: + # Create morph name + f1 = f1[:-4] + f2 = f2[:-4] + m_name = '_'.join((f1, f2)) + DST_SUFFIX + # Load, crop, and process images + x1 = cropper(bob.io.image.load(os.path.join(SRC_DIR, f1+DST_SUFFIX))) + x2 = cropper(bob.io.image.load(os.path.join(SRC_DIR, f2+DST_SUFFIX))) + pair_images = np.array([x1, x2]) + pair_images = modules.misc.adjust_dynamic_range(pair_images, [0, 255], [-1, 1]) + # Morph + morph.start(pair_images, LATENTS_DIR, (f1, f2)) + while morph.get_cur_step() < morph.num_steps: + morph.step() + dst_path = make_dst_path(arg_dst_path, 'mipgan2', alpha) + newname = os.path.join(dst_path, m_name) + modules.misc.save_image_grid(morph.get_images_interp(), newname, drange=[-1,1]) + #print('\r%-30s\r' % '', end='', flush=True) + +def main(): + ''' + Makes OpenCV morphs between selected images given in the `.csv` file. + ''' + # Parse arguments + args = parse_arguments() + + # Define variables + PERMUTATIONS = pd.read_csv(args.lst, header=None).values + DLIB_LMD_PATH = rc['sg2_morph.dlib_lmd_path'] + SRC_DIR = args.src + ALPHA_LIST = args.alphas + + if args.latents: + LATENTS_DIR = args.latents + + SRC_SUFFIX = '.png' + DST_SUFFIX = '.png' + WIDTH = 360 + HEIGHT = 480 + + # Fix seed + if args.stylegan2 or args.mipgan2: + fix_randomness() + + # Instantiate dlib detector and predictors + print('Instantiating modules.') + detector = cv_utils.dlib.get_frontal_face_detector() + predictor = cv_utils.dlib.shape_predictor(DLIB_LMD_PATH) + fa = cv_utils.FaceAligner(predictor, desiredFaceWidth=WIDTH, desiredFaceHeight=HEIGHT) + + # OpenCV Morphs + if args.opencv: + for alpha in ALPHA_LIST: + dst_path = make_dst_path(args.dst, 'opencv', alpha) + make_opencv_morphs(PERMUTATIONS, SRC_DIR, dst_path, detector, predictor, fa, alpha) + + # FaceMorpher Morphs + if args.facemorpher: + for alpha in ALPHA_LIST: + dst_path = make_dst_path(args.dst, 'facemorpher', alpha) + make_facemorpher_morphs(PERMUTATIONS, SRC_DIR, WIDTH, HEIGHT, dst_path, alpha) + + # StyleGAN2 Morphs - we can one shot for all alphas + if args.stylegan2: + check_for_latents(PERMUTATIONS, LATENTS_DIR) + generator = instantiate_generator() + make_stylegan2_morphs(PERMUTATIONS, DST_SUFFIX, LATENTS_DIR, args.dst, generator, ALPHA_LIST) + + # MIPGAN-II Morphs + if args.mipgan2: + check_for_latents(PERMUTATIONS, LATENTS_DIR) + cropper = instantiate_cropper() + if not args.stylegan2: + generator = instantiate_generator() + for alpha in ALPHA_LIST: + make_mipgan2_morphs(PERMUTATIONS, DST_SUFFIX, SRC_DIR, LATENTS_DIR, args.dst, generator, cropper, alpha) + + # Finish + print('Job completed !') + +if __name__ == "__main__": + main() diff --git a/modules/LICENSE.txt b/modules/LICENSE.txt new file mode 100644 index 0000000000000000000000000000000000000000..d7e85075defbd96f9e7a9d756aa7db0e7e30ccf7 --- /dev/null +++ b/modules/LICENSE.txt @@ -0,0 +1,101 @@ +Copyright (c) 2019, NVIDIA Corporation. All rights reserved. + + +Nvidia Source Code License-NC + +======================================================================= + +1. Definitions + +"Licensor" means any person or entity that distributes its Work. + +"Software" means the original work of authorship made available under +this License. + +"Work" means the Software and any additions to or derivative works of +the Software that are made available under this License. + +"Nvidia Processors" means any central processing unit (CPU), graphics +processing unit (GPU), field-programmable gate array (FPGA), +application-specific integrated circuit (ASIC) or any combination +thereof designed, made, sold, or provided by Nvidia or its affiliates. + +The terms "reproduce," "reproduction," "derivative works," and +"distribution" have the meaning as provided under U.S. copyright law; +provided, however, that for the purposes of this License, derivative +works shall not include works that remain separable from, or merely +link (or bind by name) to the interfaces of, the Work. + +Works, including the Software, are "made available" under this License +by including in or with the Work either (a) a copyright notice +referencing the applicability of this License to the Work, or (b) a +copy of this License. + +2. License Grants + + 2.1 Copyright Grant. Subject to the terms and conditions of this + License, each Licensor grants to you a perpetual, worldwide, + non-exclusive, royalty-free, copyright license to reproduce, + prepare derivative works of, publicly display, publicly perform, + sublicense and distribute its Work and any resulting derivative + works in any form. + +3. Limitations + + 3.1 Redistribution. You may reproduce or distribute the Work only + if (a) you do so under this License, (b) you include a complete + copy of this License with your distribution, and (c) you retain + without modification any copyright, patent, trademark, or + attribution notices that are present in the Work. + + 3.2 Derivative Works. You may specify that additional or different + terms apply to the use, reproduction, and distribution of your + derivative works of the Work ("Your Terms") only if (a) Your Terms + provide that the use limitation in Section 3.3 applies to your + derivative works, and (b) you identify the specific derivative + works that are subject to Your Terms. Notwithstanding Your Terms, + this License (including the redistribution requirements in Section + 3.1) will continue to apply to the Work itself. + + 3.3 Use Limitation. The Work and any derivative works thereof only + may be used or intended for use non-commercially. The Work or + derivative works thereof may be used or intended for use by Nvidia + or its affiliates commercially or non-commercially. As used herein, + "non-commercially" means for research or evaluation purposes only. + + 3.4 Patent Claims. If you bring or threaten to bring a patent claim + against any Licensor (including any claim, cross-claim or + counterclaim in a lawsuit) to enforce any patents that you allege + are infringed by any Work, then your rights under this License from + such Licensor (including the grants in Sections 2.1 and 2.2) will + terminate immediately. + + 3.5 Trademarks. This License does not grant any rights to use any + Licensor's or its affiliates' names, logos, or trademarks, except + as necessary to reproduce the notices described in this License. + + 3.6 Termination. If you violate any term of this License, then your + rights under this License (including the grants in Sections 2.1 and + 2.2) will terminate immediately. + +4. Disclaimer of Warranty. + +THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR +NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER +THIS LICENSE. + +5. Limitation of Liability. + +EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL +THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE +SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, +INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF +OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK +(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, +LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER +COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF +THE POSSIBILITY OF SUCH DAMAGES. + +======================================================================= diff --git a/modules/README.md b/modules/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7f8d46bb85b8476d494e47cd658427a753d92be4 --- /dev/null +++ b/modules/README.md @@ -0,0 +1,220 @@ +## StyleGAN2 — Official TensorFlow Implementation + + + +**Analyzing and Improving the Image Quality of StyleGAN**<br> +Tero Karras, Samuli Laine, Miika Aittala, Janne Hellsten, Jaakko Lehtinen, Timo Aila<br> + +Paper: http://arxiv.org/abs/1912.04958<br> +Video: https://youtu.be/c-NJtV9Jvp0<br> + +Abstract: *The style-based GAN architecture (StyleGAN) yields state-of-the-art results in data-driven unconditional generative image modeling. We expose and analyze several of its characteristic artifacts, and propose changes in both model architecture and training methods to address them. In particular, we redesign generator normalization, revisit progressive growing, and regularize the generator to encourage good conditioning in the mapping from latent vectors to images. In addition to improving image quality, this path length regularizer yields the additional benefit that the generator becomes significantly easier to invert. This makes it possible to reliably detect if an image is generated by a particular network. We furthermore visualize how well the generator utilizes its output resolution, and identify a capacity problem, motivating us to train larger models for additional quality improvements. Overall, our improved model redefines the state of the art in unconditional image modeling, both in terms of existing distribution quality metrics as well as perceived image quality.* + +For business inquiries, please contact [researchinquiries@nvidia.com](mailto:researchinquiries@nvidia.com)<br> +For press and other inquiries, please contact Hector Marinez at [hmarinez@nvidia.com](mailto:hmarinez@nvidia.com)<br> + +| Additional material | +| :--- | :---------- +| [StyleGAN2](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7) | Main Google Drive folder +| ├ [stylegan2-paper.pdf](https://drive.google.com/open?id=1fnF-QsiQeKaxF-HbvFiGtzHF_Bf3CzJu) | High-quality version of the paper +| ├ [stylegan2-video.mp4](https://drive.google.com/open?id=1f_gbKW6FUUHKkUxciJ_lQx29mCq_fSBy) | High-quality version of the video +| ├ [images](https://drive.google.com/open?id=1Sak157_DLX84ytqHHqZaH_59HoEWzfB7) | Example images produced using our method +| │ ├ [curated-images](https://drive.google.com/open?id=1ydWb8xCHzDKMTW9kQ7sL-B1R0zATHVHp) | Hand-picked images showcasing our results +| │ └ [100k-generated-images](https://drive.google.com/open?id=1BA2OZ1GshdfFZGYZPob5QWOGBuJCdu5q) | Random images with and without truncation +| ├ [videos](https://drive.google.com/open?id=1yXDV96SFXoUiZKU7AyE6DyKgDpIk4wUZ) | Individual clips of the video as high-quality MP4 +| └ [networks](https://drive.google.com/open?id=1yanUI9m4b4PWzR0eurKNq6JR1Bbfbh6L) | Pre-trained networks +|    ├ [stylegan2-ffhq-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl) | StyleGAN2 for <span style="font-variant:small-caps">FFHQ</span> dataset at 1024×1024 +|    ├ [stylegan2-car-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-car-config-f.pkl) | StyleGAN2 for <span style="font-variant:small-caps">LSUN Car</span> dataset at 512×384 +|    ├ [stylegan2-cat-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-cat-config-f.pkl) | StyleGAN2 for <span style="font-variant:small-caps">LSUN Cat</span> dataset at 256×256 +|    ├ [stylegan2-church-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-church-config-f.pkl) | StyleGAN2 for <span style="font-variant:small-caps">LSUN Church</span> dataset at 256×256 +|    ├ [stylegan2-horse-config-f.pkl](http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-horse-config-f.pkl) | StyleGAN2 for <span style="font-variant:small-caps">LSUN Horse</span> dataset at 256×256 +|    └ ⋯ | Other training configurations used in the paper + +## Requirements + +* Both Linux and Windows are supported. Linux is recommended for performance and compatibility reasons. +* 64-bit Python 3.6 installation. We recommend Anaconda3 with numpy 1.14.3 or newer. +* TensorFlow 1.14 or 1.15 with GPU support. The code does not support TensorFlow 2.0. +* On Windows, you need to use TensorFlow 1.14 — TensorFlow 1.15 will not work. +* One or more high-end NVIDIA GPUs, NVIDIA drivers, CUDA 10.0 toolkit and cuDNN 7.5. To reproduce the results reported in the paper, you need an NVIDIA GPU with at least 16 GB of DRAM. +* Docker users: use the [provided Dockerfile](./Dockerfile) to build an image with the required library dependencies. + +StyleGAN2 relies on custom TensorFlow ops that are compiled on the fly using [NVCC](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html). To test that your NVCC installation is working correctly, run: + +```.bash +nvcc test_nvcc.cu -o test_nvcc -run +| CPU says hello. +| GPU says hello. +``` + +On Windows, the compilation requires Microsoft Visual Studio to be in `PATH`. We recommend installing [Visual Studio Community Edition](https://visualstudio.microsoft.com/vs/) and adding into `PATH` using `"C:\Program Files (x86)\Microsoft Visual Studio\2019\Community\VC\Auxiliary\Build\vcvars64.bat"`. + +## Using pre-trained networks + +Pre-trained networks are stored as `*.pkl` files on the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7). Below, you can either reference them directly using the syntax `gdrive:networks/<filename>.pkl`, or download them manually and reference by filename. + +```.bash +# Generate uncurated ffhq images (matches paper Figure 12) +python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --seeds=6600-6625 --truncation-psi=0.5 + +# Generate curated ffhq images (matches paper Figure 11) +python run_generator.py generate-images --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --seeds=66,230,389,1518 --truncation-psi=1.0 + +# Generate uncurated car images +python run_generator.py generate-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --seeds=6000-6025 --truncation-psi=0.5 + +# Example of style mixing (matches the corresponding video clip) +python run_generator.py style-mixing-example --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --row-seeds=85,100,75,458,1500 --col-seeds=55,821,1789,293 --truncation-psi=1.0 +``` + +The results are placed in `results/<RUNNING_ID>/*.png`. You can change the location with `--result-dir`. For example, `--result-dir=~/my-stylegan2-results`. + +You can import the networks in your own Python code using `pickle.load()`. For this to work, you need to include the `dnnlib` source directory in `PYTHONPATH` and create a default TensorFlow session by calling `dnnlib.tflib.init_tf()`. See [run_generator.py](./run_generator.py) and [pretrained_networks.py](./pretrained_networks.py) for examples. + +## Preparing datasets + +Datasets are stored as multi-resolution TFRecords, similar to the [original StyleGAN](https://github.com/NVlabs/stylegan). Each dataset consists of multiple `*.tfrecords` files stored under a common directory, e.g., `~/datasets/ffhq/ffhq-r*.tfrecords`. In the following sections, the datasets are referenced using a combination of `--dataset` and `--data-dir` arguments, e.g., `--dataset=ffhq --data-dir=~/datasets`. + +**FFHQ**. To download the [Flickr-Faces-HQ](https://github.com/NVlabs/ffhq-dataset) dataset as multi-resolution TFRecords, run: + +```.bash +pushd ~ +git clone https://github.com/NVlabs/ffhq-dataset.git +cd ffhq-dataset +python download_ffhq.py --tfrecords +popd +python dataset_tool.py display ~/ffhq-dataset/tfrecords/ffhq +``` + +**LSUN**. Download the desired LSUN categories in LMDB format from the [LSUN project page](https://www.yf.io/p/lsun). To convert the data to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_lsun_wide ~/datasets/car ~/lsun/car_lmdb --width=512 --height=384 +python dataset_tool.py create_lsun ~/datasets/cat ~/lsun/cat_lmdb --resolution=256 +python dataset_tool.py create_lsun ~/datasets/church ~/lsun/church_outdoor_train_lmdb --resolution=256 +python dataset_tool.py create_lsun ~/datasets/horse ~/lsun/horse_lmdb --resolution=256 +``` + +**Custom**. Create custom datasets by placing all training images under a single directory. The images must be square-shaped and they must all have the same power-of-two dimensions. To convert the images to multi-resolution TFRecords, run: + +```.bash +python dataset_tool.py create_from_images ~/datasets/my-custom-dataset ~/my-custom-images +python dataset_tool.py display ~/datasets/my-custom-dataset +``` + +## Projecting images to latent space + +To find the matching latent vectors for a set of images, run: + +```.bash +# Project generated images +python run_projector.py project-generated-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --seeds=0,1,5 + +# Project real images +python run_projector.py project-real-images --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --dataset=car --data-dir=~/datasets +``` + +## Training networks + +To reproduce the training runs for config F in Tables 1 and 3, run: + +```.bash +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=ffhq --mirror-augment=true +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=car --total-kimg=57000 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=cat --total-kimg=88000 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=church --total-kimg 88000 --gamma=100 +python run_training.py --num-gpus=8 --data-dir=~/datasets --config=config-f \ + --dataset=horse --total-kimg 100000 --gamma=100 +``` + +For other configurations, see `python run_training.py --help`. + +We have verified that the results match the paper when training with 1, 2, 4, or 8 GPUs. Note that training FFHQ at 1024×1024 resolution requires GPU(s) with at least 16 GB of memory. The following table lists typical training times using NVIDIA DGX-1 with 8 Tesla V100 GPUs: + +| Configuration | Resolution | Total kimg | 1 GPU | 2 GPUs | 4 GPUs | 8 GPUs | GPU mem | +| :------------ | :-------------: | :--------: | :-----: | :-----: | :-----: | :----: | :-----: | +| `config-f` | 1024×1024 | 25000 | 69d 23h | 36d 4h | 18d 14h | 9d 18h | 13.3 GB | +| `config-f` | 1024×1024 | 10000 | 27d 23h | 14d 11h | 7d 10h | 3d 22h | 13.3 GB | +| `config-e` | 1024×1024 | 25000 | 35d 11h | 18d 15h | 9d 15h | 5d 6h | 8.6 GB | +| `config-e` | 1024×1024 | 10000 | 14d 4h | 7d 11h | 3d 20h | 2d 3h | 8.6 GB | +| `config-f` | 256×256 | 25000 | 32d 13h | 16d 23h | 8d 21h | 4d 18h | 6.4 GB | +| `config-f` | 256×256 | 10000 | 13d 0h | 6d 19h | 3d 13h | 1d 22h | 6.4 GB | + +Training curves for FFHQ config F (StyleGAN2) compared to original StyleGAN using 8 GPUs: + + + +After training, the resulting networks can be used the same way as the official pre-trained networks: + +```.bash +# Generate 1000 random images without truncation +python run_generator.py generate-images --seeds=0-999 --truncation-psi=1.0 \ + --network=results/00006-stylegan2-ffhq-8gpu-config-f/networks-final.pkl +``` + +## Evaluation metrics + +To reproduce the numbers for config F in Tables 1 and 3, run: + +```.bash +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-ffhq-config-f.pkl \ + --metrics=fid50k,ppl_wend --dataset=ffhq --mirror-augment=true +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-car-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=car +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-cat-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=cat +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-church-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=church +python run_metrics.py --data-dir=~/datasets --network=gdrive:networks/stylegan2-horse-config-f.pkl \ + --metrics=fid50k,ppl2_wend --dataset=horse +``` + +For other configurations, see the [StyleGAN2 Google Drive folder](https://drive.google.com/open?id=1QHc-yF5C3DChRwSdZKcx1w6K8JvSxQi7). + +Note that the metrics are evaluated using a different random seed each time, so the results will vary between runs. In the paper, we reported the average result of running each metric 10 times. The following table lists the available metrics along with their expected runtimes and random variation: + +| Metric | FFHQ config F | 1 GPU | 2 GPUs | 4 GPUs | Description | +| :---------- | :------------: | :----: | :-----: | :----: | :---------- | +| `fid50k` | 2.84 ± 0.03 | 22 min | 14 min | 10 min | [Fréchet Inception Distance](https://arxiv.org/abs/1706.08500) +| `is50k` | 5.13 ± 0.02 | 23 min | 14 min | 8 min | [Inception Score](https://arxiv.org/abs/1606.03498) +| `ppl_zfull` | 348.0 ± 3.8 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, full paths +| `ppl_wfull` | 126.9 ± 0.2 | 42 min | 22 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, full paths +| `ppl_zend` | 348.6 ± 3.0 | 41 min | 22 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in Z, path endpoints +| `ppl_wend` | 129.4 ± 0.8 | 40 min | 23 min | 13 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) in W, path endpoints +| `ppl2_wend` | 145.0 ± 0.5 | 41 min | 23 min | 14 min | [Perceptual Path Length](https://arxiv.org/abs/1812.04948) without center crop +| `ls` | 154.2 / 4.27 | 10 hrs | 6 hrs | 4 hrs | [Linear Separability](https://arxiv.org/abs/1812.04948) +| `pr50k3` | 0.689 / 0.492 | 26 min | 17 min | 12 min | [Precision and Recall](https://arxiv.org/abs/1904.06991) + +Note that some of the metrics cache dataset-specific data on the disk, and they will take somewhat longer when run for the first time. + +## License + +Copyright © 2019, NVIDIA Corporation. All rights reserved. + +This work is made available under the Nvidia Source Code License-NC. To view a copy of this license, visit https://nvlabs.github.io/stylegan2/license.html + +## Citation + +``` +@article{Karras2019stylegan2, + title = {Analyzing and Improving the Image Quality of {StyleGAN}}, + author = {Tero Karras and Samuli Laine and Miika Aittala and Janne Hellsten and Jaakko Lehtinen and Timo Aila}, + journal = {CoRR}, + volume = {abs/1912.04958}, + year = {2019}, +} +``` + +## Acknowledgements + +We thank Ming-Yu Liu for an early review, Timo Viitanen for his help with code release, and Tero Kuosmanen for compute infrastructure. diff --git a/modules/__init__.py b/modules/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a51a12b30f6205041e652f32c6f08b86292effb --- /dev/null +++ b/modules/__init__.py @@ -0,0 +1 @@ +from . import preprocessor, generator, projector \ No newline at end of file diff --git a/modules/generator.py b/modules/generator.py new file mode 100644 index 0000000000000000000000000000000000000000..46b38ff8714942f8c1e8edf562df2b543f9c2843 --- /dev/null +++ b/modules/generator.py @@ -0,0 +1,94 @@ +# Copyright (c) 2021, Idiap Research Institute. All rights reserved. +# +# This work is made available under a custom license, Non-Commercial Research and Educational Use Only +# To view a copy of this license, visit +# https://gitlab.idiap.ch/bob/bob.paper.ijcb2021_synthetic_dataset/-/blob/master/LICENSE.txt + + +import pickle +import dnnlib +from dnnlib import tflib +import numpy as np +from bob.extension import rc +import utils + +class StyleGAN2(object): + def __init__(self, sg2_path=rc['sg2_morph.sg2_path'], randomize_noise=False): + """ + Instanciate the StyleGAN2 generator network. Cf. + T. Karras, *et al.*, \"Analyzing and improving the quality of StyleGAN\", in *Proc. + IEEE/CVF Conference on Computer Vision and Patter Recognition, + 2020.* + Repository : https://github.com/NVlabs/stylegan2 + + :param sg2_path: Path to the pickled pretrained network + :param randomize_noise: Whether to randomly sample the noise inputs of the synthetizer, or fix them once + and for all at initialization. + """ + with open(sg2_path, 'rb') as pkl_file: + _G, _D, Gs = pickle.load(pkl_file) + self.network = Gs + self.latent_dim = Gs.input_shape[-1] + self.run_kwargs = {'randomize_noise': randomize_noise} + + if not randomize_noise: + noise_vars = [var for name, var in self.network.components.synthesis.vars.items() if name.startswith('noise')] + tflib.set_vars({var: np.random.randn(*var.shape.as_list()) for var in noise_vars}) # [height, width] + + def run_from_Z(self, latents, truncation_psi=0.5, return_w_latents=False, **kwargs): + """ + Run the generator from the input latent space Z + + Inputs: + :param latents: batch of latent vectors, of the shape [batch_size, latent_dim]. + :param truncation_psi: (float in [0,1]) value of psi when applying the truncation trick (cf. T. Karras, *et al.*, \"A style-based + generator architecture for generative adversarial networks\", in *Proc. CVPR*, 2018). + A value of 0 applies complete truncation (meaning one can only generate the mean face), + while a value of 1 applies no truncation. + :param return_w_latents: whether to return the W-space latents as additional output + :param **kwargs: other parameters that will be feeded to the Network.run method + + Outputs: + + :return images: Batch of generated images in bob format : tensor with shape [batch_size, 3, 1024, 1024] of uint8 + values in the range [0, 255] + :return w_latents: Batch of W-space latents vector, of the shape [batch_size, latent_dim]. + Only returned if `return_w_latents` is True. + """ + run_kwargs = self.run_kwargs + run_kwargs['truncation_psi'] = truncation_psi + run_kwargs['return_dlatents'] = return_w_latents + run_kwargs.update(kwargs) + + result = self.network.run(latents, None, **run_kwargs) + if return_w_latents: + images, dlatents = result + return self._postprocess(images), dlatents[:, 0, :] + else: + images = result + return self._postprocess(images) + + def run_from_W(self, w_latents, **kwargs): + """ + Run the generator from the intermediate latent space W + + Inputs: + :param latents: batch of W-space latent vectors, of the shape [batch_size, latent_dim]. + :param **kwargs: other parameters that will be feeded to the Network.run method of the synthesis network + + Outputs: + + :return images: Batch of generated images in bob format : tensor with shape [batch_size, 3, 1024, 1024] of uint8 + values in the range [0, 255] + """ + # Repeat the input latent for each style input + dlatents = np.tile(w_latents[:, np.newaxis, :], [1, 18, 1]) + run_kwargs = self.run_kwargs + run_kwargs.update(kwargs) + return self._postprocess(self.network.components['synthesis'].run(dlatents, **run_kwargs)) + + def _postprocess(self, images): + return np.stack([utils.adjust_dynamic_range(img, [0,255], 'uint8') for img in images]) + + + \ No newline at end of file diff --git a/modules/misc.py b/modules/misc.py new file mode 100755 index 0000000000000000000000000000000000000000..9b3444e85c70d9fe742bd2e8055a42210d857f8b --- /dev/null +++ b/modules/misc.py @@ -0,0 +1,145 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +"""Miscellaneous utility functions.""" + +import os +import pickle +import numpy as np +import PIL.Image +import PIL.ImageFont +import dnnlib + +#---------------------------------------------------------------------------- +# Convenience wrappers for pickle that are able to load data produced by +# older versions of the code, and from external URLs. + +def open_file_or_url(file_or_url): + if dnnlib.util.is_url(file_or_url): + return dnnlib.util.open_url(file_or_url, cache_dir='.stylegan2-cache') + return open(file_or_url, 'rb') + +def load_pkl(file_or_url): + with open_file_or_url(file_or_url) as file: + return pickle.load(file, encoding='latin1') + +def save_pkl(obj, filename): + with open(filename, 'wb') as file: + pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) + +#---------------------------------------------------------------------------- +# Image utils. + +def adjust_dynamic_range(data, drange_in, drange_out): + if drange_in != drange_out: + scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / (np.float32(drange_in[1]) - np.float32(drange_in[0])) + bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) + data = data * scale + bias + return data + +def create_image_grid(images, grid_size=None): + assert images.ndim == 3 or images.ndim == 4 + num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] + + if grid_size is not None: + grid_w, grid_h = tuple(grid_size) + else: + grid_w = max(int(np.ceil(np.sqrt(num))), 1) + grid_h = max((num - 1) // grid_w + 1, 1) + + grid = np.zeros(list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) + for idx in range(num): + x = (idx % grid_w) * img_w + y = (idx // grid_w) * img_h + grid[..., y : y + img_h, x : x + img_w] = images[idx] + return grid + +def convert_to_pil_image(image, drange=[0,1]): + assert image.ndim == 2 or image.ndim == 3 + if image.ndim == 3: + if image.shape[0] == 1: + image = image[0] # grayscale CHW => HW + else: + image = image.transpose(1, 2, 0) # CHW -> HWC + + image = adjust_dynamic_range(image, drange, [0,255]) + image = np.rint(image).clip(0, 255).astype(np.uint8) + fmt = 'RGB' if image.ndim == 3 else 'L' + return PIL.Image.fromarray(image, fmt) + +def save_image_grid(images, filename, drange=[0,1], grid_size=None): + convert_to_pil_image(create_image_grid(images, grid_size), drange).save(filename) + +def apply_mirror_augment(minibatch): + mask = np.random.rand(minibatch.shape[0]) < 0.5 + minibatch = np.array(minibatch) + minibatch[mask] = minibatch[mask, :, :, ::-1] + return minibatch + +#---------------------------------------------------------------------------- +# Loading data from previous training runs. + +def parse_config_for_previous_run(run_dir): + with open(os.path.join(run_dir, 'submit_config.pkl'), 'rb') as f: + data = pickle.load(f) + data = data.get('run_func_kwargs', {}) + return dict(train=data, dataset=data.get('dataset_args', {})) + +#---------------------------------------------------------------------------- +# Size and contents of the image snapshot grids that are exported +# periodically during training. + +def setup_snapshot_image_grid(training_set, + size = '1080p', # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. + layout = 'random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. + + # Select size. + gw = 1; gh = 1 + if size == '1080p': + gw = np.clip(1920 // training_set.shape[2], 3, 32) + gh = np.clip(1080 // training_set.shape[1], 2, 32) + if size == '4k': + gw = np.clip(3840 // training_set.shape[2], 7, 32) + gh = np.clip(2160 // training_set.shape[1], 4, 32) + if size == '8k': + gw = np.clip(7680 // training_set.shape[2], 7, 32) + gh = np.clip(4320 // training_set.shape[1], 4, 32) + + # Initialize data arrays. + reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) + labels = np.zeros([gw * gh, training_set.label_size], dtype=training_set.label_dtype) + + # Random layout. + if layout == 'random': + reals[:], labels[:] = training_set.get_minibatch_np(gw * gh) + + # Class-conditional layouts. + class_layouts = dict(row_per_class=[gw,1], col_per_class=[1,gh], class4x4=[4,4]) + if layout in class_layouts: + bw, bh = class_layouts[layout] + nw = (gw - 1) // bw + 1 + nh = (gh - 1) // bh + 1 + blocks = [[] for _i in range(nw * nh)] + for _iter in range(1000000): + real, label = training_set.get_minibatch_np(1) + idx = np.argmax(label[0]) + while idx < len(blocks) and len(blocks[idx]) >= bw * bh: + idx += training_set.label_size + if idx < len(blocks): + blocks[idx].append((real, label)) + if all(len(block) >= bw * bh for block in blocks): + break + for i, block in enumerate(blocks): + for j, (real, label) in enumerate(block): + x = (i % nw) * bw + j % bw + y = (i // nw) * bh + j // bw + if x < gw and y < gh: + reals[x + y * gw] = real[0] + labels[x + y * gw] = label[0] + + return (gw, gh), reals, labels + +#---------------------------------------------------------------------------- diff --git a/modules/morpher.py b/modules/morpher.py new file mode 100755 index 0000000000000000000000000000000000000000..db530aeb6f63125f04eff98e5568e1dd06fdd264 --- /dev/null +++ b/modules/morpher.py @@ -0,0 +1,714 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# This file is a modification of the following file: https://github.com/NVlabs/stylegan2/blob/master/projector.py. +# Adjustements made by Eklavya Sarkar (Idiap Research Institute, Biometrics Security and Privacy), Jan-Feb 2021, +# -> Perceptual loss: mipgan_perceptual_loss() +# -> Identity loss: mipgan_identity_loss() +# -> ID-Difference loss: mipgan_id_diff_loss() +# -> MS-SSIM loss: mipgan_ms_ssim_loss() +# -> Other misc initialization and variable changes required for this implementation. + +# For the purpose of generating morphing attacks as described in the paper: + +# Sarkar, E., Korshunov, P., Colbois, L. and Marcel, S., Are GAN-based Morphs Threatening Face Recognition?, 2022. +# International Conference on Acoustics, Speech, & Signal Processing (ICASSP). + +# The implementation is a modified version of the one described in: + +# Zhang, H., Venkatesh, S., Ramachandra, R., Raja, K., Damer, N. and Busch, C., 2021. +# Mipgan—generating strong and high quality morphing attacks using identity prior driven gan. +# IEEE Transactions on Biometrics, Behavior, and Identity Science, 3(3), pp.365-383. +# URL: https://arxiv.org/abs/2009.01729 + +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib +from keras_vggface.vggface import VGGFace +from keras_vggface import utils +from keras.engine import Model +from modules import misc +import cv2 +import h5py +import os + +#---------------------------------------------------------------------------- + + +class Morpher(object): + def __init__(self, alpha): + self.num_steps = 150 + self.dlatent_avg_samples = 10000 + self.initial_learning_rate = 0.03 + self.initial_noise_factor = 0.05 + self.lr_rampdown_length = 0.25 + self.lr_rampup_length = 0.05 + self.noise_ramp_length = 0.75 + self.regularize_noise_weight = 1e5 + self.verbose = False + self.clone_net = True + + self._Gs = None + self._minibatch_size = None + self._dlatent_avg = None + self._dlatent_std = None + self._noise_vars = None + self._noise_init_op = None + self._noise_normalize_op = None + self._dlatents_var = None + self._noise_in = None + self._dlatents_expr = None + self._images_expr = None + self._target_images_var = None + self._lpips = None + self._dist = None + self._loss = None + self._reg_sizes = None + self._lrate_in = None + self._opt = None + self._opt_step = None + self._cur_step = None + + self._dlatent_rand = None + self._dlatent_avg_tf = None + self._dlatent_interp_tf = None + self.dlatent_interp_tf = None + self._dlatent_rand_tf = None + self._image_rand_tf = None + self._feature_layer = 'flatten_1' + self._model = 'resnet50' + self._x_1 = None + self._x_2 = None + self._B = None + + self._alpha = alpha + self.proc_images_expr = None + self.proc_images_interp = None + + self._B_proc_i_m_v2 = None + self.i_m = None + self._x_1_proc_v2 = None + self._x_2_proc_v2 = None + + self.proc_i_m = None + self._B_x_1_proc_v2 = None + self._B_x_2_proc_v2 = None + + self._loss_perceptual = None + self._loss_identity = None + self._loss_id_diff = None + self._loss_ms_ssim = None + + def _info(self, *args): + if self.verbose: + print(*args) + + def set_network(self, Gs, minibatch_size=1): + + assert minibatch_size == 1 + self._Gs = Gs + self._minibatch_size = minibatch_size + if self._Gs is None: + return + if self.clone_net: + self._Gs = self._Gs.clone() + + # Find dlatent stats. + self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples) + latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) # (10000,512) + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512] # N = 10000 + image_rand, self._dlatent_rand = self._Gs.run(latent_samples[1][None,:], None, return_dlatents=True) + self._dlatent_rand = self._dlatent_rand[:, :1, :] # [1, 1, 512] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512] + self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 + self._info('std = %g' % self._dlatent_std) + + # Find noise inputs. + self._info('Setting up noise inputs...') + self._noise_vars = [] + noise_init_ops = [] + noise_normalize_ops = [] + while True: + n = 'G_synthesis/noise%d' % len(self._noise_vars) + if not n in self._Gs.vars: + break + v = self._Gs.vars[n] + self._noise_vars.append(v) + noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) + noise_mean = tf.reduce_mean(v) + noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 + noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) + self._info(n, v) + self._noise_init_op = tf.group(*noise_init_ops) + self._noise_normalize_op = tf.group(*noise_normalize_ops) + + # Image output graph + self._info('Building image output graph...') + self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') # (1,1,512) + self._noise_in = tf.placeholder(tf.float32, [], name='noise_in') # () + + # Set VGGFace biometric network and extract embeddings + self._B = self.return_vggface_network() + + # Initialize + self.dlatent_interp_tf = tf.Variable(tf.zeros([1, 1, 512]), name='dlatent_interp_tf') # (1, 1, 512) + self.dlatent_interp_expr = tf.tile(self.dlatent_interp_tf, [1, self._Gs.components.synthesis.input_shape[1], 1]) # (1, 18, 512) + self.i_m = self._Gs.components.synthesis.get_output_for(self.dlatent_interp_expr, randomize_noise=False) # (1, 3, 1024, 1024) + + # Loss graph + self._info('Building loss graph...') + self._x_1 = tf.Variable(tf.zeros([self._minibatch_size, 256, 256, 3]), name='x_1') # (1, 256, 256, 3) + self._x_2 = tf.Variable(tf.zeros([self._minibatch_size, 256, 256, 3]), name='x_2') # (1, 256, 256, 3) + + # Preprocess input images + self._x_1_proc_v2 = self.preprocess_input(self._x_1, version=2) + self._x_2_proc_v2 = self.preprocess_input(self._x_2, version=2) + + # Extract Embeddings + self._B_x_1_proc_v2 = self._B(self._x_1_proc_v2) + self._B_x_2_proc_v2 = self._B(self._x_2_proc_v2) + + # Downsize, preprocess, extract + self.proc_i_m = self.downsize_img_tf(self.i_m) # (1, 256, 256, 3) + self.proc_i_m_v2 = self.preprocess_input(self.proc_i_m, version=2) # (1, 256, 256, 3) + self._B_proc_i_m_v2 = self._B(self.proc_i_m_v2) # (1, 2048) + + # Choose loss + self._loss = self.mipgan_loss(self._B_x_1_proc_v2, + self._B_x_2_proc_v2, + self.proc_i_m, + self._B_proc_i_m_v2, + self._x_1, + self._x_2) + + # Noise regularization graph. + self._info('Building noise regularization graph...') + reg_loss = 0.0 + for v in self._noise_vars: + sz = v.shape[2] + while True: + reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 + if sz <= 8: + break # Small enough already + v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale + v = tf.reduce_mean(v, axis=[3, 5]) + sz = sz // 2 + self._loss += reg_loss * self.regularize_noise_weight + + # Optimizer. + self._info('Setting up optimizer...') + self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') + self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in) + self._opt.register_gradients(self._loss, [self.dlatent_interp_tf] + self._noise_vars) + self._opt_step = self._opt.apply_updates() + + def mipgan_perceptual_loss(self, proc_i_m, x_1, x_2): + ''' + Calculates and returns the scalar of MIPGAN's first loss: the perceptual loss. + ''' + # Preprocess (version 1 for VGG16) + proc_i_m_v1 = self.preprocess_input(proc_i_m, version=1) # (1, 256, 256, 3) + proc_x_1 = self.preprocess_input(x_1, version=1) # (1, 256, 256, 3) + proc_x_2 = self.preprocess_input(x_2, version=1) # (1, 256, 256, 3) + # Variables + selected_layers = ['conv1_1', 'conv1_2', 'conv2_2', 'conv3_3'] + vgg_16 = VGGFace(model='vgg16', include_top=False, input_shape=(256, 256, 3), pooling='avg') + sum_1 = 0 + sum_2 = 0 + # Loop through the layers + for layer in selected_layers: + # Make get layer in question + out = vgg_16.get_layer(layer).output + vgg_16_model = Model(vgg_16.input, out) + # Get embeddings + vgg_embed_i_m = vgg_16_model(proc_i_m_v1) + vgg_embed_x_1 = vgg_16_model(proc_x_1) + vgg_embed_x_2 = vgg_16_model(proc_x_2) + # Differences + vgg_embed_diff_1 = tf.math.subtract(vgg_embed_x_1, vgg_embed_i_m, name='vgg_embeds_diff_1') + vgg_embed_diff_2 = tf.math.subtract(vgg_embed_x_2, vgg_embed_i_m, name='vgg_embeds_diff_2') + # Squared L2-Norms + vgg_sqrt_l2_norms_1 = tf.math.sqrt(tf.norm(vgg_embed_diff_1, ord=2), name='squared_l2_norm_1') + vgg_sqrt_l2_norms_2 = tf.math.sqrt(tf.norm(vgg_embed_diff_2, ord=2), name='squared_l2_norm_2') + # Divisions + ratio = 1 / vgg_embed_i_m.get_shape().as_list()[-1] # Check if this actually the "number of features in layer i" value + # Update + sum_1 += tf.math.multiply(vgg_sqrt_l2_norms_1, ratio) + sum_2 += tf.math.multiply(vgg_sqrt_l2_norms_2, ratio) + # Take half of both sums + loss = ((1-self._alpha) * sum_1) + (self._alpha * sum_2) + # Return + return loss + + def get_cosine_distances(self, B_x_1_norm, B_x_2_norm, B_i_m_norm): + ''' + Calculates and returns the two cosine distances, both required for the id_diff and identity losses. + ''' + # First term + numerator_first_term = tf.reduce_sum(tf.math.multiply(B_x_1_norm, B_i_m_norm), name='numerator_first_term') + denominator_first_term = tf.math.multiply(tf.norm(B_x_1_norm, ord=2), tf.norm(B_i_m_norm, ord=2), name='denominator_first_term') + cos_similarity_first_term = tf.math.divide(numerator_first_term, denominator_first_term) + cosine_dist_1 = 1 - cos_similarity_first_term + # Second term + numerator_second_term = tf.reduce_sum(tf.math.multiply(B_x_2_norm, B_i_m_norm, name='numerator_second_term')) + denominator_second_term = tf.math.multiply(tf.norm(B_x_2_norm, ord=2), tf.norm(B_i_m_norm, ord=2), name='denominator_second_term') + cos_similarity_second_term = tf.math.divide(numerator_second_term, denominator_second_term) + cosine_dist_2 = 1 - cos_similarity_second_term + return cosine_dist_1, cosine_dist_2 + + def mipgan_identity_loss(self, cosine_dist_1, cosine_dist_2): + ''' + Calculates and returns the scalar of MIPGAN's second loss: the identity loss. + ''' + # Sum & Division + weighted_cosine_dist_1 = cosine_dist_1 * (1-self._alpha) + weighted_cosine_dist_2 = cosine_dist_2 * self._alpha + return tf.math.add_n([weighted_cosine_dist_1, weighted_cosine_dist_2], name='sum_cos_dists') + + def mipgan_id_diff_loss(self, cosine_dist_1, cosine_dist_2): + ''' + Calculates and returns the scalar of MIPGAN's third loss: the id_diff loss. + ''' + # Substraction & L1 norm + weighted_cosine_dist_1 = cosine_dist_1 * (1-self._alpha) + weighted_cosine_dist_2 = cosine_dist_2 * self._alpha + cosine_dist_diff = tf.math.subtract(weighted_cosine_dist_1, weighted_cosine_dist_2, name='substract_cos_dist') + return tf.norm(cosine_dist_diff, ord=1) + + def mipgan_ms_ssim_loss(self, proc_i_m, x_1, x_2): + ''' + Calculates and returns the scalar of MIPGAN's fourth loss: the ms_ssim loss. + Remember this is a value one has to maximize to optimize, not minimize. + ''' + loss_1 = tf.image.ssim_multiscale(x_1, proc_i_m, max_val=255) + loss_2 = tf.image.ssim_multiscale(x_2, proc_i_m, max_val=255) + return - ((loss_1*(1-self._alpha)) + (loss_2*self._alpha)) + + def mipgan_loss(self, B_x_1_proc_v2, B_x_2_proc_v2, proc_i_m, B_proc_i_m_v2, x_1, x_2, + lambda_perceptual=0.0002, lambda_identity=10, lambda_id_diff=1, lambda_ms_ssim=1): + ''' + Adapted equation from paper 'MIPGAN - Generating Robust and High Quality Morph Attacks Using Identity Prior Driven GAN' + ''' + # L2 Normalize vectors, so their length (norm) is 1 + B_x_1_norm = tf.nn.l2_normalize(B_x_1_proc_v2, name='norm_B_x_1') # tf.norm(B_x_1_norm)=1 + B_x_2_norm = tf.nn.l2_normalize(B_x_2_proc_v2, name='norm_B_x_2') # tf.norm(B_x_2_norm)=1 + B_i_m_norm = tf.nn.l2_normalize(B_proc_i_m_v2, name='norm_B_i_m') # tf.norm(B_i_m_norm)=1 + # Get cosine distances + cosine_dist_1, cosine_dist_2 = self.get_cosine_distances(B_x_1_norm, B_x_2_norm, B_i_m_norm) + # Get losses + self._loss_perceptual = self.mipgan_perceptual_loss(proc_i_m, x_1, x_2) * lambda_perceptual + self._loss_identity = self.mipgan_identity_loss(cosine_dist_1, cosine_dist_2) * lambda_identity + self._loss_id_diff = self.mipgan_id_diff_loss(cosine_dist_1, cosine_dist_2) * lambda_id_diff + self._loss_ms_ssim = self.mipgan_ms_ssim_loss(proc_i_m, x_1, x_2) * lambda_ms_ssim + # Final loss + return self._loss_perceptual + self._loss_identity + self._loss_id_diff + self._loss_ms_ssim + + def get_middle_morph_latent(self, latents_dir, m_name): + + ''' + Reads the latent vectors of projected images, and creates the middle interpolation morph, + sends it through the Generator to get the corresponding (1, 3, 1024, 1024) image, + before processing it to a (1, 256, 256, 3) image (channels last). + ''' + # Get morph file names + f1, f2 = m_name + suffix = '.hdf5' + f1 = f1 + suffix # Modify according to hdf5 file names + f2 = f2 + suffix # Modify according to hdf5 file names + + # Read existing latent vectors of each input image (previously saved as hdf5) + latents_path = [os.path.join(latents_dir, f1), os.path.join(latents_dir, f2)] + + # Convert hdf5 file to numpy + latent_x_1 = np.array(h5py.File(latents_path[0], 'r')['array'])[None, None, :] # (1, 1, 512) + latent_x_2 = np.array(h5py.File(latents_path[1], 'r')['array'])[None, None, :] # (1, 1, 512) + + # Get the middle interpolation: 0.5*x1 + (1-0.5)*x2 + scaled_x_1 = 0.5 * latent_x_1 + scaled_x_2 = 0.5 * latent_x_2 + _dlatent_interp = scaled_x_1 + scaled_x_2 + + # Return + return _dlatent_interp # (1, 1, 512) + + def downsize_img_tf(self, images_expr, size=256): + ''' + Downsizes the tensor of an image, and puts the channel last. + @param _images_expr: an image tensor with the channels first, eg: (1, 3, 1024, 1024) + ''' + # Downsize + proc_images_expr = (images_expr + 1) * (255 / 2) # (1, 3, 1024, 1024) + sh = proc_images_expr.shape.as_list() + if sh[2] > size: + factor = sh[2] // size + proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor , factor, sh[2] // factor, factor]), axis=[3,5]) + # Channels last + sh = proc_images_expr.shape.as_list() + proc_images_expr = tf.transpose(proc_images_expr, (0, 2, 3, 1), name='channels_last') # (1, 256, 256, 3) + # Return + return proc_images_expr + + def return_vggface_network(self): + ''' + Creates and returns a Keras VGGFace network. + ''' + # Biometric Network B + # B_VGG = VGGFace(model=self._model) + # out = B_VGG.get_layer(self._feature_layer).output + # return Model(B_VGG.input, out) + return VGGFace(model=self._model, include_top=False, input_shape=(256, 256, 3), pooling='avg') + + def _preprocess_numpy_input(self, x, version, data_format, mode, **kwargs): + """Preprocesses a Numpy array encoding a batch of images. + # Arguments + x: Input array, 3D or 4D. + data_format: Data format of the image array. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + # Returns + Preprocessed Numpy array. + """ + backend = tf.keras.backend + if not issubclass(x.dtype.type, np.floating): + x = x.astype(backend.floatx(), copy=False) + + if mode == 'tf': + x /= 127.5 + x -= 1. + return x + + if mode == 'torch': + x /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + else: + if data_format == 'channels_first': + # 'RGB'->'BGR' + if x.ndim == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + # mean = [103.939, 116.779, 123.68] + if version == 1: + mean = [93.5940, 104.7624, 129.1863] + elif version == 2: + mean = [91.4953, 103.8827, 131.0912] + std = None + + # Zero-center by mean pixel + if data_format == 'channels_first': + if x.ndim == 3: + x[0, :, :] -= mean[0] + x[1, :, :] -= mean[1] + x[2, :, :] -= mean[2] + if std is not None: + x[0, :, :] /= std[0] + x[1, :, :] /= std[1] + x[2, :, :] /= std[2] + else: + x[:, 0, :, :] -= mean[0] + x[:, 1, :, :] -= mean[1] + x[:, 2, :, :] -= mean[2] + if std is not None: + x[:, 0, :, :] /= std[0] + x[:, 1, :, :] /= std[1] + x[:, 2, :, :] /= std[2] + else: + x[..., 0] -= mean[0] + x[..., 1] -= mean[1] + x[..., 2] -= mean[2] + if std is not None: + x[..., 0] /= std[0] + x[..., 1] /= std[1] + x[..., 2] /= std[2] + return x + + def get_submodules_from_kwargs(self, kwargs): + backend = kwargs.get('backend', _KERAS_BACKEND) + layers = kwargs.get('layers', _KERAS_LAYERS) + models = kwargs.get('models', _KERAS_MODELS) + utils = kwargs.get('utils', _KERAS_UTILS) + for key in kwargs.keys(): + if key not in ['backend', 'layers', 'models', 'utils']: + raise TypeError('Invalid keyword argument: %s', key) + return backend, layers, models, utils + + def _preprocess_symbolic_input(self, x, version, data_format, mode, **kwargs): + """Preprocesses a tensor encoding a batch of images. + # Arguments + x: Input tensor, 3D or 4D. + data_format: Data format of the image tensor. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + # Returns + Preprocessed tensor. + """ + _IMAGENET_MEAN = None + + #backend, _, _, _ = self.get_submodules_from_kwargs(kwargs) + backend = tf.keras.backend + + if mode == 'tf': + x /= 127.5 + x -= 1. + return x + + if mode == 'torch': + x /= 255. + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + else: + if data_format == 'channels_first': + # 'RGB'->'BGR' + if backend.ndim(x) == 3: + x = x[::-1, ...] + else: + x = x[:, ::-1, ...] + else: + # 'RGB'->'BGR' + x = x[..., ::-1] + #mean = [103.939, 116.779, 123.68] + if version == 1: + mean = [93.5940, 104.7624, 129.1863] + elif version == 2: + mean = [91.4953, 103.8827, 131.0912] + std = None + + if _IMAGENET_MEAN is None: + _IMAGENET_MEAN = backend.constant(-np.array(mean)) + + # Zero-center by mean pixel + if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN): + x = backend.bias_add( + x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)), + data_format=data_format) + else: + x = backend.bias_add(x, _IMAGENET_MEAN, data_format) + if std is not None: + x /= std + return x + + def preprocess_input(self, x, version, data_format=None, mode='caffe', **kwargs): + """Preprocesses a tensor or Numpy array encoding a batch of images. + # Arguments + x: Input Numpy or symbolic tensor, 3D or 4D. + The preprocessed data is written over the input data + if the data types are compatible. To avoid this + behaviour, `numpy.copy(x)` can be used. + data_format: Data format of the image tensor/array. + mode: One of "caffe", "tf" or "torch". + - caffe: will convert the images from RGB to BGR, + then will zero-center each color channel with + respect to the ImageNet dataset, + without scaling. + - tf: will scale pixels between -1 and 1, + sample-wise. + - torch: will scale pixels between 0 and 1 and then + will normalize each channel with respect to the + ImageNet dataset. + # Returns + Preprocessed tensor or Numpy array. + # Raises + ValueError: In case of unknown `data_format` argument. + """ + #backend, _, _, _ = self.get_submodules_from_kwargs(kwargs) + backend = tf.keras.backend + + if data_format is None: + data_format = backend.image_data_format() + if data_format not in {'channels_first', 'channels_last'}: + raise ValueError('Unknown data_format ' + str(data_format)) + + if isinstance(x, np.ndarray): + return self._preprocess_numpy_input(x, version, data_format=data_format, mode=mode, **kwargs) + else: + return self._preprocess_symbolic_input(x, version, data_format=data_format, mode=mode, **kwargs) + + def preprocess_input_tf(self, x, data_format=None, version=1): + # x_temp = tf.identity(x) + x_temp = x + if data_format is None: + data_format = tf.keras.backend.image_data_format() + assert data_format in {'channels_last', 'channels_first'} + + if version == 1: + if data_format == 'channels_first': + x_temp = x_temp[:, ::-1, ...] + x_temp[:, 0, :, :] -= 93.5940 + x_temp[:, 1, :, :] -= 104.7624 + x_temp[:, 2, :, :] -= 129.1863 + else: + x_temp = x_temp[..., ::-1] + x_temp[:, :, :, 0] -= 93.5940 + x_temp[:, :, :, 1] -= 104.7624 + x_temp[:, :, :, 2] -= 129.1863 + + elif version == 2: + if data_format == 'channels_first': + x_temp = x_temp[:, ::-1, ...] + x_temp[:, 0, :, :] -= 91.4953 + x_temp[:, 1, :, :] -= 103.8827 + x_temp[:, 2, :, :] -= 131.0912 + else: + tf_var = tf.Variable(tf.zeros(shape=x_temp.shape)) + x_temp = x_temp[..., ::-1] + tf_var[:, :, :, 0] = x_temp[:, :, :, 0] - 91.4953 + tf_var[:, :, :, 1] = x_temp[:, :, :, 1] - 103.8827 + tf_var[:, :, :, 2] = x_temp[:, :, :, 2] - 131.0912 + + x_temp[:, :, :, 0] -= 91.4953 + x_temp[:, :, :, 1] -= 103.8827 + x_temp[:, :, :, 2] -= 131.0912 + else: + raise NotImplementedError + + return tf_var + + def restore_image(self, x, data_format=None): + x_temp = np.copy(x) + # if data_format is None: + data_format = tf.keras.backend.image_data_format() + assert data_format in {'channels_last', 'channels_first'} + mean = [93.5940, 104.7624, 129.1863] + + # Zero-center by mean pixel + if data_format == 'channels_first': + if x_temp.ndim == 3: + x_temp[0, :, :] += mean[0] + x_temp[1, :, :] += mean[1] + x_temp[2, :, :] += mean[2] + else: + x_temp[:, 0, :, :] += mean[0] + x_temp[:, 1, :, :] += mean[1] + x_temp[:, 2, :, :] += mean[2] + else: + x_temp[..., 0] += mean[0] + x_temp[..., 1] += mean[1] + x_temp[..., 2] += mean[2] + + if data_format == 'channels_first': + # 'BGR'->'RGB' + if x_temp.ndim == 3: + x_temp = x_temp[::-1, ...] + else: + x_temp = x_temp[:, ::-1, ...] + else: + # 'BGR'->'RGB' + x_temp = x_temp[..., ::-1] + # Return + return x_temp + + def run(self, target_images): + # Run to completion. + self.start(target_images) + while self._cur_step < self.num_steps: + self.step() + + # Collect results. + pres = dnnlib.EasyDict() + + # For Perceptual or (Euclidean) Biometric loss + # pres.dlatents = self.get_dlatents() + # pres.noises = self.get_noises() + # pres.images = self.get_images() + + # For MIPGAN loss + pres.dlatents = self.get_dlatent_interp() + pres.noises = self.get_noises() + pres.images = self.get_images_interp() + + return pres + + def start(self, target_images, latents_dir, m_name): + assert self._Gs is not None + + # Prepare target images. + self._info('Preparing target images...') + target_images = np.asarray(target_images, dtype='float32') + target_images = (target_images + 1) * (255 / 2) + sh = target_images.shape # (2, 3, 1024, 1024) + + if sh[2] > self._x_1.shape[2]: + factor = sh[2] // self._x_1.shape[2] + target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) + sh = target_images.shape # (2, 3, 256, 256) + target_images = np.transpose(target_images, (0, 2, 3, 1)) # (2, 256, 256, 3) + + # Initialize optimization state. + self._info('Initializing optimization state...') + tflib.set_vars({self._x_1: target_images[0][None, :, :, :], # (1, 256, 256, 3) + self._x_2: target_images[1][None, :, :, :], # (1, 256, 256, 3) + self.dlatent_interp_tf: self.get_middle_morph_latent(latents_dir, m_name) + }) + tflib.run(self._noise_init_op) + self._opt.reset_optimizer_state() + self._cur_step = 0 + + def step(self): + assert self._cur_step is not None + if self._cur_step >= self.num_steps: + return + + # Hyperparameters. + t = self._cur_step / self.num_steps + noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) + learning_rate = self.initial_learning_rate * lr_ramp + + # Train. + feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate} + run_list = [self._opt_step, self._loss_perceptual, self._loss_identity, self._loss_id_diff, self._loss_ms_ssim, self._loss] + _, loss_perceptual_value, loss_identity_value, loss_id_diff_value, loss_ms_ssim_value, loss_value = tflib.run(run_list, feed_dict) + tflib.run(self._noise_normalize_op) + + # Print status. + self._cur_step += 1 + #if self._cur_step == self.num_steps or self._cur_step % 10 == 0: + self._info('%-8d%-12g%-12g%-12g%-12g%-12g' % (self._cur_step, loss_perceptual_value, loss_identity_value, loss_id_diff_value, loss_ms_ssim_value, loss_value)) + if self._cur_step == self.num_steps: + self._info('Done.') + + def get_cur_step(self): + return self._cur_step + + def get_dlatents(self): + return tflib.run(self._dlatents_expr, {self._noise_in: 0}) # (1,18,512) + + def get_noises(self): + return tflib.run(self._noise_vars) + + def get_images(self): + return tflib.run(self._images_expr, {self._noise_in: 0}) # (1,3,1024,1024) + # + def get_dlatent_interp(self): + return tflib.run(self._dlatent_interp_tf) # (1,18,512) + + def get_images_interp(self): + return tflib.run(self.i_m, {self._noise_in: 0}) # (1,3,1024,1024) + +#---------------------------------------------------------------------------- diff --git a/modules/preprocessor.py b/modules/preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..fc22323330b886ca1e3d8159244ac17792dcf971 --- /dev/null +++ b/modules/preprocessor.py @@ -0,0 +1,126 @@ +import dlib +import PIL.Image +import bob.io.image +import scipy.ndimage +import numpy as np +from bob.extension import rc + +class FFHQCropper(object): + def __init__(self, dlib_lmd_path=rc['sg2_morph.dlib_lmd_path']): + """ + Instanciate a face cropper that behaves similarly to the one used to preprocess the FFHQ database. + + :param dlib_lmd_path: Path to the dlib landmark detector model + """ + self.detector = dlib.get_frontal_face_detector() + self.predictor = dlib.shape_predictor(dlib_lmd_path) + + def __call__(self, image): + """ + Run the cropper on the input image. + + :param image: input image in bob format (channels first) + :return : cropped image in bob format, with shape [3, 1024, 1024] + """ + # Assuming input image in bob format + channels_last_img = bob.io.image.to_matplotlib(image) + landmarks = self.detect_landmarks(channels_last_img) + cropped = self.crop_and_resize(channels_last_img, lm=landmarks) + return bob.io.image.to_bob(cropped) + + def detect_landmarks(self, image): + """ + Run the dlib landmark detector on the input image to build a list of landmarks. + + :param image: input image in matplotlib format (channels last) + :return: list of 68 landmarks in tuple format : [(x1, y1), (x2, y2), ...] + """ + detection = self.detector(image, 1)[0] + shape = self.predictor(image, detection) + return [(item.x, item.y) for item in shape.parts()] + + def crop_and_resize(self, img, lm, + output_size=1024, + transform_size=4096, + enable_padding=True): + """ + Crop, resize and align the image based on the provided landmarks (lm), + in the same way FFHQ has been preprocessed for training StyleGAN2 + + This code was entirely borrowed from + https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py (recreate_aligned_images() function), + with a few adaptations. + + """ + lm = np.array(lm) + lm_chin = lm[0 : 17] # left-right + lm_eyebrow_left = lm[17 : 22] # left-right + lm_eyebrow_right = lm[22 : 27] # left-right + lm_nose = lm[27 : 31] # top-down + lm_nostrils = lm[31 : 36] # top-down + lm_eye_left = lm[36 : 42] # left-clockwise + lm_eye_right = lm[42 : 48] # left-clockwise + lm_mouth_outer = lm[48 : 60] # left-clockwise + lm_mouth_inner = lm[60 : 68] # left-clockwise + + # Calculate auxiliary vectors. + eye_left = np.mean(lm_eye_left, axis=0) + eye_right = np.mean(lm_eye_right, axis=0) + eye_avg = (eye_left + eye_right) * 0.5 + eye_to_eye = eye_right - eye_left + mouth_left = lm_mouth_outer[0] + mouth_right = lm_mouth_outer[6] + mouth_avg = (mouth_left + mouth_right) * 0.5 + eye_to_mouth = mouth_avg - eye_avg + + # Choose oriented crop rectangle. + x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1] + x /= np.hypot(*x) + x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) + y = np.flipud(x) * [-1, 1] + c = eye_avg + eye_to_mouth * 0.1 + quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) + qsize = np.hypot(*x) * 2 + + # Convert to PIL (original code used PIL all the way through) + img = PIL.Image.fromarray(img) + + # Shrink. + shrink = int(np.floor(qsize / output_size * 0.5)) + if shrink > 1: + rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink))) + img = img.resize(rsize, PIL.Image.ANTIALIAS) + quad /= shrink + qsize /= shrink + + # Crop. + border = max(int(np.rint(qsize * 0.1)), 3) + crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1])))) + crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1])) + if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: + img = img.crop(crop) + quad -= crop[0:2] + + # Pad. + pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1])))) + pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0)) + if enable_padding and max(pad) > border - 4: + pad = np.maximum(pad, int(np.rint(qsize * 0.3))) + img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') + h, w, _ = img.shape + y, x, _ = np.ogrid[:h, :w, :1] + mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w-1-x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h-1-y) / pad[3])) + blur = qsize * 0.02 + img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) + img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0) + img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB') + quad += pad[:2] + + # Transform. + img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR) + if output_size < transform_size: + img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS) + + # Done ! Back to numpy format + img = np.array(img) + return img \ No newline at end of file diff --git a/modules/projector.py b/modules/projector.py new file mode 100755 index 0000000000000000000000000000000000000000..c09c3d6608dd83f504fecfeec9e2cf74065e555f --- /dev/null +++ b/modules/projector.py @@ -0,0 +1,265 @@ +# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. +# +# This work is made available under the Nvidia Source Code License-NC. +# To view a copy of this license, visit +# https://nvlabs.github.io/stylegan2/license.html + +# 14.09.2020 : Adjustements by Laurent Colbois (Idiap, Biometrics Security and Privacy) +# -> Adding two parameters to the __init__ method. +# -> Adding verbosity option to the run method +# -> Image postprocessing (adjust dynamic range) + + +from bob.extension import rc +import utils +import numpy as np +import tensorflow as tf +import dnnlib +import dnnlib.tflib as tflib + +import pickle + +#---------------------------------------------------------------------------- + +class Projector(object): + def __init__(self, vgg16_path=rc['sg2_morph.vgg16_path'], num_steps=1000): + """ + Instanciate an image projector into the W space of a StyleGAN network. + This class's code was borrowed from + https://github.com/NVlabs/stylegan2/blob/master/projector.py, + with a few adaptations. + + :param vgg16_path: path to the pickle file of the pretrained VGG16 network used for + computing the perceptual loss between images. + :param num_steps: Number of optimization steps to project a single image. + """ + self.num_steps = num_steps + with open(vgg16_path, 'rb') as pkl_file: + self._lpips = pickle.load(pkl_file) + + self.dlatent_avg_samples = 10000 + self.initial_learning_rate = 0.1 + self.initial_noise_factor = 0.05 + self.lr_rampdown_length = 0.25 + self.lr_rampup_length = 0.05 + self.noise_ramp_length = 0.75 + self.regularize_noise_weight = 1e5 + self.verbose = False + self.clone_net = True + self.proc_images_size = 256 + + self._Gs = None + self._minibatch_size = None + self._dlatent_avg = None + self._dlatent_std = None + self._noise_vars = None + self._noise_init_op = None + self._noise_normalize_op = None + self._dlatents_var = None + self._noise_in = None + self._dlatents_expr = None + self._images_expr = None + self._target_images_var = None + self._dist = None + self._loss = None + self._reg_sizes = None + self._lrate_in = None + self._opt = None + self._opt_step = None + self._cur_step = None + + def _info(self, *args): + if self.verbose: + print('Projector:', *args) + + def set_network(self, Gs): + """ + Assign the generator network Gs to the projector. + """ + self._Gs = Gs + self._minibatch_size = 1 + if self._Gs is None: + return + if self.clone_net: + self._Gs = self._Gs.clone() + + # Find dlatent stats. + self._info('Finding W midpoint and stddev using %d samples...' % self.dlatent_avg_samples) + latent_samples = np.random.RandomState(123).randn(self.dlatent_avg_samples, *self._Gs.input_shapes[0][1:]) + dlatent_samples = self._Gs.components.mapping.run(latent_samples, None)[:, :1, :] # [N, 1, 512] + self._dlatent_avg = np.mean(dlatent_samples, axis=0, keepdims=True) # [1, 1, 512] + self._dlatent_std = (np.sum((dlatent_samples - self._dlatent_avg) ** 2) / self.dlatent_avg_samples) ** 0.5 + self._info('std = %g' % self._dlatent_std) + + # Find noise inputs. + self._info('Setting up noise inputs...') + self._noise_vars = [] + noise_init_ops = [] + noise_normalize_ops = [] + while True: + n = 'G_synthesis/noise%d' % len(self._noise_vars) + if not n in self._Gs.vars: + break + v = self._Gs.vars[n] + self._noise_vars.append(v) + noise_init_ops.append(tf.assign(v, tf.random_normal(tf.shape(v), dtype=tf.float32))) + noise_mean = tf.reduce_mean(v) + noise_std = tf.reduce_mean((v - noise_mean)**2)**0.5 + noise_normalize_ops.append(tf.assign(v, (v - noise_mean) / noise_std)) + self._info(n, v) + self._noise_init_op = tf.group(*noise_init_ops) + self._noise_normalize_op = tf.group(*noise_normalize_ops) + + # Image output graph. + self._info('Building image output graph...') + self._dlatents_var = tf.Variable(tf.zeros([self._minibatch_size] + list(self._dlatent_avg.shape[1:])), name='dlatents_var') + self._noise_in = tf.placeholder(tf.float32, [], name='noise_in') + dlatents_noise = tf.random.normal(shape=self._dlatents_var.shape) * self._noise_in + self._dlatents_expr = tf.tile(self._dlatents_var + dlatents_noise, [1, self._Gs.components.synthesis.input_shape[1], 1]) + self._images_expr = self._Gs.components.synthesis.get_output_for(self._dlatents_expr, randomize_noise=False) + + # Downsample image to 256x256 if it's larger than that. VGG was built for 224x224 images. + proc_images_expr = (self._images_expr + 1) * (255 / 2) + sh = proc_images_expr.shape.as_list() + if sh[2] > self.proc_images_size: + factor = sh[2] // self.proc_images_size + proc_images_expr = tf.reduce_mean(tf.reshape(proc_images_expr, [-1, sh[1], sh[2] // factor, factor, sh[2] // factor, factor]), axis=[3,5]) + + # Loss graph. + self._info('Building loss graph...') + self._target_images_var = tf.Variable(tf.zeros(proc_images_expr.shape), name='target_images_var') + if self._lpips is None: + self._lpips = misc.load_pkl('http://d36zk2xti64re0.cloudfront.net/stylegan1/networks/metrics/vgg16_zhang_perceptual.pkl') + self._dist = self._lpips.get_output_for(proc_images_expr, self._target_images_var) + self._loss = tf.reduce_sum(self._dist) + + # Noise regularization graph. + self._info('Building noise regularization graph...') + reg_loss = 0.0 + for v in self._noise_vars: + sz = v.shape[2] + while True: + reg_loss += tf.reduce_mean(v * tf.roll(v, shift=1, axis=3))**2 + tf.reduce_mean(v * tf.roll(v, shift=1, axis=2))**2 + if sz <= 8: + break # Small enough already + v = tf.reshape(v, [1, 1, sz//2, 2, sz//2, 2]) # Downscale + v = tf.reduce_mean(v, axis=[3, 5]) + sz = sz // 2 + self._loss += reg_loss * self.regularize_noise_weight + + # Optimizer. + self._info('Setting up optimizer...') + self._lrate_in = tf.placeholder(tf.float32, [], name='lrate_in') + self._opt = dnnlib.tflib.Optimizer(learning_rate=self._lrate_in) + self._opt.register_gradients(self._loss, [self._dlatents_var] + self._noise_vars) + self._opt_step = self._opt.apply_updates() + + def __call__(self, target_image, verbose=False): + """ + Project the provided image + + :param target_image: Image to project in bob format. Tensor of shape [3, 1024, 1024] + of uint8 values in [0, 255] + :param verbose: Display progression + + :return: Dictionary containing + 'image': projection of the target image, in bob format. Tensor of shape [3, 1024, 1024] + of uint8 values in [0, 255] + 'w_latent': W-space latent vector corresponding to the projection. Shape [latent_dim] + 'd_latents': repetition of the w_latent as many time as the synthesis network has style inputs. Shape [num_style_inputs, latent_dim] + 'noises': optimized noise inputs for the projected image + + """ + return self.run(target_image, verbose) + + def run(self, target_image, verbose=False): + """ + Project the provided image + + :param target_image: Image to project in bob format. Tensor of shape [3, 1024, 1024] + of uint8 values in [0, 255] + :param verbose: Display progression + + :return: Dictionary containing + 'image': projection of the target image, in bob format. Tensor of shape [3, 1024, 1024] + of uint8 values in [0, 255] + 'w_latent': W-space latent vector corresponding to the projection. Shape [latent_dim] + 'd_latents': repetition of the w_latent as many time as the synthesis network has style inputs. Shape [num_style_inputs, latent_dim] + 'noises': optimized noise inputs for the projected image + + """ + # Run to completion. + target_images = np.stack([target_image]) + self.start(target_images) + while self._cur_step < self.num_steps: + if verbose: + print('Step {}/{}'.format(self._cur_step + 1, self.num_steps)) + self.step() + + # Collect results. + pres = dnnlib.EasyDict() + pres.dlatents = self.get_dlatents()[0] + pres.w_latent = pres.dlatents[0, :] + pres.noises = self.get_noises() + pres.image = utils.adjust_dynamic_range(self.get_images()[0], [0, 255], 'uint8') + return pres + + def start(self, target_images): + assert self._Gs is not None + + # Prepare target images. + self._info('Preparing target images...') + target_images = np.asarray(target_images, dtype='float32') + sh = target_images.shape + assert sh[0] == self._minibatch_size + if sh[2] > self._target_images_var.shape[2]: + factor = sh[2] // self._target_images_var.shape[2] + target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5)) + + # Initialize optimization state. + self._info('Initializing optimization state...') + tflib.set_vars({self._target_images_var: target_images, self._dlatents_var: np.tile(self._dlatent_avg, [self._minibatch_size, 1, 1])}) + tflib.run(self._noise_init_op) + self._opt.reset_optimizer_state() + self._cur_step = 0 + + def step(self): + assert self._cur_step is not None + if self._cur_step >= self.num_steps: + return + if self._cur_step == 0: + self._info('Running...') + + # Hyperparameters. + t = self._cur_step / self.num_steps + noise_strength = self._dlatent_std * self.initial_noise_factor * max(0.0, 1.0 - t / self.noise_ramp_length) ** 2 + lr_ramp = min(1.0, (1.0 - t) / self.lr_rampdown_length) + lr_ramp = 0.5 - 0.5 * np.cos(lr_ramp * np.pi) + lr_ramp = lr_ramp * min(1.0, t / self.lr_rampup_length) + learning_rate = self.initial_learning_rate * lr_ramp + + # Train. + feed_dict = {self._noise_in: noise_strength, self._lrate_in: learning_rate} + _, dist_value, loss_value = tflib.run([self._opt_step, self._dist, self._loss], feed_dict) + tflib.run(self._noise_normalize_op) + + # Print status. + self._cur_step += 1 + if self._cur_step == self.num_steps or self._cur_step % 10 == 0: + self._info('%-8d%-12g%-12g' % (self._cur_step, dist_value, loss_value)) + if self._cur_step == self.num_steps: + self._info('Done.') + + def get_cur_step(self): + return self._cur_step + + def get_dlatents(self): + return tflib.run(self._dlatents_expr, {self._noise_in: 0}) + + def get_noises(self): + return tflib.run(self._noise_vars) + + def get_images(self): + return tflib.run(self._images_expr, {self._noise_in: 0}) + +#---------------------------------------------------------------------------- diff --git a/src/facemorpher/__init__.py b/src/facemorpher/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1eef49e87a744d1ec122370e5554d813747faa88 --- /dev/null +++ b/src/facemorpher/__init__.py @@ -0,0 +1 @@ +from . import morpher \ No newline at end of file diff --git a/src/facemorpher/aligner.py b/src/facemorpher/aligner.py new file mode 100644 index 0000000000000000000000000000000000000000..64c44589e69df62626ac87ca575b1e123b0447ca --- /dev/null +++ b/src/facemorpher/aligner.py @@ -0,0 +1,107 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/aligner.py + + +""" +Align face and image sizes +""" +import cv2 +import numpy as np + +def positive_cap(num): + """ Cap a number to ensure positivity + + :param num: positive or negative number + :returns: (overflow, capped_number) + """ + if num < 0: + return 0, abs(num) + else: + return num, 0 + +def roi_coordinates(rect, size, scale): + """ Align the rectangle into the center and return the top-left coordinates + within the new size. If rect is smaller, we add borders. + + :param rect: (x, y, w, h) bounding rectangle of the face + :param size: (width, height) are the desired dimensions + :param scale: scaling factor of the rectangle to be resized + :returns: 4 numbers. Top-left coordinates of the aligned ROI. + (x, y, border_x, border_y). All values are > 0. + """ + rectx, recty, rectw, recth = rect + new_height, new_width = size + mid_x = int((rectx + rectw/2) * scale) + mid_y = int((recty + recth/2) * scale) + roi_x = mid_x - int(new_width/2) + roi_y = mid_y - int(new_height/2) + + roi_x, border_x = positive_cap(roi_x) + roi_y, border_y = positive_cap(roi_y) + return roi_x, roi_y, border_x, border_y + +def scaling_factor(rect, size): + """ Calculate the scaling factor for the current image to be + resized to the new dimensions + + :param rect: (x, y, w, h) bounding rectangle of the face + :param size: (width, height) are the desired dimensions + :returns: floating point scaling factor + """ + new_height, new_width = size + rect_h, rect_w = rect[2:] + height_ratio = rect_h / new_height + width_ratio = rect_w / new_width + scale = 1 + if height_ratio > width_ratio: + new_recth = 0.8 * new_height + scale = new_recth / rect_h + else: + new_rectw = 0.8 * new_width + scale = new_rectw / rect_w + return scale + +def resize_image(img, scale): + """ Resize image with the provided scaling factor + + :param img: image to be resized + :param scale: scaling factor for resizing the image + """ + cur_height, cur_width = img.shape[:2] + new_scaled_height = int(scale * cur_height) + new_scaled_width = int(scale * cur_width) + + return cv2.resize(img, (new_scaled_width, new_scaled_height)) + +def resize_align(img, points, size): + """ Resize image and associated points, align face to the center + and crop to the desired size + + :param img: image to be resized + :param points: *m* x 2 array of points + :param size: (height, width) tuple of new desired size + """ + new_height, new_width = size + + # Resize image based on bounding rectangle + rect = cv2.boundingRect(np.array([points], np.int32)) + scale = scaling_factor(rect, size) + img = resize_image(img, scale) + + # Align bounding rect to center + cur_height, cur_width = img.shape[:2] + roi_x, roi_y, border_x, border_y = roi_coordinates(rect, size, scale) + roi_h = np.min([new_height-border_y, cur_height-roi_y]) + roi_w = np.min([new_width-border_x, cur_width-roi_x]) + + # Crop to supplied size + crop = np.zeros((new_height, new_width, 3), img.dtype) + crop[border_y:border_y+roi_h, border_x:border_x+roi_w] = ( + img[roi_y:roi_y+roi_h, roi_x:roi_x+roi_w]) + + # Scale and align face points to the crop + points[:, 0] = (points[:, 0] * scale) + (border_x - roi_x) + points[:, 1] = (points[:, 1] * scale) + (border_y - roi_y) + + return (crop, points) diff --git a/src/facemorpher/blender.py b/src/facemorpher/blender.py new file mode 100644 index 0000000000000000000000000000000000000000..f8b8a892f59f762422f4fc6ebc81cba1f8e335aa --- /dev/null +++ b/src/facemorpher/blender.py @@ -0,0 +1,138 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/blender.py + + +import cv2 +import numpy as np +import scipy.sparse + +def mask_from_points(size, points): + """ Create a mask of supplied size from supplied points + :param size: tuple of output mask size + :param points: array of [x, y] points + :returns: mask of values 0 and 255 where + 255 indicates the convex hull containing the points + """ + radius = 10 # kernel size + kernel = np.ones((radius, radius), np.uint8) + + mask = np.zeros(size, np.uint8) + cv2.fillConvexPoly(mask, cv2.convexHull(points), 255) + mask = cv2.erode(mask, kernel) + + return mask + +def overlay_image(foreground_image, mask, background_image): + """ Overlay foreground image onto the background given a mask + :param foreground_image: foreground image points + :param mask: [0-255] values in mask + :param background_image: background image points + :returns: image with foreground where mask > 0 overlaid on background image + """ + foreground_pixels = mask > 0 + background_image[..., :3][foreground_pixels] = foreground_image[..., :3][foreground_pixels] + return background_image + +def apply_mask(img, mask): + """ Apply mask to supplied image + :param img: max 3 channel image + :param mask: [0-255] values in mask + :returns: new image with mask applied + """ + masked_img = np.copy(img) + num_channels = 3 + for c in range(num_channels): + masked_img[..., c] = img[..., c] * (mask / 255) + + return masked_img + +def weighted_average(img1, img2, percent=0.5): + if percent <= 0: + return img2 + elif percent >= 1: + return img1 + else: + return cv2.addWeighted(img1, percent, img2, 1-percent, 0) + +def alpha_feathering(src_img, dest_img, img_mask, blur_radius=15): + mask = cv2.blur(img_mask, (blur_radius, blur_radius)) + mask = mask / 255.0 + + result_img = np.empty(src_img.shape, np.uint8) + for i in range(3): + result_img[..., i] = src_img[..., i] * mask + dest_img[..., i] * (1-mask) + + return result_img + +def poisson_blend(img_source, dest_img, img_mask, offset=(0, 0)): + # http://opencv.jp/opencv2-x-samples/poisson-blending + img_target = np.copy(dest_img) + import pyamg + # compute regions to be blended + region_source = ( + max(-offset[0], 0), + max(-offset[1], 0), + min(img_target.shape[0] - offset[0], img_source.shape[0]), + min(img_target.shape[1] - offset[1], img_source.shape[1])) + region_target = ( + max(offset[0], 0), + max(offset[1], 0), + min(img_target.shape[0], img_source.shape[0] + offset[0]), + min(img_target.shape[1], img_source.shape[1] + offset[1])) + region_size = (region_source[2] - region_source[0], + region_source[3] - region_source[1]) + + # clip and normalize mask image + img_mask = img_mask[region_source[0]:region_source[2], + region_source[1]:region_source[3]] + + # create coefficient matrix + coff_mat = scipy.sparse.identity(np.prod(region_size), format='lil') + for y in range(region_size[0]): + for x in range(region_size[1]): + if img_mask[y, x]: + index = x + y * region_size[1] + coff_mat[index, index] = 4 + if index + 1 < np.prod(region_size): + coff_mat[index, index + 1] = -1 + if index - 1 >= 0: + coff_mat[index, index - 1] = -1 + if index + region_size[1] < np.prod(region_size): + coff_mat[index, index + region_size[1]] = -1 + if index - region_size[1] >= 0: + coff_mat[index, index - region_size[1]] = -1 + coff_mat = coff_mat.tocsr() + + # create poisson matrix for b + poisson_mat = pyamg.gallery.poisson(img_mask.shape) + # for each layer (ex. RGB) + for num_layer in range(img_target.shape[2]): + # get subimages + t = img_target[region_target[0]:region_target[2], + region_target[1]:region_target[3], num_layer] + s = img_source[region_source[0]:region_source[2], + region_source[1]:region_source[3], num_layer] + t = t.flatten() + s = s.flatten() + + # create b + b = poisson_mat * s + for y in range(region_size[0]): + for x in range(region_size[1]): + if not img_mask[y, x]: + index = x + y * region_size[1] + b[index] = t[index] + + # solve Ax = b + x = pyamg.solve(coff_mat, b, verb=False, tol=1e-10) + + # assign x to target image + x = np.reshape(x, region_size) + x[x > 255] = 255 + x[x < 0] = 0 + x = np.array(x, img_target.dtype) + img_target[region_target[0]:region_target[2], + region_target[1]:region_target[3], num_layer] = x + + return img_target diff --git a/src/facemorpher/locator.py b/src/facemorpher/locator.py new file mode 100644 index 0000000000000000000000000000000000000000..b13344f72eeb13502a51775d7b594bd1c0a7020d --- /dev/null +++ b/src/facemorpher/locator.py @@ -0,0 +1,115 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/locator.py + + +""" +Locate face points +""" + +import cv2 +import numpy as np +import os.path as path +import dlib +import os + + +dlib_detector = dlib.get_frontal_face_detector() + # The following line has been modified to remove the DLIB_DATA_DIR +dlib_predictor = dlib.shape_predictor( + '/idiap/home/esarkar/temp/Experiments/FaceMorph/bob/project/morph/gen/morphs/shape_predictor_68_face_landmarks.dat') + +def boundary_points(points, width_percent=0.1, height_percent=0.1): + """ Produce additional boundary points + :param points: *m* x 2 array of x,y points + :param width_percent: [-1, 1] percentage of width to taper inwards. Negative for opposite direction + :param height_percent: [-1, 1] percentage of height to taper downwards. Negative for opposite direction + :returns: 2 additional points at the top corners + """ + x, y, w, h = cv2.boundingRect(np.array([points], np.int32)) + spacerw = int(w * width_percent) + spacerh = int(h * height_percent) + return [[x+spacerw, y+spacerh], + [x+w-spacerw, y+spacerh]] + + +def face_points(img, add_boundary_points=True): + return face_points_dlib(img, add_boundary_points) + +def face_points_dlib(img, add_boundary_points=True): + """ Locates 68 face points using dlib (http://dlib.net) + Requires shape_predictor_68_face_landmarks.dat to be in face_morpher/data + Download at: http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 + :param img: an image array + :param add_boundary_points: bool to add additional boundary points + :returns: Array of x,y face points. Empty array if no face found + """ + try: + points = [] + rgbimg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + rects = dlib_detector(rgbimg, 1) + + if rects and len(rects) > 0: + # We only take the first found face + shapes = dlib_predictor(rgbimg, rects[0]) + points = np.array([(shapes.part(i).x, shapes.part(i).y) for i in range(68)], np.int32) + + if add_boundary_points: + # Add more points inwards and upwards as dlib only detects up to eyebrows + points = np.vstack([ + points, + boundary_points(points, 0.1, -0.03), + boundary_points(points, 0.13, -0.05), + boundary_points(points, 0.15, -0.08), + boundary_points(points, 0.33, -0.12)]) + + return points + except Exception as e: + print(e) + return [] + +def face_points_stasm(img, add_boundary_points=True): + import stasm + """ Locates 77 face points using stasm (http://www.milbo.users.sonic.net/stasm) + + :param img: an image array + :param add_boundary_points: bool to add 2 additional points + :returns: Array of x,y face points. Empty array if no face found + """ + try: + points = stasm.search_single(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)) + except Exception as e: + print('Failed finding face points: ', e) + return [] + + points = points.astype(np.int32) + if len(points) == 0: + return points + + if add_boundary_points: + return np.vstack([points, boundary_points(points)]) + + return points + +def average_points(point_set): + """ Averages a set of face points from images + + :param point_set: *n* x *m* x 2 array of face points. \\ + *n* = number of images. *m* = number of face points per image + """ + return np.mean(point_set, 0).astype(np.int32) + +def weighted_average_points(start_points, end_points, percent=0.5): + """ Weighted average of two sets of supplied points + + :param start_points: *m* x 2 array of start face points. + :param end_points: *m* x 2 array of end face points. + :param percent: [0, 1] percentage weight on start_points + :returns: *m* x 2 array of weighted average points + """ + if percent <= 0: + return end_points + elif percent >= 1: + return start_points + else: + return np.asarray(start_points*percent + end_points*(1-percent), np.int32) diff --git a/src/facemorpher/morpher.py b/src/facemorpher/morpher.py new file mode 100644 index 0000000000000000000000000000000000000000..bb1ebace40261cc2ef55ca94e45c3e80499dcce8 --- /dev/null +++ b/src/facemorpher/morpher.py @@ -0,0 +1,165 @@ +# This code is based on the original one by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/morpher.py + +# This file is a modification. +# Adjustements made by Eklavya Sarkar (Idiap Research Institute, Biometrics Security and Privacy), Jan-Feb 2021, +# Save image at different alphas -> morph() + +""" +:: + + Morph from source to destination face or + Morph through all images in a folder + + Usage: + morpher.py (--src=<src_path> --dest=<dest_path> | --images=<folder>) + [--width=<width>] [--height=<height>] + [--num=<num_frames>] [--fps=<frames_per_second>] + [--out_frames=<folder>] [--out_video=<filename>] + [--plot] [--background=(black|transparent|average)] + + Options: + -h, --help Show this screen. + --src=<src_imgpath> Filepath to source image (.jpg, .jpeg, .png) + --dest=<dest_imgpath> Filepath to destination image (.jpg, .jpeg, .png) + --images=<folder> Folderpath to images + --width=<width> Custom width of the images/video [default: 500] + --height=<height> Custom height of the images/video [default: 600] + --num=<num_frames> Number of morph frames [default: 20] + --fps=<fps> Number frames per second for the video [default: 10] + --out_frames=<folder> Folder path to save all image frames + --out_video=<filename> Filename to save a video + --plot Flag to plot images to result.png [default: False] + --background=<bg> Background of images to be one of (black|transparent|average) [default: black] + --version Show version. +""" +from docopt import docopt +import os +import numpy as np +import cv2 +from . import locator +from . import aligner +from . import warper +from . import blender +from . import plotter +from . import videoer + +def verify_args(args): + if args['--images'] is None: + valid = os.path.isfile(args['--src']) & os.path.isfile(args['--dest']) + if not valid: + print('--src=%s or --dest=%s file does not exist. Double check the supplied paths' % ( + args['--src'], args['--dest'])) + exit(1) + else: + valid = os.path.isdir(args['--images']) + if not valid: + print('--images=%s is not a valid directory' % args['--images']) + exit(1) + +def load_image_points(path, size): + img = cv2.imread(path) + points = locator.face_points(img) + + if len(points) == 0: + print('No face in %s' % path) + return None, None + else: + return aligner.resize_align(img, points, size) + +def load_valid_image_points(imgpaths, size): + for path in imgpaths: + img, points = load_image_points(path, size) + if img is not None: + #print(path) + yield (img, points) + +def list_imgpaths(images_folder=None, src_image=None, dest_image=None): + if images_folder is None: + yield src_image + yield dest_image + else: + for fname in os.listdir(images_folder): + if (fname.lower().endswith('.jpg') or + fname.lower().endswith('.png') or + fname.lower().endswith('.jpeg')): + yield os.path.join(images_folder, fname) + +def morph(imgpaths, src_img, src_points, dest_img, dest_points, + video, width=500, height=600, num_frames=20, fps=10, + out_frames=None, out_video=None, plot=False, background='black', alpha=0.5): + """ + Create a morph sequence from source to destination image + + :param src_img: ndarray source image + :param src_points: source image array of x,y face points + :param dest_img: ndarray destination image + :param dest_points: destination image array of x,y face points + :param video: facemorpher.videoer.Video object + """ + size = (height, width) + stall_frames = np.clip(int(fps*0.15), 1, fps) # Show first & last longer + plt = plotter.Plotter(plot, num_images=num_frames, out_folder=out_frames) + num_frames -= (stall_frames * 2) # No need to process src and dest image + + plt.plot_one(src_img) + video.write(src_img, 1) + + morph_fname = '_'.join([img.split('/')[-1] for img in imgpaths]) + + # Produce morph frames! + for i, percent in enumerate(np.linspace(1, 0, num=num_frames)): + points = locator.weighted_average_points(src_points, dest_points, percent) + src_face = warper.warp_image(src_img, src_points, points, size) + end_face = warper.warp_image(dest_img, dest_points, points, size) + average_face = blender.weighted_average(src_face, end_face, percent) + + if background in ('transparent', 'average'): + mask = blender.mask_from_points(average_face.shape[:2], points) + average_face = np.dstack((average_face, mask)) + + if background == 'average': + average_background = blender.weighted_average(src_img, dest_img, percent) + average_face = blender.overlay_image(average_face, mask, average_background) + + if i == num_frames//10*(10*alpha): # Yes, this is correct + plt.save_one(average_face, filename=morph_fname) + + #plt.save(average_face) + plt.plot_one(average_face) + video.write(average_face) + + plt.plot_one(dest_img) + video.write(dest_img, stall_frames) + plt.show() + +def morpher(imgpaths, width=500, height=600, num_frames=20, fps=10, + out_frames=None, out_video=None, plot=False, background='black', alpha=0.5): + """ + Create a morph sequence from multiple images in imgpaths + + :param imgpaths: array or generator of image paths + """ + video = videoer.Video(out_video, fps, width, height) + images_points_gen = load_valid_image_points(imgpaths, (height, width)) + src_img, src_points = next(images_points_gen) + for dest_img, dest_points in images_points_gen: + morph(imgpaths, src_img, src_points, dest_img, dest_points, video, + width, height, num_frames, fps, out_frames, out_video, plot, background, alpha) + src_img, src_points = dest_img, dest_points + video.end() + +def main(): + args = docopt(__doc__, version='Face Morpher 1.0') + verify_args(args) + + morpher(list_imgpaths(args['--images'], args['--src'], args['--dest']), + int(args['--width']), int(args['--height']), + int(args['--num']), int(args['--fps']), + args['--out_frames'], args['--out_video'], + args['--plot'], args['--background']) + + +if __name__ == "__main__": + main() diff --git a/src/facemorpher/plotter.py b/src/facemorpher/plotter.py new file mode 100644 index 0000000000000000000000000000000000000000..c868c74302e7b0aea631c2c840a729ba8c27e3ff --- /dev/null +++ b/src/facemorpher/plotter.py @@ -0,0 +1,99 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/plotter.py + +""" +Plot and save images +""" + +import matplotlib.pyplot as plt +import matplotlib.image as mpimg +import os.path +import numpy as np +import cv2 + +def bgr2rgb(img): + # OpenCV's BGR to RGB + rgb = np.copy(img) + rgb[..., 0], rgb[..., 2] = img[..., 2], img[..., 0] + return rgb + +def check_do_plot(func): + def inner(self, *args, **kwargs): + if self.do_plot: + func(self, *args, **kwargs) + + return inner + +def check_do_save(func): + def inner(self, *args, **kwargs): + if self.do_save: + func(self, *args, **kwargs) + + return inner + +class Plotter(object): + def __init__(self, plot=True, rows=0, cols=0, num_images=0, out_folder=None, out_filename=None): + self.out_folder = out_folder + self.save_counter = 1 + self.plot_counter = 1 + self.do_plot = plot + self.do_save = out_filename is not None + self.out_filename = out_filename + self.set_filepath(out_folder) + + if (rows + cols) == 0 and num_images > 0: + # Auto-calculate the number of rows and cols for the figure + self.rows = np.ceil(np.sqrt(num_images / 2.0)) + self.cols = np.ceil(num_images / self.rows) + else: + self.rows = rows + self.cols = cols + + def set_filepath(self, folder): + if folder is None: + self.filepath = None + return + + if not os.path.exists(folder): + os.makedirs(folder) + self.filepath = os.path.join(folder, 'frame{0:03d}.png') + self.do_save = True + + @check_do_save + def save(self, img, filename=None): + + if self.filepath: + filename = self.filepath.format(self.save_counter) + self.save_counter += 1 + elif filename is None: + filename = self.out_filename + mpimg.imsave(filename, bgr2rgb(img)) + print(filename + ' saved') + + def save_one(self, img, filename=None): + mpimg.imsave(os.path.join(self.out_folder, filename), bgr2rgb(img), format='png') + #print(os.path.join(self.out_folder,filename) + ' saved') + + @check_do_plot + def plot_one(self, img): + p = plt.subplot(self.rows, self.cols, self.plot_counter) + p.axes.get_xaxis().set_visible(False) + p.axes.get_yaxis().set_visible(False) + plt.imshow(bgr2rgb(img)) + self.plot_counter += 1 + + @check_do_plot + def show(self): + plt.gcf().subplots_adjust(hspace=0.05, wspace=0, + left=0, bottom=0, right=1, top=0.98) + plt.axis('off') + #plt.show() + plt.savefig('result.png') + + @check_do_plot + def plot_mesh(self, points, tri, color='k'): + """ plot triangles """ + for tri_indices in tri.simplices: + t_ext = [tri_indices[0], tri_indices[1], tri_indices[2], tri_indices[0]] + plt.plot(points[t_ext, 0], points[t_ext, 1], color) diff --git a/src/facemorpher/videoer.py b/src/facemorpher/videoer.py new file mode 100644 index 0000000000000000000000000000000000000000..b9ff6c7bcaf884b2f8e255180b5523ca6b386841 --- /dev/null +++ b/src/facemorpher/videoer.py @@ -0,0 +1,40 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/videoer.py + +""" +Create a video with image frames +""" + +import cv2 +import numpy as np + + +def check_write_video(func): + def inner(self, *args, **kwargs): + if self.video: + return func(self, *args, **kwargs) + else: + pass + return inner + + +class Video(object): + def __init__(self, filename, fps, w, h): + self.filename = filename + + if filename is None: + self.video = None + else: + fourcc = cv2.VideoWriter_fourcc(*'MJPG') + self.video = cv2.VideoWriter(filename, fourcc, fps, (w, h), True) + + @check_write_video + def write(self, img, num_times=1): + for i in range(num_times): + self.video.write(img[..., :3]) + + @check_write_video + def end(self): + print(self.filename + ' saved') + self.video.release() diff --git a/src/facemorpher/warper.py b/src/facemorpher/warper.py new file mode 100644 index 0000000000000000000000000000000000000000..1e355f5c1952045f72640c48d96b4b4cda6bcf6f --- /dev/null +++ b/src/facemorpher/warper.py @@ -0,0 +1,143 @@ +# This original code is by Alyssa Quek, cloned from the face_morpher repository. +# To view the source repository of this code, visit: +# https://github.com/alyssaq/face_morpher/blob/dlib/facemorpher/warper.py + +import numpy as np +import scipy.spatial as spatial + +def bilinear_interpolate(img, coords): + """ Interpolates over every image channel + http://en.wikipedia.org/wiki/Bilinear_interpolation + + :param img: max 3 channel image + :param coords: 2 x _m_ array. 1st row = xcoords, 2nd row = ycoords + :returns: array of interpolated pixels with same shape as coords + """ + int_coords = np.int32(coords) + x0, y0 = int_coords + dx, dy = coords - int_coords + + # 4 Neighour pixels + q11 = img[y0, x0] + q21 = img[y0, x0+1] + q12 = img[y0+1, x0] + q22 = img[y0+1, x0+1] + + btm = q21.T * dx + q11.T * (1 - dx) + top = q22.T * dx + q12.T * (1 - dx) + inter_pixel = top * dy + btm * (1 - dy) + + return inter_pixel.T + +def grid_coordinates(points): + """ x,y grid coordinates within the ROI of supplied points + + :param points: points to generate grid coordinates + :returns: array of (x, y) coordinates + """ + xmin = np.min(points[:, 0]) + xmax = np.max(points[:, 0]) + 1 + ymin = np.min(points[:, 1]) + ymax = np.max(points[:, 1]) + 1 + return np.asarray([(x, y) for y in range(ymin, ymax) + for x in range(xmin, xmax)], np.uint32) + +def process_warp(src_img, result_img, tri_affines, dst_points, delaunay): + """ + Warp each triangle from the src_image only within the + ROI of the destination image (points in dst_points). + """ + roi_coords = grid_coordinates(dst_points) + # indices to vertices. -1 if pixel is not in any triangle + roi_tri_indices = delaunay.find_simplex(roi_coords) + + for simplex_index in range(len(delaunay.simplices)): + coords = roi_coords[roi_tri_indices == simplex_index] + num_coords = len(coords) + out_coords = np.dot(tri_affines[simplex_index], + np.vstack((coords.T, np.ones(num_coords)))) + x, y = coords.T + result_img[y, x] = bilinear_interpolate(src_img, out_coords) + + return None + +def triangular_affine_matrices(vertices, src_points, dest_points): + """ + Calculate the affine transformation matrix for each + triangle (x,y) vertex from dest_points to src_points + + :param vertices: array of triplet indices to corners of triangle + :param src_points: array of [x, y] points to landmarks for source image + :param dest_points: array of [x, y] points to landmarks for destination image + :returns: 2 x 3 affine matrix transformation for a triangle + """ + ones = [1, 1, 1] + for tri_indices in vertices: + src_tri = np.vstack((src_points[tri_indices, :].T, ones)) + dst_tri = np.vstack((dest_points[tri_indices, :].T, ones)) + mat = np.dot(src_tri, np.linalg.inv(dst_tri))[:2, :] + yield mat + +def warp_image(src_img, src_points, dest_points, dest_shape, dtype=np.uint8): + # Resultant image will not have an alpha channel + num_chans = 3 + src_img = src_img[:, :, :3] + + rows, cols = dest_shape[:2] + result_img = np.zeros((rows, cols, num_chans), dtype) + + delaunay = spatial.Delaunay(dest_points) + tri_affines = np.asarray(list(triangular_affine_matrices( + delaunay.simplices, src_points, dest_points))) + + process_warp(src_img, result_img, tri_affines, dest_points, delaunay) + + return result_img + +def test_local(): + from functools import partial + import cv2 + import scipy.misc + import locator + import aligner + from matplotlib import pyplot as plt + + # Load source image + face_points_func = partial(locator.face_points, '../data') + base_path = '../females/Screenshot 2015-03-04 17.11.12.png' + src_path = '../females/BlDmB5QCYAAY8iw.jpg' + src_img = cv2.imread(src_path) + + # Define control points for warps + src_points = face_points_func(src_path) + base_img = cv2.imread(base_path) + base_points = face_points_func(base_path) + + size = (600, 500) + src_img, src_points = aligner.resize_align(src_img, src_points, size) + base_img, base_points = aligner.resize_align(base_img, base_points, size) + result_points = locator.weighted_average_points(src_points, base_points, 0.2) + + # Perform transform + dst_img1 = warp_image(src_img, src_points, result_points, size) + dst_img2 = warp_image(base_img, base_points, result_points, size) + + import blender + ave = blender.weighted_average(dst_img1, dst_img2, 0.6) + mask = blender.mask_from_points(size, result_points) + blended_img = blender.poisson_blend(dst_img1, dst_img2, mask) + + plt.subplot(2, 2, 1) + plt.imshow(ave) + plt.subplot(2, 2, 2) + plt.imshow(dst_img1) + plt.subplot(2, 2, 3) + plt.imshow(dst_img2) + plt.subplot(2, 2, 4) + + plt.imshow(blended_img) + plt.show() + + +if __name__ == "__main__": + test_local() diff --git a/src/opencv/utils.py b/src/opencv/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..dc05dc18189038edcc96130dfd2c8f9a5b3ac460 --- /dev/null +++ b/src/opencv/utils.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python + +# Copyright (c) 2016 Satya Mallick <spmallick@learnopencv.com> +# All rights reserved. No warranty, explicit or implicit, provided. + +from imutils.face_utils import FaceAligner +from imutils.face_utils import rect_to_bb +from imutils import face_utils +import pandas as pd +import numpy as np +import cv2 as cv +import random +import sys +import dlib +import imutils + +def readPoints(path): + '''Read points from .tem file''' + # Create an array of points. + points = [] + # Read points + with open(path) as file: + no_lines = int(file.readline()) + for i, line in enumerate(file): + if 0 <= i < no_lines: + x, y = line.split() + points.append((int(float(x)), int(float(y)))) + + return points + + +def applyAffineTransform(src, srcTri, dstTri, size): + '''Apply affine transform calculated using srcTri and dstTri to src and output an image of size.''' + # Given a pair of triangles, find the affine transform. + warpMat = cv.getAffineTransform(np.float32(srcTri), np.float32(dstTri)) + + # Apply the Affine Transform just found to the src image + dst = cv.warpAffine(src, warpMat, (size[0], size[1]), None, + flags=cv.INTER_LINEAR, borderMode=cv.BORDER_REFLECT_101) + + return dst + + +def morphTriangle(img1, img2, img, t1, t2, t, alpha): + '''Warps and alpha blends triangular regions from img1 and img2 to img''' + # Find bounding rectangle for each triangle + r1 = cv.boundingRect(np.float32([t1])) + r2 = cv.boundingRect(np.float32([t2])) + r = cv.boundingRect(np.float32([t])) + + # Offset points by left top corner of the respective rectangles + t1Rect = [] + t2Rect = [] + tRect = [] + + for i in range(0, 3): + tRect.append(((t[i][0] - r[0]), (t[i][1] - r[1]))) + t1Rect.append(((t1[i][0] - r1[0]), (t1[i][1] - r1[1]))) + t2Rect.append(((t2[i][0] - r2[0]), (t2[i][1] - r2[1]))) + + # Get mask by filling triangle + mask = np.zeros((r[3], r[2], 3), dtype=np.float32) + cv.fillConvexPoly(mask, np.int32(tRect), (1.0, 1.0, 1.0), 16, 0) + + # Apply warpImage to small rectangular patches + img1Rect = img1[r1[1]:r1[1] + r1[3], r1[0]:r1[0] + r1[2]] + img2Rect = img2[r2[1]:r2[1] + r2[3], r2[0]:r2[0] + r2[2]] + + size = (r[2], r[3]) + warpImage1 = applyAffineTransform(img1Rect, t1Rect, tRect, size) + warpImage2 = applyAffineTransform(img2Rect, t2Rect, tRect, size) + + # Alpha blend rectangular patches + imgRect = (1.0 - alpha) * warpImage1 + alpha * warpImage2 + + # Copy triangular region of the rectangular patch to the output image + img[r[1]:r[1]+r[3], r[0]:r[0]+r[2]] = img[r[1]:r[1] + + r[3], r[0]:r[0]+r[2]] * (1 - mask) + imgRect * mask + + +def rect_contains(rect, point): + '''Check if a point is inside a rectangle''' + if point[0] < rect[0]: + return False + elif point[1] < rect[1]: + return False + elif point[0] > rect[2]: + return False + elif point[1] > rect[3]: + return False + return True + + +def draw_point(img, p, color): + '''Draw a point''' + cv.circle(img, p, 2, color, cv.FILLED, cv.LINE_AA, 0) + + +def draw_voronoi(img, subdiv): + '''Draw voronoi diagram''' + (facets, centers) = subdiv.getVoronoiFacetList([]) + + for i in range(0, len(facets)): + ifacet_arr = [] + for f in facets[i]: + ifacet_arr.append(f) + + ifacet = np.array(ifacet_arr, np.int) + color = (random.randint(0, 255), random.randint( + 0, 255), random.randint(0, 255)) + + cv.fillConvexPoly(img, ifacet, color, cv.LINE_AA, 0) + ifacets = np.array([ifacet]) + cv.polylines(img, ifacets, True, (0, 0, 0), 1, cv.LINE_AA, 0) + cv.circle(img, (centers[i][0], centers[i][1]), + 3, (0, 0, 0), cv.FILLED, cv.LINE_AA, 0) + + +def draw_delaunay(img, subdiv, delaunay_color): + '''Draw delaunay triangles''' + triangleList = subdiv.getTriangleList() + size = img.shape + r = (0, 0, size[1], size[0]) + + for t in triangleList: + pt1 = (t[0], t[1]) + pt2 = (t[2], t[3]) + pt3 = (t[4], t[5]) + + if rect_contains(r, pt1) and rect_contains(r, pt2) and rect_contains(r, pt3): + cv.line(img, pt1, pt2, delaunay_color, 1, cv.LINE_AA, 0) + cv.line(img, pt2, pt3, delaunay_color, 1, cv.LINE_AA, 0) + cv.line(img, pt3, pt1, delaunay_color, 1, cv.LINE_AA, 0) + + +def calculateDelaunayTriangles(rect, subdiv, points, img, win_delaunay, delaunay_color, draw=False): + '''Calculate delanauy triangle''' + + # Insert points into subdiv + for p in points: + subdiv.insert((p[0], p[1])) + + # List of triangles. Each triangle is a list of 3 points (6 numbers) + triangleList = subdiv.getTriangleList() + + # Find the indices of triangles in the points array + delaunayTri = [] + + for t in triangleList: + pt = [] + pt.append((t[0], t[1])) + pt.append((t[2], t[3])) + pt.append((t[4], t[5])) + + pt1 = (t[0], t[1]) + pt2 = (t[2], t[3]) + pt3 = (t[4], t[5]) + + if rect_contains(rect, pt1) and rect_contains(rect, pt2) and rect_contains(rect, pt3): + ind = [] + for j in range(0, 3): + for k in range(0, len(points)): + if(abs(pt[j][0] - points[k][0]) < 0.1 and abs(pt[j][1] - points[k][1]) < 0.5): + ind.append(k) + + if len(ind) == 3: + delaunayTri.append((ind[0], ind[1], ind[2])) + + # Draw lines + if draw: + cv.line(img, pt1, pt2, delaunay_color, 1, cv.LINE_AA, 0) + cv.line(img, pt2, pt3, delaunay_color, 1, cv.LINE_AA, 0) + cv.line(img, pt3, pt1, delaunay_color, 1, cv.LINE_AA, 0) + imgS = cv.resize(img, (413, 531)) + + return delaunayTri + +def drawLanmarks(rect, points, img, col): + (x, y, w, h) = face_utils.rect_to_bb(rect) + for (x, y) in points: + cv.circle(img, (x, y), 2, col, -1) + +def readPermutations(file, header, footer): + data = pd.read_csv(file) + return data.apply(lambda x: header + x + footer).values + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/utils.py b/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a06e2ce6ba70fe32ddef4675bb7bc5e5e7401563 --- /dev/null +++ b/utils.py @@ -0,0 +1,66 @@ +# Copyright (c) 2021, Idiap Research Institute. All rights reserved. +# +# This work is made available under a custom license, Non-Commercial Research and Educational Use Only +# To view a copy of this license, visit +# https://gitlab.idiap.ch/bob/bob.paper.icassp2022_morph_generate/-/blob/master/LICENSE.txt + + +import numpy as np +import bob.io.image +import matplotlib.pyplot as plt +from dnnlib import tflib + +def fix_randomness(seed=None): + config = {'rnd.np_random_seed': seed, + 'rnd.tf_random_seed': 'auto'} + tflib.init_tf(config) + +def adjust_dynamic_range(image, target_range, dtype): + """ + Update the dynamic range of the input image to lie in the required range. + Example : + adjust_dynamic_range(image, target_range=[0, 255], dtype='uint8') + maps the image to the [0, 255] interval and casts it as a uint8. + """ + minval = np.min(image) + maxval = np.max(image) + return ((target_range[0]*(maxval - image) + target_range[1]*(image-minval))/(maxval - minval)).astype(dtype) + +def lerp(p0, p1, n, start=0.0, end=1.0): + """ + Linear interpolation between two points + Inputs: + p0, p1: Rank-1 numpy vectors with same dimension D + n: int, total number of points to return + start, end : control the range of the interpolation, i.e. the range of interpolation parameter t. + Interpolated points are computed as (1-t)*p0 + t*p1 where t can takes linearly spaced values + in the range [start, end] (included). + + Returns: + p : Numpy vector of shape (n, D) containing all interpolated points + """ + t = np.linspace(start, end, n)[:, np.newaxis] + p = (1-t) * p0[np.newaxis, :] + t * p1[np.newaxis, :] + return p + +def facegrid(images, nrows, ncols, figsize=None, labels=None): + """ + Produces an image grid of size (nrows, ncols) showing each image + contained in the input `images` list. + An optional `labels` list can also be provided, in which case the `labels` will be + used as title for each subplot in the grid. + """ + if figsize is None: + figsize = (2*ncols, 2*nrows) + if labels is None: + labels = [None]*len(images) + fig, ax = plt.subplots(nrows=nrows, ncols=ncols, squeeze=False, figsize=figsize, tight_layout=True) + for i, (face, label) in enumerate(zip(images, labels)): + currax = ax[i//ncols, i%ncols] + if face is not None: + currax.imshow(bob.io.image.to_matplotlib(face)) + currax.axis('off') + if label is not None: + currax.set_title(label) + + return fig, ax diff --git a/version.txt b/version.txt new file mode 100644 index 0000000000000000000000000000000000000000..afaf360d37fb71bcfa8cc082882f910ac2628bda --- /dev/null +++ b/version.txt @@ -0,0 +1 @@ +1.0.0 \ No newline at end of file