From 95ac3ba0763dea4bfad045589bfe3ce85752ccf5 Mon Sep 17 00:00:00 2001 From: Jerome Pansanel Date: Tue, 20 Jan 2015 12:43:47 +0100 Subject: [PATCH] Import from the previous Subversion repository --- AUTHORS | 3 + CMakeLists.txt | 257 ++++ COPYING | 339 ++++++ ChangeLog | 388 ++++++ Doxyfile | 1517 +++++++++++++++++++++++ INSTALL | 61 + NEWS | 48 + THANKS | 7 + TODO | 14 + cmake/modules/FindMySQL.cmake | 178 +++ cmake/modules/FindOpenBabel2.cmake | 97 ++ cmake/modules/MacroEnsureVersion.cmake | 116 ++ include/CMakeLists.txt | 2 + include/mychem/CMakeLists.txt | 5 + include/mychem/config.h.cmake | 54 + include/mychem/dlhandler.h | 44 + include/mychem/serialization.h | 52 + include/mychem/structures.h | 57 + scripts/mychemdb-manager.py | 491 ++++++++ src/CMakeLists.txt | 60 + src/conversion.c | 1526 ++++++++++++++++++++++++ src/conversion.h | 598 ++++++++++ src/conversion_wrapper.cpp | 340 ++++++ src/conversion_wrapper.h | 105 ++ src/descriptors/groupcontrib.cpp | 199 +++ src/dlhandler_unix.cpp | 66 + src/dlhandler_win32.cpp | 48 + src/fingerprints/finger2.cpp | 278 +++++ src/fingerprints/finger3.cpp | 318 +++++ src/helper.c | 182 +++ src/helper.h | 137 +++ src/helper_wrapper.cpp | 66 + src/helper_wrapper.h | 54 + src/modification.c | 255 ++++ src/modification.h | 138 +++ src/modification_wrapper.cpp | 130 ++ src/modification_wrapper.h | 65 + src/molmatch.c | 579 +++++++++ src/molmatch.h | 247 ++++ src/molmatch_wrapper.cpp | 187 +++ src/molmatch_wrapper.h | 70 ++ src/mychem.def | 156 +++ src/mychemdb.sql | 170 +++ src/mychemdb_win32.sql | 170 +++ src/property.c | 881 ++++++++++++++ src/property.h | 518 ++++++++ src/property_wrapper.cpp | 498 ++++++++ src/property_wrapper.h | 165 +++ src/serialization.cpp | 291 +++++ tests/CMakeLists.txt | 94 ++ tests/conversion_test.cpp | 719 +++++++++++ tests/data/1CRN.molecule | 669 +++++++++++ tests/data/1CRN.pdb | 426 +++++++ tests/data/1H-indole.bin | Bin 0 -> 128 bytes tests/data/1H-indole.mol | 38 + tests/data/1H-indole.molecule | 38 + tests/data/1H-indole.smi | 1 + tests/data/L-tryptophan.bin | Bin 0 -> 128 bytes tests/data/L-tryptophan.can | 1 + tests/data/L-tryptophan.mol | 60 + tests/data/L-tryptophan.molecule | 60 + tests/data/glycine.V3000 | 21 + tests/data/glycine.cml | 15 + tests/data/glycine.inchi | 1 + tests/data/glycine.inchi102 | 1 + tests/data/glycine.mol | 14 + tests/data/glycine.mol2 | 17 + tests/data/glycine.molecule | 14 + tests/data/glycine.smi | 1 + tests/data/glycine_sodium_salt.mol | 16 + tests/data/glycine_wo_sodium.mol | 15 + tests/data/hydrogenated_glycine.mol | 24 + tests/helper_test.cpp | 175 +++ tests/modification_test.cpp | 226 ++++ tests/molmatch_test.cpp | 384 ++++++ tests/mysql_handler.cpp | 134 +++ tests/mysql_handler.h | 66 + tests/property_test.cpp | 673 +++++++++++ tests/test_functions.cpp | 404 +++++++ tests/test_functions.h | 47 + 80 files changed, 16581 insertions(+) create mode 100644 AUTHORS create mode 100644 CMakeLists.txt create mode 100644 COPYING create mode 100644 ChangeLog create mode 100644 Doxyfile create mode 100644 INSTALL create mode 100644 NEWS create mode 100644 THANKS create mode 100644 TODO create mode 100755 cmake/modules/FindMySQL.cmake create mode 100644 cmake/modules/FindOpenBabel2.cmake create mode 100644 cmake/modules/MacroEnsureVersion.cmake create mode 100644 include/CMakeLists.txt create mode 100644 include/mychem/CMakeLists.txt create mode 100644 include/mychem/config.h.cmake create mode 100644 include/mychem/dlhandler.h create mode 100644 include/mychem/serialization.h create mode 100644 include/mychem/structures.h create mode 100644 scripts/mychemdb-manager.py create mode 100644 src/CMakeLists.txt create mode 100644 src/conversion.c create mode 100644 src/conversion.h create mode 100644 src/conversion_wrapper.cpp create mode 100644 src/conversion_wrapper.h create mode 100644 src/descriptors/groupcontrib.cpp create mode 100644 src/dlhandler_unix.cpp create mode 100644 src/dlhandler_win32.cpp create mode 100644 src/fingerprints/finger2.cpp create mode 100644 src/fingerprints/finger3.cpp create mode 100644 src/helper.c create mode 100644 src/helper.h create mode 100644 src/helper_wrapper.cpp create mode 100644 src/helper_wrapper.h create mode 100644 src/modification.c create mode 100644 src/modification.h create mode 100644 src/modification_wrapper.cpp create mode 100644 src/modification_wrapper.h create mode 100644 src/molmatch.c create mode 100644 src/molmatch.h create mode 100644 src/molmatch_wrapper.cpp create mode 100644 src/molmatch_wrapper.h create mode 100644 src/mychem.def create mode 100644 src/mychemdb.sql create mode 100644 src/mychemdb_win32.sql create mode 100644 src/property.c create mode 100644 src/property.h create mode 100644 src/property_wrapper.cpp create mode 100644 src/property_wrapper.h create mode 100644 src/serialization.cpp create mode 100644 tests/CMakeLists.txt create mode 100644 tests/conversion_test.cpp create mode 100644 tests/data/1CRN.molecule create mode 100644 tests/data/1CRN.pdb create mode 100644 tests/data/1H-indole.bin create mode 100644 tests/data/1H-indole.mol create mode 100644 tests/data/1H-indole.molecule create mode 100644 tests/data/1H-indole.smi create mode 100644 tests/data/L-tryptophan.bin create mode 100644 tests/data/L-tryptophan.can create mode 100644 tests/data/L-tryptophan.mol create mode 100644 tests/data/L-tryptophan.molecule create mode 100644 tests/data/glycine.V3000 create mode 100644 tests/data/glycine.cml create mode 100644 tests/data/glycine.inchi create mode 100644 tests/data/glycine.inchi102 create mode 100644 tests/data/glycine.mol create mode 100644 tests/data/glycine.mol2 create mode 100644 tests/data/glycine.molecule create mode 100644 tests/data/glycine.smi create mode 100644 tests/data/glycine_sodium_salt.mol create mode 100644 tests/data/glycine_wo_sodium.mol create mode 100644 tests/data/hydrogenated_glycine.mol create mode 100644 tests/helper_test.cpp create mode 100644 tests/modification_test.cpp create mode 100644 tests/molmatch_test.cpp create mode 100644 tests/mysql_handler.cpp create mode 100644 tests/mysql_handler.h create mode 100644 tests/property_test.cpp create mode 100644 tests/test_functions.cpp create mode 100644 tests/test_functions.h diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..301d4e4 --- /dev/null +++ b/AUTHORS @@ -0,0 +1,3 @@ +Jerome Pansanel +Aurelie De Luca +Bjoern Gruening diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..17e53db --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,257 @@ +project(mychem) + +set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules) + +cmake_minimum_required(VERSION 2.6.0) + +if(COMMAND cmake_policy) + cmake_policy(SET CMP0003 NEW) +endif(COMMAND cmake_policy) + +# this is adapted from KDE's FindKDE4Internal.cmake : default the build type to +# "release with debug info". +# +# We will define two other build types: Debug and Release. +# These names are case-insensitive i.e. you can do -DCMAKE_BUILD_TYPE=debug +if (NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE RelWithDebInfo) +endif (NOT CMAKE_BUILD_TYPE) + +message(STATUS "The build type is ${CMAKE_BUILD_TYPE}") + +include (CheckCXXCompilerFlag) +include (MacroEnsureVersion) + +# Mychem version number. +set(MYCHEM_VERSION_MAJOR 0) +set(MYCHEM_VERSION_MINOR 9) +set(MYCHEM_VERSION_RELEASE 2) +set(MYCHEM_VERSION "${MYCHEM_VERSION_MAJOR}.${MYCHEM_VERSION_MINOR}.${MYCHEM_VERSION_RELEASE}") + +#----------------------------------------------------------------------------- +# Define test environment +#----------------------------------------------------------------------------- + +if (NOT MY_DB) + set (MY_DB "mysql") +endif (NOT MY_DB) + +if (MY_HOST AND MY_USER AND MY_PASSWD) + set(MY_ARGS -h ${MY_HOST} -u ${MY_USER} -p ${MY_PASSWD} -b ${MY_DB}) + enable_testing() + message( STATUS "Test module enabled" ) +else (MY_HOST AND MY_USER AND MY_PASSWD) + if (MY_HOST AND MY_USER) + set(MY_ARGS -h ${MY_HOST} -u ${MY_USER} -b ${MY_DB}) + enable_testing() + message( STATUS "Test module enabled" ) + else (MY_HOST AND MY_USER) + message( STATUS "Test module disabled" ) + endif (MY_HOST AND MY_USER) +endif (MY_HOST AND MY_USER AND MY_PASSWD) + +option(BUILD_SHARED "enable static build support" ON) + + +#----------------------------------------------------------------------------- +# Software dependency +#----------------------------------------------------------------------------- + +include( FindPkgConfig ) + +# Open Babel dependency +find_package( OpenBabel2 REQUIRED ) + +# MySQL +find_package( MySQL REQUIRED ) + + +#----------------------------------------------------------------------------- +# Installation directory +# Set the install path. Check the difference between 32bits and 64 bits +#----------------------------------------------------------------------------- + +if (NOT LIB_INSTALL_DIR) + if ("${MySQL_PLUGIN_DIR}" STREQUAL "") + set( LIB_INSTALL_DIR "${CMAKE_INSTALL_PREFIX}/lib${LIB_SUFFIX}" ) + else ("${MySQL_PLUGIN_DIR}" STREQUAL "") + set( LIB_INSTALL_DIR "${MySQL_PLUGIN_DIR}" ) + endif ("${MySQL_PLUGIN_DIR}" STREQUAL "") +endif (NOT LIB_INSTALL_DIR) +message(STATUS "Mychem installation directory: ${LIB_INSTALL_DIR}") + + +#----------------------------------------------------------------------------- +# Set platform specific stuff +#----------------------------------------------------------------------------- + +include(CheckIncludeFile) +include(CheckIncludeFileCXX) +include(CheckSymbolExists) +include(CheckTypeSize) +include(CheckCSourceCompiles) + +check_include_file(stdlib.h HAVE_STLID_H) +check_include_file(stdio.h HAVE_STDIO_H) +check_include_file(string.h HAVE_STRING_H) +check_include_file_cxx(sstream HAVE_SSTREAM) + +if (WIN32) + # Set some generic values + add_definitions( -DWIN32 -D__WIN__ ) + # add definitions for OB in WIN32 + add_definitions( -DUSING_OBDLL -DUSING_DYNAMIC_LIBS) + + if (CYGWIN) + add_definitions( -D__CYGWIN__) + endif (CYGWIN) + + if (MSVC) + # check if it's better to use set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS or + # add_definitions + # 'class1' : inherits 'class2::member' via dominance + add_definitions(-wd4250) + # 'identifier' : class 'type' needs to have dll-interface to be used by clients of class 'type2' + add_definitions(-wd4251) + # non DLL-interface classkey 'identifier' used as base for DLL-interface classkey 'identifier' + add_definitions(-wd4275) + # 'conversion' : truncation of constant value + add_definitions(-wd4309) + # 'identifier' : 'function' the inline specifier cannot be used when a friend declaration refers to a specialization of a function template + add_definitions(-wd4396) + # decorated name length exceeded + add_definitions(-wd4503) + # identifier was truncated to '255' characters in the debug information + add_definitions(-wd4786) + + if (MINGW) + add_definitions(-D__MINGW32__) + endif (MINGW) + + if (CMAKE_COMPILER_2005) + # Disable deprecation warnings for standard C and STL functions in + # VS2005 and later + add_definitions(-D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE) + add_definitions(-D_SCL_SECURE_NO_DEPRECATE) + # 'identifier' : no suitable definition provided for explicit template instantiation request + add_definitions(-wd4661) + endif (CMAKE_COMPILER_2005) + endif (MSVC) +endif (WIN32) + +if (UNIX) + if (APPLE) + set(CMAKE_INSTALL_NAME_DIR ${LIB_INSTALL_DIR}) + endif (APPLE) +endif (UNIX) + +if (APPLE) + set (CMAKE_SHARED_LINKER_FLAGS "-single_module -multiply_defined suppress ${CMAKE_SHARED_LINKER_FLAGS}") + set (CMAKE_MODULE_LINKER_FLAGS "-multiply_defined suppress ${CMAKE_MODULE_LINKER_FLAGS}") + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-common") +endif (APPLE) + +if (CMAKE_SYSTEM_NAME MATCHES Linux) + # Fix a bug with dynamic plugin + add_definitions( -D__CYGWIN__) + + if (CMAKE_COMPILER_IS_GNUCXX) + set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--fatal-warnings -Wl,--no-undefined -lc ${CMAKE_SHARED_LINKER_FLAGS}") + set (CMAKE_MODULE_LINKER_FLAGS "-Wl,--fatal-warnings -Wl,--no-undefined -lc ${CMAKE_MODULE_LINKER_FLAGS}") + + set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--enable-new-dtags ${CMAKE_SHARED_LINKER_FLAGS}") + set (CMAKE_MODULE_LINKER_FLAGS "-Wl,--enable-new-dtags ${CMAKE_MODULE_LINKER_FLAGS}") + set (CMAKE_EXE_LINKER_FLAGS "-Wl,--enable-new-dtags ${CMAKE_EXE_LINKER_FLAGS}") + endif (CMAKE_COMPILER_IS_GNUCXX) + + if (CMAKE_C_COMPILER MATCHES "icc") + set ( CMAKE_SHARED_LINKER_FLAGS "-Wl,--fatal-warnings -Wl,--no-undefined -lc ${CMAKE_SHARED_LINKER_FLAGS}") + set ( CMAKE_MODULE_LINKER_FLAGS "-Wl,--fatal-warnings -Wl,--no-undefined -lc ${CMAKE_MODULE_LINKER_FLAGS}") + endif (CMAKE_C_COMPILER MATCHES "icc") + +endif (CMAKE_SYSTEM_NAME MATCHES Linux) + + +#----------------------------------------------------------------------------- +# Compiler specific settings +#----------------------------------------------------------------------------- + +if (MSVC) +endif (MSVC) + +if (CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") + set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DQT_NO_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "-g -O2 -fno-reorder-blocks -fno-schedule-insns -fno-inline") + set(CMAKE_CXX_FLAGS_DEBUGFULL "-g3 -fno-inline") + set(CMAKE_CXX_FLAGS_PROFILE "-g3 -fno-inline -ftest-coverage -fprofile-arcs") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g -std=gnu89") + set(CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DQT_NO_DEBUG") + set(CMAKE_C_FLAGS_DEBUG "-g -O2 -fno-reorder-blocks -fno-schedule-insns -fno-inline") + set(CMAKE_C_FLAGS_DEBUGFULL "-g3 -fno-inline") + set(CMAKE_C_FLAGS_PROFILE "-g3 -fno-inline -ftest-coverage -fprofile-arcs") + + if (CMAKE_SYSTEM_NAME MATCHES Linux) + set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-long-long -std=iso9899:1990 -Wundef -Wcast-align -Werror-implicit-function-declaration -Wchar-subscripts -Wall -W -Wpointer-arith -Wwrite-strings -Wformat-security -Wmissing-format-attribute -fno-common") + set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wnon-virtual-dtor -Wno-long-long -ansi -Wundef -Wcast-align -Wchar-subscripts -Wall -W -Wpointer-arith -Wformat-security -fno-check-new -fno-common") + endif (CMAKE_SYSTEM_NAME MATCHES Linux) + + # gcc under windows + if (MINGW) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--export-all-symbols -Wl,--disable-auto-import") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -Wl,--export-all-symbols -Wl,--disable-auto-import") + endif (MINGW) + + # get the gcc version + exec_program(${CMAKE_C_COMPILER} ARGS --version OUTPUT_VARIABLE _gcc_version_info) + + string (REGEX MATCH "[345]\\.[0-9]\\.[0-9]" _gcc_version "${_gcc_version_info}") + # gcc on mac just reports: "gcc (GCC) 3.3 20030304 ..." without the patch level, handle this here: + if (NOT _gcc_version) + string (REGEX REPLACE ".*\\(GCC\\).* ([34]\\.[0-9]) .*" "\\1.0" _gcc_version "${_gcc_version_info}") + endif (NOT _gcc_version) + + macro_ensure_version("4.1.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_1) + macro_ensure_version("4.2.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_2) + macro_ensure_version("4.3.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_3) + + # save a little by making local statics not threadsafe + # ### do not enable it for older compilers, see + # ### http://gcc.gnu.org/bugzilla/show_bug.cgi?id=31806 + if (GCC_IS_NEWER_THAN_4_3) + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics") + endif (GCC_IS_NEWER_THAN_4_3) + + set(_GCC_COMPILED_WITH_BAD_ALLOCATOR FALSE) + if (GCC_IS_NEWER_THAN_4_1) + exec_program(${CMAKE_C_COMPILER} ARGS -v OUTPUT_VARIABLE _gcc_alloc_info) + string(REGEX MATCH "(--enable-libstdcxx-allocator=mt)" _GCC_COMPILED_WITH_BAD_ALLOCATOR "${_gcc_alloc_info}") + endif (GCC_IS_NEWER_THAN_4_1) + +endif (CMAKE_COMPILER_IS_GNUCXX) + +if (CMAKE_C_COMPILER MATCHES "icc") + + # Select flags. + set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") + set(CMAKE_CXX_FLAGS_RELEASE "-O2 -DNDEBUG -DQT_NO_DEBUG") + set(CMAKE_CXX_FLAGS_DEBUG "-O2 -g -fno-inline -noalign") + set(CMAKE_CXX_FLAGS_DEBUGFULL "-g -fno-inline -noalign") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g") + set(CMAKE_C_FLAGS_RELEASE "-O2 -DNDEBUG -DQT_NO_DEBUG") + set(CMAKE_C_FLAGS_DEBUG "-O2 -g -fno-inline -noalign") + set(CMAKE_C_FLAGS_DEBUGFULL "-g -fno-inline -noalign") + + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -ansi -Wall -w1 -Wpointer-arith -fno-common") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ansi -Wall -w1 -Wpointer-arith -fno-exceptions -fno-common") +endif (CMAKE_C_COMPILER MATCHES "icc") + + +#----------------------------------------------------------------------------- +# Subdirectories +#----------------------------------------------------------------------------- + +add_subdirectory( include ) +add_subdirectory( src ) +add_subdirectory( tests ) diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..d511905 --- /dev/null +++ b/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..1faf08f --- /dev/null +++ b/ChangeLog @@ -0,0 +1,388 @@ +2014-09-12 Jerome Pansanel + + * Release 0.9.2 + + * Add possibility to select database in unitary tests + + * Fix argument testing for functions running with binary data + +2012-02-13 Jerome Pansanel + + * Release 0.9.1 + + * Fix stereochemistry issue + +2011-12-09 Jerome Pansanel + + * Release 0.9.0 + + * Update the documentation + + * Fix a bug when loading Open Babel plugins + + * Add support for Open Babel 2.3.0 + +2011-04-04 Jerome Pansanel + + * Release 0.8.2 + + * Fix a bug related to the fingerprint generation + + * Fix a bug related to OBMol object serialization + +2010-09-07 Jerome Pansanel + + * Release 0.8.1 + + * Fix URLs in Mychem documentation + + * Fix an important bug in the serialized function + + * Upgrade the test for molmatch functions + +2010-07-06 Jerome Pansanel + + * Release 0.8.0 + +2010-07-06 Jerome Pansanel + + * Fix molmatch and property tests + +2010-07-06 Jerome Pansanel + + * The documentation has been updated + +2010-06-29 Jerome Pansanel + + * The mychemdb-manager.py script has been added + +2010-06-10 Jerome Pansanel + + * MySQL 5.1 support added + +2009-11-24 Jerome Pansanel + + * Build fix + +2009-09-30 Jerome Pansanel + + * Release 0.7.0 + +2009-09-29 Jerome Pansanel + + * The documentation has been updated (new section about file formats) + + * Compilation under Microsoft Windows XP has been successfully tested + +2009-09-15 Jerome Pansanel + + * The Mychem software has been completly rewritten + + * src/converion.c, src/conversion.h: + - mol2_to_molecule function added + - molecule_to_mol2 function added + + * src/helper.c, src/helper.h, src/helper_wrapper.h, + src/helper_wrapper.cpp + - inchi_version function added + + * Many bug fixes + + * If a function fails, it returns NULL + + * The Mychem3 documentation has been updated + + * Mychem2 is no longer supported + +2009-06-03 Jerome Pansanel + + * src/conversion/conversion.c, src/conversion/conversion.h: + - pdb_to_molecule function added + +2009-06-02 Jerome Pansanel + + * src/serialization.cpp, src/include/serialization.h: + - Serialization of OBMol object added + + * src/conversion/conversion.c, src/conversion/conversion.h: + - molecule_to_serializedOBMol function added + + * src/molmatch/molmatch_wrapper.cpp: + - structure_match is now using serializad OBMol object + +2008-12-30 Jerome Pansanel + + * src/dlhandler_unix.cpp: + - Fix the dlhandler for Mac OS X + + * Documentation updated + + * Release 0.6.0 + +2008-12-10 Jerome Pansanel + + * Documentation updated + +2008-12-05 Jerome Pansanel + + * Test code updated (switch from C to C++) + +2008-12-02 Jerome Pansanel + + * Fix many code lines for an easy Win32 compilation + +2008-11-21 Jerome Pansanel + + * src/molmatch/molmatch.c: + - bit_fp_or function added + - bit_fp_count function added + +2008-11-20 Jerome Pansanel + + * src/molmatch/molmatch.c: + - bit_fp_and function added + +2008-11-19 Jerome Pansanel + + * Default molecule type is now MOL + + * Mychem3 is fully working. + + * The Mychem3 documentation has been updated + + * Several bug fixes + +2008-11-12 Jerome Pansanel + + * Default molecule type is now CML + + * src/conversion/conversion.c, src/conversion/conversion.h: + - molecule_to_inchi function added + - inchi_to_molecule function added + +2008-11-11 Jerome Pansanel + + * CMakeLists.txt: + - The file has been updated to better handle the compiler flags + + * cmake/modules/MacroEnsureVersion.cmake: + - new file + +2008-11-10 Jerome Pansanel + + * Add wrapper for the helper module (fix the include of babelconfig.h) + + * conversion, modification, molmatch and property modules: + - fix an important issue with the arguments given by MySQL + +2008-06-19 Jerome Pansanel + + * Release 0.5.9 + + * Bug fixes + +2008-06-18 Jerome Pansanel + + * src/conversion/conversion.c, src/conversion/conservion.h: + - fingerprint3 function added + - fingerprint4 function added + + * doc/mychem/installation.xml: + - Description of the windows installation added + - Code formatting fixed + +2008-06-16 Jerome Pansanel + + * Windows version of Mychem is working + +2008-06-10 Jerome Pansanel + + * src/molmatch/molmatch.c, src/molmatch/molmatch.h, + src/molmatch/molmatch_wrapper.cpp, + src/molmatch/molmatch_wrapper.h, + src/molmatch/molmatch.sql: + - substruct_atom_ids function added + +2008-04-08 Jerome Pansanel + + * Release 0.5.5 + + * Fix some spelling in the documentation + +2008-04-04 Jerome Pansanel + + * src/modification/modification.c, src/modification/modification.h, + src/modification/modification_wrapper.cpp, + src/modification/modification_wrapper.h, + src/modification/modification.sql + - strip_salts function added + + * src/conversion/conversion.c, src/property/conversion.h, + src/conversion/conversion_wrapper.cpp, + src/conversion/conversion_wrapper.h, + src/conversion/conversion.sql + - molecule_to_canonical_smiles function added + + * doc/mychem/command-reference.xml + - Add documentation for the new functions + - Update documentation for some functions + +2008-03-29 Jerome Pansanel + + * src/property/property.c, src/property/property.h, + src/property/property_wrapper.cpp, src/property/property_wrapper.h + - is_2D function added + - is_3D function added + - is_chiral function added + - number_of_rings function added + +2008-03-28 Jerome Pansanel + + * src/property/property.c, src/property/property.h, + src/property/property_wrapper.cpp, src/property/property_wrapper.h + - number_of_acceptors function added + - number_of_donors function added + - molpsa function added + - molmr function added + - mollogp function added + + * Updating doc/mychem/command-reference.xml + +2008-03-19 Jerome Pansanel + + * Release 0.5.2 + + * Updating doc and ChangeLog + +2008-03-18 Jerome Pansanel + + * src/conversion/conversion.c, src/helper/helper.c, + src/modification/modification.c, src/property/property.c + - Fixing important bug; thanks to Bjoern Gruening ! + +2008-03-07 Jerome Pansanel + + * src/molmatch, doc/mychem/command-reference.xml + - Move match to match_substruct + - Add substruct_count + +2008-03-05 Jerome Pansanel + + * Release 0.5.1 + + * src/molmatch/, doc/mychem/command-reference.xml + - The FIND_SIMILAR() function has been removed + + * src/{module}/ + - Split .c files in .c and .h + - Fix some spelling for _wrapper.cpp + - Add const and __attribute__((unused)) to some function definitions + (no warning left when compiling) + +2008-02-29 Jerome Pansanel + + * Release 0.5.0 + + * tests: + conversion_test.c tests molfile_to_molecule(), molecule_to_molfile(), + V3000_to_molecule(), molecule_to_V3000(), smiles_to_molecule(), + molecule_to_smiles(), cml_to_molecule(), molecule_to_cml(), + fingerprint() + + * src/conversion: + - conversion_wrapper.cpp has been modified to replace '\n' by '\0' + +2008-02-27 Jerome Pansanel + + * tests: + property_test.c tests molweight(), exactmass(), number_of_atoms(), + number_of_heavy_atoms(), number_of_bonds(), number_of_rotable_bonds(), + molformula() + test_functions.{h,c} contains two functions useful for all modules. + +2008-02-25 Jerome Pansanel + + * tests: + - CMakeLists.txt is working + - helper_test tests mychem_version() and openbabel_version() + + * doc/mychem: + - installation.xml has been updated to introduce the test process + +2008-02-19 Jerome Pansanel + + * tests: + - Creation of the directory. The CMakeLists.txt, conversion_test.c, + helper_test.c, modification.c, molmatch.c and property_test.c files + have been added. + +2008-02-18 Jerome Pansanel + + * Release 0.4.5 + + * src: + - New directories (conversion, helper, modification, molmatch and + property) containing each a main module + + * src: + - New directory 'include'. It contains mychem/mychem_config.h. + + * doc: + - The documentation has been updated with a new troubleshooting + appendix and the description of 5 new functions for the property + module. + + * src/conversion: + - Some bug fixes + + * src/property: + - Five functions have been added (number_of_atoms, + number_of_heavy_atoms, number_of_bonds, number_of_rotable_bonds and + total_charge). + +2007-10-17 Jerome Pansanel + + * src/conversion.c, src/conversion_wrapper.h, + - src/conversion_wrapper.cpp: + - Extending conversion functions + + * src/modification.c, src/modification_wrapper.h, + - src/modifcation_wrapper.cpp: + - Adding modification functions + + * src/helper.c: + - Common helper functions + + * doc/mychem: + - Update the documentation + + * script/mychemadmin: + - Fix some bugs + - Adding distutils script for the installation + + * Doxyfile: + - Update API documentation and generation options + + * Release 0.4 + +2007-08-17 Jerome Pansanel + + * src/conversion.c, src/conversion_wrapper.h, + - src/conversion_wrapper.cpp: + - Basic conversion functions are working + + * src/property.c, src/property_wrapper.h, src/property_wrapper.cpp: + - Basic property functions are working + + * script/mychemadmin: + - MychemAdmin, a GUI for database loading is available + + * Release 0.2 + +2007-06-28 Jerome Pansanel + + * Project initialization + + * first API draft + + * Release 0.1 diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..757a3f1 --- /dev/null +++ b/Doxyfile @@ -0,0 +1,1517 @@ +# Doxyfile 1.6.1 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or a sequence of words surrounded +# by quotes) that should identify the project. + +PROJECT_NAME = Mychem + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = 0.9.2 + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = doc/api + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = NO + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = \return + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = YES + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = YES + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = NO + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful is your file systems +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = NO + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it parses. +# With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this tag. +# The format is ext=language, where ext is a file extension, and language is one of +# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, +# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat +# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), +# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also make the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = YES + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen to replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penality. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will rougly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols + +SYMBOL_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = YES + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = NO + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespace are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = YES + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = NO + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or define consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and defines in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# If the sources in your project are distributed over multiple directories +# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy +# in the documentation. The default is NO. + +SHOW_DIRECTORIES = NO + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. +# This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by +# doxygen. The layout file controls the global structure of the generated output files +# in an output format independent way. The create the layout file that represents +# doxygen's defaults, run doxygen with the -l option. You can optionally specify a +# file name after the option, if omitted DoxygenLayout.xml will be used as the name +# of the layout file. + +LAYOUT_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = YES + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# This WARN_NO_PARAMDOC option can be abled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = src include/mychem + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx +# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 + +FILE_PATTERNS = *.h \ + *.cpp \ + *.c + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = NO + +# The EXCLUDE tag can be used to specify files and/or directories that should +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used select whether or not files or +# directories that are symbolic links (a Unix filesystem feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. +# If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. +# Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. +# The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER +# is applied to all files. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = NO + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C and C++ comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = YES + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = YES + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. +# Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# stylesheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, +# files or namespaces will be aligned in HTML using tables. If set to +# NO a bullet list will be used. + +HTML_ALIGN_MEMBERS = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. For this to work a browser that supports +# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox +# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). + +HTML_DYNAMIC_SECTIONS = NO + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER +# are set, an additional index file will be generated that can be used as input for +# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated +# HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. +# For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's +# filter section matches. +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index at +# top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. + +DISABLE_INDEX = NO + +# This tag can be used to set the number of enum values (range [1..20]) +# that doxygen will group on one line in the generated HTML documentation. + +ENUM_VALUES_PER_LINE = 4 + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. + +GENERATE_TREEVIEW = NO + +# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, +# and Class Hierarchy pages using a tree view instead of an ordered list. + +USE_INLINE_TREES = NO + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP) +# there is already a search function so this one should typically +# be disabled. + +SEARCHENGINE = YES + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = NO + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, a4wide, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4wide + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = NO + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load stylesheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = NO + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = NO + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. +# This is useful +# if you want to understand what is going on. +# On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = YES + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = YES + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# in the INCLUDE_PATH (see below) will be search if a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = OBAPI:= \ + EXTERN:= + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all function-like macros that are alone +# on a line, have an all uppercase name, and do not end with a semicolon. Such +# function macros are typically used for boiler-plate code, and will confuse +# the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. +# Optionally an initial location of the external documentation +# can be added for each tagfile. The format of a tag file without +# this location is as follows: +# +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths or +# URLs. If a location is present for each tag, the installdox tool +# does not have to be run to correct the links. +# Note that each tag file must have a unique name +# (where the name does NOT include the path) +# If a tag file is not located in the directory in which doxygen +# is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option is superseded by the HAVE_DOT option below. This is only a +# fallback. It is recommended to install and use dot, since it yields more +# powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = YES + +# By default doxygen will write a font called FreeSans.ttf to the output +# directory and reference it in all dot files that doxygen generates. This +# font does not include all possible unicode characters however, so when you need +# these (or just want a differently looking font) you can specify the font name +# using DOT_FONTNAME. You need need to make sure dot is able to find the font, +# which can be done by putting it in a standard location or by setting the +# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory +# containing the font. + +DOT_FONTNAME = FreeSans + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the output directory to look for the +# FreeSans.ttf font (which doxygen will put there itself). If you specify a +# different font using DOT_FONTNAME you can set the path where dot +# can find it using this tag. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# the CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = NO + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = NO + +# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = NO + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are png, jpg, or gif +# If left blank png will be used. + +DOT_IMAGE_FORMAT = png + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = YES + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/INSTALL b/INSTALL new file mode 100644 index 0000000..88d7dd1 --- /dev/null +++ b/INSTALL @@ -0,0 +1,61 @@ +Mychem Installation +------------------- + +== Requirements == +CMake v2.4.5 or higher +MySQL-dev v4.0 or higher +OpenBabel-dev v2.3.0 or higher + +Note: Mychem should work with MySQL v4.0, but has not been tested. + + +== Quick Install == +This section describes a simple way to compile and install Mychem. + +=== Source Extraction === +If you are using a command line interface, follow this instructions: +- For the tar gzipped archive: +# tar -xfzv mychem-0.9.2.tar.gz +- For the zip archive: +# unzip mychem-0.9.2.zip + +=== Compilation and Installation === +CMake can build the libraries and executables into any directory. If the +directory contains the source, the build is called in source. In other cases, +it’s called out of source. CMake strongly recommends and promotes building +out of source. +- In source build: +# cd mychem +# cmake . +# make +# make install + +- Build out-of-source (recommended): +# cd mychem +# mkdir build +# cd build +# cmake .. +# make +# make install +# cd .. + +Note: You may need some root privileges to run make install. +Once the libraries are installed, you have to create the SQL functions. + +For Unix like systems (Linux, BSD, Mac OS X), use the following command: +# mysql -u user -p < src/mysqldb.sql + +For Microsoft Windows, use the following command: +# mysql -u user -p < src/mysqldb_win32.sql + +== Custom Install == + +=== Setting the installation path === +You can customize the installation prefix by using the following command: +# cmake -DCMAKE_INSTALL_PREFIX=/convenient/path + +=== Other tunning === +Many build and installation parameters can be customized. Look at the CMake +documentation for further informations. +http://www.cmake.org/Wiki/CMake + diff --git a/NEWS b/NEWS new file mode 100644 index 0000000..1575eb7 --- /dev/null +++ b/NEWS @@ -0,0 +1,48 @@ +2014-09-14 +v0.9.2 is released; see ChangeLog for more details. + +2012-02-13 +v0.9.1 is released; see ChangeLog for more details. + +2011-12-09 +v0.9.0 is released; See ChangeLog for more details. + +2011-04-04 +v0.8.2 is released; See ChangeLog for more details. + +2010-09-07 +v0.8.1 is released; See ChangeLog for more details. + +2010-07-06 +v0.8.0 is released; See ChangeLog for more details. + +2009-09-30 +v0.7.0 is released; See ChangeLog for more details. + +2008-12-30 +v0.6.0 is released; See ChangeLog for more details. + +2008-06-19 +v0.5.9 is released; See ChangeLog for more details. + +2008-04-08 +v0.5.5 is released; See ChangeLog for more details. + +2008-03-19 +v0.5.2 is released; See ChangeLog for more details. + +2008-03-05 +v0.5.1 is released; See ChangeLog for more details. + +2008-02-29 +v0.5.0 is released; See ChangeLog for more details. + +2008-02-18 +v0.4.5 is released; See ChangeLog for more details. + +2007-10-17 +v0.4 is released; See ChangeLog for more details. + +2007-08-17 +v0.2 is released; See ChangeLog for more details. + diff --git a/THANKS b/THANKS new file mode 100644 index 0000000..5b50bae --- /dev/null +++ b/THANKS @@ -0,0 +1,7 @@ +Mychem THANKS +------------- + +The Mychem development team would like to thank the following contributors: + +Fredrik Wallner +Chris Morley diff --git a/TODO b/TODO new file mode 100644 index 0000000..3ae768f --- /dev/null +++ b/TODO @@ -0,0 +1,14 @@ +=============================== OPTIMISATION ================================ +v1.0 Add support for build type (Debug, Release, ...) + +================================= FUNCTIONS ================================== +v1.0 Add a switch for the addhydrogen interface +v1.0 Add a switch for stereochemistry search in molmatch module +v1.0 Enhance search performance of tanimoto scoring + +=============================== DOCUMENTATION ================================ +v1.0 Add a more explicit documentation with the -DOPENBABEL2_LIBRARIES=/path/to/library .. option +v1.0 Synchronize bug reports with the troubleshooting section + +=================================== MISC ==================================== +v1.0 Add 2D and 3D functions diff --git a/cmake/modules/FindMySQL.cmake b/cmake/modules/FindMySQL.cmake new file mode 100755 index 0000000..380b4c9 --- /dev/null +++ b/cmake/modules/FindMySQL.cmake @@ -0,0 +1,178 @@ +############################################################################## +# Try to find MySQL include dirs ad libraries +############################################################################## +# +# Usage of this module as follows: +# +# find_package( MySQL ) +# if(MySQL_FOUND) +# include_directories(${MySQL_INCLUDE_DIRS}) +# add_executable(foo foo.cc) +# endif() +# +# +############################################################################## +# +# Variables used by this module, they can change the default behaviour and +# need to set before calling find_package: +# +# MYSQL_INCLUDEDIR Set this to the include directory of MySQL. +# MYSQL_LIBRARYDIR Set this to the lib directory of MySQL. +# +# +############################################################################## +# +# Variables defined by this module. +# +# MySQL_FOUND System has MySQL, this means the include dir +# was found as well as the library. +# MySQL_INCLUDE_DIR MySQL include directory. +# MySQL_LIBRARIES Link to this to use the MySQL library. +# MySQL_MAJOR_VERSION Major version number of MySQL. +# MySQL_MINOR_VERSION Minor version number of MySQL. +# MySQL_PLUGIN_DIR Plugin directory. +# MySQL_VERSION The version numer of MySQL. +# +# +############################################################################## +# +# Copyright (c) 2006, Jaroslaw Staniek, +# Copyright (c) 2010, CNRS +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. +# +############################################################################## + + +if(UNIX) + set(MYSQL_CONFIG_PREFER_PATH "$ENV{MYSQL_HOME}/bin" CACHE FILEPATH + "preferred path to MySQL (mysql_config)") + find_program(MYSQL_CONFIG mysql_config + ${MYSQL_CONFIG_PREFER_PATH} + /usr/local/mysql/bin/ + /usr/local/bin/ + /usr/bin/ + ) + + if(MYSQL_CONFIG) + message(STATUS "Using mysql-config: ${MYSQL_CONFIG}") + + # set INCLUDE_DIR + exec_program(${MYSQL_CONFIG} + ARGS --include + OUTPUT_VARIABLE MY_TMP) + string(REGEX REPLACE "-I([^ ]+)( .*)?" "\\1" MY_TMP "${MY_TMP}") + set(MYSQL_ADD_INCLUDE_DIR ${MY_TMP} CACHE FILEPATH INTERNAL) + + # set LIBRARY_DIR + exec_program(${MYSQL_CONFIG} + ARGS --libs + OUTPUT_VARIABLE MY_TMP) + set(MYSQL_ADD_LIBRARIES "") + string(REGEX MATCHALL "(^| )-l[^ ]+" MYSQL_LIB_LIST "${MY_TMP}") + foreach(LIB ${MYSQL_LIB_LIST}) + string(REGEX REPLACE "[ ]*-l([^ ]*)" "\\1" LIB "${LIB}") + list(APPEND MYSQL_ADD_LIBRARIES "${LIB}") + endforeach(LIB ${MYSQL_LIBS}) + + # Add mysqlclient library + set(MYSQL_ADD_LIBRARY_PATH "") + string(REGEX MATCHALL "-L[^ ]+" MYSQL_LIBDIR_LIST "${MY_TMP}") + foreach(LIB ${MYSQL_LIBDIR_LIST}) + string(REGEX REPLACE "[ ]*-L([^ ]*)" "\\1" LIB "${LIB}") + list(APPEND MYSQL_ADD_LIBRARY_PATH "${LIB}") + endforeach(LIB ${MYSQL_LIBS}) + + # Set MYSQL_VERSION + exec_program(${MYSQL_CONFIG} + ARGS --version + OUTPUT_VARIABLE MY_TMP) + set(MySQL_VERSION "") + set(MySQL_VERSION ${MY_TMP}) + + else(MYSQL_CONFIG) + set(MYSQL_ADD_LIBRARIES "") + list(APPEND MYSQL_ADD_LIBRARIES "mysqlclient") + endif(MYSQL_CONFIG) + +else(UNIX) + if (WIN32) + set(MYSQL_ADD_LIBRARIES "") + list(APPEND MYSQL_ADD_LIBRARIES "mysql") + endif (WIN32) + set(MYSQL_ADD_INCLUDE_DIR "c:/msys/local/include" CACHE FILEPATH INTERNAL) + set(MYSQL_ADD_LIBRARY_PATH "c:/msys/local/lib" CACHE FILEPATH INTERNAL) +ENDIF(UNIX) + +find_path(MySQL_INCLUDE_DIR mysql.h + /usr/local/include + /usr/local/include/mysql + /usr/local/mysql/include + /usr/local/mysql/include/mysql + /opt/mysql/mysql/include + /opt/mysql/mysql/include/mysql + /usr/include + /usr/include/mysql + ${MYSQL_INCLUDEDIR} +) + +set(TMP_MYSQL_LIBRARIES "") + +foreach(LIB ${MYSQL_ADD_LIBRARIES}) + find_library("MYSQL_LIBRARIES_${LIB}" NAMES ${LIB} + PATHS + ${MYSQL_LIBRARYDIR} + /usr/lib64/mysql + /usr/lib/mysql + /usr/local/lib64 + /usr/local/lib + /usr/local/lib64/mysql + /usr/local/lib/mysql + /usr/local/mysql64/lib + /usr/local/mysql/lib + ) + list(APPEND TMP_MYSQL_LIBRARIES "${MYSQL_LIBRARIES_${LIB}}") +endforeach(LIB ${MYSQL_ADD_LIBRARIES}) + +set(MySQL_LIBRARIES ${TMP_MYSQL_LIBRARIES} CACHE FILEPATH INTERNAL) + +if(MySQL_VERSION) + STRING(REGEX REPLACE ".*([456])\\.[0-9]\\..*" "\\1" MySQL_MAJOR_VERSION "${MySQL_VERSION}") + STRING(REGEX REPLACE ".*[456]\\.([0-9])\\..*" "\\1" MySQL_MINOR_VERSION "${MySQL_VERSION}") +else(MySQL_VERSION) + if(MySQL_INCLUDE_DIR) + FILE(READ "${MySQL_INCLUDE_DIR}/mysql_version.h" _MYSQL_VERSION_H_CONTENTS) + STRING(REGEX REPLACE "^.*#define MYSQL_SERVER_VERSION.*\"([456]\\.[0-9]\\.[0-9]+).*\".*$" "\\1" MySQL_VERSION ${_MYSQL_VERSION_H_CONTENTS}) + STRING(REGEX REPLACE ".*([456])\\.[0-9]\\..*" "\\1" MYSQL_MAJOR_VERSION "${MySQL_VERSION}") + STRING(REGEX REPLACE ".*[456]\\.([0-9])\\..*" "\\1" MYSQL_MINOR_VERSION "${MySQL_VERSION}") + endif(MySQL_INCLUDE_DIR) +endif(MySQL_VERSION) + +set(MYSQL_DIRECTORIES + ${MYSQL_LIBRARYDIR} + /usr/lib64/mysql + /usr/lib/mysql + /usr/local/lib64/mysql + /usr/local/lib/mysql +) +message(STATUS "MySQL Version: ${MySQL_VERSION}") + +set ( ${MySQL_PLUGIN_DIR} "") +if (${MySQL_VERSION} MATCHES "^5\\.[15]|^6\\.") + foreach (MYSQL_DIR ${MYSQL_DIRECTORIES}) + if (IS_DIRECTORY "${MYSQL_DIR}/plugin") + set (MySQL_PLUGIN_DIR "${MYSQL_DIR}/plugin") + endif (IS_DIRECTORY "${MYSQL_DIR}/plugin") + endforeach (MYSQL_DIR MYSQL_DIRECTORIES) +endif (${MySQL_VERSION} MATCHES "^5\\.[15]|^6\\.") + +message(STATUS "MySQL Plugin Dir: ${MySQL_PLUGIN_DIR}") +if(MySQL_INCLUDE_DIR AND MySQL_LIBRARIES) + set(MySQL_FOUND TRUE CACHE INTERNAL "MySQL found") + message(STATUS "Found MySQL ${MySQL_VERSION}: ${MySQL_INCLUDE_DIR}, ${MySQL_LIBRARIES}") +else(MySQL_INCLUDE_DIR AND MySQL_LIBRARIES) + set(MySQL_FOUND FALSE CACHE INTERNAL "MySQL found") + message(STATUS "MySQL not found.") +endif(MySQL_INCLUDE_DIR AND MySQL_LIBRARIES) +mark_as_advanced(MySQL_INCLUDE_DIR MySQL_LIBRARIES) diff --git a/cmake/modules/FindOpenBabel2.cmake b/cmake/modules/FindOpenBabel2.cmake new file mode 100644 index 0000000..833b305 --- /dev/null +++ b/cmake/modules/FindOpenBabel2.cmake @@ -0,0 +1,97 @@ +# - Try to find OpenBabel2 +# Once done this will define +# +# OPENBABEL2_FOUND - system has OpenBabel2 +# OPENBABEL2_INCLUDE_DIR - the OpenBabel2 include directory +# OPENBABEL2_LIBRARIES - Link these to use OpenBabel2 + +# Copyright (c) 2006, Carsten Niehaus, +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +# +# Search for Open Babel2 libraries and includes +# +if (OPENBABEL2_INCLUDE_DIR AND OPENBABEL2_LIBRARIES) + + # in cache already + set(OPENBABEL2_FOUND TRUE) + +else (OPENBABEL2_INCLUDE_DIR AND OPENBABEL2_LIBRARIES) +if(NOT WIN32) + include(UsePkgConfig) + + pkgconfig(openbabel-2.0 _obIncDir _obLinkDir _obLinkFlags _obCflags) + + # query pkg-config asking for a openbabel >= 2.1.0 + exec_program(${PKGCONFIG_EXECUTABLE} ARGS --atleast-version=2.1.0 openbabel-2.0 RETURN_VALUE _return_VALUE OUTPUT_VARIABLE _pkgconfigDevNull ) + if(_return_VALUE STREQUAL "0") + set(OPENBABEL_MINI_FOUND TRUE) + endif(_return_VALUE STREQUAL "0") + message(STATUS "OPENBABEL_MINI_FOUND <${OPENBABEL_MINI_FOUND}>") + + exec_program(${PKGCONFIG_EXECUTABLE} ARGS --variable=pkgincludedir openbabel-2.0 RETURN_VALUE _return_VALUE OUTPUT_VARIABLE _obPkgIncDir ) + if (_obPkgIncDir) + set(_obIncDir "${_obPkgIncDir}") + endif (_obPkgIncDir) +endif(NOT WIN32) + find_path(OPENBABEL2_INCLUDE_DIR openbabel/obconversion.h + ${_obIncDir} + /usr/local/include + /usr/include + ${GNUWIN32_DIR}/include + $ENV{OPENBABEL2_INCLUDE_DIR} + ) + + find_library(OPENBABEL2_LIBRARIES NAMES openbabel + PATHS + ${_obLinkDir} + /usr/lib + /usr/local/lib + ${GNUWIN32_DIR}/lib + $ENV{OPENBABEL2_LIBRARIES} + ) + + if(OPENBABEL2_INCLUDE_DIR AND OPENBABEL2_LIBRARIES AND OPENBABEL_MINI_FOUND) + set(OPENBABEL2_FOUND TRUE) + endif(OPENBABEL2_INCLUDE_DIR AND OPENBABEL2_LIBRARIES AND OPENBABEL_MINI_FOUND) + + if (OPENBABEL2_FOUND) + if (NOT OPENBABEL2_FIND_QUIETLY) + message(STATUS "Found OpenBabel2: ${OPENBABEL2_LIBRARIES}") + endif (NOT OPENBABEL2_FIND_QUIETLY) + else (OPENBABEL2_FOUND) + if (OPENBABEL2_FIND_REQUIRED) + message(FATAL_ERROR "Could NOT find OpenBabel2") + endif (OPENBABEL2_FIND_REQUIRED) + endif (OPENBABEL2_FOUND) + + mark_as_advanced(OPENBABEL2_INCLUDE_DIR OPENBABEL2_LIBRARIES) + +endif (OPENBABEL2_INCLUDE_DIR AND OPENBABEL2_LIBRARIES) + +# +# Search for Open Babel2 executable +# +IF( OPENBABEL2_EXECUTABLE ) + # in cache already + SET( OPENBABEL2_EXECUTABLE_FOUND TRUE ) + +ELSE( OPENBABEL2_EXECUTABLE ) + FIND_PROGRAM(OPENBABEL2_EXECUTABLE + NAMES babel + PATHS + [HKEY_CURRENT_USER\\SOFTWARE\\OpenBabel\ 2.0.2] + $ENV{OPENBABEL2_EXECUTABLE} + ) + + SET(OPENBABEL2_EXECUTABLE_FOUND) + IF(OPENBABEL2_EXECUTABLE) + SET(OPENBABEL2_EXECUTABLE_FOUND ON) + + ENDIF(OPENBABEL2_EXECUTABLE) + +MESSAGE( STATUS "Open Babel Exe: ${OPENBABEL2_EXECUTABLE}" ) + +ENDIF( OPENBABEL2_EXECUTABLE ) diff --git a/cmake/modules/MacroEnsureVersion.cmake b/cmake/modules/MacroEnsureVersion.cmake new file mode 100644 index 0000000..34b0c89 --- /dev/null +++ b/cmake/modules/MacroEnsureVersion.cmake @@ -0,0 +1,116 @@ +# This file defines the following macros for developers to use in ensuring +# that installed software is of the right version: +# +# MACRO_ENSURE_VERSION - test that a version number is greater than +# or equal to some minimum +# MACRO_ENSURE_VERSION_RANGE - test that a version number is greater than +# or equal to some minimum and less than some +# maximum +# MACRO_ENSURE_VERSION2 - deprecated, do not use in new code +# + +# MACRO_ENSURE_VERSION +# This macro compares version numbers of the form "x.y.z" or "x.y" +# MACRO_ENSURE_VERSION( FOO_MIN_VERSION FOO_VERSION_FOUND FOO_VERSION_OK) +# will set FOO_VERSION_OK to true if FOO_VERSION_FOUND >= FOO_MIN_VERSION +# Leading and trailing text is ok, e.g. +# MACRO_ENSURE_VERSION( "2.5.31" "flex 2.5.4a" VERSION_OK) +# which means 2.5.31 is required and "flex 2.5.4a" is what was found on the system + +# Copyright (c) 2006, David Faure, +# Copyright (c) 2007, Will Stephenson +# +# Redistribution and use is allowed according to the terms of the BSD license. + +# MACRO_ENSURE_VERSION_RANGE +# This macro ensures that a version number of the form +# "x.y.z" or "x.y" falls within a range defined by +# min_version <= found_version < max_version. +# If this expression holds, FOO_VERSION_OK will be set TRUE +# +# Example: MACRO_ENSURE_VERSION_RANGE3( "0.1.0" ${FOOCODE_VERSION} "0.7.0" FOO_VERSION_OK ) +# +# This macro will break silently if any of x,y,z are greater than 100. +# +# Copyright (c) 2007, Will Stephenson +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +# NORMALIZE_VERSION +# Helper macro to convert version numbers of the form "x.y.z" +# to an integer equal to 10^4 * x + 10^2 * y + z +# +# This macro will break silently if any of x,y,z are greater than 100. +# +# Copyright (c) 2006, David Faure, +# Copyright (c) 2007, Will Stephenson +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + +# CHECK_RANGE_INCLUSIVE_LOWER +# Helper macro to check whether x <= y < z +# +# Copyright (c) 2007, Will Stephenson +# +# Redistribution and use is allowed according to the terms of the BSD license. +# For details see the accompanying COPYING-CMAKE-SCRIPTS file. + + +MACRO(NORMALIZE_VERSION _requested_version _normalized_version) + STRING(REGEX MATCH "[^0-9]*[0-9]+\\.[0-9]+\\.[0-9]+.*" _threePartMatch "${_requested_version}") + if (_threePartMatch) + # parse the parts of the version string + STRING(REGEX REPLACE "[^0-9]*([0-9]+)\\.[0-9]+\\.[0-9]+.*" "\\1" _major_vers "${_requested_version}") + STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.([0-9]+)\\.[0-9]+.*" "\\1" _minor_vers "${_requested_version}") + STRING(REGEX REPLACE "[^0-9]*[0-9]+\\.[0-9]+\\.([0-9]+).*" "\\1" _patch_vers "${_requested_version}") + else (_threePartMatch) + STRING(REGEX REPLACE "([0-9]+)\\.[0-9]+" "\\1" _major_vers "${_requested_version}") + STRING(REGEX REPLACE "[0-9]+\\.([0-9]+)" "\\1" _minor_vers "${_requested_version}") + set(_patch_vers "0") + endif (_threePartMatch) + + # compute an overall version number which can be compared at once + MATH(EXPR ${_normalized_version} "${_major_vers}*10000 + ${_minor_vers}*100 + ${_patch_vers}") +ENDMACRO(NORMALIZE_VERSION) + +MACRO(MACRO_CHECK_RANGE_INCLUSIVE_LOWER _lower_limit _value _upper_limit _ok) + if (${_value} LESS ${_lower_limit}) + set( ${_ok} FALSE ) + elseif (${_value} EQUAL ${_lower_limit}) + set( ${_ok} TRUE ) + elseif (${_value} EQUAL ${_upper_limit}) + set( ${_ok} FALSE ) + elseif (${_value} GREATER ${_upper_limit}) + set( ${_ok} FALSE ) + else (${_value} LESS ${_lower_limit}) + set( ${_ok} TRUE ) + endif (${_value} LESS ${_lower_limit}) +ENDMACRO(MACRO_CHECK_RANGE_INCLUSIVE_LOWER) + +MACRO(MACRO_ENSURE_VERSION requested_version found_version var_too_old) + NORMALIZE_VERSION( ${requested_version} req_vers_num ) + NORMALIZE_VERSION( ${found_version} found_vers_num ) + + if (found_vers_num LESS req_vers_num) + set( ${var_too_old} FALSE ) + else (found_vers_num LESS req_vers_num) + set( ${var_too_old} TRUE ) + endif (found_vers_num LESS req_vers_num) + +ENDMACRO(MACRO_ENSURE_VERSION) + +MACRO(MACRO_ENSURE_VERSION2 requested_version2 found_version2 var_too_old2) + MACRO_ENSURE_VERSION( ${requested_version2} ${found_version2} ${var_too_old2}) +ENDMACRO(MACRO_ENSURE_VERSION2) + +MACRO(MACRO_ENSURE_VERSION_RANGE min_version found_version max_version var_ok) + NORMALIZE_VERSION( ${min_version} req_vers_num ) + NORMALIZE_VERSION( ${found_version} found_vers_num ) + NORMALIZE_VERSION( ${max_version} max_vers_num ) + + MACRO_CHECK_RANGE_INCLUSIVE_LOWER( ${req_vers_num} ${found_vers_num} ${max_vers_num} ${var_ok}) +ENDMACRO(MACRO_ENSURE_VERSION_RANGE) + + diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 0000000..c93ba58 --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory( mychem ) + diff --git a/include/mychem/CMakeLists.txt b/include/mychem/CMakeLists.txt new file mode 100644 index 0000000..fd11df2 --- /dev/null +++ b/include/mychem/CMakeLists.txt @@ -0,0 +1,5 @@ +configure_file( + ${CMAKE_CURRENT_SOURCE_DIR}/config.h.cmake + ${CMAKE_CURRENT_BINARY_DIR}/config.h +) + diff --git a/include/mychem/config.h.cmake b/include/mychem/config.h.cmake new file mode 100644 index 0000000..02f63a2 --- /dev/null +++ b/include/mychem/config.h.cmake @@ -0,0 +1,54 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It defines default values used + * by Mychem. + * @file config.h + * @short Mychem config file. + * @author Jerome Pansanel + */ + +#ifndef __MYCHEM_CONFIG_H +#define __MYCHEM_CONFIG_H + +/* Default molecle type */ +#define MOLECULE_TYPE "MOL" + +/* Max length for varchar */ +#define MAX_NAME_LENGTH 256 + +/* Max length for a text or a blob */ +#define MAX_VALUE_LENGTH 65536 + +/* Mychem major version */ +#define MYCHEM_VERSION_MAJOR "${MYCHEM_VERSION_MAJOR}" + +/* Mychem minor version */ +#define MYCHEM_VERSION_MINOR "${MYCHEM_VERSION_MINOR}" + +/* Mychem release version */ +#define MYCHEM_VERSION_RELEASE "${MYCHEM_VERSION_RELEASE}" + +/* Mychem version */ +#define MYCHEM_VERSION "${MYCHEM_VERSION}" + +#endif /* __MYCHEM_CONFIG_H */ + diff --git a/include/mychem/dlhandler.h b/include/mychem/dlhandler.h new file mode 100644 index 0000000..0f05f5a --- /dev/null +++ b/include/mychem/dlhandler.h @@ -0,0 +1,44 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by Mychem and related to dynamic library management. + * @file lib_handler.h + * @short Dynamic library management. + * @author Jerome Pansanel + */ + +#ifndef __LIB_HANDLER_H +#define __LIB_HANDLER_H + +class LibHandler +{ + private: + static void * lib_handler; + + public: + LibHandler(); + ~LibHandler(); + bool isLoaded(); +}; + +#endif /* __LIB_HANDLER_H */ + diff --git a/include/mychem/serialization.h b/include/mychem/serialization.h new file mode 100644 index 0000000..be442d1 --- /dev/null +++ b/include/mychem/serialization.h @@ -0,0 +1,52 @@ +/*************************************************************************** + * Copyright (C) 2009-2012 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to the serialization of OBMol objects. + * @file serialization.cpp + * @short OBMol Serialization Methods. + * @author Jerome Pansanel +*/ + +#ifndef __SERIALIZATION_H +#define __SERIALIZATION_H + +#include +#include + +/** + * @short Serialize an OBMol object. + * @param mol The OBMol object to serialize. + * @return the serialized object. + */ +char *serializeOBMol(OpenBabel::OBMol &mol); + +/** + * @short Unserialize an OBMol object. + * @param pOb The OBBase object to initialize. + * @param serializedInput The OBMol object to unserialize. + * @param stereoEnabled true if we need stereo informations + * @return true if the unserialization is successful. + */ +bool unserializeOBMol(OpenBabel::OBBase* pOb, const char *serializedInput, bool stereoEnabled); + +#endif /* __SERIALIZATION_H */ + diff --git a/include/mychem/structures.h b/include/mychem/structures.h new file mode 100644 index 0000000..5ed00e2 --- /dev/null +++ b/include/mychem/structures.h @@ -0,0 +1,57 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It defines structures used + * by Mychem. + * @file structures.h + * @short Mychem structure file. + * @author Jerome Pansanel + */ + +#ifndef __MYCHEM_STRUCTURES_H +#define __MYCHEM_STRUCTURES_H + +typedef struct { + unsigned long int size; + char * str; +} binary_string; + +typedef struct +{ + unsigned int idx; + int hybridization; + unsigned char atomicnum; + unsigned short isotope; + char formalcharge; + unsigned char spinmultiplicity; + unsigned char aromatic; +} _ATOM; + +typedef struct +{ + unsigned int beginidx; + unsigned int endidx; + unsigned char order; + unsigned char aromatic; +} _BOND; + +#endif /* __MYCHEM_STRUCTURES_H */ + diff --git a/scripts/mychemdb-manager.py b/scripts/mychemdb-manager.py new file mode 100644 index 0000000..c769beb --- /dev/null +++ b/scripts/mychemdb-manager.py @@ -0,0 +1,491 @@ +#!/usr/bin/python + +# Copyright (c) 2010 CNRS +# Author: Jerome Pansanel +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of the CNRS nor the +# names of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os +import sys +import getpass +import MySQLdb +from optparse import OptionParser + + +#----------------------------------------------------------------------------- +# Functions +#----------------------------------------------------------------------------- + +def logger(message, logfile=None, mode=0): + if mode == 2: + if logfile: + logfile.write(message) + sys.stderr.write(message) + elif mode == 1: + if logfile: + logfile.write(message) + sys.stdout.write(message) + else: + if logfile: + logfile.write(message) + else: + sys.stdout.write(message) + + +#----------------------------------------------------------------------------- +# Options +#----------------------------------------------------------------------------- + +usage = "Usage: %prog [options] FILE" + +version = "%prog 0.8" + +description = "%prog load a file in MDL SDF format into a MySQL database" \ + + " and creates a chemical cartridge with Mychem." + +parser = OptionParser(usage=usage, version=version, description=description ) + +parser.add_option("-H", action="store", type="string", dest="host", + help="connect to host [default: %default]", default="localhost") +parser.add_option("-U", action="store", type="string", dest="user", + help="user for login to MySQL [default: %default]", default="mychem") +parser.add_option("-P", action="store_true", dest="pwd", + help="use a password to connect to MySQL (it will be asked from the tty)", + default=False) +parser.add_option("-D", action="store", type="string", dest="db", + help="database to use [default: %default]", default="mychem") +parser.add_option("-n", action="store", type="string", dest="nameTag", + help="name tag to use in the MDL SDF file", default="") +parser.add_option("-l", action="store", type="string", dest="logfile", + help="log file name", default="") +parser.add_option("-t", action="store", type="string", dest="tablePrefix", + help="table prefix", default="") +parser.add_option("-a", action="store_true", dest="append", + help="append data if the compound table exists", + default=False) +parser.add_option("-r", action="store_true", dest="replace", + help="replace data if the compound table exists", + default=False) +parser.add_option("-u", action="store_true", dest="update", + help="update data if the compound table exists", + default=False) +parser.add_option("-v", action="store_true", dest="verbose", + help="verbose mode", default=False) + +# Ajouter une option Filename +(options, args) = parser.parse_args() + +if len(args) != 1: + sys.stdout.write(parser.format_help()) + sys.exit(0) + + +#----------------------------------------------------------------------------- +# Logfile +#----------------------------------------------------------------------------- + +verbose = options.verbose +if options.logfile: + message = "Opening log file '%s'\n" % (options.logfile) + if verbose: + logger(message) + try: + logFile = open(options.logfile, 'w') + except IOError, e: + sys.stderr.write("Error: Could not open log file '%s': " % options.logfile) + sys.stderr.write("%s\n" % (e.args[1])) + sys.exit(1) +else: + logFile = None + + +#----------------------------------------------------------------------------- +# Check incompatible options +#----------------------------------------------------------------------------- + +if options.append + options.replace + options.update > 1: + message = "Error: Options [a]ppend, [r]eplace and [u]pdate can not be set" \ + + "simultanously\n" + logger(message, logFile, 2) + logFile.close() + sys.exit(1) + + +#----------------------------------------------------------------------------- +# Database connexion +# Connect to the database and verify the table structures. If the user asked +# to create new tables, old one will be removed. Else, they will be updated. +# the tables does not exist, the script create new +#----------------------------------------------------------------------------- + +host = options.host +user = options.user +db = options.db +if options.pwd: + passwd = getpass.getpass(prompt="Enter MySQL password: ") +else: + passwd = "" + +try: + link = MySQLdb.connect(host = host, user = user, passwd = passwd, db = db) +except MySQLdb.Error, e: + sys.stderr.write("Error: %d: %s\n" % (e.args[0], e.args[1])) + sys.exit(1) + +if verbose: + message = "Connection to MySQL successufull\n" + logger(message, logFile) + +cursor = link.cursor() + + +#----------------------------------------------------------------------------- +# Table settings +#----------------------------------------------------------------------------- + +if options.tablePrefix: + compoundTable = options.tablePrefix + "_compounds" + structure1DTable = options.tablePrefix + "_1D_structures" + structure3DTable = options.tablePrefix + "_3D_structures" + structureBinTable = options.tablePrefix + "_bin_structures" +else: + compoundTable = "compounds" + structure1DTable = "1D_structures" + structure3DTable = "3D_structures" + structureBinTable = "bin_structures" + +mychemTables = { 'compoundsTable' : compoundTable, + 'structure1DTable' : structure1DTable, + 'structure3DTable' : structure3DTable, + 'structureBinTable' : structureBinTable } + +existingTables = [] + +for table in mychemTables: + query = "SHOW TABLES LIKE '%s'" % (mychemTables[table]) + result_len = cursor.execute(query) + if result_len > 0: + existingTables.append(table) + if verbose: + message = "Table '%s' already exists.\n" % (table) + logger(message, logFile) + +if (len(existingTables) == 4) and not (options.append or options.replace or options.update): + message = "A complet set of tables alread exists. Which action should " \ + + "be performed:\n" + sys.stdout.write(message) + while 1: + action = raw_input("[A]ppend, [C]ancel, [R]eplace or [U]pdate?\n").strip() + if len(action) < 1: + continue + if action[0] in ['a','A']: + options.append = True + break + elif action[0] in ['c','C']: + link.close() + message = "Database loading cancelled\n" + logger(message, logFile) + if logFile: + logFile.close() + sys.exit(0) + elif action[0] in ['r','R']: + options.replace = True + break + elif action[0] in ['u','U']: + options.update = True + break + else: + continue + +elif len(existingTables) in [1,2,3]: + message = "Some tables (but not all) already exist. Which action should " \ + + "be performed:\n" + sys.stdout.write(message) + while 1: + action = raw_input("[C]ancel or [R]eplace?\n").strip() + if len(action) < 1: + continue + elif action[0] in ['c','C']: + link.close() + message = "Database loading cancelled\n" + logger(message, logFile) + if logFile: + logFile.close() + sys.exit(0) + elif action[0] in ['r','R']: + options.replace = True + break + else: + continue + +if options.append and verbose: + if existingTables: + message = "As requested, data will be appended to existing tables\n" + else: + message = "The tables do not exist. A new table set will be " \ + + "created.\n" + logger(message, logFile) +elif options.replace and verbose: + if existingTables: + message = "As requested, old tables will be dropped and a new table " \ + + "set will be created.\n" + else: + message = "The tables do not exist. A new table set will be " \ + + "created.\n" + logger(message, logFile) +elif options.update and verbose: + if existingTables: + message = "As requested, old data will be updated.\n" + else: + message = "The tables do not exist. A new table set will be" \ + + "created.\n" + logger(message, logFile) + +if options.replace: + if 'compoundsTable' in existingTables: + query = "DROP TABLE IF EXISTS `%s`" % (compoundTable) + cursor.execute(query) + link.commit() + if verbose: + message = "The '%s' table has been dropped.\n" % (compoundTable) + logger(message, logFile) + if 'structure1DTable' in existingTables: + query = "DROP TABLE IF EXISTS `%s`" % (structure1DTable) + cursor.execute(query) + link.commit() + if verbose: + message = "The '%s' table has been dropped.\n" % (structure1DTable) + logger(message, logFile) + if 'structure3DTable' in existingTables: + query = "DROP TABLE IF EXISTS `%s`" % (structure3DTable) + cursor.execute(query) + link.commit() + if verbose: + message = "The '%s' table has been dropped.\n" % (structure3DTable) + logger(message, logFile) + if 'structureBinTable' in existingTables: + query = "DROP TABLE IF EXISTS `%s`" % (structureBinTable) + cursor.execute(query) + link.commit() + if verbose: + message = "The '%s' table has been dropped.\n" % (structureBinTable) + logger(message, logFile) + existingTables = [] + if verbose: + message = "All existing tables have been dropped.\n" + logger(message, logFile) + +#----------------------------------------------------------------------------- +# Table creation +#----------------------------------------------------------------------------- + +if not existingTables: + # Create the compound table + query = """CREATE TABLE IF NOT EXISTS `%s` ( + `id` int(11) unsigned NOT NULL AUTO_INCREMENT, + `name` varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, + `created` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + `modified` timestamp NOT NULL DEFAULT '0000-00-00 00:00:00', + PRIMARY KEY id (`id`), + KEY name (`name`) ) + ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin + COMMENT='Compound Library'; +""" % (compoundTable) + cursor.execute(query) + link.commit() + message = "The '%s' table has been created\n" % (compoundTable) + if verbose: + logger(message, logFile) + + # Create the 1D_structures table + query = """CREATE TABLE IF NOT EXISTS `%s` ( + `compound_id` int(11) unsigned NOT NULL, + `inchi` text NOT NULL, + `smiles` text NOT NULL, + PRIMARY KEY compound_id (`compound_id`) ) + ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin + COMMENT='Compound 1D Structures'; +""" % (structure1DTable) + cursor.execute(query) + link.commit() + message = "The '%s' table has been created\n" % (structure1DTable) + if verbose: + logger(message, logFile) + + # Create the 3D_structures table + query = """CREATE TABLE IF NOT EXISTS `%s` ( + `compound_id` int(11) unsigned NOT NULL, + `molfile` text NOT NULL, + PRIMARY KEY compound_id (`compound_id`) ) + ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin + COMMENT='Compound 3D Structures'; +""" % (structure3DTable) + cursor.execute(query) + link.commit() + message = "The '%s' table has been created\n" % (structure3DTable) + if verbose: + logger(message, logFile) + + # Create the bin_structures table + query = """ +CREATE TABLE IF NOT EXISTS `%s` ( + `compound_id` int(11) unsigned NOT NULL, + `fp2` blob NULL, + `obserialized` blob NULL, + PRIMARY KEY compound_id (`compound_id`) ) + ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin + COMMENT='Compound Binary Structures'; +""" % (structureBinTable) + cursor.execute(query) + link.commit() + message = "The '%s' table has been created\n" % (structureBinTable) + if verbose: + logger(message, logFile) + + +#----------------------------------------------------------------------------- +# load_structure() function +#----------------------------------------------------------------------------- + +def load_structure(name, molfile): + if options.update: + query = """ +SELECT `id` FROM `%s` WHERE name='%s' +""" % (compoundTable, link.escape_string(name)) + result_len = cursor.execute(query) + + if result_len > 0: + result = cursor.fetchone() + compound_id = result[0] + + query = """ +UPDATE `%s`,`%s` SET `modified`=CURRENT_TIMESTAMP(),`molfile`='%s' + WHERE `id`=%i AND `compound_id`=`id`; +""" % (compoundTable, structure3DTable, link.escape_string(molfile), compound_id) + cursor.execute(query) + + query = """ +UPDATE `%s`,`%s` SET + `inchi`=MOLECULE_TO_INCHI(`molfile`), + `smiles`=MOLECULE_TO_SMILES(`molfile`) + WHERE `%s`.`compound_id`=%i + AND `%s`.`compound_id`=%i +""" % (structure1DTable, structure3DTable, structure1DTable, compound_id, structure3DTable, compound_id) + cursor.execute(query) + + query = """ +UPDATE `%s`,`%s` SET + `fp2`=FINGERPRINT2(`molfile`), + `obserialized`=MOLECULE_TO_SERIALIZEDOBMOL(`molfile`) + WHERE `%s`.`compound_id`=%i + AND `%s`.`compound_id`=%i +""" % (structureBinTable, structure3DTable, structureBinTable, compound_id, structure3DTable, compound_id) + cursor.execute(query) + + return True + + query = "INSERT INTO `%s` (`name`,`created`,`modified`)" % (compoundTable) \ + + " VALUES ('%s',CURRENT_TIMESTAMP(),CURRENT_TIMESTAMP())" % (link.escape_string(name)) + cursor.execute(query) + compound_id = cursor.lastrowid + query = """ +INSERT INTO `%s` (`compound_id`, `molfile`) VALUES (%i, '%s'); +""" % (structure3DTable, compound_id, link.escape_string(molfile)) + cursor.execute(query) + query = """ +INSERT INTO `%s` (`compound_id`, `inchi`, `smiles`) + SELECT `compound_id`, MOLECULE_TO_INCHI(`molfile`), + MOLECULE_TO_SMILES(`molfile`) FROM `%s` WHERE `compound_id`=%i; +""" % (structure1DTable, structure3DTable, compound_id) + cursor.execute(query) + query = """ +INSERT INTO `%s` (`compound_id`, `fp2`, `obserialized`) + SELECT `compound_id`, FINGERPRINT2(`molfile`), + MOLECULE_TO_SERIALIZEDOBMOL(`molfile`) FROM `%s` WHERE `compound_id`=%i; +""" % (structureBinTable, structure3DTable, compound_id) + cursor.execute(query) + return True + + +#----------------------------------------------------------------------------- +# Parse the file and load the structures +#----------------------------------------------------------------------------- + +dbFileName = args[0] + +try: + dbFile = open(dbFileName, 'r') +except IOError, e: + sys.stderr.write("Error: Could not open MDL Sdfile '%s': " % (dbFileName)) + sys.stderr.write("%s\n" % (e.args[1])) + sys.exit(1) + +molfile = "" +lineCount = 0 +molCount = 0 +ctEnd = False + +while 1: + line = dbFile.readline() + if not line: + break + lineCount += 1 + if line[0:4] == "$$$$": + if name == "": + name = "Mol" + str(molCount) + if len(name) > 250: + sys.stderr.write("Error: molecule name is too long for %s" % (name)) + else: + load_structure(name,molfile) + molfile = "" + lineCount = 0 + molCount += 1 + ctEnd = False + elif not ctEnd: + molfile += line + if lineCount == 1: + name = line.strip() + if line[0:6] == "M END": + ctEnd = True + + +#----------------------------------------------------------------------------- +# Close the handlers and print a summary +#----------------------------------------------------------------------------- + +dbFile.close() +link.commit() +cursor.close() +link.close() + +if verbose: + message = "%i structures have been loaded.\n" % (molCount) + logger(message, logFile) + +message = "The MDL SDF file has been successfully loaded.\n" +logger(message, logFile, 1) + +if logFile: + logFile.close() + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..78612fa --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,60 @@ +include_directories( + ${MySQL_INCLUDE_DIR} + ${OPENBABEL2_INCLUDE_DIR} + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/include +) + +if (BUILD_SHARED) + message(STATUS "Defining dlhandler source files") + if (WIN32) + set(dlhandler_SRCS + ${CMAKE_SOURCE_DIR}/src/dlhandler_win32.cpp + ) + else (WIN32) + set(dlhandler_SRCS + ${CMAKE_SOURCE_DIR}/src/dlhandler_unix.cpp + ) + endif (WIN32) +endif (BUILD_SHARED) + +set(serialization_SRCS + ${CMAKE_SOURCE_DIR}/src/serialization.cpp +) + +if (NOT WIN32) + set(MYCHEM_LIBRARIES + dl + ) +endif (NOT WIN32) + +add_definitions( -DHAVE_DLOPEN ) + +set( libmychem_SRCS + conversion.c + conversion_wrapper.cpp + helper.c + helper_wrapper.cpp + modification.c + modification_wrapper.cpp + molmatch.c + molmatch_wrapper.cpp + property.c + property_wrapper.cpp + serialization.cpp + mychem.def + ${dlhandler_SRCS} +) + +add_library( mychem-lib SHARED ${libmychem_SRCS} ) +set_target_properties( mychem-lib PROPERTIES OUTPUT_NAME mychem ) +set_target_properties( mychem-lib PROPERTIES VERSION ${MYCHEM_VERSION} SOVERSION ${MYCHEM_VERSION_MAJOR} ) + +target_link_libraries( mychem-lib + ${OPENBABEL2_LIBRARIES} + ${MySQL_LIBRARIES} + ${MYCHEM_LIBRARIES} +) + +#### install files #### +install(TARGETS mychem-lib DESTINATION ${LIB_INSTALL_DIR}) diff --git a/src/conversion.c b/src/conversion.c new file mode 100644 index 0000000..cac921a --- /dev/null +++ b/src/conversion.c @@ -0,0 +1,1526 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions used by + * MySQL and related to chemical file conversion. + * @file conversion.c + * @short Function definitions of the Mychem conversion module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "conversion.h" +#include "conversion_wrapper.h" +#include +#include + +#ifdef HAVE_DLOPEN + +my_bool molfile_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLFILE_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLFILE_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molfile_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molfile_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = "MOL"; + const char *outputFormat = MOLECULE_TYPE; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + if (outputMol == NULL) { + *error = 1; + *is_null = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_molfile_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_MOLFILE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_MOLFILE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_molfile_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_molfile(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = MOLECULE_TYPE; + const char *outputFormat = "MOL"; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool V3000_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: V3000_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: V3000_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void V3000_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *V3000_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = V3000conversion(inputMol); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_V3000_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_V3000() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_V3000() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_V3000_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_V3000(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversionV3000(inputMol); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool smiles_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: SMILES_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: SMILES_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void smiles_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *smiles_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + const char *inputFormat = "SMI"; + const char *outputFormat = MOLECULE_TYPE; + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_smiles_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_SMILES() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_SMILES() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_smiles_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_smiles(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + const char *inputFormat = MOLECULE_TYPE; + const char *outputFormat = "SMI"; + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + const char *inputFormat = "INCHI"; + const char *outputFormat = MOLECULE_TYPE; + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool cml_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: CML_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: CML_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void cml_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *cml_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = CMLconversion(inputMol); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_cml_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_CML() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_CML() requires a STRING"); + return 1; + } + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_cml_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_cml(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversionCML(inputMol); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool fingerprint_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: FINGERPRINT() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message,"Wrong argument type: FINGERPRINT() requires a STRING and a STRING"); + return 1; + } + + if ((strncmp(args->args[1],"FP2",3) && strncmp(args->args[1],"FP3",3) && strncmp(args->args[1],"FP4",3))) { + strcpy(message,"Wrong type of fingerprint: valid types are FP2, FP3 and FP4"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void fingerprint_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *fingerprint(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *fpType = (char *) malloc(sizeof(char)*4); + char *serializedOutput = NULL; + unsigned long int *intptr = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + strncpy(fpType, args->args[1], 3); + fpType[3] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + serializedOutput = conversionFPT(inputMol, fpType); + + /* Return NULL if the fingerprint is empty */ + if (serializedOutput == NULL) { + *is_null = 1; + *error = 1; + free(fpType); + free(inputMol); + + return NULL; + } + + intptr = (unsigned long int *) serializedOutput; + *length = intptr[0] + sizeof(unsigned long int); + memcpy(initid->ptr, serializedOutput, *length); + + free(inputMol); + free(fpType); + free(serializedOutput); + + return initid->ptr; +} + +my_bool fingerprint2_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: FINGERPRINT2() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: FINGERPRINT2() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void fingerprint2_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *fingerprint2(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *serializedOutput = NULL; + unsigned long int *intptr = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + serializedOutput = conversionFPT(inputMol, "FP2"); + + /* Return NULL if the fingerprint is empty */ + if (serializedOutput == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + intptr = (unsigned long int *) serializedOutput; + *length = intptr[0] + sizeof(unsigned long int); + memcpy(initid->ptr, serializedOutput, *length); + + free(inputMol); + free(serializedOutput); + + return initid->ptr; +} + +my_bool fingerprint3_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: FINGERPRINT3() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: FINGERPRINT3() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void fingerprint3_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *fingerprint3(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *serializedOutput = NULL; + unsigned long int *intptr = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + serializedOutput = conversionFPT(inputMol, "FP3"); + + /* Return NULL if the fingerprint is empty */ + if (serializedOutput == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + intptr = (unsigned long int *) serializedOutput; + *length = intptr[0] + sizeof(unsigned long int); + memcpy(initid->ptr, serializedOutput, *length); + + free(inputMol); + free(serializedOutput); + + return initid->ptr; +} + +my_bool fingerprint4_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: FINGERPRINT2() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: FINGERPRINT2() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void fingerprint4_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *fingerprint4(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *serializedOutput = NULL; + unsigned long int *intptr = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + serializedOutput = conversionFPT(inputMol, "FP4"); + + /* Return NULL if the fingerprint is empty */ + if (serializedOutput == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + intptr = (unsigned long int *) serializedOutput; + *length = intptr[0] + sizeof(unsigned long int); + memcpy(initid->ptr, serializedOutput, *length); + + free(inputMol); + free(serializedOutput); + + return initid->ptr; +} + +my_bool molecule_to_canonical_smiles_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_CANONICAL_SMILES() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_CANONICAL_SMILES() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_canonical_smiles_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_canonical_smiles(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversionCAN(inputMol); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool inchi_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: INCHI_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: INCHI_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void inchi_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *inchi_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + const char *inputFormat = "INCHI"; + const char *outputFormat = MOLECULE_TYPE; + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_inchi_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_INCHI() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_INCHI() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_inchi_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_inchi(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = MOLECULE_TYPE; + const char *outputFormat = "INCHI"; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_serializedOBMol_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_BINARY() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_BINARY() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_serializedOBMol_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_serializedOBMol(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + unsigned int *serializedOBMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + serializedOBMol = (unsigned int*) serializeMolecule(inputMol); + + /* Return NULL if the serializedOBMol is empty */ + if (serializedOBMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = serializedOBMol[0] + sizeof(unsigned int); + memcpy(initid->ptr, serializedOBMol, *length); + + free(inputMol); + free(serializedOBMol); + + return initid->ptr; +} + +my_bool pdb_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: PDB_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: PDB_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void pdb_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *pdb_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = "PDB"; + const char *outputFormat = MOLECULE_TYPE; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool mol2_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOL2_TO_MOLECULE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOL2_TO_MOLECULE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void mol2_to_molecule_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *mol2_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = "MOL2"; + const char *outputFormat = MOLECULE_TYPE; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool molecule_to_mol2_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLECULE_TO_MOL2() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLECULE_TO_MOL2() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char*) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molecule_to_mol2_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *molecule_to_mol2(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + const char *inputFormat = MOLECULE_TYPE; + const char *outputFormat = "MOL2"; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = conversion(inputMol, inputFormat, outputFormat); + + /* Return NULL if the outputMol is empty */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +#endif /* HAVE_DLOPEN */ + diff --git a/src/conversion.h b/src/conversion.h new file mode 100644 index 0000000..54d9390 --- /dev/null +++ b/src/conversion.h @@ -0,0 +1,598 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by MySQL and related to chemical file conversion. + * @file conversion.h + * @short Function declarations of the Mychem conversion module. + * @author Jerome Pansanel + */ + +#ifndef __CONVERSION_H +#define __CONVERSION_H + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef __WIN__ +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*__WIN__*/ +#else +#include +#include +#endif /* STANDARD */ +#include +#include +#include /* To get strmov() */ + +/* static pthread_mutex_t obmtx; */ + +#ifdef HAVE_DLOPEN + +/** + * @short Initializes the molfile_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molfile_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molfile_to_molecule_init(). + * @param initid The structure filled by molfile_to_molecule_init() + */ +void molfile_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MDL Molfile format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by molfile_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molfile_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_molfile function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_molfile_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_molfile_init(). + * @param initid The structure filled by molecule_to_molfile_init() + */ +void molecule_to_molfile_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a molecule in MDL Molfile format. + * @param initid A structure filled by molecule_to_molfile_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_molfile(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the V3000_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool V3000_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by V3000_to_molecule_init(). + * @param initid the structure filled by V3000_to_molecule_init() + */ +void V3000_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MDL V3000 format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by V3000_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *V3000_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_V3000 function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_V3000_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_V3000_init(). + * @param initid The structure filled by molecule_to_V3000_init() + */ +void molecule_to_V3000_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a molecule in MDL V3000 format. + * @param initid A structure filled by molecule_to_V3000_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_V3000(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the smiles_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool smiles_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by smiles_to_molecule_init(). + * @param initid The structure filled by smiles_to_molecule_init() + */ +void smiles_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in SMILES format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by smiles_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *smiles_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_smiles function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_smiles_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_smiles_init(). + * @param initid The structure filled by molecule_to_smiles_init() + */ +void molecule_to_smiles_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a molecule in SMILES. + * @param initid A structure filled by molecule_to_smiles_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_smiles(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_molecule_init(). + * @param initid The structure filled by molecule_to_molecule_init() + */ +void molecule_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in OLD_TYPE format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by molecule_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the cml_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool cml_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by cml_to_molecule_init(). + * @param initid the structure filled by cml_to_molecule_init() + */ +void cml_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in cml format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by cml_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *cml_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_cml function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_cml_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_cml_init(). + * @param initid The structure filled by molecule_to_cml_init() + */ +void molecule_to_cml_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a molecule in cml format. + * @param initid A structure filled by molecule_to_cml_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_cml(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the fingerprint function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool fingerprint_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by fingerprint_init(). + * @param initid The structure filled by fingerprint_init() + */ +void fingerprint_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a fingerprint. + * @param initid A structure filled by fingerprint_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *fingerprint(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the fingerprint2 function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool fingerprint2_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by fingerprint2_init(). + * @param initid The structure filled by fingerprint2_init() + */ +void fingerprint2_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a fingerprint, type FP2. + * @param initid A structure filled by fingerprint2_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *fingerprint2(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the fingerprint3 function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool fingerprint3_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by fingerprint3_init(). + * @param initid The structure filled by fingerprint3_init() + */ +void fingerprint3_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a fingerprint, type FP3. + * @param initid A structure filled by fingerprint3_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *fingerprint3(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the fingerprint4 function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool fingerprint4_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by fingerprint4_init(). + * @param initid The structure filled by fingerprint4_init() + */ +void fingerprint4_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a fingerprint, type FP4. + * @param initid A structure filled by fingerprint4_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *fingerprint4(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_canonical_smiles function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_canonical_smiles_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_canonical_smiles_init(). + * @param initid The structure filled by molecule_to_canonical_smiles_init() + */ +void molecule_to_canonical_smiles_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a canonical smiles. + * @param initid A structure filled by molecule_to_canonical_smiles_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_canonical_smiles(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the inchi_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool inchi_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by inchi_to_molecule_init(). + * @param initid The structure filled by inchi_to_molecule_init() + */ +void inchi_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts an InChI string to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by inchi_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *inchi_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_inchi function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_inchi_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_inchi_init(). + * @param initid The structure filled by molecule_to_inchi_init() + */ +void molecule_to_inchi_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to an InChI string. + * @param initid A structure filled by molecule_to_inchi_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_inchi(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the molecule_to_serializedOBMol function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_serializedOBMol_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_serializedOBMol_init(). + * @param initid The structure filled by molecule_to_serializedOBMol_init() + */ +void molecule_to_serializedOBMol_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a serialized OBMol string. + * @param initid A structure filled by molecule_to_serializedOBMol() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_serializedOBMol(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the pdb_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool pdb_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by pdb_to_molecule_init(). + * @param initid The structure filled by pdb_to_molecule_init() + */ +void pdb_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in Protein Data Bank format data to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by pdb_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *pdb_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the mol2_to_molecule function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool mol2_to_molecule_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by mol2_to_molecule_init(). + * @param initid The structure filled by mol2_to_molecule_init() + */ +void mol2_to_molecule_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in mol2 format to a molecule in MOLECULE_TYPE format. + * @param initid A structure filled by mol2_to_molecule_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *mol2_to_molecule(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); +/** + * @short Initializes the molecule_to_mol2 function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molecule_to_mol2_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molecule_to_mol2_init(). + * @param initid The structure filled by molecule_to_mol2_init() + */ +void molecule_to_mol2_deinit(UDF_INIT *initid); + +/** + * @short Converts a molecule in MOLECULE_TYPE format to a molecule in mol2 format. + * @param initid A structure filled by molecule_to_mol2_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The converted molecule + */ +char *molecule_to_mol2(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +#endif /* HAVE_DLOPEN */ + +#endif /* __CONVERSION_H */ + diff --git a/src/conversion_wrapper.cpp b/src/conversion_wrapper.cpp new file mode 100644 index 0000000..fab217e --- /dev/null +++ b/src/conversion_wrapper.cpp @@ -0,0 +1,340 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to chemical file conversion. + * @file conversion_wrapper.cpp + * @short Mychem Conversion Wrapper. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "conversion_wrapper.h" +#include +#include +#include + +#include +#include +#include +#include +#include "fingerprints/finger2.cpp" +#include "fingerprints/finger3.cpp" + +#include + +using namespace std; +using namespace OpenBabel; + +#if defined(__CYGWIN__) || defined(__MINGW32__) + // macro to implement static OBPlugin::PluginMapType& Map() + PLUGIN_CPP_FILE(OBFingerprint) + PLUGIN_CPP_FILE(OBFormat) +#endif + +char *conversion(const char *molecule, const char *inType, const char *outType) +{ + string instring(molecule); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream,&outStream); + + if (conv.SetInAndOutFormats(inType, outType)) { + // Set options + if (!strcmp(outType, "SMI")) { + /* No molecule name */ + conv.AddOption("n", OBConversion::OUTOPTIONS); + } + + try { + conv.Convert(); + + outstring = outStream.str(); + + if (outstring[outstring.length()-1] == '\n') { + outstring = outstring.substr(0, outstring.length()-1); + } + + retVal = strdup(outstring.c_str()); + } + catch(...) { + } + } + + return retVal; +} + +char *conversionV3000(const char *V3000) +{ + string instring(V3000); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream,&outStream); + + if (conv.SetInAndOutFormats(MOLECULE_TYPE, "MOL")) { + // Set options + conv.AddOption("3", OBConversion::OUTOPTIONS); + + conv.Convert(); + + outstring = outStream.str(); + + // remove the trailling $$$$ from the SDFile + if (outstring.find("$$$$\n", 0) != string::npos) { + outstring = outstring.substr(0, outstring.length()-5); + } + else if (outstring.find("$$$$\r\n", 0) != string::npos) { + outstring = outstring.substr(0, outstring.length()-6); + } + + if (outstring[outstring.length()-1] == '\n') { + outstring[outstring.length()-1] = '\0'; + } + + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *V3000conversion(const char *molecule) +{ + string instring(molecule); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream, &outStream); + + if (conv.SetInAndOutFormats("MOL", MOLECULE_TYPE)) { + // Set options + conv.AddOption("3", OBConversion::INOPTIONS); + + conv.Convert(); + + outstring = outStream.str(); + + if (outstring[outstring.length()-1] == '\n') { + outstring = outstring.substr(0, outstring.length()-1); + } + + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *CMLconversion(const char *cml) +{ + string instring(cml); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream,&outStream); + + if (conv.SetInAndOutFormats("CML", MOLECULE_TYPE)) { + conv.Convert(); + + outstring = outStream.str(); + + if (outstring[outstring.length()-1] == '\n') { + outstring = outstring.substr(0, outstring.length()-1); + } + + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *conversionCML(const char *molecule) +{ + string instring(molecule); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream,&outStream); + + if (conv.SetInAndOutFormats(MOLECULE_TYPE, "CML")) { + conv.AddOption("x", OBConversion::OUTOPTIONS); + conv.Convert(); + + outstring = outStream.str(); + + if (outstring[outstring.length()-1] == '\n') { + outstring = outstring.substr(0, outstring.length()-1); + } + + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *conversionFPT(const char *molecule, const char *type) +{ + string instring(molecule); + string fpType(type); + istringstream inStream(instring); + char *fpStruct = NULL; + unsigned long int fpSize = 0; + unsigned long int *ulintptr = NULL; + int *intptr = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return fpStruct; + } + OBConversion conv(&inStream); + OBMol mol; + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol); + OBFingerprint *pFingerPrint = NULL; + vector fp; + vector::iterator fpIt; + // Check that fingerprint type is available + pFingerPrint = OBFingerprint::FindFingerprint(fpType.c_str()); + if (!pFingerPrint) { + return fpStruct; + } + + pFingerPrint->GetFingerprint(&mol, fp); + + fpSize = fp.size()*sizeof(int); + + fpStruct = (char *) malloc(fpSize + sizeof(unsigned long int)); + memset(fpStruct, 0, fpSize + sizeof(unsigned long int)); + ulintptr = (unsigned long int *) fpStruct; + memcpy(ulintptr, &fpSize, sizeof(unsigned long int)); + ++ulintptr; + intptr = (int *) ulintptr; + + for (fpIt = fp.begin(); fpIt != fp.end(); ++fpIt) { + memcpy(intptr, &(*fpIt), sizeof(int)); + ++intptr; + } + } + + return fpStruct; +} + +char *conversionCAN(const char *molecule) +{ + string instring(molecule); + string outstring; + istringstream inStream(instring); + ostringstream outStream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBConversion conv(&inStream,&outStream); + + if (conv.SetInAndOutFormats(MOLECULE_TYPE, "CAN")) { + // Set options + /* No molecule name */ + conv.AddOption("n", OBConversion::OUTOPTIONS); + + conv.Convert(); + outstring = outStream.str(); + + if (outstring[outstring.length()-1] == '\n') { + outstring = outstring.substr(0, outstring.length()-1); + } + + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *serializeMolecule(const char *molecule) { + string instring(molecule); + string outstring; + istringstream inStream(instring); + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return NULL; + } + + OBMol mol; + OBConversion conv; + conv.SetInFormat(MOLECULE_TYPE); + conv.Read(&mol,&inStream); + + return serializeOBMol(mol); +} + diff --git a/src/conversion_wrapper.h b/src/conversion_wrapper.h new file mode 100644 index 0000000..0856509 --- /dev/null +++ b/src/conversion_wrapper.h @@ -0,0 +1,105 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * related to chemical file conversion. + * @file conversion_wrapper.h + * @short Wrapper for the Mychem conversion module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#ifndef __CONVERSION_WRAPPER_H +#define __CONVERSION_WRAPPER_H + +#include + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @short Converts a file format to an other. + * @param molecule The molecule to convert. + * @param in_type Input format. + * @param out_type Output format. + * @return converted molecule + */ + char *conversion(const char *molecule, const char *in_type, const char *out_type); + + /** + * @short Converts a V3000 molecule to the default molecule type. + * @param V3000 The V3000 molecule to convert. + * @return converted molecule. + */ + char *V3000conversion(const char *V3000); + + /** + * @short Converts a molecule to a V3000 molfile. + * @param molecule The molecule to convert. + * @return V3000 molecule. + */ + char *conversionV3000(const char *molecule); + + /** + * @short Converts a cml molecule to the default molecule type. + * @param cml The cml molecule to convert. + * @return the converted cml. + */ + char *CMLconversion(const char *cml); + + /** + * @short Converts a molecule to a cml molecule. + * @param molecule The cml molecule to convert. + * @return the converted molecule. + */ + char *conversionCML(const char *molecule); + + /** + * @short Converts a molecule to a fingerprint. + * @param molecule The molecule to convert. + * @param type The type of fingerprint (FP2, FP3 or FP4). + * @return The fingerprint. + */ + char *conversionFPT(const char *molecule, const char *type); + + /** + * @short Converts a molecule to a canonical SMILES. + * @param molecule The molecule to convert. + * @return the canonical SMILES. + */ + char *conversionCAN(const char *molecule); + + /** + * @short Converts a molecule to a 1D serialized OBMol object. + * @param molecule The molecule to convert. + * @return the serialized OBMol object. + */ + char *serializeMolecule(const char *molecule); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __CONVERSION_WRAPPER_H */ + diff --git a/src/descriptors/groupcontrib.cpp b/src/descriptors/groupcontrib.cpp new file mode 100644 index 0000000..dcd0a67 --- /dev/null +++ b/src/descriptors/groupcontrib.cpp @@ -0,0 +1,199 @@ +/********************************************************************** +groupcontrib.cpp - Handle logP, PSA, MR, and other group-based predictions + +Copyright (C) 2007 by Tim Vandermeersch + 2001-2007 by Stephen Jelfs + 2001-2007 by Joerg Kurt Wegner, me@cheminformatics.eu + 2007 by Chris Morley + +Original version: JOELib2, http://joelib.sf.net + +This file is part of the Open Babel project. +For more information, see + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +***********************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +namespace OpenBabel +{ + + const char* OBGroupContrib::Description() + { + //Adds name of datafile containing SMARTS strings to the description + static string txt; + txt = _descr; + txt += "\n Datafile: "; + txt += _filename; + txt += "\nOBGroupContrib is definable"; + return txt.c_str(); + } + + bool OBGroupContrib::ParseFile() + { + OBSmartsPattern *sp; + + // open data file + ifstream ifs; + + if (OpenDatafile(ifs, _filename).length() == 0) { + obErrorLog.ThrowError(__FUNCTION__, " Could not find contribution data file.", obError); + return false; + } + + // Set the locale for number parsing to avoid locale issues: PR#1785463 + obLocale.SetLocale(); + + vector vs; + bool heavy = false; + string ln; + while(getline(ifs,ln)){ + if(ln[0]=='#') continue; + if(ln.find(";heavy")!=string::npos) + heavy=true; + if(ln[0]==';') continue; + tokenize(vs, ln); + + if (vs.size() < 2) + continue; + + sp = new OBSmartsPattern;//causes non-serious memory leak. + // Could be cured by copying OBSmartsPattern rather than a pointer in vectors + if (sp->Init(vs[0])) + { + if (heavy) + _contribsHeavy.push_back(pair (sp, atof(vs[1].c_str()))); + else + _contribsHydrogen.push_back(pair (sp, atof(vs[1].c_str()))); + } + else + { + delete sp; + sp = NULL; + obErrorLog.ThrowError(__FUNCTION__, " Could not parse SMARTS from contribution data file", obInfo); + + // return the locale to the original one + obLocale.RestoreLocale(); + + return false; + } + } + + // return the locale to the original one + obLocale.RestoreLocale(); + return true; + } + + + double OBGroupContrib::Predict(OBBase* pOb, string* param) + { + OBMol* pmol = dynamic_cast(pOb); + if(!pmol) + return 0.0; + + //Need to add hydrogens, so do this to a copy to leave original unchanged + OBMol mol(*pmol); + mol.AddHydrogens(false, false); + + //Read in data, unless it has already been done. + if(_contribsHeavy.empty() && _contribsHydrogen.empty()) + ParseFile(); + + vector > _mlist; // match list for atom typing + vector >::iterator j; + vector >::iterator i; + + vector atomValues(mol.NumAtoms(), 0.0); + + OBMol tmpmol; + tmpmol = mol; + + tmpmol.ConvertDativeBonds(); + + // atom contributions + //cout << "atom contributions:" << endl; + for (i = _contribsHeavy.begin();i != _contribsHeavy.end();++i) { + if (i->first->Match(tmpmol)) { + _mlist = i->first->GetMapList(); + for (j = _mlist.begin();j != _mlist.end();++j) { + atomValues[(*j)[0] - 1] = i->second; + //cout << (*j)[0] << " = " << i->first->GetSMARTS() << " : " << i->second << endl; + } + } + } + + vector hydrogenValues(tmpmol.NumAtoms(), 0.0); + //hydrogenValues.resize(tmpmol.NumAtoms()); + + // hydrogen contributions + //cout << "hydrogen contributions:" << endl; + for (i = _contribsHydrogen.begin();i != _contribsHydrogen.end();++i) { + if (i->first->Match(tmpmol)) { + _mlist = i->first->GetMapList(); + for (j = _mlist.begin();j != _mlist.end();++j) { + int Hcount = tmpmol.GetAtom((*j)[0])->GetValence() - tmpmol.GetAtom((*j)[0])->GetHvyValence(); + hydrogenValues[(*j)[0] - 1] = i->second * Hcount; + //cout << (*j)[0] << " = " << i->first->GetSMARTS() << " : " << i->second << endl; + } + } + } + + // total atomic and hydrogen contribution + double total = 0.0; + + for (int index = 0; index < tmpmol.NumAtoms(); index++) { + if (tmpmol.GetAtom(index+1)->IsHydrogen()) + continue; + + total += atomValues[index]; + total += hydrogenValues[index]; + } + + /* + FOR_ATOMS_OF_MOL (a, tmpmol) + cout << "hydrogens on atom " << a->GetIdx() << ": " << a->GetValence() - a->GetHvyValence() << endl; + for (int index = 0; index < tmpmol.NumAtoms(); index++) + cout << "atom " << index << ": " << atomValues[index] << endl; + for (int index = 0; index < tmpmol.NumAtoms(); index++) + cout << "hydrogen " << index << ": " << hydrogenValues[index] << endl; + */ + + return total; + } + + //****************************************************** + // Make global instances for descriptors which are all calculated + // from group contibutions in the same way but with different data. + + // LogP (octanol/water partition coefficient) + OBGroupContrib thelogP("logP", "logp.txt", + "octanol/water partition coefficient"); + + // TPSA (topological polar surface area) + OBGroupContrib theTPSA("TPSA", "psa.txt", + "topological polar surface area"); + + // MR (molar refractivity) + OBGroupContrib theMR("MR", "mr.txt", + "molar refractivity"); + +//! \file groupcontrib.cpp +//! \brief Handle logP, PSA and other group-based prediction algorithms. + + }//namespace diff --git a/src/dlhandler_unix.cpp b/src/dlhandler_unix.cpp new file mode 100644 index 0000000..c0a03f8 --- /dev/null +++ b/src/dlhandler_unix.cpp @@ -0,0 +1,66 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions declaration + * used by Mychem and related to dynamic library management. + * @file dlhandler_unix.cpp + * @short Dynamic library management. + * @author Jerome Pansanel + */ + +#include + +#include +#include + +#ifdef __APPLE__ +#define OBLIBRARY "libopenbabel.4.dylib" +#else +#define OBLIBRARY "libopenbabel.so.4" +#endif + +using namespace std; + +void *LibHandler::lib_handler = NULL; + +LibHandler::LibHandler() +{ + if (!lib_handler) { + lib_handler = dlopen(OBLIBRARY, RTLD_LAZY | RTLD_GLOBAL); + } +} + +LibHandler::~LibHandler() +{ + if (lib_handler) { + dlclose(lib_handler); + } +} + +bool LibHandler::isLoaded() { + if (lib_handler) { + return true; + } + else { + return false; + } +} + diff --git a/src/dlhandler_win32.cpp b/src/dlhandler_win32.cpp new file mode 100644 index 0000000..d5d4960 --- /dev/null +++ b/src/dlhandler_win32.cpp @@ -0,0 +1,48 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions declaration + * used by Mychem and related to dynamic library management. + * @file dlhandler_win32.cpp + * @short Dynamic library management. + * @author Jerome Pansanel + */ + +#include + +#include + +using namespace std; + +void *LibHandler::lib_handler = NULL; + +LibHandler::LibHandler() +{ +} + +LibHandler::~LibHandler() +{ +} + +bool LibHandler::isLoaded() { + return true; +} + diff --git a/src/fingerprints/finger2.cpp b/src/fingerprints/finger2.cpp new file mode 100644 index 0000000..9c4087d --- /dev/null +++ b/src/fingerprints/finger2.cpp @@ -0,0 +1,278 @@ +/********************************************************************** +finger2.cpp: fingerprint2 definition and implementation. + +Copyright (C) 2005 Chris Morley + +This file is part of the Open Babel project. +For more information, see + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +***********************************************************************/ + +#include +#include +#include +#include +#include + +using namespace std; +namespace OpenBabel +{ +/// \brief Fingerprint based on linear fragments up to 7 atoms ID="FP2" +class fingerprint2 : public OBFingerprint +{ +public: + fingerprint2(const char* ID, bool IsDefault=false) + : OBFingerprint(ID, IsDefault), _flags(0){}; + + virtual const char* Description() + { return "Indexes linear fragments up to 7 atoms.";}; + + //Calculates the fingerprint + virtual bool GetFingerprint(OBBase* pOb, vector&fp, int nbits=0); + + /// \returns fragment info unless SetFlags(OBFingerprint::FPT_NOINFO) has been called before GetFingerprint() called. + /** Structure of a fragment (vector) + For a complete ring: last atom bonded to first atom + bo(0)(n), atno(1), bo(1)(2), atno(2), bo(2)(3),...atno(n) + For the rest, even when stopped by encountering atoms already visited + 0 , atno(1), bo(1)(2), atno(2), bo(2)(3),...atno(n) + **/ +virtual std::string DescribeBits(const std:: vector fp, bool bSet=true) + { return _ss.str(); } + + virtual unsigned int Flags() { return _flags;}; + virtual void SetFlags(unsigned int f){ _flags=f; } + +private: + typedef std::set > Fset; + typedef std::set >::iterator SetItr; + + void getFragments(std::vector levels, std::vector curfrag, + int level, OBAtom* patom, OBBond* pbond); + void DoReverses(); + void DoRings(); + + unsigned int CalcHash(const std::vector& frag); + void PrintFpt(const std::vector& f, int hash=0); + + Fset fragset; + Fset ringset; + stringstream _ss; + unsigned int _flags; + +}; + +//*********************************************** +//Make a global instance +fingerprint2 thefingerprint2("FP2",true); +//*********************************************** + +/*! class fingerprint2 +Similar to Fabien Fontain's fingerprint class, with a slightly improved +algorithm, but re-written using STL which makes it shorter. + +A molecule structure is analysed to identify linear fragments of length +from one to Max_Fragment_Size = 7 atoms but single atom fragments of C,N,and O +are ignored. A fragment is terminated when the atoms form a ring. + +For each of these fragments the atoms, bonding and whether +they constitute a complete ring is recorded and saved in a std::set +so that there is only one of each fragment type. Chemically identical versions, +i.e. ones with the atoms listed in reverse order and rings listed starting at +different atoms, are identified and only a single canonical fragment is retained. + +Each remaining fragment is assigned a hash number from 0 to 1020 which is used +to set a bit in a 1024 bit vector +*/ + +bool fingerprint2::GetFingerprint(OBBase* pOb, vector&fp, int nbits) +{ + OBMol* pmol = dynamic_cast(pOb); + if(!pmol) return false; + fp.resize(1024/Getbitsperint()); + fragset.clear();//needed because now only one instance of fp class + ringset.clear(); + + //identify fragments starting at every atom + OBAtom *patom; + vector::iterator i; + for (patom = pmol->BeginAtom(i);patom;patom = pmol->NextAtom(i)) + { + if(patom->IsHydrogen()) continue; + vector curfrag; + vector levels(pmol->NumAtoms()); + getFragments(levels, curfrag, 1, patom, NULL); + } + +// TRACE("%s %d frags before; ",pmol->GetTitle(),fragset.size()); + + //Ensure that each chemically identical fragment is present only in a single + DoRings(); + DoReverses(); + + SetItr itr; + _ss.str(""); + for(itr=fragset.begin();itr!=fragset.end();++itr) + { + //Use hash of fragment to set a bit in the fingerprint + int hash = CalcHash(*itr); + SetBit(fp,hash); + if(!(Flags() & FPT_NOINFO)) + PrintFpt(*itr,hash); + } + if(nbits) + Fold(fp, nbits); + +// TRACE("%d after\n",fragset.size()); + return true; +} + +////////////////////////////////////////////////////////// +void fingerprint2::getFragments(vector levels, vector curfrag, + int level, OBAtom* patom, OBBond* pbond) +{ + //Recursive routine to analyse schemical structure and populate fragset and ringset + //Hydrogens,charges(except dative bonds), spinMultiplicity ignored + const int Max_Fragment_Size = 7; + int bo=0; + if(pbond) + { + bo = pbond->IsAromatic() ? 5 : pbond->GetBO(); + +// OBAtom* pprevat = pbond->GetNbrAtom(patom); +// if(patom->GetFormalCharge() && (patom->GetFormalCharge() == -pprevat->GetFormalCharge())) +// ++bo; //coordinate (dative) bond eg C[N+]([O-])=O is seen as CN(=O)=O + } + curfrag.push_back(bo); + curfrag.push_back(patom->GetAtomicNum()); + levels[patom->GetIdx()-1] = level; + + vector::iterator itr; + OBBond *pnewbond; +// PrintFpt(curfrag,(int)patom); + for (pnewbond = patom->BeginBond(itr);pnewbond;pnewbond = patom->NextBond(itr)) + { + if(pnewbond==pbond) continue; //don't retrace steps + OBAtom* pnxtat = pnewbond->GetNbrAtom(patom); + if(pnxtat->IsHydrogen()) continue; + + int atlevel = levels[pnxtat->GetIdx()-1]; + if(atlevel) //ring + { + if(atlevel==1) + { + //If complete ring (last bond is back to starting atom) add bond at front + //and save in ringset + curfrag[0] = bo; + ringset.insert(curfrag); + } + } + else //no ring + { + if(level1 || patom->GetAtomicNum()>8 || patom->GetAtomicNum()<6)) + { + fragset.insert(curfrag); //curfrag ignored if an identical fragment already present +// PrintFpt(curfrag,level); + } +} + +/////////////////////////////////////////////////// +void fingerprint2::DoReverses() +{ + SetItr itr; + for(itr=fragset.begin();itr!=fragset.end();) + { + //Reverse the order of the atoms, add the smallest fragment and remove the larger + SetItr titr = itr++; //Ensure have valid next iterator in case current one is erased + vector t1(*titr); //temporary copy + reverse(t1.begin()+1, t1.end()); //(leave 0 at front alone) + if(t1!=*titr) + { + //Add the larger fragment and delete the smaller + if(t1>*titr) + { + fragset.erase(titr); + fragset.insert(t1); + } + else + fragset.erase(t1); + } + } +} +/////////////////////////////////////////////////// +void fingerprint2::DoRings() +{ + //For each complete ring fragment, find its largest chemically identical representation + //by rotating and reversing, and insert into the main set of fragments + SetItr itr; + for(itr=ringset.begin();itr!=ringset.end();++itr) + { + vector t1(*itr); //temporary copy + vector maxring(*itr); //the current largest vector + unsigned int i; + for(i=0;imaxring) + maxring=t1; + + //Add the non-ring form of all ring rotations + int tmp = t1[0]; + t1[0] = 0; + fragset.insert(t1); + t1[0] = tmp; + + //reverse the direction around ring + vector t2(t1); + reverse(t2.begin()+1, t2.end()); + if(t2>maxring) + maxring=t2; + } + fragset.insert(maxring); + //PrintFpt(maxring,0); + } +} + +////////////////////////////////////////////////////////// +unsigned int fingerprint2::CalcHash(const vector& frag) +{ + //Something like... whole of fragment treated as a binary number modulus 1021 + const int MODINT = 108; //2^32 % 1021 + unsigned int hash=0; + for(unsigned i=0;i& f, int hash) +{ + unsigned int i; + for(i=0;i" << endl; +} + +} //namespace OpenBabel + +//! \file finger2.cpp +//! \brief fingerprint2 definition and implementation diff --git a/src/fingerprints/finger3.cpp b/src/fingerprints/finger3.cpp new file mode 100644 index 0000000..bd27079 --- /dev/null +++ b/src/fingerprints/finger3.cpp @@ -0,0 +1,318 @@ +/********************************************************************** +finger3.cpp: Fingerprints based on list of SMARTS patterns +Copyright (C) 2005 Chris Morley + +This file is part of the Open Babel project. +For more information, see + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation version 2 of the License. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. +***********************************************************************/ + +#include +#include + +#include +#include +#include +#include + +#include + +using namespace std; +namespace OpenBabel +{ +/// \brief Fingerprint based on list of SMARTS patterns +class PatternFP : public OBFingerprint +{ +private: + struct pattern + { + string smartsstring; + OBSmartsPattern obsmarts; + string description; + int numbits; + int numoccurrences; + int bitindex; + }; + vector _pats; + int _bitcount; + +protected: + string _patternsfile; + +public: + PatternFP(const char* ID, const char* filename=NULL, + bool IsDefault=false) : OBFingerprint(ID, IsDefault) + { + if(filename==NULL) + _patternsfile="patterns.txt"; + else + _patternsfile = filename; + } + +///////////////////////////////////////////////////////////////////////////// + virtual const char* Description() + { + static string desc; + desc = "SMARTS patterns specified in the file " + _patternsfile + + "\nPatternFP is definable"; + return (desc.c_str()); + } + +////////////////////////////////////////////////////////////////////////////// + //Each bit represents a single substructure; no need for confirmation when substructure searching + virtual unsigned int Flags() { return FPT_UNIQUEBITS;}; + +/////////////////////////////////////////////////////////////////////////////// + virtual PatternFP* MakeInstance(const std::vector& textlines) + { + return new PatternFP(textlines[1].c_str(),textlines[2].c_str()); + } + +//////////////////////////////////////////////////////////////////////////////// + virtual bool GetFingerprint(OBBase* pOb, vector&fp, int foldbits) + { + OBMol* pmol = dynamic_cast(pOb); + if(!pmol) + return false; + + unsigned int n; + //Read patterns file if it has not been done already + if(_pats.empty()) + ReadPatternFile(); + + //Make fp size the smallest power of two to contain the patterns + n=Getbitsperint(); + while(n < _bitcount) + n*=2; + fp.resize(n/Getbitsperint()); + + n=0; //bit position + vector::iterator ppat; + for(ppat=_pats.begin();ppat!=_pats.end();++ppat) + { + if(ppat->numbits //ignore pattern if numbits==0 + && ppat->obsmarts.Match(*pmol, ppat->numoccurrences==0))//do single match if all that's needed + { + /* Set bits in the fingerprint depending on the number of matches in the molecule + and the parameters, numbits and numoccurrences, in the pattern. + The pattern will set or clear numbits bits in the fingerprint. + They will be in numoccurrences+1 groups, each containing an approximately + equal number of bits. + The first group of bits will be set if numMatches > numoccurences; + The second group will be set if numMatches > numoccurrences - 1; + and so on. + So with a pattern with numbits = 4 and numoccurences = 2, + the groups would be 1, 1, and 2 bits. + A molecule with + 1 match to the pattern would give 0011 + 2 matches to the pattern would give 0111 + 3 or more matches to the pattern would give 1111 + */ + int numMatches = ppat->obsmarts.GetUMapList().size(); + int num = ppat->numbits, div = ppat->numoccurrences+1, ngrp; + + int i = n; + while(num) + { + ngrp = (num -1)/div-- +1; //rounds up + num -= ngrp; + while(ngrp--) + if (numMatches > div) { + SetBit(fp,i); + } + i++; + } + } + n += ppat->numbits; + } + + if(foldbits) + Fold(fp, foldbits); + return true; + } + + ///////////////////////////////////////////////////////////////////// + bool ReadPatternFile() + { + //Reads three types of file. See below + ifstream ifs; + stringstream errorMsg; + + if (OpenDatafile(ifs, _patternsfile).length() == 0) + { + errorMsg << "Cannot open " << _patternsfile << endl; + obErrorLog.ThrowError(__FUNCTION__, errorMsg.str(), obError); + return false; + } + + string line; + if(!getline(ifs, line)) //first line + return false; + bool smartsfirst = (Trim(line)=="#Comments after SMARTS"); + + _bitcount=0; + do + { + if(Trim(line).size()>0 && line[0]!='#') + { + pattern p; + p.numbits=1; p.numoccurrences=0; //default values + p.bitindex = _bitcount; + istringstream ss(line); + + if(smartsfirst) + { + if(isdigit(line[0])) + { + if(!ParseRDKitFormat(ss, p)) + continue; + } + else + //Original format, which looks like: + // SMARTS description + ss >> p.smartsstring >> p.description; + } + else + { + // Christian Laggner's format: + // description: SMARTS [occurrences [numbits]] + getline(ss, p.description, ':'); + ss >> p.smartsstring; + ss >> p.numoccurrences >> p.numbits; + } + + if(!p.obsmarts.Init(p.smartsstring)) + { + obErrorLog.ThrowError(__FUNCTION__, + "Faulty SMARTS: " + p.description + ' ' + p.smartsstring, obError); + continue; + } + _pats.push_back(p); + _bitcount += p.numbits; + } + }while(getline(ifs,line)); + + if (ifs) + ifs.close(); + return true; + } + +/////////////////////////////////////////////////////////////////////////////// + virtual string DescribeBits(const vector fp, bool bSet=true) + { + //checkmol-type output with tab separated functional group names + stringstream ss; + vector::iterator ppat; + for(ppat=_pats.begin();ppat!=_pats.end();++ppat) + { + int n = ppat->bitindex; + int num = ppat->numbits, div = ppat->numoccurrences+1, ngrp; + while(num) //for each group of bits + { + ngrp = (num + div -1)/div--; //rounds up + num -= ngrp; + if(GetBit(fp, n) == bSet) + { + ss << '\t' << ppat->description; + if(div>0) + ss << '*' << div+1; + break; //ignore the bits signifying a smaller number of occurrences + } + n += ngrp; + } + } + ss << endl; + return ss.str(); + } + +/////////////////////////////////////////////////////////////////////////////////// + bool ParseRDKitFormat(istringstream& ss, pattern& p) + { + //rdkit format, e.g. + // 14:('[S,s]-[S,s]',0), # S-S + const int dum = 20; //an arbitrary number in case delimiters in ignore statements not found + string number, comment; + getline(ss, number, ':'); + ss.ignore(dum, '\''); + getline(ss, p.smartsstring, '\''); + if(p.smartsstring[0]=='?') //ignore patterns with SMARTS '?' + p.smartsstring="[999]";//this seems to match nothing; was return false; + ss.ignore(dum,','); + ss >> p.numoccurrences; + ss.ignore(dum,'#'); + getline(ss, comment); + + //description is number + edited commment + Trim(comment); + string::size_type pos; + pos = comment.find("FIX"); + if(pos==string::npos) + pos = comment.find("*NOTE*"); + if(pos!=string::npos) + comment.erase(pos); + p.description = number + ": " + comment; + return true; + } + + +}; //class PatternFP + +//*********************************************** +//Make a global instance +PatternFP FP3PatternFP("FP3"); + +PatternFP FP4PatternFP("FP4", "SMARTS_InteLigand.txt"); +//*********************************************** + +/*! \class PatternFP +A bit is set when there is a match to one of a list +of SMARTS patterns in the datafile, which is specified in the constructor. +If no filename is given, the default filename is patterns.txt. +Fingerprints can be made by declaring a global variable, as in: + +PatternFP FP4PatternFP("FP4", "SMARTS_InteLigand.txt"); + +Alternatively, an entry in plugindefines.txt like: + +PatternFP +MACCS #ID of this fingerprint type +MACCS.txt #File containing the SMARTS patterns + +defines a fingerprint without the need to recompile. + +Three file formats are supported: + - the preferred format (e.g. SMARTS_InteLigand.txt in FP4) + - the original format (patterns.txt has an incomplete set of SMARTS patterns) + - a format made by extracting from an RDKit file (MACCS.txt) +The last two require the first line to be: +#Comments after SMARTS + +Lines starting with # are ignored. +For the preferred format each line is of the form: +description: SMARTS [occurrences [numbits]] +A bit is set in the fingerprint for ach SMARTS pattern matched. +The optional integer parameters refine this behaviour; the most obvious uses are: + - if is present and greater than its default value of 0, the bit + is set only if the number of matches to the pattern is greater than . + - if is 0 and is greater than its default value of 1, then + the fingerprint has bits set if there is a match. This gives greater weight + to the pattern for use in similarity measures like Tanimoto. + - if the parameters are n-1 and n and the number of matches is n, + a bit is set for each of the conditions n>=m, n>=m-1, ... , n>=1 + This can be used to distinguish structures with many similar atoms like n-alkanes. +The use of other values for the parameters, which can be any positive integer, can give +other analogous behaviours. If numbits is 0 the pattern is ignored. +*/ + +}//namespace + +//! \file finger3.cpp +//! \brief fingerprints based on list of SMARTS patterns diff --git a/src/helper.c b/src/helper.c new file mode 100644 index 0000000..6608a06 --- /dev/null +++ b/src/helper.c @@ -0,0 +1,182 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions used by + * MySQL and related to generic informations. + * @file helper.c + * @short Function definitions of the Mychem helper module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "helper.h" +#include "helper_wrapper.h" +#include + +#ifdef HAVE_DLOPEN + +my_bool mychem_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 0) { + strcpy(message, "Wrong number of arguments: MYCHEM_VERSION() requires no argument"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_NAME_LENGTH; + initid->const_item = 1; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void mychem_version_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *mychem_version(UDF_INIT *initid, UDF_ARGS *args __attribute__((unused)), char *result, unsigned long *length, char *is_null, char *error) +{ + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + *length = strlen(MYCHEM_VERSION); + strncat(initid->ptr, MYCHEM_VERSION, *length); + + return initid->ptr; +} + +my_bool openbabel_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 0) { + strcpy(message, "Wrong number of arguments: OPENBABEL_VERSION() requires no argument"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_NAME_LENGTH; + initid->const_item = 1; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void openbabel_version_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *openbabel_version(UDF_INIT *initid, UDF_ARGS *args __attribute__((unused)), char *result, unsigned long *length, char *is_null, char *error) +{ + char *babel_version = NULL; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + babel_version = getOpenBabelVersion(); + + /* Return NULL if babel_version is NULL */ + if (babel_version == NULL) { + *is_null=1; + *error = 1; + + return NULL; + } + + *length = strlen(babel_version); + strncat(initid->ptr, babel_version, *length); + + free(babel_version); + + return initid->ptr; +} + +my_bool inchi_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 0) { + strcpy(message, "Wrong number of arguments: INCHI_VERSION() requires no argument"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_NAME_LENGTH; + initid->const_item = 1; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void inchi_version_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *inchi_version(UDF_INIT *initid, UDF_ARGS *args __attribute__((unused)), char *result, unsigned long *length, char *is_null, char *error) +{ + char *inchi_version = NULL; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + inchi_version = getInChIVersion(); + + /* Return NULL if inchi_version is NULL */ + if (inchi_version == NULL) { + *is_null=1; + *error = 1; + + return NULL; + } + + *length = strlen(inchi_version); + strncat(initid->ptr, inchi_version, *length); + + free(inchi_version); + + return initid->ptr; +} +#endif /* HAVE_DLOPEN */ + diff --git a/src/helper.h b/src/helper.h new file mode 100644 index 0000000..e3a2e06 --- /dev/null +++ b/src/helper.h @@ -0,0 +1,137 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by MySQL and related to Mychem informations. + * @file helper.h + * @short Function declarations of the Mychem helper module. + * @author Jerome Pansanel + */ + +#ifndef __HELPER_H +#define __HELPER_H + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef __WIN__ +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*__WIN__*/ +#else +#include +#include +#endif /* STANDARD */ +#include +#include +#include /* To get strmov() */ + +#ifdef HAVE_DLOPEN + +/** + * @short Initializes the mychem_version function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool mychem_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by mychem_version_init(). + * @param initid The structure filled by mychem_version_init() + */ +void mychem_version_deinit(UDF_INIT *initid); + +/** + * @short Returns the version of Mychem. + * @param initid A structure filled by mychem_version_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The version of Mychem + */ +char *mychem_version(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the openbabel_version function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool openbabel_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by openbabel_version_init(). + * @param initid The structure filled by openbabel_version_init() + */ +void openbabel_version_deinit(UDF_INIT *initid); + +/** + * @short Returns the version of the Open Babel library. + * @param initid A structure filled by openbabel_version_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The version of the Open Babel library + */ +char *openbabel_version(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the inchi_version function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool inchi_version_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by inchi_version_init(). + * @param initid The structure filled by inchi_version_init() + */ +void inchi_version_deinit(UDF_INIT *initid); + +/** + * @short Returns the version of the InChI library. + * @param initid A structure filled by inchi_version_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The version of the InChI library + */ +char *inchi_version(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); +#endif /* HAVE_DLOPEN */ + +#endif /* __HELPER_H */ + diff --git a/src/helper_wrapper.cpp b/src/helper_wrapper.cpp new file mode 100644 index 0000000..b457203 --- /dev/null +++ b/src/helper_wrapper.cpp @@ -0,0 +1,66 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to the helper module. + * @file helper_wrapper.cpp + * @short Mychem Helper Wrapper. + * @author Jerome Pansanel + */ + +#include "helper_wrapper.h" + +#include + +#include +#include + +using namespace std; + +char *getOpenBabelVersion() +{ + // pb: BABEL_VERSION is defined at compilation. BABEL_VERSION should be + // defined at run time Try to get VERSION for the dynamic lib + return strdup(BABEL_VERSION); +} + +char *getInChIVersion() +{ + string version; + + if (!strncmp(BABEL_VERSION, "2.2.0", 5)) { + version = "1.0"; + } + else if (!strncmp(BABEL_VERSION, "2.2.1", 5)) { + version = "1.0"; + } + else if (!strncmp(BABEL_VERSION, "2.2.2", 5)) { + version = "1.02"; + } + else if (!strncmp(BABEL_VERSION, "2.2.3", 5)) { + version = "1.02"; + } + else { + version = "unknown"; + } + + return strdup(version.c_str()); +} diff --git a/src/helper_wrapper.h b/src/helper_wrapper.h new file mode 100644 index 0000000..7c7730c --- /dev/null +++ b/src/helper_wrapper.h @@ -0,0 +1,54 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * related to Mychem informations. + * @file helper_wrapper.h + * @short Wrapper for the Mychem helper module. + * @author Jerome Pansanel + */ + +#ifndef __HELPER_WRAPPER_H +#define __HELPER_WRAPPER_H + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @short Returns the version of the Open Babel library. + * @return The version of the Open Babel library. + */ + char *getOpenBabelVersion(); + + /** + * @short Returns the version of the InChI library. + * @return The version of the InChI library. + */ + char *getInChIVersion(); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __HELPER_WRAPPER_H */ + diff --git a/src/modification.c b/src/modification.c new file mode 100644 index 0000000..0aa5d0e --- /dev/null +++ b/src/modification.c @@ -0,0 +1,255 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions used by + * MySQL and related to molecule modifications. + * @file modification.c + * @short Function definitions of the Mychem modification module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "modification.h" +#include "modification_wrapper.h" +#include + +#ifdef HAVE_DLOPEN + +my_bool add_hydrogens_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: ADD_HYDROGENS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: ADD_HYDROGENS() requires a STRING"); + return 1; + } + + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void add_hydrogens_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *add_hydrogens(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = ob_add_hydrogens(inputMol); + + /* Return NULL if outputMol is NULL */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool remove_hydrogens_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: REMOVE_HYDROGENS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: REMOVE_HYDROGENS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void remove_hydrogens_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *remove_hydrogens(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = ob_remove_hydrogens(inputMol); + + /* Return NULL if outputMol is NULL */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +my_bool strip_salts_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: REMOVE_HYDROGENS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: REMOVE_HYDROGENS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void strip_salts_deinit(UDF_INIT *initid) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *strip_salts(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *outputMol = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + outputMol = ob_strip_salts(inputMol); + + /* Return NULL if outputMol is NULL */ + if (outputMol == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(outputMol); + strncat(initid->ptr, outputMol, *length); + + free(inputMol); + free(outputMol); + + return initid->ptr; +} + +#endif /* HAVE_DLOPEN */ + diff --git a/src/modification.h b/src/modification.h new file mode 100644 index 0000000..8325847 --- /dev/null +++ b/src/modification.h @@ -0,0 +1,138 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by MySQL and related to molecule modifications. + * @file modification.h + * @short Function declarations of the Mychem modification module. + * @author Jerome Pansanel + */ + +#ifndef __MODIFICATION_H +#define __MODIFICATION_H + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef __WIN__ +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*__WIN__*/ +#else +#include +#include +#endif /* STANDARD */ +#include +#include +#include /* To get strmov() */ + +#ifdef HAVE_DLOPEN + +/** + * @short Initializes the add_hydrogens function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool add_hydrogens_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by add_hydrogens_init(). + * @param initid The structure filled by add_hydrogens_init() + */ +void add_hydrogens_deinit(UDF_INIT *initid); + +/** + * @short Add hydrogens to a molecule. + * @param initid A structure filled by add_hydrogens_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The hydrogenated molecule + */ +char *add_hydrogens(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the remove_hydrogens function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool remove_hydrogens_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by remove_hydrogens_init(). + * @param initid The structure filled by remove_hydrogens_init() + */ +void remove_hydrogens_deinit(UDF_INIT *initid); + +/** + * @short Remove hydrogens to a molecule. + * @param initid A structure filled by remove_hydrogens_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The dehydrogenated molecule + */ +char *remove_hydrogens(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the strip_salts function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool strip_salts_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by strip_salts_init(). + * @param initid The structure filled by strip_salts_init() + */ +void strip_salts_deinit(UDF_INIT *initid); + +/** + * @short Removes all atoms from the molecule, except for the larger contiguous fragment. + * @param initid A structure filled by strip_salts_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The cleaned molecule + */ +char *strip_salts(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +#endif /* HAVE_DLOPEN */ + +#endif /* __MODIFICATION_H */ + diff --git a/src/modification_wrapper.cpp b/src/modification_wrapper.cpp new file mode 100644 index 0000000..f44a78f --- /dev/null +++ b/src/modification_wrapper.cpp @@ -0,0 +1,130 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to molecule modifications. + * @file modification_wrapper.cpp + * @short Mychem Modification Wrapper. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "modification_wrapper.h" +#include +#include + +#include +#include + +#include + +using namespace std; +using namespace OpenBabel; + +char *ob_add_hydrogens(const char *molecule) +{ + string instring(molecule); + istringstream inStream(instring); + ostringstream outStream; + string outstring; + + char *retVal = NULL; + + LibHandler lib_ob; + + if (!lib_ob.isLoaded()) { + return retVal; + } + + OBMol mol; + OBConversion conv; + + conv.SetInAndOutFormats(MOLECULE_TYPE,MOLECULE_TYPE); + + conv.Read(&mol,&inStream); + mol.AddHydrogens(false,false); + conv.Write(&mol,&outStream); + + outstring = outStream.str(); + retVal = strdup(outstring.c_str()); + + return retVal; +} + +char *ob_remove_hydrogens(const char *molecule) +{ + string instring(molecule); + istringstream inStream(instring); + ostringstream outStream; + string outstring; + + char *retVal = NULL; + + LibHandler lib_ob; + + if (!lib_ob.isLoaded()) { + return retVal; + } + + OBMol mol; + OBConversion conv; + + if (conv.SetInAndOutFormats(MOLECULE_TYPE,MOLECULE_TYPE)) { + conv.Read(&mol,&inStream); + mol.DeleteHydrogens(); + conv.Write(&mol,&outStream); + outstring = outStream.str(); + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + +char *ob_strip_salts(const char *molecule) +{ + string instring(molecule); + istringstream inStream(instring); + ostringstream outStream; + string outstring; + + char *retVal = NULL; + + LibHandler lib_ob; + + if (!lib_ob.isLoaded()) { + return retVal; + } + + OBMol mol; + OBConversion conv(&inStream, &outStream); + + if (conv.SetInAndOutFormats(MOLECULE_TYPE,MOLECULE_TYPE)) { + conv.Read(&mol); + mol.StripSalts(3); + conv.Write(&mol); + outstring = outStream.str(); + retVal = strdup(outstring.c_str()); + } + + return retVal; +} + diff --git a/src/modification_wrapper.h b/src/modification_wrapper.h new file mode 100644 index 0000000..06b5181 --- /dev/null +++ b/src/modification_wrapper.h @@ -0,0 +1,65 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * related to molecule modifications. + * @file modification_wrapper.h + * @short Wrapper for the Mychem modification module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#ifndef __MODIFICATION_WRAPPER_H +#define __MODIFICATION_WRAPPER_H + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @short Adds the hydrogen atoms of a molecule. + * @param molecule The molecule where to add hydrogens. + * @return The modified molecule. + */ + char *ob_add_hydrogens(const char *molecule); + + /** + * @short Removes the hydrogen atoms of a molecule. + * @param molecule The molecule where to remove hydrogens. + * @return The modified molecule. + */ + char *ob_remove_hydrogens(const char *molecule); + + /** + * @short Removes all atoms except for the larger contiguous fragment. + * @param molecule The molecule where to remove the salts. + * @return The cleaned molecule. + */ + char *ob_strip_salts(const char *molecule); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MODIFICATION_WRAPPER_H */ + diff --git a/src/molmatch.c b/src/molmatch.c new file mode 100644 index 0000000..f7bf74d --- /dev/null +++ b/src/molmatch.c @@ -0,0 +1,579 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions used by + * MySQL and related to chemical file comparison. + * @file molmatch.c + * @short Function definitions of the Mychem molmatch module. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "molmatch.h" +#include "molmatch_wrapper.h" +#include +#include +#include + + +#ifdef HAVE_DLOPEN + +my_bool match_substruct_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: MATCH_SUBSTRUCT() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message,"Wrong argument type: MATCH_SUBSTRUCT() requires a STRING and a STRING"); + return 1; + } + + if (args->args[0] == NULL) { + strcpy(message,"The first argument is null"); + + return 1; + } + + if (args->args[1] == NULL) { + strcpy(message,"The second argument is null"); + + return 1; + } + + if (args->lengths[1] < sizeof(unsigned int)) { + strcpy(message,"The second argument is invalid"); + + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void match_substruct_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong match_substruct(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *queryMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + unsigned int *intptr = (unsigned int *) args->args[1]; + unsigned int totalsize = intptr[0]; + longlong match_bool = 0; + + if (totalsize == 0) { + /* empty molecule */ + free(queryMol); + + return 0; + } + + strncpy(queryMol, args->args[0], args->lengths[0]); + queryMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + match_bool = match_substructure(queryMol, args->args[1]); + + free(queryMol); + + return match_bool; +} + +my_bool substruct_atom_ids_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: SUBSTRUCT_ATOM_IDS() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message,"Wrong argument type: SUBSTRUCT_ATOM_IDS() requires a STRING and a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void substruct_atom_ids_deinit(UDF_INIT *initid __attribute__((unused))) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *substruct_atom_ids(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *queryMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + unsigned int *intptr = (unsigned int *) args->args[1]; + unsigned int totalsize = intptr[0]; + char *list = NULL; + + if ((args->args[0] == NULL) || (args->args[1] == NULL)) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(queryMol); + + return NULL; + } + + if (totalsize == 0) { + /* empty molecule */ + free(queryMol); + + return 0; + } + + strncpy(queryMol, args->args[0], args->lengths[0]); + queryMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + list = substructure_atom_ids(queryMol, args->args[1]); + + /* Return NULL if list is NULL */ + if (list == NULL) { + *is_null = 1; + *error = 1; + free(queryMol); + + return 0; + } + + *length = strlen(list); + strncat(initid->ptr, list, *length); + + free(queryMol); + free(list); + + return initid->ptr; +} + +my_bool tanimoto_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: TANIMOTO() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message, "Wrong argument type: TANIMOTO() requires a STRING and a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void tanimoto_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double tanimoto(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null __attribute__((unused)), char *error) +{ + int *intptr1 = NULL; + int *intptr2 = NULL; + unsigned long int *ulintptr1 = NULL; + unsigned long int *ulintptr2 = NULL; + unsigned long int fpLength1 = 0; + unsigned long int fpLength2 = 0; + int andfp = 0, orfp = 0; + unsigned long int andbits = 0, orbits = 0; + unsigned long int i = 0; + + if ((args->args[0] == NULL) || (args->args[1] == NULL)) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + + return 0; + } + + *error = 0; + ulintptr1 = (unsigned long int *) args->args[0]; + fpLength1 = ulintptr1[0]; + ulintptr2 = (unsigned long int *) args->args[1]; + fpLength2 = ulintptr2[0]; + + if (fpLength1 != fpLength2) { + /* The both fingerprint must have the same length) */ + *is_null = 1; + *error = 1; + + return 0; + } + + if ((fpLength1 % sizeof(int)) || (fpLength2 % sizeof(int))) { + /* The length of the fingerprint must be a multiple of long int */ + *is_null = 1; + *error = 1; + + return 0; + } + + ++ulintptr1; + ++ulintptr2; + intptr1 = (int*) ulintptr1; + intptr2 = (int*) ulintptr2; + + /** + * Tanimoto coefficient is defined by: + * Number of bits set in (patternFP & targetFP) / + * Number of bits in (patternFP | targetFP) + */ + for(i = 0; i < fpLength1/sizeof(int); ++i) { + andfp = intptr1[i] & intptr2[i]; + orfp = intptr1[i] | intptr2[i]; + for (; andfp; andfp = andfp<<1) { + if (andfp < 0) { ++andbits; } + } + for (; orfp; orfp = orfp<<1) { + if (orfp < 0) { ++orbits; } + } + } + + if (orbits == 0) { + return 0.0; + } + + return (double) andbits / (double) orbits; +} + +my_bool substruct_count_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: SUBSTRUCT_COUNT() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message,"Wrong argument type: SUBSTRUCT_COUNT() requires a STRING and a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void substruct_count_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong substruct_count(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *queryMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + unsigned int *intptr = (unsigned int *) args->args[1]; + unsigned int totalsize = intptr[0]; + longlong number_of_substructure = 0; + + if ((args->args[0] == NULL) || (args->args[1] == NULL)) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(queryMol); + + return 0; + } + + if (totalsize == 0) { + /* empty molecule */ + free(queryMol); + + return 0; + } + + strncpy(queryMol, args->args[0], args->lengths[0]); + queryMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + number_of_substructure = substructure_count(queryMol, args->args[1]); + + free(queryMol); + + return number_of_substructure; +} + +my_bool bit_fp_and_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: BIT_FP_AND() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message, "Wrong argument type: BIT_FP_AND() requires a STRING and a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void bit_fp_and_deinit(UDF_INIT *initid __attribute__((unused))) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *bit_fp_and(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + unsigned long int *ulintptr1 = NULL; + unsigned long int *ulintptr2 = NULL; + unsigned long int *ulintptr3 = NULL; + int *intptr1 = NULL; + int *intptr2 = NULL; + int *intptr3 = NULL; + unsigned long int fpLength1 = 0; + unsigned long int fpLength2 = 0; + int andInt = 0; + unsigned long int i = 0; + + if ((args->args[0] == NULL) || (args->args[1] == NULL)) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + + return 0; + } + + ulintptr1 = (unsigned long int *) args->args[0]; + fpLength1 = ulintptr1[0]; + ulintptr2 = (unsigned long int *) args->args[1]; + fpLength2 = ulintptr2[0]; + + if (fpLength1 != fpLength2) { + /* The both fingerprint must have the same length) */ + *is_null = 1; + *error = 1; + + return 0; + } + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + ulintptr3 = (unsigned long int *) initid->ptr; + memcpy(ulintptr3, &fpLength1, sizeof(unsigned long int)); + + ++ulintptr1; + intptr1 = (int *) ulintptr1; + ++ulintptr2; + intptr2 = (int *) ulintptr2; + ++ulintptr3; + intptr3 = (int *) ulintptr3; + + for(i = 0; i < fpLength1; ++i) { + andInt = *intptr1 & *intptr2; + memcpy(intptr3, &andInt, sizeof(int)); + ++intptr1; + ++intptr2; + ++intptr3; + } + + *length = fpLength1 + sizeof(unsigned long int); + + return initid->ptr; +} + +my_bool bit_fp_or_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 2) { + strcpy(message, "Wrong number of arguments: BIT_FP_OR() requires two arguments"); + return 1; + } + if ((args->arg_type[0] != STRING_RESULT) || (args->arg_type[1] != STRING_RESULT)) { + strcpy(message, "Wrong argument type: BIT_FP_OR() requires a STRING and a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void bit_fp_or_deinit(UDF_INIT *initid __attribute__((unused))) +{ + if (initid->ptr) { + free(initid->ptr); + } +} + +char *bit_fp_or(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + unsigned long int *ulintptr1 = NULL; + unsigned long int *ulintptr2 = NULL; + unsigned long int *ulintptr3 = NULL; + int *intptr1 = NULL; + int *intptr2 = NULL; + int *intptr3 = NULL; + unsigned long int fpLength1 = 0; + unsigned long int fpLength2 = 0; + int orInt = 0; + unsigned long int i = 0; + + if ((args->args[0] == NULL) || (args->args[1] == NULL)) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + + return 0; + } + + ulintptr1 = (unsigned long int *) args->args[0]; + fpLength1 = ulintptr1[0]; + ulintptr2 = (unsigned long int *) args->args[1]; + fpLength2 = ulintptr2[0]; + + if (fpLength1 != fpLength2) { + /* The both fingerprint must have the same length) */ + *is_null = 1; + *error = 1; + + return 0; + } + + memset(initid->ptr, 0, sizeof(char)*initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + ulintptr3 = (unsigned long int *) initid->ptr; + memcpy(ulintptr3, &fpLength1, sizeof(unsigned long int)); + + ++ulintptr1; + intptr1 = (int *) ulintptr1; + ++ulintptr2; + intptr2 = (int *) ulintptr2; + ++ulintptr3; + intptr3 = (int *) ulintptr3; + + for(i = 0; i < fpLength1; ++i) { + orInt = *intptr1 | *intptr2; + memcpy(intptr3, &orInt, sizeof(int)); + ++intptr1; + ++intptr2; + ++intptr3; + } + + *length = fpLength1 + sizeof(unsigned long int); + + return initid->ptr; +} + +my_bool bit_fp_count_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: BIT_FP_COUNT() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: BIT_FP_COUNT() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void bit_fp_count_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong bit_fp_count(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null __attribute__((unused)), char *error) +{ + int *intptr = NULL; + unsigned long int *ulintptr = NULL; + unsigned long int fpLength = 0; + long int fpIntVal = 0; + unsigned long int bitCount = 0; + unsigned long int i = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + + return 0; + } + + *error = 0; + + ulintptr = (unsigned long int *) args->args[0]; + fpLength = ulintptr[0]; + + if (fpLength % sizeof(int)) { + /* The fingerprint is a multiple of int */ + *is_null = 1; + *error = 1; + + return 0; + } + + ++ulintptr; + + intptr = (int *) ulintptr; + + for(i = 0; i < fpLength/sizeof(int); ++i) { + fpIntVal = intptr[i]; + for (; fpIntVal; fpIntVal = fpIntVal<<1) { + if (fpIntVal < 0) { ++bitCount; } + } + } + + return bitCount; +} + +#endif /* HAVE_DLOPEN */ + diff --git a/src/molmatch.h b/src/molmatch.h new file mode 100644 index 0000000..7d4e49c --- /dev/null +++ b/src/molmatch.h @@ -0,0 +1,247 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by MySQL and related to chemical file comparison. + * @file molmatch.h + * @short Function declarations of the Mychem molmatch module. + * @author Jerome Pansanel + */ + +#ifndef __MOLMATCH_H +#define __MOLMATCH_H + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef __WIN__ +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*__WIN__*/ +#else +#include +#include +#endif /* STANDARD */ +#include +#include +#include /* To get strmov() */ + +#ifdef HAVE_DLOPEN + +/** + * @short Initializes the match_substruct function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool match_substruct_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by match_substruct_init(). + * @param initid The structure filled by match_substruct_init() + */ +void match_substruct_deinit(UDF_INIT *initid); + +/** + * @short Checks if a molecule is a substructure of an other molecule. + * @param initid A structure filled by match_substruct_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return True if the molecule is a substructure + */ +longlong match_substruct(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the substruct_atom_ids function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool substruct_atom_ids_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by substruct_atom_ids_init(). + * @param initid The structure filled by substruct_atom_ids_init() + */ +void substruct_atom_ids_deinit(UDF_INIT *initid); + +/** + * @short Returns the atom ids of the reference molecule's atom that are + * contained in substructures matching the query. + * @param initid A structure filled by substruct_atom_ids_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The list of atom ids + */ +char *substruct_atom_ids(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the tanimoto function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool tanimoto_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by tanimoto_init(). + * @param initid The structure filled by tanimoto_init() + */ +void tanimoto_deinit(UDF_INIT *initid); + +/** + * @short Calculates the Tanimoto coefficient of two fingerprint bitsets. + * @param initid A structure filled by tanimoto_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The Tanimoto coefficient + */ +double tanimoto(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the substruct_count function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool substruct_count_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by substruct_count_init(). + * @param initid The structure filled by substruct_count_init() + */ +void substruct_count_deinit(UDF_INIT *initid); + +/** + * @short Returns the number of a substructure within a reference structure. + * @param initid A structure filled by substruct_count_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of substructures + */ +longlong substruct_count(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the bit_fp_and function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool bit_fp_and_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by bit_fp_and_init(). + * @param initid The structure filled by bit_fp_and_init() + */ +void bit_fp_and_deinit(UDF_INIT *initid); + +/** + * @short Returns a bitwise AND comparison between two fingerprints. + * @param initid A structure filled by bit_fp_and_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The bitwise comparison + */ +char *bit_fp_and(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the bit_fp_or function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool bit_fp_or_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by bit_fp_or_init(). + * @param initid The structure filled by bit_fp_or_init() + */ +void bit_fp_or_deinit(UDF_INIT *initid); + +/** + * @short Returns a bitwise OR comparison between two fingerprints. + * @param initid A structure filled by bit_fp_or_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The bitwise comparison + */ +char *bit_fp_or(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the bit_fp_count function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool bit_fp_count_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by bit_fp_count_init(). + * @param initid The structure filled by bit_fp_count_init() + */ +void bit_fp_count_deinit(UDF_INIT *initid); + +/** + * @short Returns the number of bits that are set in a binary string. + * @param initid A structure filled by bit_fp_count_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of bits + */ +longlong bit_fp_count(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +#endif /* HAVE_DLOPEN */ + +#endif /* __MOLMATCH_H */ + diff --git a/src/molmatch_wrapper.cpp b/src/molmatch_wrapper.cpp new file mode 100644 index 0000000..31d9dc3 --- /dev/null +++ b/src/molmatch_wrapper.cpp @@ -0,0 +1,187 @@ +/*************************************************************************** + * Copyright (C) 2009-2012 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to chemical file comparison. + * @file molmatch_wrapper.cpp + * @short Mychem Molmatch Wrapper. + * @author Jerome Pansanel + * @author Aurelie De Luca + */ + +#include "molmatch_wrapper.h" +#include +#include +#include + +#include +#include +#include + +#include + +using namespace std; +using namespace OpenBabel; + +long long match_substructure(const char *smarts_pattern, const char *serializedMolecule) +{ + long long int moleculeMatch = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return moleculeMatch; + } + + OBMol mol; + OBSmartsPattern sp; + bool mol_ok; + + mol_ok = unserializeOBMol(&mol, serializedMolecule,true); + + if (!mol_ok) { + cerr << "mol is not ok" << endl; + return 0; + } + + if (mol.Empty()) { + cerr << "mol is empty" << endl; + return 0; + } + sp.Init(smarts_pattern); +/* if (!sp.Init(smarts_pattern)) { return -1;} + } +*/ + if (mol.NumHvyAtoms() < sp.NumAtoms()) { + /* More heavy atoms in pattern than in serializedMolecule */ + return 0; + } + + if (sp.Match(mol,true)) { + moleculeMatch = 1; + } + else { + moleculeMatch = 0; + } + + return moleculeMatch; +} + +char *substructure_atom_ids(const char *smarts_pattern, const char *serializedMolecule) +{ + vector > maplist; + ostringstream outstream; + + char *retVal = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return retVal; + } + + OBMol mol; + OBSmartsPattern sp; + bool mol_ok; + + mol_ok = unserializeOBMol(&mol, serializedMolecule,true); + + if (!mol_ok) { + cerr << "mol is not ok" << endl; + return retVal; + } + + if (mol.Empty()) { + cerr << "mol is empty" << endl; + return retVal; + } + + sp.Init(smarts_pattern); + + if (mol.NumHvyAtoms() < sp.NumAtoms()) { + /* More heavy atoms in pattern than in serializedMolecule */ + retVal = strdup(outstream.str().c_str()); + return retVal; + } + + sp.Match(mol); + maplist = sp.GetUMapList(); + + if (maplist.size() > 0) { + vector >::iterator mit; + vector::iterator ait; + for (mit = maplist.begin(); mit != maplist.end(); ++mit) { + for (ait = (*mit).begin(); ait != (*mit).end(); ++ait) { + outstream << *ait << " "; + } + outstream << "; "; + } + } + + retVal = strdup(outstream.str().c_str()); + + return retVal; +} + +long long substructure_count(const char *smarts_pattern, const char *serializedMolecule) +{ + vector > maplist; + + long long int substructureCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return substructureCount; + } + + OBMol mol; + OBSmartsPattern sp; + bool mol_ok; + + mol_ok = unserializeOBMol(&mol, serializedMolecule,true); + + if (!mol_ok) { + cerr << "mol is not ok" << endl; + return 0; + } + + if (mol.Empty()) { + cerr << "mol is empty" << endl; + return 0; + } + + sp.Init(smarts_pattern); + + if (mol.NumHvyAtoms() < sp.NumAtoms()) { + /* More heavy atoms in pattern than in serializedMolecule */ + return 0; + } + + sp.Match(mol); + maplist = sp.GetUMapList(); + + substructureCount = maplist.size(); + + return substructureCount; +} + diff --git a/src/molmatch_wrapper.h b/src/molmatch_wrapper.h new file mode 100644 index 0000000..400713e --- /dev/null +++ b/src/molmatch_wrapper.h @@ -0,0 +1,70 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * bjoern@gruening.eu -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * related to chemical file comparison. + * @file molmatch_wrapper.h + * @short Wrapper for the Mychem molmatch module. + * @author Jerome Pansanel + * @author Aurelie De Luca + * @author Bjoern Gruening + */ + +#ifndef __MOLMATCH_WRAPPER_H +#define __MOLMATCH_WRAPPER_H + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @short Checks if a molecule is a substructure of an other molecule. + * @param smarts_pattern The molecule to check (SMARTS format) + * @param serializedMolecule The reference molecule (serialized OBMol object) + * @return 1 if query is a substructure of reference + */ + long long match_substructure(const char *smarts_pattern, const char *serializedMolecule); + + /** + * @short Returns the atom ids of the reference molecule's atoms that are + * contained in substructures matching the query. + * @param smarts_pattern The molecule to check (SMARTS format) + * @param serializedMolecule The reference molecule (serialized OBMol object) + * @return The list of atom ids + */ + char *substructure_atom_ids(const char *smarts_pattern, const char *serializedMolecule); + + /** + * @short Returns the number of substructures within a reference structure. + * @param smarts_pattern The substructure to check (SMARTS format) + * @param serializedMolecule The reference structure (serialized OBMol object) + * @return The number of substructure + */ + long long substructure_count(const char *smarts_pattern, const char *serializedMolecule); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __MOLMATCH_WRAPPER_H */ diff --git a/src/mychem.def b/src/mychem.def new file mode 100644 index 0000000..9db5d04 --- /dev/null +++ b/src/mychem.def @@ -0,0 +1,156 @@ +LIBRARY mychem + +VERSION 0.8.1 + +EXPORTS + molfile_to_molecule_init + molfile_to_molecule_deinit + molfile_to_molecule + molecule_to_molfile_init + molecule_to_molfile_deinit + molecule_to_molfile + V3000_to_molecule_init + V3000_to_molecule_deinit + V3000_to_molecule + molecule_to_V3000_init + molecule_to_V3000_deinit + molecule_to_V3000 + smiles_to_molecule_init + smiles_to_molecule_deinit + smiles_to_molecule + molecule_to_smiles_init + molecule_to_smiles_deinit + molecule_to_smiles + molecule_to_molecule_init + molecule_to_molecule_deinit + molecule_to_molecule + cml_to_molecule_init + cml_to_molecule_deinit + cml_to_molecule + molecule_to_cml_init + molecule_to_cml_deinit + molecule_to_cml + fingerprint_init + fingerprint_deinit + fingerprint + fingerprint2_init + fingerprint2_deinit + fingerprint2 + fingerprint3_init + fingerprint3_deinit + fingerprint3 + fingerprint4_init + fingerprint4_deinit + fingerprint4 + molecule_to_canonical_smiles_init + molecule_to_canonical_smiles_deinit + molecule_to_canonical_smiles + inchi_to_molecule_init + inchi_to_molecule_deinit + inchi_to_molecule + molecule_to_inchi_init + molecule_to_inchi_deinit + molecule_to_inchi + molecule_to_serializedOBMol_init + molecule_to_serializedOBMol_deinit + molecule_to_serializedOBMol + pdb_to_molecule_init + pdb_to_molecule_deinit + pdb_to_molecule + mol2_to_molecule_init + mol2_to_molecule_deinit + mol2_to_molecule + molecule_to_mol2_init + molecule_to_mol2_deinit + molecule_to_mol2 + mychem_version_init + mychem_version_deinit + mychem_version + openbabel_version_init + openbabel_version_deinit + openbabel_version + inchi_version_init + inchi_version_deinit + inchi_version + add_hydrogens_init + add_hydrogens_deinit + add_hydrogens + remove_hydrogens_init + remove_hydrogens_deinit + remove_hydrogens + strip_salts_init + strip_salts_deinit + strip_salts + match_substruct_init + match_substruct_deinit + match_substruct + substruct_atom_ids_init + substruct_atom_ids_deinit + substruct_atom_ids + tanimoto_init + tanimoto_deinit + tanimoto + substruct_count_init + substruct_count_deinit + substruct_count + bit_fp_and_init + bit_fp_and_deinit + bit_fp_and + bit_fp_or_init + bit_fp_or_deinit + bit_fp_or + bit_fp_count_init + bit_fp_count_deinit + bit_fp_count + molweight_init + molweight_deinit + molweight + exactmass_init + exactmass_deinit + exactmass + number_of_atoms_init + number_of_atoms_deinit + number_of_atoms + number_of_heavy_atoms_init + number_of_heavy_atoms_deinit + number_of_heavy_atoms + number_of_bonds_init + number_of_bonds_deinit + number_of_bonds + number_of_rotable_bonds_init + number_of_rotable_bonds_deinit + number_of_rotable_bonds + total_charge_init + total_charge_deinit + total_charge + molformula_init + molformula_deinit + molformula + number_of_acceptors_init + number_of_acceptors_deinit + number_of_acceptors + number_of_donors_init + number_of_donors_deinit + number_of_donors + molpsa_init + molpsa_deinit + molpsa + molmr_init + molmr_deinit + molmr + mollogp_init + mollogp_deinit + mollogp + is_2D_init + is_2D_deinit + is_2D + is_3D_init + is_3D_deinit + is_3D + is_chiral_init + is_chiral_deinit + is_chiral + number_of_rings_init + number_of_rings_deinit + number_of_rings + diff --git a/src/mychemdb.sql b/src/mychemdb.sql new file mode 100644 index 0000000..8b233b3 --- /dev/null +++ b/src/mychemdb.sql @@ -0,0 +1,170 @@ +-- +-- This SQL script creates the functions provided by Mychem +-- +USE mysql; + +-- +-- Functions related to chemical format conversion +-- +DROP FUNCTION IF EXISTS molfile_to_molecule; +CREATE FUNCTION molfile_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_molfile; +CREATE FUNCTION molecule_to_molfile RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS V3000_to_molecule; +CREATE FUNCTION V3000_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_V3000; +CREATE FUNCTION molecule_to_V3000 RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS smiles_to_molecule; +CREATE FUNCTION smiles_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_smiles; +CREATE FUNCTION molecule_to_smiles RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_molecule; +CREATE FUNCTION molecule_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS cml_to_molecule; +CREATE FUNCTION cml_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_cml; +CREATE FUNCTION molecule_to_cml RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS fingerprint; +CREATE FUNCTION fingerprint RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS fingerprint2; +CREATE FUNCTION fingerprint2 RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS fingerprint3; +CREATE FUNCTION fingerprint3 RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS fingerprint4; +CREATE FUNCTION fingerprint4 RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_canonical_smiles; +CREATE FUNCTION molecule_to_canonical_smiles RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS inchi_to_molecule; +CREATE FUNCTION inchi_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_inchi; +CREATE FUNCTION molecule_to_inchi RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_serializedOBMol; +CREATE FUNCTION molecule_to_serializedOBMol RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS pdb_to_molecule; +CREATE FUNCTION pdb_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS mol2_to_molecule; +CREATE FUNCTION mol2_to_molecule RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molecule_to_mol2; +CREATE FUNCTION molecule_to_mol2 RETURNS STRING SONAME "libmychem.so"; + +-- +-- Functions related to the helper +-- +DROP FUNCTION IF EXISTS mychem_version; +CREATE FUNCTION mychem_version RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS openbabel_version; +CREATE FUNCTION openbabel_version RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS inchi_version; +CREATE FUNCTION inchi_version RETURNS STRING SONAME "libmychem.so"; + +-- +-- Functions related to chemical data modifications +-- +DROP FUNCTION IF EXISTS add_hydrogens; +CREATE FUNCTION add_hydrogens RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS remove_hydrogens; +CREATE FUNCTION remove_hydrogens RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS strip_salts; +CREATE FUNCTION strip_salts RETURNS STRING SONAME "libmychem.so"; + +-- +-- Functions related to chemical match +-- +DROP FUNCTION IF EXISTS match_substruct; +CREATE FUNCTION match_substruct RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS substruct_atom_ids; +CREATE FUNCTION substruct_atom_ids RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS substruct_count; +CREATE FUNCTION substruct_count RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS tanimoto; +CREATE FUNCTION tanimoto RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS bit_fp_and; +CREATE FUNCTION bit_fp_and RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS bit_fp_or; +CREATE FUNCTION bit_fp_or RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS bit_fp_count; +CREATE FUNCTION bit_fp_count RETURNS INTEGER SONAME "libmychem.so"; + +-- +-- Functions related to chemical property calculations +-- +DROP FUNCTION IF EXISTS molweight; +CREATE FUNCTION molweight RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS exactmass; +CREATE FUNCTION exactmass RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_atoms; +CREATE FUNCTION number_of_atoms RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_heavy_atoms; +CREATE FUNCTION number_of_heavy_atoms RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_bonds; +CREATE FUNCTION number_of_bonds RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_rotable_bonds; +CREATE FUNCTION number_of_rotable_bonds RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS total_charge; +CREATE FUNCTION total_charge RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molformula; +CREATE FUNCTION molformula RETURNS STRING SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_acceptors; +CREATE FUNCTION number_of_acceptors RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_donors; +CREATE FUNCTION number_of_donors RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molpsa; +CREATE FUNCTION molpsa RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS molmr; +CREATE FUNCTION molmr RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS mollogp; +CREATE FUNCTION mollogp RETURNS REAL SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS is_2D; +CREATE FUNCTION is_2D RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS is_3D; +CREATE FUNCTION is_3D RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS is_chiral; +CREATE FUNCTION is_chiral RETURNS INTEGER SONAME "libmychem.so"; + +DROP FUNCTION IF EXISTS number_of_rings; +CREATE FUNCTION number_of_rings RETURNS INTEGER SONAME "libmychem.so"; + diff --git a/src/mychemdb_win32.sql b/src/mychemdb_win32.sql new file mode 100644 index 0000000..0c3923b --- /dev/null +++ b/src/mychemdb_win32.sql @@ -0,0 +1,170 @@ +-- +-- This SQL script creates the functions provided by Mychem +-- +USE mysql; + +-- +-- Functions related to chemical format conversion +-- +DROP FUNCTION IF EXISTS molfile_to_molecule; +CREATE FUNCTION molfile_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_molfile; +CREATE FUNCTION molecule_to_molfile RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS V3000_to_molecule; +CREATE FUNCTION V3000_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_V3000; +CREATE FUNCTION molecule_to_V3000 RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS smiles_to_molecule; +CREATE FUNCTION smiles_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_smiles; +CREATE FUNCTION molecule_to_smiles RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_molecule; +CREATE FUNCTION molecule_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS cml_to_molecule; +CREATE FUNCTION cml_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_cml; +CREATE FUNCTION molecule_to_cml RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS fingerprint; +CREATE FUNCTION fingerprint RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS fingerprint2; +CREATE FUNCTION fingerprint2 RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS fingerprint3; +CREATE FUNCTION fingerprint3 RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS fingerprint4; +CREATE FUNCTION fingerprint4 RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_canonical_smiles; +CREATE FUNCTION molecule_to_canonical_smiles RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS inchi_to_molecule; +CREATE FUNCTION inchi_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_inchi; +CREATE FUNCTION molecule_to_inchi RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_serializedOBMol; +CREATE FUNCTION molecule_to_serializedOBMol RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS pdb_to_molecule; +CREATE FUNCTION pdb_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS mol2_to_molecule; +CREATE FUNCTION mol2_to_molecule RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molecule_to_mol2; +CREATE FUNCTION molecule_to_mol2 RETURNS STRING SONAME "mychem.dll"; + +-- +-- Functions related to the helper +-- +DROP FUNCTION IF EXISTS mychem_version; +CREATE FUNCTION mychem_version RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS openbabel_version; +CREATE FUNCTION openbabel_version RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS inchi_version; +CREATE FUNCTION inchi_version RETURNS STRING SONAME "mychem.dll"; + +-- +-- Functions related to chemical data modifications +-- +DROP FUNCTION IF EXISTS add_hydrogens; +CREATE FUNCTION add_hydrogens RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS remove_hydrogens; +CREATE FUNCTION remove_hydrogens RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS strip_salts; +CREATE FUNCTION strip_salts RETURNS STRING SONAME "mychem.dll"; + +-- +-- Functions related to chemical match +-- +DROP FUNCTION IF EXISTS match_substruct; +CREATE FUNCTION match_substruct RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS substruct_atom_ids; +CREATE FUNCTION substruct_atom_ids RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS substruct_count; +CREATE FUNCTION substruct_count RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS tanimoto; +CREATE FUNCTION tanimoto RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS bit_fp_and; +CREATE FUNCTION bit_fp_and RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS bit_fp_or; +CREATE FUNCTION bit_fp_or RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS bit_fp_count; +CREATE FUNCTION bit_fp_count RETURNS INTEGER SONAME "mychem.dll"; + +-- +-- Functions related to chemical property calculations +-- +DROP FUNCTION IF EXISTS molweight; +CREATE FUNCTION molweight RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS exactmass; +CREATE FUNCTION exactmass RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_atoms; +CREATE FUNCTION number_of_atoms RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_heavy_atoms; +CREATE FUNCTION number_of_heavy_atoms RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_bonds; +CREATE FUNCTION number_of_bonds RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_rotable_bonds; +CREATE FUNCTION number_of_rotable_bonds RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS total_charge; +CREATE FUNCTION total_charge RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molformula; +CREATE FUNCTION molformula RETURNS STRING SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_acceptors; +CREATE FUNCTION number_of_acceptors RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_donors; +CREATE FUNCTION number_of_donors RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molpsa; +CREATE FUNCTION molpsa RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS molmr; +CREATE FUNCTION molmr RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS mollogp; +CREATE FUNCTION mollogp RETURNS REAL SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS is_2D; +CREATE FUNCTION is_2D RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS is_3D; +CREATE FUNCTION is_3D RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS is_chiral; +CREATE FUNCTION is_chiral RETURNS INTEGER SONAME "mychem.dll"; + +DROP FUNCTION IF EXISTS number_of_rings; +CREATE FUNCTION number_of_rings RETURNS INTEGER SONAME "mychem.dll"; + diff --git a/src/property.c b/src/property.c new file mode 100644 index 0000000..c34d82e --- /dev/null +++ b/src/property.c @@ -0,0 +1,881 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead * + * aureliedeluca@gmail.com -- Developer * + * bjoern@gruenings.eu -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions used by + * MySQL and related to chemical property calculations. + * @file property.c + * @short Function definitions of the Mychem property module. + * @author Jerome Pansanel + * @author Aurelie De Luca + * @author Bjoern Gruening + */ + +#include "property.h" +#include "property_wrapper.h" +#include + +#ifdef HAVE_DLOPEN + +my_bool molweight_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLWEIGHT() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLWEIGHT() requires a STRING"); + return 1; + } + + initid->decimals = 6; + initid->max_length=19; + initid->maybe_null = 1; + + return 0; +} + +void molweight_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double molweight(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + double weight = 0.0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + + *is_null = 0; + *error = 0; + + weight = getMolWeight(inputMol); + + free(inputMol); + + return weight; +} + +my_bool exactmass_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: EXACTMASS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: EXACTMASS() requires a STRING"); + return 1; + } + + initid->decimals = 6; + initid->max_length=19; + initid->maybe_null = 1; + + return 0; +} + +void exactmass_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double exactmass(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + double weight = 0.0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + weight = getExactMass(inputMol); + + free(inputMol); + + return weight; +} + +my_bool number_of_atoms_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_ATOMS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_ATOMS() requires a STRING"); + return 1; + } + if (args->arg_count != 1 || args->arg_type[0] != STRING_RESULT) + { + strcpy(message,"You have an error in your SQL syntax; Check the Mychem manual for the right syntax to use with the number_of_atoms function"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_atoms_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_atoms(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + count = getAtomCount(inputMol); + + free(inputMol); + + return count; +} + +my_bool number_of_heavy_atoms_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_HEAVY_ATOMS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_HEAVY_ATOMS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_heavy_atoms_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_heavy_atoms(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + count = getHeavyAtomCount(inputMol); + + free(inputMol); + + return count; +} + +my_bool number_of_bonds_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_BONDS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_BONDS() requires a STRING"); + return 1; + } + + initid->maybe_null = 0; + + return 0; +} + +void number_of_bonds_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_bonds(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + count = getBondCount(inputMol); + + free(inputMol); + + return count; +} + +my_bool number_of_rotable_bonds_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_ROTABLE_BONDS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_ROTABLE_BONDS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_rotable_bonds_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_rotable_bonds(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + count = getRotableBondCount(inputMol); + + free(inputMol); + + return count; +} + +my_bool total_charge_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: TOTAL_CHARGE() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: TOTAL_CHARGE() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void total_charge_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong total_charge(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error = 0; + + count = getTotalCharge(inputMol); + + free(inputMol); + + return count; +} + +my_bool molformula_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLFORMULA() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLFORMULA() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + initid->max_length = MAX_VALUE_LENGTH; + + if (!(initid->ptr = (char *) malloc(sizeof(char)*initid->max_length))) { + strcpy(message, "Couldn't allocate memory"); + return 1; + } + + return 0; +} + +void molformula_deinit(UDF_INIT *initid) +{ + if(initid->ptr) { + free(initid->ptr); + } +} + +char *molformula(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + char *formula = NULL; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + memset(initid->ptr, 0, initid->max_length); + *result = 0; + *is_null = 0; + *error = 0; + + formula = getFormula(inputMol); + + /* Return NULL is the formula is empty */ + if (formula == NULL) { + *is_null = 1; + *error = 1; + free(inputMol); + + return NULL; + } + + *length = strlen(formula); + strncat(initid->ptr, formula, *length); + + free(inputMol); + free(formula); + + return initid->ptr; +} + +my_bool number_of_acceptors_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_ACCEPTORS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_ACCEPTORS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_acceptors_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_acceptors(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong acceptors = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + acceptors = getAcceptors(inputMol); + + free(inputMol); + + return acceptors; +} + +my_bool number_of_donors_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_DONORS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_DONORS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_donors_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_donors(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong donors = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + donors = getDonors(inputMol); + + free(inputMol); + + return donors; +} + +my_bool molpsa_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLPSA() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLPSA() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void molpsa_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double molpsa(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + double PSA = 0.0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + PSA = getPSA(inputMol); + + free(inputMol); + + return PSA; +} + +my_bool molmr_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLMR() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLMR() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void molmr_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double molmr(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + double MR = 0.0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + MR = getMR(inputMol); + + free(inputMol); + + return MR; +} + +my_bool mollogp_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: MOLLOGP() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: MOLOGP() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void mollogp_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +double mollogp(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + double LogP = 0.0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + LogP = getLogP(inputMol); + + free(inputMol); + + return LogP; +} + +my_bool is_2D_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: IS_2D() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: IS_2D() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void is_2D_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong is_2D(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong is2DBool = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + is2DBool = is2D(inputMol); + + free(inputMol); + + return is2DBool; +} + +my_bool is_3D_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: IS_3D() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: IS_3D() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void is_3D_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong is_3D(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong is3DBool = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + is3DBool = is3D(inputMol); + + free(inputMol); + + return is3DBool; +} + +my_bool is_chiral_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: IS_CHIRAL() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: IS_CHIRAL() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void is_chiral_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong is_chiral(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong isChiralBool = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + isChiralBool = isChiral(inputMol); + + free(inputMol); + + return isChiralBool; +} + +my_bool number_of_rings_init(UDF_INIT *initid, UDF_ARGS *args, char *message) +{ + if (args->arg_count != 1) { + strcpy(message, "Wrong number of arguments: NUMBER_OF_RINGS() requires one argument"); + return 1; + } + if (args->arg_type[0] != STRING_RESULT) { + strcpy(message,"Wrong argument type: NUMBER_OF_RINGS() requires a STRING"); + return 1; + } + + initid->maybe_null = 1; + + return 0; +} + +void number_of_rings_deinit(UDF_INIT *initid __attribute__((unused))) +{ +} + +longlong number_of_rings(UDF_INIT *initid __attribute__((unused)), UDF_ARGS *args, char *is_null, char *error) +{ + /* Fix a MySQL end string char issue */ + char *inputMol = (char *) malloc(sizeof(char)*(args->lengths[0]+1)); + longlong count = 0; + + if (args->args[0] == NULL) { + /* Arguments can not be NULL */ + *is_null = 1; + *error = 1; + free(inputMol); + + return 0; + } + + strncpy(inputMol, args->args[0], args->lengths[0]); + inputMol[args->lengths[0]] = 0; + + *is_null = 0; + *error =0; + + count = getRingCount(inputMol); + + free(inputMol); + + return count; +} + +#endif /* HAVE_DLOPEN */ + diff --git a/src/property.h b/src/property.h new file mode 100644 index 0000000..4cfebf3 --- /dev/null +++ b/src/property.h @@ -0,0 +1,518 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * bjoern@gruenings.eu -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * used by MySQL and related to chemical property calculations. + * @file property.h + * @short Function declarations of the Mychem property module. + * @author Jerome Pansanel + * @author Bjoern Gruening + */ + +#ifndef __PROPERTY_H +#define __PROPERTY_H + +#ifdef STANDARD +/* STANDARD is defined, don't use any mysql functions */ +#include +#include +#include +#ifdef __WIN__ +typedef unsigned __int64 ulonglong; /* Microsofts 64 bit types */ +typedef __int64 longlong; +#else +typedef unsigned long long ulonglong; +typedef long long longlong; +#endif /*__WIN__*/ +#else +#include +#include +#endif /* STANDARD */ +#include +#include +#include /* To get strmov() */ + +#ifdef HAVE_DLOPEN + +/** + * @short Initializes the molweight function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molweight_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molweight_init(). + * @param initid The structure filled by molweight_init() + */ +void molweight_deinit(UDF_INIT *initid); + +/** + * @short Calculates the molecular weight of a molecule. + * @param initid A structure filled by molweight_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The molecular weight + */ +double molweight(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the exactmass function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool exactmass_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by exactmass_init(). + * @param initid The structure filled by exactmass_init() + */ +void exactmass_deinit(UDF_INIT *initid); + +/** + * @short Calculates the monoisotopic weight of a molecule. + * @param initid A structure filled by exactmass_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The monoisotopic weight + */ +double exactmass(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_atoms function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_atoms_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_atoms_init(). + * @param initid The structure filled by number_of_atoms_init() + */ +void number_of_atoms_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of atoms in a molecule. + * @param initid A structure filled by number_of_atoms_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of atoms + */ +longlong number_of_atoms(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_heavy_atoms function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_heavy_atoms_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_heavy_atoms_init(). + * @param initid The structure filled by number_of_heavy_atoms_init() + */ +void number_of_heavy_atoms_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of heavy atoms in a molecule. + * @param initid A structure filled by number_of_heavy_atoms_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of heavy atoms + */ +longlong number_of_heavy_atoms(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_bonds function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_bonds_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_bonds_init(). + * @param initid The structure filled by number_of_bonds_init() + */ +void number_of_bonds_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of bonds in a molecule. + * @param initid A structure filled by number_of_bonds_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of bonds + */ +longlong number_of_bonds(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_rotable_bonds function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_rotable_bonds_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_rotable_bonds_init(). + * @param initid The structure filled by number_of_rotable_bonds_init() + */ +void number_of_rotable_bonds_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of rotable bonds in a molecule. + * @param initid A structure filled by number_of_rotable_bonds_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of rotable bonds + */ +longlong number_of_rotable_bonds(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the total_charge function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool total_charge_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by total_charge_init(). + * @param initid The structure filled by total_charge_init() + */ +void total_charge_deinit(UDF_INIT *initid); + +/** + * @short Calculates the total charge of a molecule. + * @param initid A structure filled by total_charge_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The total charge of a molecule + */ +longlong total_charge(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the molformula function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molformula_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molformula_init(). + * @param initid The structure filled by molformula_init() + */ +void molformula_deinit(UDF_INIT *initid); + +/** + * @short Returns the molecular formula of a molecule. + * @param initid A structure filled by molformula_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The molecular formula + */ +char *molformula(UDF_INIT *initid, UDF_ARGS *args, char *result, unsigned long *length, char *is_null, char *error); + +/** + * @short Initializes the number_of_acceptors function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_acceptors_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_acceptors_init(). + * @param initid The structure filled by number_of_acceptors_init() + */ +void number_of_acceptors_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of hydrogen-bond acceptors in a molecule. + * @param initid A structure filled by number_of_acceptors_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of hydrogen-bond acceptors + */ +longlong number_of_acceptors(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_donors function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_donors_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_donors_init(). + * @param initid The structure filled by number_of_donors_init() + */ +void number_of_donors_deinit(UDF_INIT *initid); + +/** + * @short Calculates the number of hydrogen-bond donors in a molecule. + * @param initid A structure filled by number_of_donors_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of hydrogen-bond donors + */ +longlong number_of_donors(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the molpsa function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molpsa_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molpsa_init(). + * @param initid The structure filled by molpsa_init() + */ +void molpsa_deinit(UDF_INIT *initid); + +/** + * @short Predicts the Topological Polar Surface Area (TPSA) of a molecule. + * @param initid A structure filled by molpsa_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The topological polar surface area + */ +double molpsa(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the molmr function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool molmr_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by molmr_init(). + * @param initid The structure filled by molmr_init() + */ +void molmr_deinit(UDF_INIT *initid); + +/** + * @short Predicts the Molar Refractivity (MR) of a molecule. + * @param initid A structure filled by molmr_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The molar refactivity + */ +double molmr(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the mollogp function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool mollogp_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by mollogp_init(). + * @param initid The structure filled by mollogp_init() + */ +void mollogp_deinit(UDF_INIT *initid); + +/** + * @short Predicts the LogP of a molecule. + * @param initid A structure filled by mollogp_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The LogP + */ +double mollogp(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the is_2D function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool is_2D_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by is_2D_init(). + * @param initid The structure filled by is_2D_init() + */ +void is_2D_deinit(UDF_INIT *initid); + +/** + * @short Checks if a molecule has 2D coordinates. + * @param initid A structure filled by is_2D_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return 1 if the molecule has 2D coordinates + */ +longlong is_2D(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the is_3D function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool is_3D_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by is_3D_init(). + * @param initid The structure filled by is_3D_init() + */ +void is_3D_deinit(UDF_INIT *initid); + +/** + * @short Checks if a molecule has 3D coordinates. + * @param initid A structure filled by is_3D_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return 1 if a molecule has 3D coordinates. + */ +longlong is_3D(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the is_chiral function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool is_chiral_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by is_chiral_init(). + * @param initid The structure filled by is_chiral_init() + */ +void is_chiral_deinit(UDF_INIT *initid); + +/** + * @short Checks if a molecule is chiral. + * @param initid A structure filled by is_chiral_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return 1 if the molecule is chiral + */ +longlong is_chiral(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +/** + * @short Initializes the number_of_rings function. + * @param initid A structure that the init function should fill + * @param args A structure which contains arguments and related variables + * @param message The error message that should be passed to the user on fail + * @return True if an error is raised during the initialization + */ +my_bool number_of_rings_init(UDF_INIT *initid, UDF_ARGS *args, char *message); + +/** + * @short Should free all resources allocated by number_of_rings_init(). + * @param initid The structure filled by number_of_rings_init() + */ +void number_of_rings_deinit(UDF_INIT *initid); + +/** + * @short Returns the number of rings in a molecule. + * @param initid A structure filled by number_of_rings_init() + * @param args A structure which contains arguments and related variables + * @param result A buffer to save result + * @param length A pointer to length of the above buffer + * @param is_null Set to 1 if the result is null + * @param error Set to 1 if something goes fatally wrong + * @return The number of rings + */ +longlong number_of_rings(UDF_INIT *initid, UDF_ARGS *args, char *is_null, char *error); + +#endif /* HAVE_DLOPEN */ + +#endif /* __PROPERTY_H */ + diff --git a/src/property_wrapper.cpp b/src/property_wrapper.cpp new file mode 100644 index 0000000..5cb4e20 --- /dev/null +++ b/src/property_wrapper.cpp @@ -0,0 +1,498 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * bjoern@gruenings.eu -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to chemical property calculations. + * @file property_wrapper.cpp + * @short Mychem Property Wrapper. + * @author Jerome Pansanel + * @author Aurelie De Luca + * @author Bjoern Gruening + */ + +#include "property_wrapper.h" +#include +#include + +#include +#include +#include +#include +#include "descriptors/groupcontrib.cpp" +#include + +using namespace std; +using namespace OpenBabel; + +#if defined(__CYGWIN__) || defined(__MINGW32__) + // macro to implement static OBPlugin::PluginMapType& Map() + PLUGIN_CPP_FILE(OBDescriptor) +#endif + +double getMolWeight(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + double molWeight = 0.0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return molWeight; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol,&inStream); + molWeight = mol.GetMolWt(); + } + + return molWeight; +} + +double getExactMass(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + double exactMass = 0.0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return exactMass; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + exactMass = mol.GetExactMass(); + } + + return exactMass; +} + +long long int getAtomCount(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int atomCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return atomCount; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + atomCount = mol.NumAtoms(); + } + + return atomCount; +} + +long long int getHeavyAtomCount(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int heavyAtomCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return heavyAtomCount; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + heavyAtomCount = mol.NumHvyAtoms(); + } + + return heavyAtomCount; +} + +long long int getBondCount(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int bondCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return bondCount; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + bondCount = mol.NumBonds(); + } + + return bondCount; +} + +long long int getRotableBondCount(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int rotableBondCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return rotableBondCount; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + rotableBondCount = mol.NumRotors(); + } + + return rotableBondCount; +} + +long long int getTotalCharge(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int totalCharge = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return totalCharge; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + totalCharge = mol.GetTotalCharge(); + } + + return(totalCharge); +} + +char *getFormula(const char *molecule) +{ + string inString(molecule); + string outstring; + istringstream inStream(inString); + + char * formula = NULL; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return formula; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + outstring = mol.GetFormula(); + formula = strdup(outstring.c_str()); + } + + return formula; +} + +long long int getAcceptors(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int acceptorCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return acceptorCount; + } + + OBConversion conv; + OBMol mol; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + OBMolAtomIter atomIter(mol); + + for (; atomIter; ++atomIter) { + if (atomIter->IsHbondAcceptor()) { + ++acceptorCount; + } + } + } + + return acceptorCount; +} + +long long int getDonors(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int donorCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return donorCount; + } + + OBMol mol; + OBConversion conv(&inStream); + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + // iterator over each atom + OBMolAtomIter atomIter(mol); + + for (; atomIter; ++atomIter) { + if (atomIter->IsHbondDonor()) { + ++donorCount; + } + } + } + + return donorCount; +} + +double getPSA(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + double PSA = 0.0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return PSA; + } + + OBMol mol; + OBConversion conv(&inStream); + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol); + // Make sur that TPSA descriptor exits + OBDescriptor* pDescr = OBDescriptor::FindType("TPSA"); + if (pDescr) { + PSA = pDescr->Predict(&mol); + } + } + + return PSA; +} + +double getMR(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + double MR = 0.0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return MR; + } + + OBMol mol; + OBConversion conv(&inStream); + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol); + // Make sur that MR descriptor exits + OBDescriptor* pDescr = OBDescriptor::FindType("MR"); + if (pDescr) { + MR = pDescr->Predict(&mol); + } + } + + return MR; +} + +double getLogP(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + double logP = 0.0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return logP; + } + + OBMol mol; + OBConversion conv; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol,&inStream); + // Make sur that logP descriptor exits + OBDescriptor* pDesc = OBDescriptor::FindType("logP"); + if (pDesc) { + logP = pDesc->Predict(&mol); + } + } + + return logP; +} + +long long int is2D(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int Bool = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return Bool; + } + + OBMol mol; + OBConversion conv; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + + if (mol.Has2D ()) { + Bool = 1; + } + } + + return Bool; +} + +long long int is3D(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int Bool = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return Bool; + } + + OBMol mol; + OBConversion conv; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + if (mol.Has3D ()) { + Bool = 1; + } + } + + return Bool; +} + +long long int isChiral(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int Bool = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return Bool; + } + + OBMol mol; + OBConversion conv; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + mol.FindChiralCenters(); + + if (mol.IsChiral()) { + Bool = 1; + } + } + + return Bool; +} + +long long int getRingCount(const char *molecule) +{ + string inString(molecule); + istringstream inStream(inString); + + long long int ringCount = 0; + + LibHandler ob_lib; + + if (!ob_lib.isLoaded()) { + return ringCount; + } + + OBMol mol; + OBConversion conv; + + vector obring; + + if (conv.SetInFormat(MOLECULE_TYPE)) { + conv.Read(&mol, &inStream); + obring = mol.GetSSSR(); + ringCount = obring.size(); + } + + return ringCount; +} + diff --git a/src/property_wrapper.h b/src/property_wrapper.h new file mode 100644 index 0000000..6d375aa --- /dev/null +++ b/src/property_wrapper.h @@ -0,0 +1,165 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * aureliedeluca@gmail.com -- Developer * + * bjoern@gruenings.eu -- Developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains function declarations + * related to chemical property calculations. + * @file property_wrapper.h + * @short Wrapper for the Mychem property module. + * @author Jerome Pansanel + * @author Aurelie De Luca + * @author Bjoern Gruening + */ + +#ifndef __PROPERTY_WRAPPER_H +#define __PROPERTY_WRAPPER_H + +#ifdef __cplusplus +extern "C" +{ +#endif /* __cplusplus */ + + /** + * @short Calculates the molecular weight of a molecule. + * @param molecule The molecule used for the calculation. + * @return The molecular weight. + */ + double getMolWeight(const char *molecule); + + /** + * @short Calculates the exact mass of a molecule. + * @param molecule The molecule used for the calculation. + * @return The exact mass. + */ + double getExactMass(const char *molecule); + + /** + * @short Returns the number of atoms in a molecule. + * @param molecule The molecule used for the calculation. + * @return The number of atoms. + */ + long long int getAtomCount(const char *molecule); + + /** + * @short Returns the number of heavy atoms in a molecule. + * @param molecule The molecule used for the calculation. + * @return The number of heavy atoms. + */ + long long int getHeavyAtomCount(const char *molecule); + + /** + * @short Returns the number of bonds in a molecule. + * @param molecule The molecule used for the calculation. + * @return The number of bonds. + */ + long long int getBondCount(const char *molecule); + + /** + * @short Returns the number of rotable bonds in a molecule. + * @param molecule The molecule used for the calculation. + * @return The number of rotable bonds. + */ + long long int getRotableBondCount(const char *molecule); + + /** + * @short Returns the total charge of a molecule. + * @param molecule The molecule used for the calculation. + * @return The total charge. + */ + long long int getTotalCharge(const char *molecule); + + /** + * @short Returns the formula of a molecule. + * @param molecule The molecule used for the calculation. + * @return The formula. + */ + char *getFormula(const char *molecule); + + /** + * @short Returns the number of hydrogen bond acceptors. + * @param molecule The molecule used for the calculation. + * @return The number of hydrogen bond acceptors. + */ + long long int getAcceptors(const char *molecule); + + /** + * @short Returns the number of hydrogen bond donors. + * @param molecule The molecule used for the calculation. + * @return The number of hydrogen bond donors. + */ + long long int getDonors(const char *molecule); + + /** + * @short Predicts the TPSA (Topological Polar Surface Area) of a molecule. + * @param molecule The molecule used for the calculation. + * @return The topological polar surface area. + */ + double getPSA(const char *molecule); + + /** + * @short Predicts the MR (Molar Refractivity) of a molecule. + * @param molecule The molecule used for the calculation. + * @return The molar refractivity. + */ + double getMR(const char *molecule); + + /** + * @short Predicts the LogP of a molecule. + * @param molecule The molecule used for the calculation. + * @return The LogP. + */ + double getLogP(const char *molecule); + + /** + * @short Checks if a molecule has 2D coordinates. + * @param molecule The molecule used for the calculation. + * @return 1 if the molecule has 2D coordinates. + */ + long long int is2D(const char *molecule); + + /** + * @short Checks if a molecule has 3D coordinates. + * @param molecule The molecule used for the calculation. + * @return 1 if the molecule has 3D coordinates. + */ + long long int is3D(const char *molecule); + + /** + * @short Checks if a molecule is chiral. + * @param molecule The molecule used for the calculation. + * @return 1 if the molecule is chiral. + */ + long long int isChiral(const char *molecule); + + /** + * @short Returns the number of rings in a molecule. + * @param molecule The molecule used for the calculation. + * @return The number of rings (SSSR). + */ + long long int getRingCount(const char *molecule); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __PROPERTY_WRAPPER_H */ + diff --git a/src/serialization.cpp b/src/serialization.cpp new file mode 100644 index 0000000..705d0fb --- /dev/null +++ b/src/serialization.cpp @@ -0,0 +1,291 @@ +/*************************************************************************** + * Copyright (C) 2009-2012 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions related + * to the serialization of OBMol objects. + * @file serialization.cpp + * @short OBMol Serialization Methods. + * @author Jerome Pansanel +*/ + +#include +#include +#include +#include +#include + +using namespace std; +using namespace OpenBabel; + +typedef struct +{ + char type; + unsigned long atomids[2]; + unsigned long refs[4]; + OBStereo::Shape shape; + OBStereo::View view; + OBStereo::Winding winding; +} _STEREO; + +char *serializeOBMol(OBMol &mol) { + + if (mol.Empty()) { + return NULL; + } + + unsigned int numatoms = mol.NumAtoms(); + unsigned int numbonds = mol.NumBonds(); + vector::iterator data; + vector stereoData; + if (mol.HasData(OBGenericDataType::StereoData)) { + stereoData = mol.GetAllData(OBGenericDataType::StereoData); + } + unsigned int numstereo = stereoData.size(); + unsigned int totalsize = (numatoms*sizeof(_ATOM))+(numbonds*sizeof(_BOND))+(numstereo*sizeof(_STEREO))+(4*sizeof(unsigned int)); + + char *retval = new char[totalsize]; + _ATOM *atomptr; + _BOND *bondptr; + _STEREO *stereoptr; + + //mol.Kekulize(); + + memset(retval,0x0,totalsize); + + unsigned int *uintptr = (unsigned int*) retval; + + *uintptr = totalsize-sizeof(unsigned int); + + ++uintptr; + + *uintptr = numatoms; + + ++uintptr; + + *uintptr = numbonds; + + ++uintptr; + + *uintptr = numstereo; + + ++uintptr; + + atomptr = (_ATOM*) uintptr; + + FOR_ATOMS_OF_MOL(atom, mol) { + atomptr->idx = atom->GetIdx(); + atomptr->hybridization = atom->GetHyb(); + atomptr->atomicnum = (unsigned char) atom->GetAtomicNum(); + atomptr->formalcharge = (char) atom->GetFormalCharge(); + atomptr->isotope = (unsigned short) atom->GetIsotope(); + atomptr->spinmultiplicity = (unsigned char) atom->GetSpinMultiplicity(); + atomptr->aromatic = atom->IsAromatic() ? 1 : 0; + + ++atomptr; + } + + bondptr = (_BOND*) atomptr; + + if (numbonds>0) { + FOR_BONDS_OF_MOL(bond, mol) { + bondptr->beginidx = bond->GetBeginAtomIdx(); + bondptr->endidx = bond->GetEndAtomIdx(); + bondptr->order = (unsigned char) bond->GetBondOrder(); + bondptr->aromatic = bond->IsAromatic() ? 1 : 0; + + ++bondptr; + } + } + + stereoptr = (_STEREO*) bondptr; + + for (data = stereoData.begin(); data != stereoData.end(); ++data) { + OBStereo::Type type; + type = ((OBStereoBase*)*data)->GetType(); + if (type == OBStereo::Tetrahedral) { + OBTetrahedralStereo *ts = dynamic_cast(*data); + OBTetrahedralStereo::Config config = ts->GetConfig(); + stereoptr->type = type; + stereoptr->atomids[0] = config.center; + stereoptr->atomids[1] = config.towards; + stereoptr->refs[0] = config.refs[0]; + stereoptr->refs[1] = config.refs[1]; + stereoptr->refs[2] = config.refs[2]; + stereoptr->winding = config.winding; + stereoptr->view = config.view; + } else if (type == OBStereo::SquarePlanar) { + OBSquarePlanarStereo *sp = dynamic_cast(*data); + OBSquarePlanarStereo::Config config = sp->GetConfig(); + stereoptr->type = type; + stereoptr->atomids[0] = config.center; + stereoptr->refs[0] = config.refs[0]; + stereoptr->refs[1] = config.refs[1]; + stereoptr->refs[2] = config.refs[2]; + stereoptr->refs[3] = config.refs[3]; + stereoptr->shape = config.shape; + } else if (type == OBStereo::CisTrans) { + OBCisTransStereo *ct = dynamic_cast(*data); + OBCisTransStereo::Config config = ct->GetConfig(); + stereoptr->type = type; + stereoptr->atomids[0] = config.begin; + stereoptr->atomids[1] = config.end; + stereoptr->refs[0] = config.refs[0]; + stereoptr->refs[1] = config.refs[1]; + stereoptr->refs[2] = config.refs[2]; + stereoptr->refs[3] = config.refs[3]; + stereoptr->shape = config.shape; + } + + ++stereoptr; + } + + return retval; +} + +bool unserializeOBMol(OBBase* pOb, const char *serializedInput, bool stereoEnabled) +{ + OBMol* pmol = pOb->CastAndClear(); + map _mapcd; + OBMol &mol = *pmol; + _mapcd.clear(); + bool chiralWatch=false; + unsigned int i,natoms,nbonds,nstereo; + + unsigned int *intptr = (unsigned int*) serializedInput; + + ++intptr; + + natoms = *intptr; + + ++intptr; + + nbonds = *intptr; + + ++intptr; + + nstereo = *intptr; + + ++intptr; + + _ATOM *atomptr = (_ATOM*) intptr; + + mol.ReserveAtoms(natoms); + + OBAtom atom; + + for (i = 1; i <= natoms; i++) { + atom.SetIdx(atomptr->idx); + atom.SetHyb(atomptr->hybridization); + atom.SetAtomicNum((int) atomptr->atomicnum); + atom.SetIsotope((unsigned int) atomptr->isotope); + atom.SetFormalCharge((int) atomptr->formalcharge); + + atom.SetSpinMultiplicity((short) atomptr->spinmultiplicity); + + if(atomptr->aromatic != 0) { + atom.SetAromatic(); + } + + if (!mol.AddAtom(atom)) { + return false; + } + + if (chiralWatch) { + // fill the map with data for each chiral atom + _mapcd[mol.GetAtom(i)] = new OBChiralData; + } + atom.Clear(); + + ++atomptr; + } + + _BOND *bondptr = (_BOND*) atomptr; + + unsigned int start,end,order,flags; + + for (i = 0;i < nbonds;i++) { + flags = 0; + + start = bondptr->beginidx; + end = bondptr->endidx; + order = (int) bondptr->order; + + if (start == 0 || end == 0 || order == 0 || start > natoms || end > natoms) { + return false; + } + + order = (unsigned int) (order == 4) ? 5 : order; + + if (bondptr->aromatic != 0) { + flags |= OB_AROMATIC_BOND; + } + + if (!mol.AddBond(start,end,order,flags)) { + return false; + } + + + ++bondptr; + } + + if (stereoEnabled) { + _STEREO *stereoptr = (_STEREO*) bondptr; + + for (i=0; i < nstereo; ++i) { + if (stereoptr->type == OBStereo::Tetrahedral) { + OBTetrahedralStereo::Config cfg; + cfg.center = stereoptr->atomids[0]; + cfg.towards = stereoptr->atomids[1]; + cfg.refs = OBStereo::MakeRefs(stereoptr->refs[0],stereoptr->refs[1],stereoptr->refs[2]); + cfg.winding = stereoptr->winding; + cfg.view = stereoptr->view; + OBTetrahedralStereo *obts = new OBTetrahedralStereo(&mol); + obts->SetConfig(cfg); + mol.SetData(obts); + } else if (stereoptr->type == OBStereo::SquarePlanar) { + OBSquarePlanarStereo::Config cfg; + cfg.center = stereoptr->atomids[0]; + cfg.refs = OBStereo::MakeRefs(stereoptr->refs[0],stereoptr->refs[1],stereoptr->refs[2],stereoptr->refs[3]); + cfg.shape = stereoptr->shape; + OBSquarePlanarStereo *obsp = new OBSquarePlanarStereo(&mol); + obsp->SetConfig(cfg); + mol.SetData(obsp); + } else if (stereoptr->type == OBStereo::CisTrans) { + OBCisTransStereo::Config cfg; + cfg.begin = stereoptr->atomids[0]; + cfg.end = stereoptr->atomids[1]; + cfg.refs = OBStereo::MakeRefs(stereoptr->refs[0],stereoptr->refs[1],stereoptr->refs[2],stereoptr->refs[3]); + cfg.shape = stereoptr->shape; + OBCisTransStereo *obct = new OBCisTransStereo(&mol); + obct->SetConfig(cfg); + mol.SetData(obct); + } + ++stereoptr; + } + mol.SetChiralityPerceived(); + } + + mol.SetAromaticPerceived(); + mol.SetKekulePerceived(); + + return true; +} + diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000..c88ce1f --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,94 @@ +set( MYCHEM_DATA_DIR "${CMAKE_CURRENT_SOURCE_DIR}/data/" ) + +# +# Helper Module CMakeLists.txt File +# +include_directories( + ${MySQL_INCLUDE_DIR} + ${OPENBABEL2_INCLUDE_DIR} + ${CMAKE_BINARY_DIR}/include + ${CMAKE_SOURCE_DIR}/include +) + +add_definitions( -DHAVE_DLOPEN ) + +# +# Conversion Test +# +set( conversion_test_SRCS + conversion_test.cpp + mysql_handler.cpp + test_functions.cpp +) +add_executable( conversion_test ${conversion_test_SRCS} ) +target_link_libraries( conversion_test + ${MySQL_LIBRARIES} +) +add_test( ConversionTest ${CMAKE_CURRENT_BINARY_DIR}/conversion_test + ${MY_ARGS} -d ${MYCHEM_DATA_DIR} +) + +# +# Helper Test +# +set( helper_test_SRCS + helper_test.cpp + mysql_handler.cpp + test_functions.cpp +) +add_executable( helper_test ${helper_test_SRCS} ) +target_link_libraries( helper_test + ${MySQL_LIBRARIES} +) +add_test( HelperTest ${CMAKE_CURRENT_BINARY_DIR}/helper_test + ${MY_ARGS} -d ${MYCHEM_DATA_DIR} +) + +# +# Modification Test +# +set( modification_test_SRCS + modification_test.cpp + mysql_handler.cpp + test_functions.cpp +) +add_executable( modification_test ${modification_test_SRCS} ) +target_link_libraries( modification_test + ${MySQL_LIBRARIES} +) +add_test( ModificationTest ${CMAKE_CURRENT_BINARY_DIR}/modification_test + ${MY_ARGS} -d ${MYCHEM_DATA_DIR} +) + +# +# Molmatch Test +# +set( molmatch_test_SRCS + molmatch_test.cpp + mysql_handler.cpp + test_functions.cpp +) +add_executable( molmatch_test ${molmatch_test_SRCS} ) +target_link_libraries( molmatch_test + ${MySQL_LIBRARIES} +) +add_test( MolmatchTest ${CMAKE_CURRENT_BINARY_DIR}/molmatch_test + ${MY_ARGS} -d ${MYCHEM_DATA_DIR} +) + +# +# Property Test +# +set( property_test_SRCS + property_test.cpp + mysql_handler.cpp + test_functions.cpp +) +add_executable( property_test ${property_test_SRCS} ) +target_link_libraries( property_test + ${MySQL_LIBRARIES} +) +add_test( PropertyTest ${CMAKE_CURRENT_BINARY_DIR}/property_test + ${MY_ARGS} -d ${MYCHEM_DATA_DIR} +) + diff --git a/tests/conversion_test.cpp b/tests/conversion_test.cpp new file mode 100644 index 0000000..b65fb37 --- /dev/null +++ b/tests/conversion_test.cpp @@ -0,0 +1,719 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file conversion_test.cpp + * @brief Test the Mychem conversion module. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions for + * testing the conversion module. + */ + +#include "mysql_handler.h" +#include "test_functions.h" +#include + +#include + +#include +#include +#include + +using namespace std; + +void arg_error(void) +{ + cout << "conversion_test: missing some arguments\n"; + cout << "Usage: conversion_test -d data_dir -h host -u user -p password\n\n"; + cout << "Please note that the password option is not mandatory.\n"; +} + +int main(int argc, char **argv) +{ + unsigned short int count; + string host; + string user; + string passwd; + string data_dir; + string db; + + + if ((argc != 9) && (argc != 11)) { + arg_error(); + exit(1); + } + + for (count = 1; count < argc; count++) { + if (argv[count][0] == '-') { + switch(argv[count][1]) { + case 'h': + ++count; + if (count < argc) { + host = argv[count]; + break; + } + case 'u': + ++count; + if (count < argc) { + user = argv[count]; + break; + } + case 'p': + ++count; + if (count < argc) { + passwd = argv[count]; + break; + } + case 'b': + ++count; + if (count < argc) { + db = argv[count]; + break; + } + case 'd': + ++count; + if (count < argc) { + data_dir = argv[count]; + break; + } + default: + arg_error(); + exit(1); + } + } + } + + /* Initialize the MySQL connection */ + MySQLHandler my_handler; + if (!my_handler.connect(host.c_str(), user.c_str(), passwd.c_str(), db.c_str())) { + return 1; + } + + /* Variable initialisation */ + string funcName; + string filename; + string inputData; + string controlData; + string controlData2; + string query; + string queryResult; + bool currentTestFailed = false; + bool testFailed = false; + + /**************************************************************** + * molfile_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molfile_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.mol"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_molfile test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_molfile"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.mol"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * V3000_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "V3000_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.V3000"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_V3000 test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_V3000"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.V3000"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareV3000(queryResult,controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * smiles_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "smiles_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.smi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_smiles test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_smiles"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.smi"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.inchi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * cml_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "cml_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.cml"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_cml test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_cml"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.cml"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * fingerprint tests * + ****************************************************************/ + /* Initialize the variables */ + funcName = "fingerprint"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.mol"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData = "0"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT length(" + funcName + "('" + inputData + "','FP2'))"; + query += "-length(fingerprint2('" + inputData + "'))"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT length(" + funcName + "('" + inputData + "','FP3'))"; + query += "-length(fingerprint3('" + inputData + "'))"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT length(" + funcName + "('" + inputData + "','FP4'))"; + query += "-length(fingerprint4('" + inputData + "'))"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_canonical_smiles test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_canonical_smiles"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/L-tryptophan.can"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * inchi_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "inchi_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.inchi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_inchi test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_inchi"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.inchi"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Add support for libinchi v102 */ + controlData2.clear(); + filename = data_dir + "/glycine.inchi102"; + if (!readFile(filename, controlData2)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + if (queryResult.compare(controlData2)) { + currentTestFailed = true; + } + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_serializedOBMol test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_serializedOBMol"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "348"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!queryResult.compare(0,8, controlData, 0, 8)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * pdb_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "pdb_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/1CRN.pdb"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/1CRN.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * mol2_to_molecule test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "mol2_to_molecule"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.mol2"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molecule_to_mol2 test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molecule_to_mol2"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.mol2"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + my_handler.disconnect(); + + if (testFailed) { + return 1; + } + + return 0; +} diff --git a/tests/data/1CRN.molecule b/tests/data/1CRN.molecule new file mode 100644 index 0000000..ef53281 --- /dev/null +++ b/tests/data/1CRN.molecule @@ -0,0 +1,669 @@ + + OpenBabel08210907243D + +327337 0 0 0 0 0 0 0 0999 V2000 + 17.0470 14.0990 3.6250 N 0 0 0 0 0 + 16.9670 12.7840 4.3380 C 0 0 0 0 0 + 15.6850 12.7550 5.1330 C 0 0 0 0 0 + 15.2680 13.8250 5.5940 O 0 0 0 0 0 + 18.1700 12.7030 5.3370 C 0 0 0 0 0 + 19.3340 12.8290 4.4630 O 0 0 0 0 0 + 18.1500 11.5460 6.3040 C 0 0 0 0 0 + 15.1150 11.5550 5.2650 N 0 0 0 0 0 + 13.8560 11.4690 6.0660 C 0 0 0 0 0 + 14.1640 10.7850 7.3790 C 0 0 0 0 0 + 14.9930 9.8620 7.4430 O 0 0 0 0 0 + 12.7320 10.7110 5.2610 C 0 0 0 0 0 + 13.3080 9.4390 4.9260 O 0 0 0 0 0 + 12.4840 11.4420 3.8950 C 0 0 0 0 0 + 13.4880 11.2410 8.4170 N 0 0 0 0 0 + 13.6600 10.7070 9.7870 C 0 0 0 0 0 + 12.2690 10.4310 10.3230 C 0 0 0 0 0 + 11.3930 11.3080 10.1850 O 0 0 0 0 0 + 14.3680 11.7480 10.6910 C 0 0 0 0 0 + 15.8850 12.4260 10.0160 S 0 0 0 0 0 + 12.0190 9.2720 10.9280 N 0 0 0 0 0 + 10.6460 8.9910 11.4080 C 0 0 0 0 0 + 10.6540 8.7930 12.9190 C 0 0 0 0 0 + 11.6590 8.2960 13.4910 O 0 0 0 0 0 + 10.0570 7.7520 10.6820 C 0 0 0 0 0 + 9.8370 8.0180 8.9040 S 0 0 0 0 0 + 9.5610 9.1080 13.5630 N 0 0 0 0 0 + 9.4480 9.0340 15.0120 C 0 0 0 0 0 + 9.2880 7.6700 15.6060 C 0 0 0 0 0 + 9.4900 7.5190 16.8190 O 0 0 0 0 0 + 8.2300 9.9570 15.3450 C 0 0 0 0 0 + 7.3380 9.7860 14.1140 C 0 0 0 0 0 + 8.3660 9.8040 12.9580 C 0 0 0 0 0 + 8.8750 6.6860 14.7960 N 0 0 0 0 0 + 8.6730 5.3140 15.2790 C 0 0 0 0 0 + 8.7530 4.3760 14.0830 C 0 0 0 0 0 + 8.7260 4.8580 12.9230 O 0 0 0 0 0 + 7.3400 5.1210 15.9960 C 0 0 0 0 0 + 6.2740 5.2200 15.0310 O 0 0 0 0 0 + 8.8810 3.0750 14.3580 N 0 0 0 0 0 + 8.9120 2.0830 13.2580 C 0 0 0 0 0 + 7.5810 2.0900 12.5060 C 0 0 0 0 0 + 7.6700 2.0310 11.2450 O 0 0 0 0 0 + 9.2070 0.6770 13.9240 C 0 0 0 0 0 + 10.7140 0.7020 14.3120 C 0 0 0 0 0 + 8.8110 -0.4770 12.9690 C 0 0 0 0 0 + 11.1850 -0.5160 15.1420 C 0 0 0 0 0 + 6.4580 2.1620 13.1590 N 0 0 0 0 0 + 5.1450 2.2090 12.4530 C 0 0 0 0 0 + 5.1150 3.3790 11.4610 C 0 0 0 0 0 + 4.6640 3.2680 10.3430 O 0 0 0 0 0 + 3.9950 2.3540 13.4780 C 0 0 0 0 0 + 2.7160 2.8910 12.8690 C 0 0 0 0 0 + 3.7580 1.0320 14.2080 C 0 0 0 0 0 + 5.6060 4.5460 11.9410 N 0 0 0 0 0 + 5.5980 5.7670 11.0820 C 0 0 0 0 0 + 6.4410 5.5270 9.8500 C 0 0 0 0 0 + 6.0520 5.9330 8.7440 O 0 0 0 0 0 + 6.0220 6.9770 11.8910 C 0 0 0 0 0 + 7.6470 4.9090 10.0050 N 0 0 0 0 0 + 8.4960 4.6090 8.8370 C 0 0 0 0 0 + 7.7980 3.6090 7.8760 C 0 0 0 0 0 + 7.8780 3.7780 6.6510 O 0 0 0 0 0 + 9.8470 4.0200 9.3050 C 0 0 0 0 0 + 10.7520 3.6070 8.1490 C 0 0 0 0 0 + 11.2260 4.6990 7.2440 C 0 0 0 0 0 + 12.1430 5.5710 8.0350 N 0 0 0 0 0 + 12.7580 6.6090 7.4430 C 0 0 0 0 0 + 12.5390 6.9320 6.1580 N 0 0 0 0 0 + 13.6010 7.3220 8.2020 N 0 0 0 0 0 + 7.1860 2.5820 8.4450 N 0 0 0 0 0 + 6.5000 1.5840 7.5650 C 0 0 0 0 0 + 5.3820 2.3130 6.7730 C 0 0 0 0 0 + 5.2130 2.0160 5.5570 O 0 0 0 0 0 + 5.9080 0.4620 8.4000 C 0 0 0 0 0 + 6.9900 -0.2720 9.0120 O 0 0 0 0 0 + 4.6480 3.1820 7.4460 N 0 0 0 0 0 + 3.5450 3.9350 6.7510 C 0 0 0 0 0 + 4.1070 4.8510 5.6910 C 0 0 0 0 0 + 3.5360 5.0010 4.6170 O 0 0 0 0 0 + 2.6630 4.6770 7.7480 C 0 0 0 0 0 + 1.8020 3.7350 8.6100 C 0 0 0 0 0 + 1.5670 2.6130 8.1650 O 0 0 0 0 0 + 1.3940 4.2520 9.7670 N 0 0 0 0 0 + 5.2590 5.4980 6.0050 N 0 0 0 0 0 + 5.9290 6.3580 5.0550 C 0 0 0 0 0 + 6.3040 5.5780 3.7990 C 0 0 0 0 0 + 6.1360 6.0720 2.6530 O 0 0 0 0 0 + 7.1830 6.9940 5.7540 C 0 0 0 0 0 + 7.8840 8.0060 4.8830 C 0 0 0 0 0 + 8.9060 7.5860 4.0270 C 0 0 0 0 0 + 7.5320 9.3730 4.9830 C 0 0 0 0 0 + 9.5600 8.5390 3.1940 C 0 0 0 0 0 + 8.1760 10.2810 4.1450 C 0 0 0 0 0 + 9.1410 9.8450 3.2920 C 0 0 0 0 0 + 6.9000 4.3900 3.9890 N 0 0 0 0 0 + 7.3310 3.6070 2.7910 C 0 0 0 0 0 + 6.1160 3.2100 1.9150 C 0 0 0 0 0 + 6.2400 3.1440 0.6840 O 0 0 0 0 0 + 8.1450 2.4040 3.2400 C 0 0 0 0 0 + 9.5550 2.8560 3.7300 C 0 0 0 0 0 + 10.0130 3.8950 3.3230 O 0 0 0 0 0 + 10.1200 1.9560 4.5390 N 0 0 0 0 0 + 4.9930 2.9270 2.5710 N 0 0 0 0 0 + 3.7820 2.5990 1.7420 C 0 0 0 0 0 + 3.2960 3.8710 1.0040 C 0 0 0 0 0 + 2.9470 3.8170 -0.1890 O 0 0 0 0 0 + 2.6980 1.9530 2.6080 C 0 0 0 0 0 + 1.3840 1.8260 1.8060 C 0 0 0 0 0 + 3.1740 0.5330 3.0050 C 0 0 0 0 0 + 3.3210 4.9870 1.7200 N 0 0 0 0 0 + 2.8900 6.2850 1.1260 C 0 0 0 0 0 + 3.6870 6.5970 -0.1110 C 0 0 0 0 0 + 3.2000 7.1470 -1.1030 O 0 0 0 0 0 + 3.0390 7.3690 2.2400 C 0 0 0 0 0 + 2.5590 9.0140 1.6490 S 0 0 0 0 0 + 4.9970 6.2270 -0.1000 N 0 0 0 0 0 + 5.8950 6.4890 -1.2130 C 0 0 0 0 0 + 5.7380 5.5600 -2.4090 C 0 0 0 0 0 + 6.2280 5.9010 -3.5070 O 0 0 0 0 0 + 7.3700 6.5070 -0.7310 C 0 0 0 0 0 + 7.7170 7.6870 0.2060 C 0 0 0 0 0 + 7.9490 8.9470 -0.6150 C 0 0 0 0 0 + 9.2120 8.8560 -1.3370 N 0 0 0 0 0 + 9.5370 9.5330 -2.4310 C 0 0 0 0 0 + 8.6590 10.3500 -3.0320 N 0 0 0 0 0 + 10.7930 9.4910 -2.8990 N 0 0 0 0 0 + 5.0510 4.4110 -2.2040 N 0 0 0 0 0 + 4.9330 3.4310 -3.3260 C 0 0 0 0 0 + 4.3970 4.0140 -4.6200 C 0 0 0 0 0 + 4.9880 3.7550 -5.6870 O 0 0 0 0 0 + 4.1960 2.1840 -2.8630 C 0 0 0 0 0 + 4.9600 1.1780 -1.9910 C 0 0 0 0 0 + 3.9070 0.0970 -1.6340 C 0 0 0 0 0 + 6.1290 0.6060 -2.7680 C 0 0 0 0 0 + 3.3290 4.7950 -4.5430 N 0 0 0 0 0 + 2.7920 5.3760 -5.7970 C 0 0 0 0 0 + 3.5730 6.5400 -6.3220 C 0 0 0 0 0 + 3.2600 7.0450 -7.4220 O 0 0 0 0 0 + 1.3580 5.7660 -5.4720 C 0 0 0 0 0 + 1.2230 5.6940 -3.9930 C 0 0 0 0 0 + 2.4210 4.9410 -3.4080 C 0 0 0 0 0 + 4.5650 7.0470 -5.5590 N 0 0 0 0 0 + 5.3660 8.1910 -6.0180 C 0 0 0 0 0 + 5.0070 9.4810 -5.2800 C 0 0 0 0 0 + 5.5350 10.5100 -5.7300 O 0 0 0 0 0 + 4.1810 9.4380 -4.2620 N 0 0 0 0 0 + 3.7670 10.6090 -3.5130 C 0 0 0 0 0 + 5.0170 11.3970 -3.0420 C 0 0 0 0 0 + 5.9470 10.7570 -2.5230 O 0 0 0 0 0 + 2.9920 10.1880 -2.2250 C 0 0 0 0 0 + 2.0510 9.1440 -2.6230 O 0 0 0 0 0 + 2.2600 11.3490 -1.5510 C 0 0 0 0 0 + 4.9710 12.7030 -3.1760 N 0 0 0 0 0 + 6.1430 13.5130 -2.6960 C 0 0 0 0 0 + 6.4000 13.2330 -1.2250 C 0 0 0 0 0 + 5.4850 13.0610 -0.3820 O 0 0 0 0 0 + 5.7030 14.9690 -2.9200 C 0 0 0 0 0 + 4.6760 14.8930 -3.9960 C 0 0 0 0 0 + 3.9640 13.5670 -3.8110 C 0 0 0 0 0 + 7.7280 13.2970 -0.9210 N 0 0 0 0 0 + 8.1140 13.1030 0.5000 C 0 0 0 0 0 + 7.4270 14.0730 1.4100 C 0 0 0 0 0 + 7.0360 13.6820 2.5400 O 0 0 0 0 0 + 9.6480 13.2850 0.6600 C 0 0 0 0 0 + 10.4400 12.0930 0.0630 C 0 0 0 0 0 + 11.9410 12.1700 0.3910 C 0 0 0 0 0 + 12.4160 13.2250 0.6810 O 0 0 0 0 0 + 12.5390 11.0700 0.2920 O 0 0 0 0 0 + 7.2120 15.3340 0.9660 N 0 0 0 0 0 + 6.6140 16.3170 1.9130 C 0 0 0 0 0 + 5.2120 15.9360 2.3500 C 0 0 0 0 0 + 4.7820 16.1660 3.4950 O 0 0 0 0 0 + 6.6050 17.6950 1.2460 C 0 0 0 0 0 + 4.4450 15.3180 1.4050 N 0 0 0 0 0 + 3.0740 14.8940 1.7560 C 0 0 0 0 0 + 3.0850 13.6430 2.6450 C 0 0 0 0 0 + 2.3150 13.5230 3.5780 O 0 0 0 0 0 + 2.2040 14.6370 0.4620 C 0 0 0 0 0 + 1.8150 16.0480 -0.1290 C 0 0 0 0 0 + 0.9030 13.8640 0.8110 C 0 0 0 0 0 + 0.7560 16.7610 0.7570 C 0 0 0 0 0 + 4.0320 12.7640 2.3130 N 0 0 0 0 0 + 4.1800 11.5490 3.1870 C 0 0 0 0 0 + 4.6320 11.9440 4.5960 C 0 0 0 0 0 + 4.2270 11.2520 5.5470 O 0 0 0 0 0 + 5.0380 10.5180 2.5390 C 0 0 0 0 0 + 4.3490 9.7940 1.0220 S 0 0 0 0 0 + 5.4080 13.0120 4.6940 N 0 0 0 0 0 + 5.8790 13.5020 6.0260 C 0 0 0 0 0 + 4.6960 13.9080 6.8820 C 0 0 0 0 0 + 4.5280 13.4220 8.0250 O 0 0 0 0 0 + 6.8800 14.6150 5.8300 C 0 0 0 0 0 + 3.8270 14.8020 6.3580 N 0 0 0 0 0 + 2.6910 15.2210 7.1940 C 0 0 0 0 0 + 1.6720 14.1320 7.4340 C 0 0 0 0 0 + 0.9470 14.1120 8.4680 O 0 0 0 0 0 + 1.9860 16.5200 6.6140 C 0 0 0 0 0 + 1.6640 16.2210 5.2300 O 0 0 0 0 0 + 2.9140 17.7390 6.7000 C 0 0 0 0 0 + 1.6210 13.1900 6.5110 N 0 0 0 0 0 + 0.7150 12.0450 6.6570 C 0 0 0 0 0 + 1.1250 11.1250 7.8150 C 0 0 0 0 0 + 0.2860 10.6320 8.5450 O 0 0 0 0 0 + 0.7550 11.2290 5.3220 C 0 0 0 0 0 + -0.2030 10.0440 5.3540 C 0 0 0 0 0 + -1.5470 10.3370 5.6450 C 0 0 0 0 0 + 0.1930 8.7500 5.1000 C 0 0 0 0 0 + -2.4960 9.3290 5.6730 C 0 0 0 0 0 + -0.8010 7.7050 5.1560 C 0 0 0 0 0 + -2.0790 8.0310 5.4300 C 0 0 0 0 0 + -3.0970 7.0570 5.4580 O 0 0 0 0 0 + 2.4700 10.9840 7.9950 N 0 0 0 0 0 + 2.9860 9.9940 8.9500 C 0 0 0 0 0 + 3.6090 10.5050 10.2300 C 0 0 0 0 0 + 3.7660 9.7150 11.1860 O 0 0 0 0 0 + 4.0760 9.1030 8.2250 C 0 0 0 0 0 + 5.1250 10.0270 7.8240 O 0 0 0 0 0 + 3.4930 8.3240 7.0350 C 0 0 0 0 0 + 3.9840 11.7640 10.2410 N 0 0 0 0 0 + 4.7690 12.3360 11.3600 C 0 0 0 0 0 + 6.2550 12.2430 11.1060 C 0 0 0 0 0 + 7.0370 12.7500 11.9540 O 0 0 0 0 0 + 6.7100 11.6310 9.9920 N 0 0 0 0 0 + 8.1400 11.6940 9.6350 C 0 0 0 0 0 + 8.5000 13.1410 9.2060 C 0 0 0 0 0 + 7.5810 13.9490 8.9440 O 0 0 0 0 0 + 8.5040 10.6860 8.5300 C 0 0 0 0 0 + 8.0480 8.9870 8.8810 S 0 0 0 0 0 + 9.7930 13.4100 9.1730 N 0 0 0 0 0 + 10.2800 14.7600 8.8230 C 0 0 0 0 0 + 11.3460 14.6580 7.7430 C 0 0 0 0 0 + 11.9710 13.5830 7.5520 O 0 0 0 0 0 + 10.7900 15.5350 10.0850 C 0 0 0 0 0 + 12.0590 14.8030 10.6710 C 0 0 0 0 0 + 9.6840 15.6860 11.1380 C 0 0 0 0 0 + 12.7330 15.6760 11.7810 C 0 0 0 0 0 + 11.4900 15.7730 7.0380 N 0 0 0 0 0 + 12.5520 15.8770 6.0360 C 0 0 0 0 0 + 13.5900 16.9170 6.5600 C 0 0 0 0 0 + 13.1680 18.0060 6.9450 O 0 0 0 0 0 + 11.9870 16.3600 4.6810 C 0 0 0 0 0 + 10.9140 15.3380 4.1630 C 0 0 0 0 0 + 13.1310 16.5170 3.6290 C 0 0 0 0 0 + 10.1510 16.0240 2.9380 C 0 0 0 0 0 + 14.8560 16.4930 6.5360 N 0 0 0 0 0 + 15.9300 17.4540 6.9410 C 0 0 0 0 0 + 16.9130 17.5500 5.8190 C 0 0 0 0 0 + 17.0970 16.6600 4.9700 O 0 0 0 0 0 + 16.6220 16.9950 8.2850 C 0 0 0 0 0 + 17.3600 15.6510 8.0670 C 0 0 0 0 0 + 15.5920 16.9740 9.4340 C 0 0 0 0 0 + 18.2980 15.2060 9.2190 C 0 0 0 0 0 + 17.6640 18.6690 5.8060 N 0 0 0 0 0 + 18.6350 18.8610 4.7380 C 0 0 0 0 0 + 19.9250 18.0420 4.9490 C 0 0 0 0 0 + 20.5930 17.7420 3.9450 O 0 0 0 0 0 + 18.9450 20.3640 4.7830 C 0 0 0 0 0 + 18.2380 20.9370 5.9080 C 0 0 0 0 0 + 17.3710 19.9000 6.5960 C 0 0 0 0 0 + 20.1720 17.7300 6.2170 N 0 0 0 0 0 + 21.4520 16.9690 6.5130 C 0 0 0 0 0 + 21.1430 15.4780 6.4270 C 0 0 0 0 0 + 20.1380 15.0230 5.8780 O 0 0 0 0 0 + 22.0550 14.7010 7.0320 N 0 0 0 0 0 + 22.0190 13.2420 7.0200 C 0 0 0 0 0 + 21.9440 12.6280 8.3960 C 0 0 0 0 0 + 21.8690 11.3870 8.4350 O 0 0 0 0 0 + 23.2460 12.6970 6.2750 C 0 0 0 0 0 + 21.8940 13.4350 9.4360 N 0 0 0 0 0 + 21.9360 12.9110 10.8090 C 0 0 0 0 0 + 20.6150 13.1910 11.5210 C 0 0 0 0 0 + 20.3570 14.3170 11.9480 O 0 0 0 0 0 + 23.1310 13.6010 11.5930 C 0 0 0 0 0 + 24.2840 13.4010 10.7090 O 0 0 0 0 0 + 23.3400 12.9350 12.9620 C 0 0 0 0 0 + 19.8270 12.1100 11.6420 N 0 0 0 0 0 + 18.5040 12.3120 12.2980 C 0 0 0 0 0 + 18.6840 12.4510 13.7840 C 0 0 0 0 0 + 19.5330 11.7180 14.3620 O 0 0 0 0 0 + 17.5820 11.1170 11.9960 C 0 0 0 0 0 + 17.1990 10.9290 10.2370 S 0 0 0 0 0 + 17.8800 13.2660 14.4260 N 0 0 0 0 0 + 17.9240 13.4210 15.8770 C 0 0 0 0 0 + 17.3920 12.2060 16.5940 C 0 0 0 0 0 + 16.6520 11.3680 16.0330 O 0 0 0 0 0 + 17.0760 14.6580 16.1450 C 0 0 0 0 0 + 16.0980 14.6890 14.9970 C 0 0 0 0 0 + 16.8590 14.1500 13.7790 C 0 0 0 0 0 + 17.7280 12.1240 17.8840 N 0 0 0 0 0 + 17.3340 10.9560 18.6910 C 0 0 0 0 0 + 15.8750 10.6880 18.8710 C 0 0 0 0 0 + 15.4340 9.5500 19.1660 O 0 0 0 0 0 + 15.0360 11.7470 18.7150 N 0 0 0 0 0 + 13.5640 11.5730 18.8360 C 0 0 0 0 0 + 12.9360 11.2270 17.4700 C 0 0 0 0 0 + 11.7200 11.0400 17.4280 O 0 0 0 0 0 + 12.9330 12.7370 19.5800 C 0 0 0 0 0 + 13.1400 14.0940 18.9580 C 0 0 0 0 0 + 14.1090 14.3030 18.2120 O 0 0 0 0 0 + 12.2670 14.9630 19.2650 O 0 0 0 0 0 + 13.7250 11.1740 16.4250 N 0 0 0 0 0 + 13.2570 10.7450 15.0810 C 0 0 0 0 0 + 14.2750 9.6870 14.6120 C 0 0 0 0 0 + 14.9300 9.8620 13.5680 O 0 0 0 0 0 + 13.2000 11.9140 14.0710 C 0 0 0 0 0 + 12.0000 12.8190 14.3990 C 0 0 0 0 0 + 12.1190 13.8530 15.3320 C 0 0 0 0 0 + 10.7750 12.6170 13.7620 C 0 0 0 0 0 + 11.0450 14.6750 15.6100 C 0 0 0 0 0 + 9.6760 13.4330 14.0480 C 0 0 0 0 0 + 9.8020 14.4560 14.9960 C 0 0 0 0 0 + 8.7400 15.2650 15.2690 O 0 0 0 0 0 + 14.3420 8.6400 15.4220 N 0 0 0 0 0 + 15.4450 7.6670 15.2460 C 0 0 0 0 0 + 15.1710 6.5330 14.2800 C 0 0 0 0 0 + 16.0930 5.7050 14.0390 O 0 0 0 0 0 + 15.6800 7.0990 16.6820 C 0 0 0 0 0 + 13.9660 6.5020 13.7390 N 0 0 0 0 0 + 13.5120 5.3950 12.8780 C 0 0 0 0 0 + 13.3110 5.8530 11.4550 C 0 0 0 0 0 + 13.7330 6.9290 11.0260 O 0 0 0 0 0 + 12.2660 4.7690 13.5010 C 0 0 0 0 0 + 12.5380 4.3040 14.9220 C 0 0 0 0 0 + 11.9820 4.8490 15.8860 O 0 0 0 0 0 + 13.4070 3.2980 15.0150 N 0 0 0 0 0 + 12.7030 4.9730 10.7460 O 0 0 0 0 0 + 1 2 1 0 0 0 + 2 3 1 0 0 0 + 2 5 1 0 0 0 + 3 8 1 0 0 0 + 3 4 2 0 0 0 + 6 5 1 0 0 0 + 5 7 1 0 0 0 + 8 9 1 0 0 0 + 12 9 1 0 0 0 + 9 10 1 0 0 0 + 10 11 2 0 0 0 + 10 15 1 0 0 0 + 14 12 1 0 0 0 + 13 12 1 0 0 0 + 15 16 1 0 0 0 + 16 17 1 0 0 0 + 16 19 1 0 0 0 + 18 17 2 0 0 0 + 17 21 1 0 0 0 + 20 19 1 0 0 0 + 20282 1 0 0 0 + 21 22 1 0 0 0 + 25 22 1 0 0 0 + 22 23 1 0 0 0 + 23 24 2 0 0 0 + 23 27 1 0 0 0 + 26 25 1 0 0 0 + 26229 1 0 0 0 + 33 27 1 0 0 0 + 27 28 1 0 0 0 + 28 31 1 0 0 0 + 28 29 1 0 0 0 + 34 29 1 0 0 0 + 29 30 2 0 0 0 + 32 31 1 0 0 0 + 33 32 1 0 0 0 + 34 35 1 0 0 0 + 36 35 1 0 0 0 + 35 38 1 0 0 0 + 37 36 2 0 0 0 + 36 40 1 0 0 0 + 39 38 1 0 0 0 + 41 40 1 0 0 0 + 42 41 1 0 0 0 + 41 44 1 0 0 0 + 43 42 2 0 0 0 + 42 48 1 0 0 0 + 46 44 1 0 0 0 + 44 45 1 0 0 0 + 45 47 1 0 0 0 + 49 48 1 0 0 0 + 50 49 1 0 0 0 + 49 52 1 0 0 0 + 51 50 2 0 0 0 + 50 55 1 0 0 0 + 53 52 1 0 0 0 + 52 54 1 0 0 0 + 56 55 1 0 0 0 + 57 56 1 0 0 0 + 56 59 1 0 0 0 + 58 57 2 0 0 0 + 57 60 1 0 0 0 + 61 60 1 0 0 0 + 62 61 1 0 0 0 + 61 64 1 0 0 0 + 63 62 2 0 0 0 + 62 71 1 0 0 0 + 65 64 1 0 0 0 + 66 65 1 0 0 0 + 66 67 1 0 0 0 + 68 67 1 0 0 0 + 69 68 2 0 0 0 + 68 70 1 0 0 0 + 72 71 1 0 0 0 + 73 72 1 0 0 0 + 72 75 1 0 0 0 + 74 73 2 0 0 0 + 73 77 1 0 0 0 + 75 76 1 0 0 0 + 78 77 1 0 0 0 + 79 78 1 0 0 0 + 78 81 1 0 0 0 + 80 79 2 0 0 0 + 79 85 1 0 0 0 + 81 82 1 0 0 0 + 83 82 2 0 0 0 + 82 84 1 0 0 0 + 86 85 1 0 0 0 + 87 86 1 0 0 0 + 86 89 1 0 0 0 + 88 87 2 0 0 0 + 87 96 1 0 0 0 + 90 89 1 0 0 0 + 91 90 2 0 0 0 + 90 92 1 0 0 0 + 93 91 1 0 0 0 + 94 92 2 0 0 0 + 93 95 2 0 0 0 + 95 94 1 0 0 0 + 97 96 1 0 0 0 + 98 97 1 0 0 0 + 97100 1 0 0 0 + 99 98 2 0 0 0 + 98104 1 0 0 0 +100101 1 0 0 0 +102101 2 0 0 0 +101103 1 0 0 0 +105104 1 0 0 0 +106105 1 0 0 0 +105108 1 0 0 0 +107106 2 0 0 0 +106111 1 0 0 0 +109108 1 0 0 0 +108110 1 0 0 0 +112111 1 0 0 0 +113112 1 0 0 0 +112115 1 0 0 0 +114113 2 0 0 0 +113117 1 0 0 0 +116115 1 0 0 0 +116188 1 0 0 0 +118117 1 0 0 0 +119118 1 0 0 0 +118121 1 0 0 0 +120119 2 0 0 0 +119128 1 0 0 0 +121122 1 0 0 0 +123122 1 0 0 0 +124123 1 0 0 0 +125124 1 0 0 0 +126125 2 0 0 0 +127125 1 0 0 0 +129128 1 0 0 0 +130129 1 0 0 0 +129132 1 0 0 0 +131130 2 0 0 0 +130136 1 0 0 0 +132133 1 0 0 0 +135133 1 0 0 0 +133134 1 0 0 0 +137136 1 0 0 0 +136142 1 0 0 0 +138137 1 0 0 0 +137140 1 0 0 0 +139138 2 0 0 0 +138143 1 0 0 0 +140141 1 0 0 0 +141142 1 0 0 0 +144143 1 0 0 0 +144145 1 0 0 0 +146145 2 0 0 0 +145147 1 0 0 0 +147148 1 0 0 0 +148149 1 0 0 0 +148151 1 0 0 0 +154149 1 0 0 0 +149150 2 0 0 0 +152151 1 0 0 0 +151153 1 0 0 0 +160154 1 0 0 0 +154155 1 0 0 0 +158155 1 0 0 0 +155156 1 0 0 0 +156161 1 0 0 0 +156157 2 0 0 0 +159158 1 0 0 0 +159160 1 0 0 0 +161162 1 0 0 0 +162165 1 0 0 0 +162163 1 0 0 0 +170163 1 0 0 0 +163164 2 0 0 0 +166165 1 0 0 0 +166167 1 0 0 0 +169167 2 0 0 0 +167168 1 0 0 0 +170171 1 0 0 0 +174171 1 0 0 0 +171172 1 0 0 0 +175172 1 0 0 0 +172173 2 0 0 0 +175176 1 0 0 0 +179176 1 0 0 0 +176177 1 0 0 0 +183177 1 0 0 0 +177178 2 0 0 0 +180179 1 0 0 0 +179181 1 0 0 0 +180182 1 0 0 0 +183184 1 0 0 0 +187184 1 0 0 0 +184185 1 0 0 0 +185189 1 0 0 0 +185186 2 0 0 0 +188187 1 0 0 0 +189190 1 0 0 0 +193190 1 0 0 0 +190191 1 0 0 0 +194191 1 0 0 0 +191192 2 0 0 0 +194195 1 0 0 0 +198195 1 0 0 0 +195196 1 0 0 0 +201196 1 0 0 0 +196197 2 0 0 0 +199198 1 0 0 0 +198200 1 0 0 0 +201202 1 0 0 0 +205202 1 0 0 0 +202203 1 0 0 0 +203213 1 0 0 0 +203204 2 0 0 0 +205206 1 0 0 0 +208206 1 0 0 0 +206207 2 0 0 0 +207209 1 0 0 0 +208210 2 0 0 0 +211209 2 0 0 0 +210211 1 0 0 0 +211212 1 0 0 0 +213214 1 0 0 0 +217214 1 0 0 0 +214215 1 0 0 0 +215220 1 0 0 0 +215216 2 0 0 0 +219217 1 0 0 0 +218217 1 0 0 0 +220221 1 0 0 0 +222221 1 0 0 0 +224222 1 0 0 0 +222223 2 0 0 0 +225224 1 0 0 0 +228225 1 0 0 0 +226225 1 0 0 0 +227226 2 0 0 0 +230226 1 0 0 0 +229228 1 0 0 0 +231230 1 0 0 0 +232231 1 0 0 0 +231234 1 0 0 0 +238232 1 0 0 0 +233232 2 0 0 0 +234235 1 0 0 0 +234236 1 0 0 0 +235237 1 0 0 0 +239238 1 0 0 0 +242239 1 0 0 0 +239240 1 0 0 0 +246240 1 0 0 0 +240241 2 0 0 0 +244242 1 0 0 0 +243242 1 0 0 0 +245243 1 0 0 0 +246247 1 0 0 0 +248247 1 0 0 0 +247250 1 0 0 0 +249248 2 0 0 0 +254248 1 0 0 0 +251250 1 0 0 0 +250252 1 0 0 0 +251253 1 0 0 0 +255254 1 0 0 0 +254260 1 0 0 0 +255258 1 0 0 0 +255256 1 0 0 0 +257256 2 0 0 0 +256261 1 0 0 0 +258259 2 0 0 0 +259260 1 0 0 0 +261262 1 0 0 0 +263262 1 0 0 0 +264263 2 0 0 0 +263265 1 0 0 0 +266265 1 0 0 0 +269266 1 0 0 0 +266267 1 0 0 0 +267268 2 0 0 0 +267270 1 0 0 0 +270271 1 0 0 0 +271272 1 0 0 0 +271274 1 0 0 0 +272277 1 0 0 0 +272273 2 0 0 0 +275274 1 0 0 0 +274276 1 0 0 0 +277278 1 0 0 0 +281278 1 0 0 0 +278279 1 0 0 0 +279280 2 0 0 0 +279283 1 0 0 0 +282281 1 0 0 0 +289283 1 0 0 0 +283284 1 0 0 0 +284287 1 0 0 0 +284285 1 0 0 0 +286285 2 0 0 0 +285290 1 0 0 0 +288287 1 0 0 0 +289288 1 0 0 0 +290291 1 0 0 0 +291292 1 0 0 0 +294292 1 0 0 0 +292293 2 0 0 0 +294295 1 0 0 0 +296295 1 0 0 0 +295298 1 0 0 0 +302296 1 0 0 0 +297296 2 0 0 0 +299298 1 0 0 0 +300299 2 0 0 0 +299301 1 0 0 0 +303302 1 0 0 0 +306303 1 0 0 0 +304303 1 0 0 0 +305304 2 0 0 0 +304314 1 0 0 0 +306307 1 0 0 0 +309307 1 0 0 0 +307308 2 0 0 0 +308310 1 0 0 0 +309311 2 0 0 0 +312310 2 0 0 0 +311312 1 0 0 0 +312313 1 0 0 0 +315314 1 0 0 0 +316315 1 0 0 0 +315318 1 0 0 0 +319316 1 0 0 0 +317316 2 0 0 0 +320319 1 0 0 0 +321320 1 0 0 0 +320323 1 0 0 0 +327321 2 0 0 0 +322321 1 0 0 0 +323324 1 0 0 0 +324326 1 0 0 0 +324325 2 0 0 0 +M END diff --git a/tests/data/1CRN.pdb b/tests/data/1CRN.pdb new file mode 100644 index 0000000..c5ad6ad --- /dev/null +++ b/tests/data/1CRN.pdb @@ -0,0 +1,426 @@ +HEADER PLANT SEED PROTEIN 30-APR-81 1CRN 1CRND 1 +COMPND CRAMBIN 1CRN 4 +SOURCE ABYSSINIAN CABBAGE (CRAMBE ABYSSINICA) SEED 1CRN 5 +AUTHOR W.A.HENDRICKSON,M.M.TEETER 1CRN 6 +REVDAT 5 16-APR-87 1CRND 1 HEADER 1CRND 2 +REVDAT 4 04-MAR-85 1CRNC 1 REMARK 1CRNC 1 +REVDAT 3 30-SEP-83 1CRNB 1 REVDAT 1CRNB 1 +REVDAT 2 03-DEC-81 1CRNA 1 SHEET 1CRNB 2 +REVDAT 1 28-JUL-81 1CRN 0 1CRNB 3 +REMARK 1 1CRN 7 +REMARK 1 REFERENCE 1 1CRNC 2 +REMARK 1 AUTH M.M.TEETER 1CRNC 3 +REMARK 1 TITL WATER STRUCTURE OF A HYDROPHOBIC PROTEIN AT ATOMIC 1CRNC 4 +REMARK 1 TITL 2 RESOLUTION. PENTAGON RINGS OF WATER MOLECULES IN 1CRNC 5 +REMARK 1 TITL 3 CRYSTALS OF CRAMBIN 1CRNC 6 +REMARK 1 REF PROC.NAT.ACAD.SCI.USA V. 81 6014 1984 1CRNC 7 +REMARK 1 REFN ASTM PNASA6 US ISSN 0027-8424 040 1CRNC 8 +REMARK 1 REFERENCE 2 1CRNC 9 +REMARK 1 AUTH W.A.HENDRICKSON,M.M.TEETER 1CRN 9 +REMARK 1 TITL STRUCTURE OF THE HYDROPHOBIC PROTEIN CRAMBIN 1CRN 10 +REMARK 1 TITL 2 DETERMINED DIRECTLY FROM THE ANOMALOUS SCATTERING 1CRN 11 +REMARK 1 TITL 3 OF SULPHUR 1CRN 12 +REMARK 1 REF NATURE V. 290 107 1981 1CRN 13 +REMARK 1 REFN ASTM NATUAS UK ISSN 0028-0836 006 1CRN 14 +REMARK 1 REFERENCE 3 1CRNC 10 +REMARK 1 AUTH M.M.TEETER,W.A.HENDRICKSON 1CRN 16 +REMARK 1 TITL HIGHLY ORDERED CRYSTALS OF THE PLANT SEED PROTEIN 1CRN 17 +REMARK 1 TITL 2 CRAMBIN 1CRN 18 +REMARK 1 REF J.MOL.BIOL. V. 127 219 1979 1CRN 19 +REMARK 1 REFN ASTM JMOBAK UK ISSN 0022-2836 070 1CRN 20 +REMARK 2 1CRN 21 +REMARK 2 RESOLUTION. 1.5 ANGSTROMS. 1CRN 22 +REMARK 3 1CRN 23 +REMARK 3 REFINEMENT. RESTRAINED LEAST SQUARES (HENDRICKSON,W.A., 1CRN 24 +REMARK 3 KONNERT,J.H. COMPUTING IN CRYSTALLOGRAPHY, EDS.DIAMOND,R., 1CRN 25 +REMARK 3 RAMASESHAN,S.,VENKATESAN,K. (1980)). 1CRN 26 +REMARK 4 1CRN 27 +REMARK 4 CONFORMATIONAL HETEROGENEITY EXISTS AT ILE 7 AND ILE 25 1CRN 28 +REMARK 4 WHERE CD1 ATOMS TAKE EITHER OF TWO STAGGERED POSSIBILITIES. 1CRN 29 +REMARK 4 COMPOSITIONAL HETEROGENEITY ALSO EXISTS AT POSITIONS 22 AND 1CRN 30 +REMARK 4 25. REFINEMENT PARAMETERS SUGGEST THAT RESIDUE 22 IS ABOUT 1CRN 31 +REMARK 4 60/40 PRO/SER AND THAT RESIDUE 25 IS ABOUT 60/40 ILE/LEU. 1CRN 32 +REMARK 4 THE HETEROGENEITY AT RESIDUE 22 APPARENTLY CAUSES A 1CRN 33 +REMARK 4 DISORDER IN TYR 29 - THE REFINED POSITION OF ITS OH ATOM 1CRN 34 +REMARK 4 MAKES AN IMPOSSIBLY SHORT CONTACT OF 2.6 ANGSTROMS WITH 1CRN 35 +REMARK 4 ATOM CD OF PRO 22 ON A SCREW-RELATED MOLECULE. THE 1CRN 36 +REMARK 4 DEPOSITED COORDINATES ARE ONLY FOR THE MAJOR CONTRIBUTOR AT 1CRN 37 +REMARK 4 EACH SITE (PRO 22 AND ILE 25). DEPOSITION OF THE MODEL OF 1CRN 38 +REMARK 4 DISORDER AND SOLVENT STRUCTURE IS DEFERRED UNTIL HIGHER 1CRN 39 +REMARK 4 RESOLUTION REFINEMENT. THE R-FACTOR FOR THE COMPLETE MODEL 1CRN 40 +REMARK 4 INCLUDING HETEROGENEITY AND SOLVENT IS 0.114 ISOTROPIC AND 1CRN 41 +REMARK 4 0.104 ANISOTROPIC AGAINST ALL DATA IN THE 10.0 TO 1.5 1CRN 42 +REMARK 4 ANGSTROM SHELL. 1CRN 43 +REMARK 5 1CRN 44 +REMARK 5 THE SECONDARY STRUCTURE SPECIFICATIONS ARE THOSE DEFINED 1CRN 45 +REMARK 5 IN REFERENCE 1 ABOVE AND DEPEND ON PARTICULAR DEFINITIONS 1CRN 46 +REMARK 5 THAT MAY AFFECT THE DETERMINATION OF END POINTS. PLEASE 1CRN 47 +REMARK 5 CONSULT THE PRIMARY REFERENCE AND EXAMINE STRUCTURAL 1CRN 48 +REMARK 5 DETAILS SUCH AS HYDROGEN BONDING AND CONFORMATION ANGLES 1CRN 49 +REMARK 5 WHEN MAKING USE OF THE SPECIFICATIONS. 1CRN 50 +REMARK 6 1CRNA 1 +REMARK 6 CORRECTION. CORRECT RESIDUE NUMBER ON STRAND 1 OF SHEET S1. 1CRNA 2 +REMARK 6 03-DEC-81. 1CRNA 3 +REMARK 7 1CRNB 4 +REMARK 7 CORRECTION. INSERT REVDAT RECORDS. 30-SEP-83. 1CRNB 5 +REMARK 8 1CRNC 11 +REMARK 8 CORRECTION. INSERT NEW PUBLICATION AS REFERENCE 1 AND 1CRNC 12 +REMARK 8 RENUMBER THE OTHERS. 04-MAR-85. 1CRNC 13 +REMARK 9 1CRND 3 +REMARK 9 CORRECTION. CHANGE DEPOSITION DATE FROM 31-APR-81 TO 1CRND 4 +REMARK 9 30-APR-81. 16-APR-87. 1CRND 5 +SEQRES 1 46 THR THR CYS CYS PRO SER ILE VAL ALA ARG SER ASN PHE 1CRN 51 +SEQRES 2 46 ASN VAL CYS ARG LEU PRO GLY THR PRO GLU ALA ILE CYS 1CRN 52 +SEQRES 3 46 ALA THR TYR THR GLY CYS ILE ILE ILE PRO GLY ALA THR 1CRN 53 +SEQRES 4 46 CYS PRO GLY ASP TYR ALA ASN 1CRN 54 +HELIX 1 H1 ILE 7 PRO 19 1 3/10 CONFORMATION RES 17,19 1CRN 55 +HELIX 2 H2 GLU 23 THR 30 1 DISTORTED 3/10 AT RES 30 1CRN 56 +SHEET 1 S1 2 THR 1 CYS 4 0 1CRNA 4 +SHEET 2 S1 2 CYS 32 ILE 35 -1 1CRN 58 +TURN 1 T1 PRO 41 TYR 44 1CRN 59 +SSBOND 1 CYS 3 CYS 40 1CRN 60 +SSBOND 2 CYS 4 CYS 32 1CRN 61 +SSBOND 3 CYS 16 CYS 26 1CRN 62 +CRYST1 40.960 18.650 22.520 90.00 90.77 90.00 P 21 2 1CRN 63 +ORIGX1 1.000000 0.000000 0.000000 0.00000 1CRN 64 +ORIGX2 0.000000 1.000000 0.000000 0.00000 1CRN 65 +ORIGX3 0.000000 0.000000 1.000000 0.00000 1CRN 66 +SCALE1 .024414 0.000000 -.000328 0.00000 1CRN 67 +SCALE2 0.000000 .053619 0.000000 0.00000 1CRN 68 +SCALE3 0.000000 0.000000 .044409 0.00000 1CRN 69 +ATOM 1 N THR 1 17.047 14.099 3.625 1.00 13.79 1CRN 70 +ATOM 2 CA THR 1 16.967 12.784 4.338 1.00 10.80 1CRN 71 +ATOM 3 C THR 1 15.685 12.755 5.133 1.00 9.19 1CRN 72 +ATOM 4 O THR 1 15.268 13.825 5.594 1.00 9.85 1CRN 73 +ATOM 5 CB THR 1 18.170 12.703 5.337 1.00 13.02 1CRN 74 +ATOM 6 OG1 THR 1 19.334 12.829 4.463 1.00 15.06 1CRN 75 +ATOM 7 CG2 THR 1 18.150 11.546 6.304 1.00 14.23 1CRN 76 +ATOM 8 N THR 2 15.115 11.555 5.265 1.00 7.81 1CRN 77 +ATOM 9 CA THR 2 13.856 11.469 6.066 1.00 8.31 1CRN 78 +ATOM 10 C THR 2 14.164 10.785 7.379 1.00 5.80 1CRN 79 +ATOM 11 O THR 2 14.993 9.862 7.443 1.00 6.94 1CRN 80 +ATOM 12 CB THR 2 12.732 10.711 5.261 1.00 10.32 1CRN 81 +ATOM 13 OG1 THR 2 13.308 9.439 4.926 1.00 12.81 1CRN 82 +ATOM 14 CG2 THR 2 12.484 11.442 3.895 1.00 11.90 1CRN 83 +ATOM 15 N CYS 3 13.488 11.241 8.417 1.00 5.24 1CRN 84 +ATOM 16 CA CYS 3 13.660 10.707 9.787 1.00 5.39 1CRN 85 +ATOM 17 C CYS 3 12.269 10.431 10.323 1.00 4.45 1CRN 86 +ATOM 18 O CYS 3 11.393 11.308 10.185 1.00 6.54 1CRN 87 +ATOM 19 CB CYS 3 14.368 11.748 10.691 1.00 5.99 1CRN 88 +ATOM 20 SG CYS 3 15.885 12.426 10.016 1.00 7.01 1CRN 89 +ATOM 21 N CYS 4 12.019 9.272 10.928 1.00 3.90 1CRN 90 +ATOM 22 CA CYS 4 10.646 8.991 11.408 1.00 4.24 1CRN 91 +ATOM 23 C CYS 4 10.654 8.793 12.919 1.00 3.72 1CRN 92 +ATOM 24 O CYS 4 11.659 8.296 13.491 1.00 5.30 1CRN 93 +ATOM 25 CB CYS 4 10.057 7.752 10.682 1.00 4.41 1CRN 94 +ATOM 26 SG CYS 4 9.837 8.018 8.904 1.00 4.72 1CRN 95 +ATOM 27 N PRO 5 9.561 9.108 13.563 1.00 3.96 1CRN 96 +ATOM 28 CA PRO 5 9.448 9.034 15.012 1.00 4.25 1CRN 97 +ATOM 29 C PRO 5 9.288 7.670 15.606 1.00 4.96 1CRN 98 +ATOM 30 O PRO 5 9.490 7.519 16.819 1.00 7.44 1CRN 99 +ATOM 31 CB PRO 5 8.230 9.957 15.345 1.00 5.11 1CRN 100 +ATOM 32 CG PRO 5 7.338 9.786 14.114 1.00 5.24 1CRN 101 +ATOM 33 CD PRO 5 8.366 9.804 12.958 1.00 5.20 1CRN 102 +ATOM 34 N SER 6 8.875 6.686 14.796 1.00 4.83 1CRN 103 +ATOM 35 CA SER 6 8.673 5.314 15.279 1.00 4.45 1CRN 104 +ATOM 36 C SER 6 8.753 4.376 14.083 1.00 4.99 1CRN 105 +ATOM 37 O SER 6 8.726 4.858 12.923 1.00 4.61 1CRN 106 +ATOM 38 CB SER 6 7.340 5.121 15.996 1.00 5.05 1CRN 107 +ATOM 39 OG SER 6 6.274 5.220 15.031 1.00 6.39 1CRN 108 +ATOM 40 N ILE 7 8.881 3.075 14.358 1.00 4.94 1CRN 109 +ATOM 41 CA ILE 7 8.912 2.083 13.258 1.00 6.33 1CRN 110 +ATOM 42 C ILE 7 7.581 2.090 12.506 1.00 5.32 1CRN 111 +ATOM 43 O ILE 7 7.670 2.031 11.245 1.00 6.85 1CRN 112 +ATOM 44 CB ILE 7 9.207 .677 13.924 1.00 8.43 1CRN 113 +ATOM 45 CG1 ILE 7 10.714 .702 14.312 1.00 9.78 1CRN 114 +ATOM 46 CG2 ILE 7 8.811 -.477 12.969 1.00 11.70 1CRN 115 +ATOM 47 CD1 ILE 7 11.185 -.516 15.142 1.00 9.92 1CRN 116 +ATOM 48 N VAL 8 6.458 2.162 13.159 1.00 5.02 1CRN 117 +ATOM 49 CA VAL 8 5.145 2.209 12.453 1.00 6.93 1CRN 118 +ATOM 50 C VAL 8 5.115 3.379 11.461 1.00 5.39 1CRN 119 +ATOM 51 O VAL 8 4.664 3.268 10.343 1.00 6.30 1CRN 120 +ATOM 52 CB VAL 8 3.995 2.354 13.478 1.00 9.64 1CRN 121 +ATOM 53 CG1 VAL 8 2.716 2.891 12.869 1.00 13.85 1CRN 122 +ATOM 54 CG2 VAL 8 3.758 1.032 14.208 1.00 11.97 1CRN 123 +ATOM 55 N ALA 9 5.606 4.546 11.941 1.00 3.73 1CRN 124 +ATOM 56 CA ALA 9 5.598 5.767 11.082 1.00 3.56 1CRN 125 +ATOM 57 C ALA 9 6.441 5.527 9.850 1.00 4.13 1CRN 126 +ATOM 58 O ALA 9 6.052 5.933 8.744 1.00 4.36 1CRN 127 +ATOM 59 CB ALA 9 6.022 6.977 11.891 1.00 4.80 1CRN 128 +ATOM 60 N ARG 10 7.647 4.909 10.005 1.00 3.73 1CRN 129 +ATOM 61 CA ARG 10 8.496 4.609 8.837 1.00 3.38 1CRN 130 +ATOM 62 C ARG 10 7.798 3.609 7.876 1.00 3.47 1CRN 131 +ATOM 63 O ARG 10 7.878 3.778 6.651 1.00 4.67 1CRN 132 +ATOM 64 CB ARG 10 9.847 4.020 9.305 1.00 3.95 1CRN 133 +ATOM 65 CG ARG 10 10.752 3.607 8.149 1.00 4.55 1CRN 134 +ATOM 66 CD ARG 10 11.226 4.699 7.244 1.00 5.89 1CRN 135 +ATOM 67 NE ARG 10 12.143 5.571 8.035 1.00 6.20 1CRN 136 +ATOM 68 CZ ARG 10 12.758 6.609 7.443 1.00 7.52 1CRN 137 +ATOM 69 NH1 ARG 10 12.539 6.932 6.158 1.00 10.68 1CRN 138 +ATOM 70 NH2 ARG 10 13.601 7.322 8.202 1.00 9.48 1CRN 139 +ATOM 71 N SER 11 7.186 2.582 8.445 1.00 5.19 1CRN 140 +ATOM 72 CA SER 11 6.500 1.584 7.565 1.00 4.60 1CRN 141 +ATOM 73 C SER 11 5.382 2.313 6.773 1.00 4.84 1CRN 142 +ATOM 74 O SER 11 5.213 2.016 5.557 1.00 5.84 1CRN 143 +ATOM 75 CB SER 11 5.908 .462 8.400 1.00 5.91 1CRN 144 +ATOM 76 OG SER 11 6.990 -.272 9.012 1.00 8.38 1CRN 145 +ATOM 77 N ASN 12 4.648 3.182 7.446 1.00 3.54 1CRN 146 +ATOM 78 CA ASN 12 3.545 3.935 6.751 1.00 4.57 1CRN 147 +ATOM 79 C ASN 12 4.107 4.851 5.691 1.00 4.14 1CRN 148 +ATOM 80 O ASN 12 3.536 5.001 4.617 1.00 5.52 1CRN 149 +ATOM 81 CB ASN 12 2.663 4.677 7.748 1.00 6.42 1CRN 150 +ATOM 82 CG ASN 12 1.802 3.735 8.610 1.00 8.25 1CRN 151 +ATOM 83 OD1 ASN 12 1.567 2.613 8.165 1.00 12.72 1CRN 152 +ATOM 84 ND2 ASN 12 1.394 4.252 9.767 1.00 9.92 1CRN 153 +ATOM 85 N PHE 13 5.259 5.498 6.005 1.00 3.43 1CRN 154 +ATOM 86 CA PHE 13 5.929 6.358 5.055 1.00 3.49 1CRN 155 +ATOM 87 C PHE 13 6.304 5.578 3.799 1.00 3.40 1CRN 156 +ATOM 88 O PHE 13 6.136 6.072 2.653 1.00 4.07 1CRN 157 +ATOM 89 CB PHE 13 7.183 6.994 5.754 1.00 5.48 1CRN 158 +ATOM 90 CG PHE 13 7.884 8.006 4.883 1.00 5.57 1CRN 159 +ATOM 91 CD1 PHE 13 8.906 7.586 4.027 1.00 6.99 1CRN 160 +ATOM 92 CD2 PHE 13 7.532 9.373 4.983 1.00 6.52 1CRN 161 +ATOM 93 CE1 PHE 13 9.560 8.539 3.194 1.00 8.20 1CRN 162 +ATOM 94 CE2 PHE 13 8.176 10.281 4.145 1.00 6.34 1CRN 163 +ATOM 95 CZ PHE 13 9.141 9.845 3.292 1.00 6.84 1CRN 164 +ATOM 96 N ASN 14 6.900 4.390 3.989 1.00 3.64 1CRN 165 +ATOM 97 CA ASN 14 7.331 3.607 2.791 1.00 4.31 1CRN 166 +ATOM 98 C ASN 14 6.116 3.210 1.915 1.00 3.98 1CRN 167 +ATOM 99 O ASN 14 6.240 3.144 .684 1.00 6.22 1CRN 168 +ATOM 100 CB ASN 14 8.145 2.404 3.240 1.00 5.81 1CRN 169 +ATOM 101 CG ASN 14 9.555 2.856 3.730 1.00 6.82 1CRN 170 +ATOM 102 OD1 ASN 14 10.013 3.895 3.323 1.00 9.43 1CRN 171 +ATOM 103 ND2 ASN 14 10.120 1.956 4.539 1.00 8.21 1CRN 172 +ATOM 104 N VAL 15 4.993 2.927 2.571 1.00 3.76 1CRN 173 +ATOM 105 CA VAL 15 3.782 2.599 1.742 1.00 3.98 1CRN 174 +ATOM 106 C VAL 15 3.296 3.871 1.004 1.00 3.80 1CRN 175 +ATOM 107 O VAL 15 2.947 3.817 -.189 1.00 4.85 1CRN 176 +ATOM 108 CB VAL 15 2.698 1.953 2.608 1.00 4.71 1CRN 177 +ATOM 109 CG1 VAL 15 1.384 1.826 1.806 1.00 6.67 1CRN 178 +ATOM 110 CG2 VAL 15 3.174 .533 3.005 1.00 6.26 1CRN 179 +ATOM 111 N CYS 16 3.321 4.987 1.720 1.00 3.79 1CRN 180 +ATOM 112 CA CYS 16 2.890 6.285 1.126 1.00 3.54 1CRN 181 +ATOM 113 C CYS 16 3.687 6.597 -.111 1.00 3.48 1CRN 182 +ATOM 114 O CYS 16 3.200 7.147 -1.103 1.00 4.63 1CRN 183 +ATOM 115 CB CYS 16 3.039 7.369 2.240 1.00 4.58 1CRN 184 +ATOM 116 SG CYS 16 2.559 9.014 1.649 1.00 5.66 1CRN 185 +ATOM 117 N ARG 17 4.997 6.227 -.100 1.00 3.99 1CRN 186 +ATOM 118 CA ARG 17 5.895 6.489 -1.213 1.00 3.83 1CRN 187 +ATOM 119 C ARG 17 5.738 5.560 -2.409 1.00 3.79 1CRN 188 +ATOM 120 O ARG 17 6.228 5.901 -3.507 1.00 5.39 1CRN 189 +ATOM 121 CB ARG 17 7.370 6.507 -.731 1.00 4.11 1CRN 190 +ATOM 122 CG ARG 17 7.717 7.687 .206 1.00 4.69 1CRN 191 +ATOM 123 CD ARG 17 7.949 8.947 -.615 1.00 5.10 1CRN 192 +ATOM 124 NE ARG 17 9.212 8.856 -1.337 1.00 4.71 1CRN 193 +ATOM 125 CZ ARG 17 9.537 9.533 -2.431 1.00 5.28 1CRN 194 +ATOM 126 NH1 ARG 17 8.659 10.350 -3.032 1.00 6.67 1CRN 195 +ATOM 127 NH2 ARG 17 10.793 9.491 -2.899 1.00 6.41 1CRN 196 +ATOM 128 N LEU 18 5.051 4.411 -2.204 1.00 4.70 1CRN 197 +ATOM 129 CA LEU 18 4.933 3.431 -3.326 1.00 5.46 1CRN 198 +ATOM 130 C LEU 18 4.397 4.014 -4.620 1.00 5.13 1CRN 199 +ATOM 131 O LEU 18 4.988 3.755 -5.687 1.00 5.55 1CRN 200 +ATOM 132 CB LEU 18 4.196 2.184 -2.863 1.00 6.47 1CRN 201 +ATOM 133 CG LEU 18 4.960 1.178 -1.991 1.00 7.43 1CRN 202 +ATOM 134 CD1 LEU 18 3.907 .097 -1.634 1.00 8.70 1CRN 203 +ATOM 135 CD2 LEU 18 6.129 .606 -2.768 1.00 9.39 1CRN 204 +ATOM 136 N PRO 19 3.329 4.795 -4.543 1.00 4.28 1CRN 205 +ATOM 137 CA PRO 19 2.792 5.376 -5.797 1.00 5.38 1CRN 206 +ATOM 138 C PRO 19 3.573 6.540 -6.322 1.00 6.30 1CRN 207 +ATOM 139 O PRO 19 3.260 7.045 -7.422 1.00 9.62 1CRN 208 +ATOM 140 CB PRO 19 1.358 5.766 -5.472 1.00 5.87 1CRN 209 +ATOM 141 CG PRO 19 1.223 5.694 -3.993 1.00 6.47 1CRN 210 +ATOM 142 CD PRO 19 2.421 4.941 -3.408 1.00 6.45 1CRN 211 +ATOM 143 N GLY 20 4.565 7.047 -5.559 1.00 4.94 1CRN 212 +ATOM 144 CA GLY 20 5.366 8.191 -6.018 1.00 5.39 1CRN 213 +ATOM 145 C GLY 20 5.007 9.481 -5.280 1.00 5.03 1CRN 214 +ATOM 146 O GLY 20 5.535 10.510 -5.730 1.00 7.34 1CRN 215 +ATOM 147 N THR 21 4.181 9.438 -4.262 1.00 4.10 1CRN 216 +ATOM 148 CA THR 21 3.767 10.609 -3.513 1.00 3.94 1CRN 217 +ATOM 149 C THR 21 5.017 11.397 -3.042 1.00 3.96 1CRN 218 +ATOM 150 O THR 21 5.947 10.757 -2.523 1.00 5.82 1CRN 219 +ATOM 151 CB THR 21 2.992 10.188 -2.225 1.00 4.13 1CRN 220 +ATOM 152 OG1 THR 21 2.051 9.144 -2.623 1.00 5.45 1CRN 221 +ATOM 153 CG2 THR 21 2.260 11.349 -1.551 1.00 5.41 1CRN 222 +ATOM 154 N PRO 22 4.971 12.703 -3.176 1.00 5.04 1CRN 223 +ATOM 155 CA PRO 22 6.143 13.513 -2.696 1.00 4.69 1CRN 224 +ATOM 156 C PRO 22 6.400 13.233 -1.225 1.00 4.19 1CRN 225 +ATOM 157 O PRO 22 5.485 13.061 -.382 1.00 4.47 1CRN 226 +ATOM 158 CB PRO 22 5.703 14.969 -2.920 1.00 7.12 1CRN 227 +ATOM 159 CG PRO 22 4.676 14.893 -3.996 1.00 7.03 1CRN 228 +ATOM 160 CD PRO 22 3.964 13.567 -3.811 1.00 4.90 1CRN 229 +ATOM 161 N GLU 23 7.728 13.297 -.921 1.00 5.16 1CRN 230 +ATOM 162 CA GLU 23 8.114 13.103 .500 1.00 5.31 1CRN 231 +ATOM 163 C GLU 23 7.427 14.073 1.410 1.00 4.11 1CRN 232 +ATOM 164 O GLU 23 7.036 13.682 2.540 1.00 5.11 1CRN 233 +ATOM 165 CB GLU 23 9.648 13.285 .660 1.00 6.16 1CRN 234 +ATOM 166 CG GLU 23 10.440 12.093 .063 1.00 7.48 1CRN 235 +ATOM 167 CD GLU 23 11.941 12.170 .391 1.00 9.40 1CRN 236 +ATOM 168 OE1 GLU 23 12.416 13.225 .681 1.00 10.40 1CRN 237 +ATOM 169 OE2 GLU 23 12.539 11.070 .292 1.00 13.32 1CRN 238 +ATOM 170 N ALA 24 7.212 15.334 .966 1.00 4.56 1CRN 239 +ATOM 171 CA ALA 24 6.614 16.317 1.913 1.00 4.49 1CRN 240 +ATOM 172 C ALA 24 5.212 15.936 2.350 1.00 4.10 1CRN 241 +ATOM 173 O ALA 24 4.782 16.166 3.495 1.00 5.64 1CRN 242 +ATOM 174 CB ALA 24 6.605 17.695 1.246 1.00 5.80 1CRN 243 +ATOM 175 N ILE 25 4.445 15.318 1.405 1.00 4.37 1CRN 244 +ATOM 176 CA ILE 25 3.074 14.894 1.756 1.00 5.44 1CRN 245 +ATOM 177 C ILE 25 3.085 13.643 2.645 1.00 4.32 1CRN 246 +ATOM 178 O ILE 25 2.315 13.523 3.578 1.00 4.72 1CRN 247 +ATOM 179 CB ILE 25 2.204 14.637 .462 1.00 6.42 1CRN 248 +ATOM 180 CG1 ILE 25 1.815 16.048 -.129 1.00 7.50 1CRN 249 +ATOM 181 CG2 ILE 25 .903 13.864 .811 1.00 7.65 1CRN 250 +ATOM 182 CD1 ILE 25 .756 16.761 .757 1.00 7.80 1CRN 251 +ATOM 183 N CYS 26 4.032 12.764 2.313 1.00 3.92 1CRN 252 +ATOM 184 CA CYS 26 4.180 11.549 3.187 1.00 4.37 1CRN 253 +ATOM 185 C CYS 26 4.632 11.944 4.596 1.00 3.95 1CRN 254 +ATOM 186 O CYS 26 4.227 11.252 5.547 1.00 4.74 1CRN 255 +ATOM 187 CB CYS 26 5.038 10.518 2.539 1.00 4.63 1CRN 256 +ATOM 188 SG CYS 26 4.349 9.794 1.022 1.00 5.61 1CRN 257 +ATOM 189 N ALA 27 5.408 13.012 4.694 1.00 3.89 1CRN 258 +ATOM 190 CA ALA 27 5.879 13.502 6.026 1.00 4.43 1CRN 259 +ATOM 191 C ALA 27 4.696 13.908 6.882 1.00 4.26 1CRN 260 +ATOM 192 O ALA 27 4.528 13.422 8.025 1.00 5.44 1CRN 261 +ATOM 193 CB ALA 27 6.880 14.615 5.830 1.00 5.36 1CRN 262 +ATOM 194 N THR 28 3.827 14.802 6.358 1.00 4.53 1CRN 263 +ATOM 195 CA THR 28 2.691 15.221 7.194 1.00 5.08 1CRN 264 +ATOM 196 C THR 28 1.672 14.132 7.434 1.00 4.62 1CRN 265 +ATOM 197 O THR 28 .947 14.112 8.468 1.00 7.80 1CRN 266 +ATOM 198 CB THR 28 1.986 16.520 6.614 1.00 6.03 1CRN 267 +ATOM 199 OG1 THR 28 1.664 16.221 5.230 1.00 7.19 1CRN 268 +ATOM 200 CG2 THR 28 2.914 17.739 6.700 1.00 7.34 1CRN 269 +ATOM 201 N TYR 29 1.621 13.190 6.511 1.00 5.01 1CRN 270 +ATOM 202 CA TYR 29 .715 12.045 6.657 1.00 6.60 1CRN 271 +ATOM 203 C TYR 29 1.125 11.125 7.815 1.00 4.92 1CRN 272 +ATOM 204 O TYR 29 .286 10.632 8.545 1.00 7.13 1CRN 273 +ATOM 205 CB TYR 29 .755 11.229 5.322 1.00 9.66 1CRN 274 +ATOM 206 CG TYR 29 -.203 10.044 5.354 1.00 11.56 1CRN 275 +ATOM 207 CD1 TYR 29 -1.547 10.337 5.645 1.00 12.85 1CRN 276 +ATOM 208 CD2 TYR 29 .193 8.750 5.100 1.00 14.44 1CRN 277 +ATOM 209 CE1 TYR 29 -2.496 9.329 5.673 1.00 16.61 1CRN 278 +ATOM 210 CE2 TYR 29 -.801 7.705 5.156 1.00 17.11 1CRN 279 +ATOM 211 CZ TYR 29 -2.079 8.031 5.430 1.00 19.99 1CRN 280 +ATOM 212 OH TYR 29 -3.097 7.057 5.458 1.00 28.98 1CRN 281 +ATOM 213 N THR 30 2.470 10.984 7.995 1.00 5.31 1CRN 282 +ATOM 214 CA THR 30 2.986 9.994 8.950 1.00 5.70 1CRN 283 +ATOM 215 C THR 30 3.609 10.505 10.230 1.00 6.28 1CRN 284 +ATOM 216 O THR 30 3.766 9.715 11.186 1.00 8.77 1CRN 285 +ATOM 217 CB THR 30 4.076 9.103 8.225 1.00 6.55 1CRN 286 +ATOM 218 OG1 THR 30 5.125 10.027 7.824 1.00 6.57 1CRN 287 +ATOM 219 CG2 THR 30 3.493 8.324 7.035 1.00 7.29 1CRN 288 +ATOM 220 N GLY 31 3.984 11.764 10.241 1.00 4.99 1CRN 289 +ATOM 221 CA GLY 31 4.769 12.336 11.360 1.00 5.50 1CRN 290 +ATOM 222 C GLY 31 6.255 12.243 11.106 1.00 4.19 1CRN 291 +ATOM 223 O GLY 31 7.037 12.750 11.954 1.00 6.12 1CRN 292 +ATOM 224 N CYS 32 6.710 11.631 9.992 1.00 4.30 1CRN 293 +ATOM 225 CA CYS 32 8.140 11.694 9.635 1.00 4.89 1CRN 294 +ATOM 226 C CYS 32 8.500 13.141 9.206 1.00 5.50 1CRN 295 +ATOM 227 O CYS 32 7.581 13.949 8.944 1.00 5.82 1CRN 296 +ATOM 228 CB CYS 32 8.504 10.686 8.530 1.00 4.66 1CRN 297 +ATOM 229 SG CYS 32 8.048 8.987 8.881 1.00 5.33 1CRN 298 +ATOM 230 N ILE 33 9.793 13.410 9.173 1.00 6.02 1CRN 299 +ATOM 231 CA ILE 33 10.280 14.760 8.823 1.00 5.24 1CRN 300 +ATOM 232 C ILE 33 11.346 14.658 7.743 1.00 5.16 1CRN 301 +ATOM 233 O ILE 33 11.971 13.583 7.552 1.00 7.19 1CRN 302 +ATOM 234 CB ILE 33 10.790 15.535 10.085 1.00 5.49 1CRN 303 +ATOM 235 CG1 ILE 33 12.059 14.803 10.671 1.00 6.85 1CRN 304 +ATOM 236 CG2 ILE 33 9.684 15.686 11.138 1.00 6.45 1CRN 305 +ATOM 237 CD1 ILE 33 12.733 15.676 11.781 1.00 8.94 1CRN 306 +ATOM 238 N ILE 34 11.490 15.773 7.038 1.00 5.52 1CRN 307 +ATOM 239 CA ILE 34 12.552 15.877 6.036 1.00 6.82 1CRN 308 +ATOM 240 C ILE 34 13.590 16.917 6.560 1.00 6.92 1CRN 309 +ATOM 241 O ILE 34 13.168 18.006 6.945 1.00 9.22 1CRN 310 +ATOM 242 CB ILE 34 11.987 16.360 4.681 1.00 8.11 1CRN 311 +ATOM 243 CG1 ILE 34 10.914 15.338 4.163 1.00 9.59 1CRN 312 +ATOM 244 CG2 ILE 34 13.131 16.517 3.629 1.00 9.73 1CRN 313 +ATOM 245 CD1 ILE 34 10.151 16.024 2.938 1.00 13.41 1CRN 314 +ATOM 246 N ILE 35 14.856 16.493 6.536 1.00 7.06 1CRN 315 +ATOM 247 CA ILE 35 15.930 17.454 6.941 1.00 7.52 1CRN 316 +ATOM 248 C ILE 35 16.913 17.550 5.819 1.00 6.63 1CRN 317 +ATOM 249 O ILE 35 17.097 16.660 4.970 1.00 7.90 1CRN 318 +ATOM 250 CB ILE 35 16.622 16.995 8.285 1.00 8.07 1CRN 319 +ATOM 251 CG1 ILE 35 17.360 15.651 8.067 1.00 9.41 1CRN 320 +ATOM 252 CG2 ILE 35 15.592 16.974 9.434 1.00 9.46 1CRN 321 +ATOM 253 CD1 ILE 35 18.298 15.206 9.219 1.00 9.85 1CRN 322 +ATOM 254 N PRO 36 17.664 18.669 5.806 1.00 8.07 1CRN 323 +ATOM 255 CA PRO 36 18.635 18.861 4.738 1.00 8.78 1CRN 324 +ATOM 256 C PRO 36 19.925 18.042 4.949 1.00 8.31 1CRN 325 +ATOM 257 O PRO 36 20.593 17.742 3.945 1.00 9.09 1CRN 326 +ATOM 258 CB PRO 36 18.945 20.364 4.783 1.00 9.67 1CRN 327 +ATOM 259 CG PRO 36 18.238 20.937 5.908 1.00 10.15 1CRN 328 +ATOM 260 CD PRO 36 17.371 19.900 6.596 1.00 9.53 1CRN 329 +ATOM 261 N GLY 37 20.172 17.730 6.217 1.00 8.48 1CRN 330 +ATOM 262 CA GLY 37 21.452 16.969 6.513 1.00 9.20 1CRN 331 +ATOM 263 C GLY 37 21.143 15.478 6.427 1.00 10.41 1CRN 332 +ATOM 264 O GLY 37 20.138 15.023 5.878 1.00 12.06 1CRN 333 +ATOM 265 N ALA 38 22.055 14.701 7.032 1.00 9.24 1CRN 334 +ATOM 266 CA ALA 38 22.019 13.242 7.020 1.00 9.24 1CRN 335 +ATOM 267 C ALA 38 21.944 12.628 8.396 1.00 9.60 1CRN 336 +ATOM 268 O ALA 38 21.869 11.387 8.435 1.00 13.65 1CRN 337 +ATOM 269 CB ALA 38 23.246 12.697 6.275 1.00 10.43 1CRN 338 +ATOM 270 N THR 39 21.894 13.435 9.436 1.00 8.70 1CRN 339 +ATOM 271 CA THR 39 21.936 12.911 10.809 1.00 9.46 1CRN 340 +ATOM 272 C THR 39 20.615 13.191 11.521 1.00 8.32 1CRN 341 +ATOM 273 O THR 39 20.357 14.317 11.948 1.00 9.89 1CRN 342 +ATOM 274 CB THR 39 23.131 13.601 11.593 1.00 10.72 1CRN 343 +ATOM 275 OG1 THR 39 24.284 13.401 10.709 1.00 11.66 1CRN 344 +ATOM 276 CG2 THR 39 23.340 12.935 12.962 1.00 11.81 1CRN 345 +ATOM 277 N CYS 40 19.827 12.110 11.642 1.00 7.64 1CRN 346 +ATOM 278 CA CYS 40 18.504 12.312 12.298 1.00 8.05 1CRN 347 +ATOM 279 C CYS 40 18.684 12.451 13.784 1.00 7.63 1CRN 348 +ATOM 280 O CYS 40 19.533 11.718 14.362 1.00 9.64 1CRN 349 +ATOM 281 CB CYS 40 17.582 11.117 11.996 1.00 7.80 1CRN 350 +ATOM 282 SG CYS 40 17.199 10.929 10.237 1.00 7.30 1CRN 351 +ATOM 283 N PRO 41 17.880 13.266 14.426 1.00 8.00 1CRN 352 +ATOM 284 CA PRO 41 17.924 13.421 15.877 1.00 8.96 1CRN 353 +ATOM 285 C PRO 41 17.392 12.206 16.594 1.00 9.06 1CRN 354 +ATOM 286 O PRO 41 16.652 11.368 16.033 1.00 8.82 1CRN 355 +ATOM 287 CB PRO 41 17.076 14.658 16.145 1.00 10.39 1CRN 356 +ATOM 288 CG PRO 41 16.098 14.689 14.997 1.00 10.99 1CRN 357 +ATOM 289 CD PRO 41 16.859 14.150 13.779 1.00 10.49 1CRN 358 +ATOM 290 N GLY 42 17.728 12.124 17.884 1.00 7.55 1CRN 359 +ATOM 291 CA GLY 42 17.334 10.956 18.691 1.00 8.00 1CRN 360 +ATOM 292 C GLY 42 15.875 10.688 18.871 1.00 7.22 1CRN 361 +ATOM 293 O GLY 42 15.434 9.550 19.166 1.00 8.41 1CRN 362 +ATOM 294 N ASP 43 15.036 11.747 18.715 1.00 5.54 1CRN 363 +ATOM 295 CA ASP 43 13.564 11.573 18.836 1.00 5.85 1CRN 364 +ATOM 296 C ASP 43 12.936 11.227 17.470 1.00 5.87 1CRN 365 +ATOM 297 O ASP 43 11.720 11.040 17.428 1.00 7.29 1CRN 366 +ATOM 298 CB ASP 43 12.933 12.737 19.580 1.00 6.72 1CRN 367 +ATOM 299 CG ASP 43 13.140 14.094 18.958 1.00 8.59 1CRN 368 +ATOM 300 OD1 ASP 43 14.109 14.303 18.212 1.00 9.59 1CRN 369 +ATOM 301 OD2 ASP 43 12.267 14.963 19.265 1.00 11.45 1CRN 370 +ATOM 302 N TYR 44 13.725 11.174 16.425 1.00 5.22 1CRN 371 +ATOM 303 CA TYR 44 13.257 10.745 15.081 1.00 5.56 1CRN 372 +ATOM 304 C TYR 44 14.275 9.687 14.612 1.00 4.61 1CRN 373 +ATOM 305 O TYR 44 14.930 9.862 13.568 1.00 6.04 1CRN 374 +ATOM 306 CB TYR 44 13.200 11.914 14.071 1.00 5.41 1CRN 375 +ATOM 307 CG TYR 44 12.000 12.819 14.399 1.00 5.34 1CRN 376 +ATOM 308 CD1 TYR 44 12.119 13.853 15.332 1.00 6.59 1CRN 377 +ATOM 309 CD2 TYR 44 10.775 12.617 13.762 1.00 5.94 1CRN 378 +ATOM 310 CE1 TYR 44 11.045 14.675 15.610 1.00 5.97 1CRN 379 +ATOM 311 CE2 TYR 44 9.676 13.433 14.048 1.00 5.17 1CRN 380 +ATOM 312 CZ TYR 44 9.802 14.456 14.996 1.00 5.96 1CRN 381 +ATOM 313 OH TYR 44 8.740 15.265 15.269 1.00 8.60 1CRN 382 +ATOM 314 N ALA 45 14.342 8.640 15.422 1.00 4.76 1CRN 383 +ATOM 315 CA ALA 45 15.445 7.667 15.246 1.00 5.89 1CRN 384 +ATOM 316 C ALA 45 15.171 6.533 14.280 1.00 6.67 1CRN 385 +ATOM 317 O ALA 45 16.093 5.705 14.039 1.00 7.56 1CRN 386 +ATOM 318 CB ALA 45 15.680 7.099 16.682 1.00 6.82 1CRN 387 +ATOM 319 N ASN 46 13.966 6.502 13.739 1.00 5.80 1CRN 388 +ATOM 320 CA ASN 46 13.512 5.395 12.878 1.00 6.15 1CRN 389 +ATOM 321 C ASN 46 13.311 5.853 11.455 1.00 6.61 1CRN 390 +ATOM 322 O ASN 46 13.733 6.929 11.026 1.00 7.18 1CRN 391 +ATOM 323 CB ASN 46 12.266 4.769 13.501 1.00 7.27 1CRN 392 +ATOM 324 CG ASN 46 12.538 4.304 14.922 1.00 7.98 1CRN 393 +ATOM 325 OD1 ASN 46 11.982 4.849 15.886 1.00 11.00 1CRN 394 +ATOM 326 ND2 ASN 46 13.407 3.298 15.015 1.00 10.32 1CRN 395 +ATOM 327 OXT ASN 46 12.703 4.973 10.746 1.00 7.86 1CRN 396 +TER 328 ASN 46 1CRN 397 +CONECT 20 19 282 1CRN 398 +CONECT 26 25 229 1CRN 399 +CONECT 116 115 188 1CRN 400 +CONECT 188 116 187 1CRN 401 +CONECT 229 26 228 1CRN 402 +CONECT 282 20 281 1CRN 403 +MASTER 62 0 0 2 2 1 0 6 327 1 6 4 1CRND 6 +END 1CRN 405 diff --git a/tests/data/1H-indole.bin b/tests/data/1H-indole.bin new file mode 100644 index 0000000000000000000000000000000000000000..97f7bc7af972518cba074270482adb2c1009cacb GIT binary patch literal 128 zcmZQz00Jf!5CLLg0}Tue4GJ6$3>-j72Z#a%2n8frfY1TV1Ck&W4InX~LJ%7R0A-M@ G0WksnI0JnE literal 0 HcmV?d00001 diff --git a/tests/data/1H-indole.mol b/tests/data/1H-indole.mol new file mode 100644 index 0000000..3505718 --- /dev/null +++ b/tests/data/1H-indole.mol @@ -0,0 +1,38 @@ +1H-Indole + OpenBabel01020809243D + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 2.2006 -0.0517 0.0199 C 0 0 0 0 0 + 1.7129 -1.3738 0.0753 C 0 0 0 0 0 + 0.3608 -1.6439 0.0767 C 0 0 0 0 0 + -0.5338 -0.5661 0.0212 C 0 0 0 0 0 + 1.3481 1.0300 -0.0355 C 0 0 0 0 0 + -0.0297 0.7622 -0.0344 C 0 0 0 0 0 + -1.9676 -0.4868 0.0062 C 0 0 0 0 0 + -2.3165 0.8462 -0.0565 C 0 0 0 0 0 + 3.2834 0.1111 0.0212 H 0 0 0 0 0 + 2.4325 -2.1978 0.1180 H 0 0 0 0 0 + -0.0188 -2.6690 0.1196 H 0 0 0 0 0 + 1.7253 2.0560 -0.0785 H 0 0 0 0 0 + -2.6436 -1.3371 0.0389 H 0 0 0 0 0 + -3.3116 1.2900 -0.0843 H 0 0 0 0 0 + -1.1411 1.6232 -0.0819 N 0 0 0 0 0 + -1.1010 2.6076 -0.1259 H 0 0 0 0 0 + 1 2 2 0 0 0 + 1 5 1 0 0 0 + 1 9 1 0 0 0 + 2 3 1 0 0 0 + 2 10 1 0 0 0 + 3 4 2 0 0 0 + 3 11 1 0 0 0 + 6 4 1 0 0 0 + 4 7 1 0 0 0 + 5 6 2 0 0 0 + 5 12 1 0 0 0 + 6 15 1 0 0 0 + 8 7 2 0 0 0 + 7 13 1 0 0 0 + 8 14 1 0 0 0 + 15 8 1 0 0 0 + 15 16 1 0 0 0 +M END diff --git a/tests/data/1H-indole.molecule b/tests/data/1H-indole.molecule new file mode 100644 index 0000000..be425ea --- /dev/null +++ b/tests/data/1H-indole.molecule @@ -0,0 +1,38 @@ +1H-Indole + OpenBabel10160914443D + + 16 17 0 0 0 0 0 0 0 0999 V2000 + 2.2006 -0.0517 0.0199 C 0 0 0 0 0 + 1.7129 -1.3738 0.0753 C 0 0 0 0 0 + 0.3608 -1.6439 0.0767 C 0 0 0 0 0 + -0.5338 -0.5661 0.0212 C 0 0 0 0 0 + 1.3481 1.0300 -0.0355 C 0 0 0 0 0 + -0.0297 0.7622 -0.0344 C 0 0 0 0 0 + -1.9676 -0.4868 0.0062 C 0 0 0 0 0 + -2.3165 0.8462 -0.0565 C 0 0 0 0 0 + 3.2834 0.1111 0.0212 H 0 0 0 0 0 + 2.4325 -2.1978 0.1180 H 0 0 0 0 0 + -0.0188 -2.6690 0.1196 H 0 0 0 0 0 + 1.7253 2.0560 -0.0785 H 0 0 0 0 0 + -2.6436 -1.3371 0.0389 H 0 0 0 0 0 + -3.3116 1.2900 -0.0843 H 0 0 0 0 0 + -1.1411 1.6232 -0.0819 N 0 0 0 0 0 + -1.1010 2.6076 -0.1259 H 0 0 0 0 0 + 1 2 2 0 0 0 + 1 5 1 0 0 0 + 1 9 1 0 0 0 + 2 3 1 0 0 0 + 2 10 1 0 0 0 + 3 4 2 0 0 0 + 3 11 1 0 0 0 + 6 4 1 0 0 0 + 4 7 1 0 0 0 + 5 6 2 0 0 0 + 5 12 1 0 0 0 + 6 15 1 0 0 0 + 8 7 2 0 0 0 + 7 13 1 0 0 0 + 8 14 1 0 0 0 + 15 8 1 0 0 0 + 15 16 1 0 0 0 +M END diff --git a/tests/data/1H-indole.smi b/tests/data/1H-indole.smi new file mode 100644 index 0000000..a6c7d84 --- /dev/null +++ b/tests/data/1H-indole.smi @@ -0,0 +1 @@ +c1ccc2c(c1)[nH]cc2 diff --git a/tests/data/L-tryptophan.bin b/tests/data/L-tryptophan.bin new file mode 100644 index 0000000000000000000000000000000000000000..980798c65ac73e3d1c1cbbd2e6fee8534641abc9 GIT binary patch literal 128 zcmX9%u?>JQ40Eff3KBy{28K#J!5BW_ly*a0!-?47C{_~7PBQ@6kWn~5o8au8c0)ys qK;Qhvf`mvC?IhDe%&gGhG@ literal 0 HcmV?d00001 diff --git a/tests/data/L-tryptophan.can b/tests/data/L-tryptophan.can new file mode 100644 index 0000000..e20c2eb --- /dev/null +++ b/tests/data/L-tryptophan.can @@ -0,0 +1 @@ +OC(=O)[C@H](Cc1c[nH]c2c1cccc2)N diff --git a/tests/data/L-tryptophan.mol b/tests/data/L-tryptophan.mol new file mode 100644 index 0000000..0735f23 --- /dev/null +++ b/tests/data/L-tryptophan.mol @@ -0,0 +1,60 @@ +(2S)-2-Amino-3-(1H-indol-3-yl)propanoic acid + OpenBabel02241017523D + + 27 28 0 0 0 0 0 0 0 0999 V2000 + -1.2761 1.1145 -1.7676 N 0 0 0 0 0 + 0.1814 1.0854 -2.0341 C 0 0 0 0 0 + 0.3863 0.9241 -3.5348 C 0 0 0 0 0 + -0.0023 0.0374 -4.2729 O 0 0 0 0 0 + 1.1196 1.8994 -4.1188 O 0 0 0 0 0 + 0.9435 -0.0387 -1.3079 C 0 0 0 0 0 + 2.0062 -0.0231 -1.6206 H 0 0 0 0 0 + 0.5503 -1.0330 -1.6051 H 0 0 0 0 0 + 0.8606 0.1121 0.1597 C 0 0 0 0 0 + 1.6330 0.9465 0.9436 C 0 0 0 0 0 + 1.2095 0.8403 2.2864 N 0 0 0 0 0 + 1.7623 1.1156 3.0570 H 0 0 0 0 0 + 0.2223 -0.1694 2.3582 C 0 0 0 0 0 + -0.0368 -0.6065 1.0323 C 0 0 0 0 0 + -1.0212 -1.5762 0.8026 C 0 0 0 0 0 + -1.2284 -1.9193 -0.2168 H 0 0 0 0 0 + -1.7094 -2.0814 1.8868 C 0 0 0 0 0 + -2.4818 -2.8423 1.7336 H 0 0 0 0 0 + -1.4420 -1.6413 3.1980 C 0 0 0 0 0 + -2.0131 -2.0728 4.0268 H 0 0 0 0 0 + -0.4809 -0.6851 3.4558 C 0 0 0 0 0 + -0.2721 -0.3430 4.4739 H 0 0 0 0 0 + 2.4247 1.6287 0.6300 H 0 0 0 0 0 + -1.4198 1.2742 -0.7921 H 0 0 0 0 0 + -1.6876 0.2396 -2.0209 H 0 0 0 0 0 + 0.5828 2.0752 -1.6981 H 0 0 0 0 0 + 1.1890 1.7393 -5.0550 H 0 0 0 0 0 + 1 2 1 0 0 0 + 1 24 1 0 0 0 + 1 25 1 0 0 0 + 2 3 1 0 0 0 + 2 6 1 0 0 0 + 2 26 1 0 0 0 + 3 5 1 0 0 0 + 3 4 2 0 0 0 + 5 27 1 0 0 0 + 6 7 1 0 0 0 + 6 8 1 0 0 0 + 6 9 1 0 0 0 + 9 10 2 0 0 0 + 9 14 1 0 0 0 + 10 11 1 0 0 0 + 10 23 1 0 0 0 + 11 12 1 0 0 0 + 11 13 1 0 0 0 + 13 14 1 0 0 0 + 13 21 2 0 0 0 + 14 15 2 0 0 0 + 15 16 1 0 0 0 + 15 17 1 0 0 0 + 17 18 1 0 0 0 + 17 19 2 0 0 0 + 19 20 1 0 0 0 + 19 21 1 0 0 0 + 21 22 1 0 0 0 +M END diff --git a/tests/data/L-tryptophan.molecule b/tests/data/L-tryptophan.molecule new file mode 100644 index 0000000..201f12c --- /dev/null +++ b/tests/data/L-tryptophan.molecule @@ -0,0 +1,60 @@ +(2S)-2-Amino-3-(1H-indol-3-yl)propanoic acid + OpenBabel12040811403D + + 27 28 0 0 0 0 0 0 0 0999 V2000 + -1.2761 1.1145 -1.7676 N 0 0 0 0 0 + 0.1814 1.0854 -2.0341 C 0 0 0 0 0 + 0.3863 0.9241 -3.5348 C 0 0 0 0 0 + -0.0023 0.0374 -4.2729 O 0 0 0 0 0 + 1.1196 1.8994 -4.1188 O 0 0 0 0 0 + 0.9435 -0.0387 -1.3079 C 0 0 0 0 0 + 2.0062 -0.0231 -1.6206 H 0 0 0 0 0 + 0.5503 -1.0330 -1.6051 H 0 0 0 0 0 + 0.8606 0.1121 0.1597 C 0 0 0 0 0 + 1.6330 0.9465 0.9436 C 0 0 0 0 0 + 1.2095 0.8403 2.2864 N 0 0 0 0 0 + 1.7623 1.1156 3.0570 H 0 0 0 0 0 + 0.2223 -0.1694 2.3582 C 0 0 0 0 0 + -0.0368 -0.6065 1.0323 C 0 0 0 0 0 + -1.0212 -1.5762 0.8026 C 0 0 0 0 0 + -1.2284 -1.9193 -0.2168 H 0 0 0 0 0 + -1.7094 -2.0814 1.8868 C 0 0 0 0 0 + -2.4818 -2.8423 1.7336 H 0 0 0 0 0 + -1.4420 -1.6413 3.1980 C 0 0 0 0 0 + -2.0131 -2.0728 4.0268 H 0 0 0 0 0 + -0.4809 -0.6851 3.4558 C 0 0 0 0 0 + -0.2721 -0.3430 4.4739 H 0 0 0 0 0 + 2.4247 1.6287 0.6300 H 0 0 0 0 0 + -1.4198 1.2742 -0.7921 H 0 0 0 0 0 + -1.6876 0.2396 -2.0209 H 0 0 0 0 0 + 0.5828 2.0752 -1.6981 H 0 0 0 0 0 + 1.1890 1.7393 -5.0550 H 0 0 0 0 0 + 1 2 1 0 0 0 + 1 24 1 0 0 0 + 1 25 1 0 0 0 + 2 3 1 0 0 0 + 2 6 1 0 0 0 + 2 26 1 0 0 0 + 3 5 1 0 0 0 + 3 4 2 0 0 0 + 5 27 1 0 0 0 + 6 7 1 0 0 0 + 6 8 1 0 0 0 + 6 9 1 0 0 0 + 9 10 2 0 0 0 + 9 14 1 0 0 0 + 10 11 1 0 0 0 + 10 23 1 0 0 0 + 11 12 1 0 0 0 + 11 13 1 0 0 0 + 13 14 1 0 0 0 + 13 21 2 0 0 0 + 14 15 2 0 0 0 + 15 16 1 0 0 0 + 15 17 1 0 0 0 + 17 18 1 0 0 0 + 17 19 2 0 0 0 + 19 20 1 0 0 0 + 19 21 1 0 0 0 + 21 22 1 0 0 0 +M END diff --git a/tests/data/glycine.V3000 b/tests/data/glycine.V3000 new file mode 100644 index 0000000..fdca7bf --- /dev/null +++ b/tests/data/glycine.V3000 @@ -0,0 +1,21 @@ +glycine + OpenBabel12040811162D + + 0 0 0 0 0 999 V3000 +M V30 BEGIN CTAB +M V30 COUNTS 5 4 0 0 0 +M V30 BEGIN ATOM +M V30 1 C 206 126 0 0 +M V30 2 C 227.007 113.871 0 0 +M V30 3 O 227.007 89.6139 0 0 +M V30 4 O 248.015 126 0 0 +M V30 5 N 184.993 113.871 0 0 +M V30 END ATOM +M V30 BEGIN BOND +M V30 1 1 1 2 +M V30 2 1 1 5 +M V30 3 2 2 3 +M V30 4 1 2 4 +M V30 END BOND +M V30 END CTAB +M END diff --git a/tests/data/glycine.cml b/tests/data/glycine.cml new file mode 100644 index 0000000..f85212d --- /dev/null +++ b/tests/data/glycine.cml @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff --git a/tests/data/glycine.inchi b/tests/data/glycine.inchi new file mode 100644 index 0000000..3da90c1 --- /dev/null +++ b/tests/data/glycine.inchi @@ -0,0 +1 @@ +InChI=1/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5)/f/h4H diff --git a/tests/data/glycine.inchi102 b/tests/data/glycine.inchi102 new file mode 100644 index 0000000..0f88b8e --- /dev/null +++ b/tests/data/glycine.inchi102 @@ -0,0 +1 @@ +InChI=1S/C2H5NO2/c3-1-2(4)5/h1,3H2,(H,4,5) diff --git a/tests/data/glycine.mol b/tests/data/glycine.mol new file mode 100644 index 0000000..8075c58 --- /dev/null +++ b/tests/data/glycine.mol @@ -0,0 +1,14 @@ +glycine + OpenBabel12040809022D + + 5 4 0 0 0 0 0 0 0 0999 V2000 + 206.0000 126.0000 0.0000 C 0 0 0 0 0 + 227.0075 113.8713 0.0000 C 0 0 0 0 0 + 227.0075 89.6139 0.0000 O 0 0 0 0 0 + 248.0151 126.0000 0.0000 O 0 0 0 0 0 + 184.9925 113.8713 0.0000 N 0 0 0 0 0 + 1 2 1 0 0 0 + 1 5 1 0 0 0 + 2 3 2 0 0 0 + 2 4 1 0 0 0 +M END diff --git a/tests/data/glycine.mol2 b/tests/data/glycine.mol2 new file mode 100644 index 0000000..6ed6cfc --- /dev/null +++ b/tests/data/glycine.mol2 @@ -0,0 +1,17 @@ +@MOLECULE +glycine + 5 4 0 0 0 +SMALL +GASTEIGER + +@ATOM + 1 CA 206.0000 126.0000 0.0000 C.3 1 GLY1 0.2227 + 2 C 227.0075 113.8713 0.0000 C.2 1 GLY1 0.3839 + 3 O 227.0075 89.6139 0.0000 O.co2 1 GLY1 -0.2440 + 4 OXT 248.0151 126.0000 0.0000 O.co2 1 GLY1 -0.2440 + 5 N 184.9925 113.8713 0.0000 N.3 1 GLY1 -0.1186 +@BOND + 1 1 2 1 + 2 1 5 1 + 3 2 3 ar + 4 2 4 ar diff --git a/tests/data/glycine.molecule b/tests/data/glycine.molecule new file mode 100644 index 0000000..18d2fc9 --- /dev/null +++ b/tests/data/glycine.molecule @@ -0,0 +1,14 @@ +glycine + OpenBabel12040811142D + + 5 4 0 0 0 0 0 0 0 0999 V2000 + 206.0000 126.0000 0.0000 C 0 0 0 0 0 + 227.0075 113.8713 0.0000 C 0 0 0 0 0 + 227.0075 89.6139 0.0000 O 0 0 0 0 0 + 248.0151 126.0000 0.0000 O 0 0 0 0 0 + 184.9925 113.8713 0.0000 N 0 0 0 0 0 + 1 2 1 0 0 0 + 1 5 1 0 0 0 + 2 3 2 0 0 0 + 2 4 1 0 0 0 +M END diff --git a/tests/data/glycine.smi b/tests/data/glycine.smi new file mode 100644 index 0000000..8a99fae --- /dev/null +++ b/tests/data/glycine.smi @@ -0,0 +1 @@ +C(C(=O)O)N diff --git a/tests/data/glycine_sodium_salt.mol b/tests/data/glycine_sodium_salt.mol new file mode 100644 index 0000000..86f1164 --- /dev/null +++ b/tests/data/glycine_sodium_salt.mol @@ -0,0 +1,16 @@ + + OpenBabel12040809562D + + 6 4 0 0 0 0 0 0 0 0999 V2000 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 + 0.0000 0.0000 0.0000 Na 0 0 0 0 0 + 1 2 1 0 0 0 + 2 3 1 0 0 0 + 3 4 1 0 0 0 + 4 5 1 0 0 0 +M CHG 2 5 -1 6 1 +M END diff --git a/tests/data/glycine_wo_sodium.mol b/tests/data/glycine_wo_sodium.mol new file mode 100644 index 0000000..3a6b422 --- /dev/null +++ b/tests/data/glycine_wo_sodium.mol @@ -0,0 +1,15 @@ + + OpenBabel12040809582D + + 5 4 0 0 0 0 0 0 0 0999 V2000 + 0.0000 0.0000 0.0000 N 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 + 0.0000 0.0000 0.0000 C 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 + 0.0000 0.0000 0.0000 O 0 0 0 0 0 + 1 2 1 0 0 0 + 2 3 1 0 0 0 + 3 4 1 0 0 0 + 4 5 1 0 0 0 +M CHG 1 5 -1 +M END diff --git a/tests/data/hydrogenated_glycine.mol b/tests/data/hydrogenated_glycine.mol new file mode 100644 index 0000000..dbde646 --- /dev/null +++ b/tests/data/hydrogenated_glycine.mol @@ -0,0 +1,24 @@ +glycine + OpenBabel12040809082D + + 10 9 0 0 0 0 0 0 0 0999 V2000 + 206.0000 126.0000 0.0000 C 0 0 0 0 0 + 227.0075 113.8713 0.0000 C 0 0 0 0 0 + 227.0075 89.6139 0.0000 O 0 0 0 0 0 + 248.0151 126.0000 0.0000 O 0 0 0 0 0 + 184.9925 113.8713 0.0000 N 0 0 0 0 0 + 206.0000 126.7208 -0.8832 H 0 0 0 0 0 + 206.0000 126.4888 1.0299 H 0 0 0 0 0 + 248.0151 127.0635 0.0000 H 0 0 0 0 0 + 184.8694 113.3367 -0.9765 H 0 0 0 0 0 + 184.1380 114.5759 0.1668 H 0 0 0 0 0 + 1 2 1 0 0 0 + 1 5 1 0 0 0 + 1 6 1 0 0 0 + 1 7 1 0 0 0 + 2 3 2 0 0 0 + 2 4 1 0 0 0 + 4 8 1 0 0 0 + 5 9 1 0 0 0 + 5 10 1 0 0 0 +M END diff --git a/tests/helper_test.cpp b/tests/helper_test.cpp new file mode 100644 index 0000000..f81bdee --- /dev/null +++ b/tests/helper_test.cpp @@ -0,0 +1,175 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file helper_test.cpp + * @brief Test the Mychem helper module. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions for + * testing the helper module. + */ + +#include "mysql_handler.h" +#include "test_functions.h" +#include + +#include + +#include +#include +#include + +using namespace std; + +void arg_error(void) +{ + cout << "helper_test: missing some arguments\n"; + cout << "Usage: helper_test -d data_dir -h host -u user -p password\n\n"; + cout << "Please note that the password option is not mandatory.\n"; +} + +int main(int argc, char **argv) +{ + unsigned short int count; + string host; + string user; + string passwd; + string data_dir; + string db; + + + if ((argc != 9) && (argc != 11)) { + arg_error(); + exit(1); + } + + for (count = 1; count < argc; count++) { + if (argv[count][0] == '-') { + switch(argv[count][1]) { + case 'h': + ++count; + if (count < argc) { + host = argv[count]; + break; + } + case 'u': + ++count; + if (count < argc) { + user = argv[count]; + break; + } + case 'p': + ++count; + if (count < argc) { + passwd = argv[count]; + break; + } + case 'b': + ++count; + if (count < argc) { + db = argv[count]; + break; + } + case 'd': + ++count; + if (count < argc) { + data_dir = argv[count]; + break; + } + default: + arg_error(); + exit(1); + } + } + } + + /* Initialize the MySQL connection */ + MySQLHandler my_handler; + if (!my_handler.connect(host.c_str(), user.c_str(), passwd.c_str(), db.c_str())) { + return 1; + } + + /* Variable initialisation */ + string funcName; + string inputData; + string controlData; + string query; + string queryResult; + bool testFailed = false; + bool currentTestFailed = false; + + /**************************************************************** + * mychem_version test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "mychem_version"; + currentTestFailed = false; + + /* Set control data */ + controlData = MYCHEM_VERSION; + + /* Set and perform the query */ + queryResult.clear(); + query = "SELECT " + funcName + "()"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * openbabel_version test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "openbabel_version"; + currentTestFailed = false; + + /* Set control data */ + controlData = BABEL_VERSION; + + /* Set and perform the query */ + queryResult.clear(); + query = "SELECT " + funcName + "()"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + my_handler.disconnect(); + + if (testFailed) { + return 1; + } + + return 0; +} + diff --git a/tests/modification_test.cpp b/tests/modification_test.cpp new file mode 100644 index 0000000..0228fbc --- /dev/null +++ b/tests/modification_test.cpp @@ -0,0 +1,226 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions for + * testing the modification module. + * @file modification_test.cpp + * @brief Test the Mychem modification module. + * @author Jerome Pansanel + */ + +#include "mysql_handler.h" +#include "test_functions.h" +#include + +#include + +#include +#include +#include + +using namespace std; + +void arg_error(void) +{ + cout << "property_test: missing some arguments\n"; + cout << "Usage: property_test -d data_dir -h host -u user -p password\n\n"; + cout << "Please note that the password option is not mandatory.\n"; +} + +int main(int argc, char **argv) +{ + unsigned short int count; + string host; + string user; + string passwd; + string data_dir; + string db; + + + if ((argc != 9) && (argc != 11)) { + arg_error(); + exit(1); + } + + for (count = 1; count < argc; count++) { + if (argv[count][0] == '-') { + switch(argv[count][1]) { + case 'h': + ++count; + if (count < argc) { + host = argv[count]; + break; + } + case 'u': + ++count; + if (count < argc) { + user = argv[count]; + break; + } + case 'p': + ++count; + if (count < argc) { + passwd = argv[count]; + break; + } + case 'b': + ++count; + if (count < argc) { + db = argv[count]; + break; + } + case 'd': + ++count; + if (count < argc) { + data_dir = argv[count]; + break; + } + default: + arg_error(); + exit(1); + } + } + } + + /* Initialize the MySQL connection */ + MySQLHandler my_handler; + if (!my_handler.connect(host.c_str(), user.c_str(), passwd.c_str(), db.c_str())) { + return 1; + } + + /* Variable initialisation */ + string funcName; + string filename; + string inputData; + string controlData; + string query; + string queryResult; + bool currentTestFailed = false; + bool testFailed = false; + + /**************************************************************** + * add_hydrogens test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "add_hydrogens"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine.mol"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/hydrogenated_glycine.mol"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * remove_hydrogens test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "remove_hydrogens"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/hydrogenated_glycine.mol"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine.mol"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * strip_salts test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "strip_salts"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/glycine_sodium_salt.mol"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + filename = data_dir + "/glycine_wo_sodium.mol"; + if (!readFile(filename, controlData)) { currentTestFailed = true; } + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (!compareMolecules(queryResult, controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + my_handler.disconnect(); + + if (testFailed) { + return 1; + } + + return 0; +} + diff --git a/tests/molmatch_test.cpp b/tests/molmatch_test.cpp new file mode 100644 index 0000000..5db0df4 --- /dev/null +++ b/tests/molmatch_test.cpp @@ -0,0 +1,384 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file molmatch_test.cpp + * @brief Test the Mychem molmatch module. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions for + * testing the molmatch module. + */ + +#include "mysql_handler.h" +#include "test_functions.h" +#include + +#include + +#include +#include +#include + +using namespace std; + +void arg_error(void) +{ + cout << "property_test: missing some arguments\n"; + cout << "Usage: property_test -d data_dir -h host -u user -p password\n\n"; + cout << "Please note that the password option is not mandatory.\n"; +} + +int main(int argc, char **argv) +{ + unsigned short int count; + string host; + string user; + string passwd; + string data_dir; + string db; + + + if ((argc != 9) && (argc != 11)) { + arg_error(); + exit(1); + } + + for (count = 1; count < argc; count++) { + if (argv[count][0] == '-') { + switch(argv[count][1]) { + case 'h': + ++count; + if (count < argc) { + host = argv[count]; + break; + } + case 'u': + ++count; + if (count < argc) { + user = argv[count]; + break; + } + case 'p': + ++count; + if (count < argc) { + passwd = argv[count]; + break; + } + case 'b': + ++count; + if (count < argc) { + db = argv[count]; + break; + } + case 'd': + ++count; + if (count < argc) { + data_dir = argv[count]; + break; + } + default: + arg_error(); + exit(1); + } + } + } + + /* Initialize the MySQL connection */ + MySQLHandler my_handler; + if (!my_handler.connect(host.c_str(), user.c_str(), passwd.c_str(), db.c_str())) { + return 1; + } + + /* Variable initialisation */ + string funcName; + string filename; + string inputData; + string referenceStructure; + string controlData; + string query; + string queryResult; + bool currentTestFailed = false; + bool testFailed = false; + + /**************************************************************** + * match_substruct test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "match_substruct"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.smi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.mol"; + if (!readBinaryFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "',MOLECULE_TO_SERIALIZEDOBMOL('" + referenceStructure + "'))"; + if (!my_handler.performBinaryQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * substruct_atom_ids test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "substruct_atom_ids"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.smi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.mol"; + if (!readBinaryFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "19 17 15 14 13 21 11 10 9 ; "; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "',MOLECULE_TO_SERIALIZEDOBMOL('" + referenceStructure + "'))"; + if (!my_handler.performBinaryQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * substruct_count test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "substruct_count"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.smi"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.mol"; + if (!readBinaryFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "',MOLECULE_TO_SERIALIZEDOBMOL('" + referenceStructure + "'))"; + if (!my_handler.performBinaryQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * tanimoto test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "tanimoto"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.molecule"; + if (!readBinaryFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readBinaryFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "0.30263157894737"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "(fingerprint2('" + inputData + "'),fingerprint2('" + referenceStructure + "'))"; + if (!my_handler.performBinaryQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(0, 8, controlData, 0, 8)) { + currentTestFailed = true; + cout << queryResult << endl; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * bit_fp_and test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "bit_fp_and"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT tanimoto(" + funcName + "(fingerprint2('" + inputData + "'),fingerprint2('" + referenceStructure + "')),fingerprint2('" + inputData + "'))"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * bit_fp_or test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "bit_fp_or"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set reference structure */ + referenceStructure.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, referenceStructure)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "0.30263157894737"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT tanimoto(" + funcName + "(fingerprint2('" + inputData + "'),fingerprint2('" + referenceStructure + "')),fingerprint2('" + inputData + "'))"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(0, 8, controlData, 0, 8)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * bit_fp_count test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "bit_fp_count"; + currentTestFailed = false; + + /* Set substructure data */ + inputData.clear(); + filename = data_dir + "/1H-indole.molecule"; + if (!readBinaryFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "55"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "(fingerprint2('" + inputData + "'))"; + if (!my_handler.performBinaryQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + my_handler.disconnect(); + + if (testFailed) { + return 1; + } + + return 0; +} + diff --git a/tests/mysql_handler.cpp b/tests/mysql_handler.cpp new file mode 100644 index 0000000..9fb4efa --- /dev/null +++ b/tests/mysql_handler.cpp @@ -0,0 +1,134 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions for + * handling a MySQL connection. + * @file mysql_handler.cpp + * @brief MySQL Handler. + * @author Jerome Pansanel + */ + +#include "mysql_handler.h" + +#include + +#include + +using namespace std; + +MySQLHandler::MySQLHandler() +{ + mysql_init(&_mysql); + _link = NULL; +} + +MySQLHandler::~MySQLHandler() +{ + if (_link) { + mysql_close(_link); + _link = NULL; + } +} + +bool MySQLHandler::connect(string host, string user, string passwd, string db) +{ + if (!(_link = mysql_real_connect(&_mysql, host.c_str(), user.c_str(), passwd.c_str(), db.c_str(), 0, NULL, 0))) { + cout << "Error: could not connect to MySQL!\n" << mysql_error(&_mysql) << endl; + return false; + } + + _mysql.reconnect = 1; + + return true; +} + +bool MySQLHandler::performQuery(string& buffer, string query) +{ + MYSQL_RES *res = NULL; + + if (mysql_query(_link, query.c_str())) { + cerr << "Error: query failed (" << mysql_error(_link) << ")\n"; + return false; + } + + if (!(res = mysql_store_result(_link))) { + cerr << "Error: could not get result from " << mysql_error(_link) << endl; + mysql_free_result(res); + return false; + } + + MYSQL_ROW row = mysql_fetch_row(res); + + if (row == NULL) { + cerr << "Error: query result is empty\n"; + mysql_free_result(res); + return false; + } + + if (row[0] == NULL) { + buffer = ""; + } + else { + buffer = row[0]; + } + + mysql_free_result(res); + + return true; +} + +bool MySQLHandler::performBinaryQuery(string& buffer, string query) +{ + MYSQL_RES *res = NULL; + + if (mysql_real_query(_link, query.c_str(), query.size())) { + cerr << "Error: query failed (" << mysql_error(_link) << ")\n"; + return false; + } + + if (!(res = mysql_store_result(_link))) { + cerr << "Error: could not get result from " << mysql_error(_link) << endl; + mysql_free_result(res); + return false; + } + + MYSQL_ROW row = mysql_fetch_row(res); + + if (row == NULL) { + cerr << "Error: query result is empty\n"; + mysql_free_result(res); + return false; + } + + buffer = row[0]; + + mysql_free_result(res); + + return true; +} + +void MySQLHandler::disconnect() +{ + if (_link) { + mysql_close(_link); + _link = NULL; + } +} diff --git a/tests/mysql_handler.h b/tests/mysql_handler.h new file mode 100644 index 0000000..2ca4271 --- /dev/null +++ b/tests/mysql_handler.h @@ -0,0 +1,66 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * This file is part of the Mychem software. It contains functions for + * handling a MySQL connection. + * @file mysql_handler.h + * @brief MySQL Handler. + * @author Jerome Pansanel + */ + +#include +#include + +class MySQLHandler +{ + private: + MYSQL _mysql; + MYSQL * _link; + + public: + /** + * @brief Default constructor. + * + * Initializes @c. + */ + MySQLHandler(); + + /** + * @brief Default destructor. + */ + ~MySQLHandler(); + + /** + * @brief desc + * A complete description + * @param host The host + * @param user The user + * @param passwd The password + * @param db The database + * @return bool + */ + bool connect(std::string host, std::string user, std::string passwd, std::string db); + bool performQuery(std::string &buffer, std::string query); + bool performBinaryQuery(std::string &buffer, std::string query); + void disconnect(); +}; + + diff --git a/tests/property_test.cpp b/tests/property_test.cpp new file mode 100644 index 0000000..abbdaac --- /dev/null +++ b/tests/property_test.cpp @@ -0,0 +1,673 @@ +/*************************************************************************** + * Copyright (C) 2009-2014 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file property_test.cpp + * @brief Test the Mychem property module. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions for + * testing the property module. + */ + +#include "mysql_handler.h" +#include "test_functions.h" +#include + +#include + +#include +#include +#include + +using namespace std; + +void arg_error(void) +{ + cout << "property_test: missing some arguments\n"; + cout << "Usage: property_test -d data_dir -h host -u user -p password\n\n"; + cout << "Please note that the password option is not mandatory.\n"; +} + +int main(int argc, char **argv) +{ + unsigned short int count; + string host; + string user; + string passwd; + string data_dir; + string db; + + + if ((argc != 9) && (argc != 11)) { + arg_error(); + exit(1); + } + + for (count = 1; count < argc; count++) { + if (argv[count][0] == '-') { + switch(argv[count][1]) { + case 'h': + ++count; + if (count < argc) { + host = argv[count]; + break; + } + case 'u': + ++count; + if (count < argc) { + user = argv[count]; + break; + } + case 'p': + ++count; + if (count < argc) { + passwd = argv[count]; + break; + } + case 'b': + ++count; + if (count < argc) { + db = argv[count]; + break; + } + case 'd': + ++count; + if (count < argc) { + data_dir = argv[count]; + break; + } + default: + arg_error(); + exit(1); + } + } + } + + /* Initialize the MySQL connection */ + MySQLHandler my_handler; + if (!my_handler.connect(host.c_str(), user.c_str(), passwd.c_str(), db.c_str())) { + return 1; + } + + /* Variable initialisation */ + string funcName; + string filename; + string inputData; + string controlData; + string query; + string queryResult; + bool currentTestFailed = false; + bool testFailed = false; + + /**************************************************************** + * molweight test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molweight"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "204.225180"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(0, 8, controlData, 0, 8)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * exactmass test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "exactmass"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "204.089878"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(0, 8, controlData, 0, 8)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_atoms test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_atoms"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "27"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_heavy_atoms test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_heavy_atoms"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "15"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_bonds test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_bonds"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "28"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_rotable_bonds test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_rotable_bonds"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "3"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * total_charge test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "total_charge"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "0"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molformula test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molformula"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "C11H12N2O2"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_acceptors test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_acceptors"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "3"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_donors test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_donors"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "3"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molpsa test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molpsa"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "79.11"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT ROUND(" + funcName + "('" + inputData + "'),2)"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * molmr test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "molmr"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "57.3579"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT ROUND(" + funcName + "('" + inputData + "'),4)"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * mollogp test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "mollogp"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1.8226"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT ROUND(" + funcName + "('" + inputData + "'),4)"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * is_2D test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "is_2D"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + cout << queryResult << endl; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * is_3D test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "is_3D"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * is_chiral test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "is_chiral"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "1"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + /**************************************************************** + * number_of_rings test * + ****************************************************************/ + /* Initialize the variables */ + funcName = "number_of_rings"; + currentTestFailed = false; + + /* Set input data */ + inputData.clear(); + filename = data_dir + "/L-tryptophan.molecule"; + if (!readFile(filename, inputData)) { currentTestFailed = true; } + + /* Set control data */ + controlData.clear(); + controlData = "2"; + + /* Set and perform the query */ + if (!currentTestFailed) { + queryResult.clear(); + query = "SELECT " + funcName + "('" + inputData + "')"; + if (!my_handler.performQuery(queryResult, query)) { + currentTestFailed = true; + } + else if (queryResult.compare(controlData)) { + currentTestFailed = true; + } + } + + if (currentTestFailed) { testFailed = true; } + + printTestResult(funcName, currentTestFailed); + + my_handler.disconnect(); + + if (testFailed) { + return 1; + } + + return 0; +} + diff --git a/tests/test_functions.cpp b/tests/test_functions.cpp new file mode 100644 index 0000000..5467366 --- /dev/null +++ b/tests/test_functions.cpp @@ -0,0 +1,404 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file test_functions.cpp + * @brief Provide functions used by the test programs. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions used + * by the test programs. + */ + +#include "test_functions.h" + +#include +#include +#include +#include +#include + +using namespace std; + +void splitString(const string str, vector &stringList) +{ + string line; + string::size_type pos; + string::size_type lastPos = 0; + + for (pos = 0; pos < str.size(); ++pos) { + if (str[pos] == '\n') { + if (pos == 0) { + /* the first line is empty */ + line = " "; + } + else if ((pos - lastPos) == 1) { + /* empty line */ + line = " "; + lastPos = pos; + } + else { + line = str.substr(lastPos+1, pos - lastPos); + lastPos = pos; + } + stringList.push_back(line); + } + } +} + +bool tokenize(vector &vcr, string buffer, const char *delimstr) +{ + vcr.clear(); + if (buffer.empty() || !delimstr) { + return false; + } + buffer += "\n"; + size_t startpos=0,endpos=0; + for (;;) { + startpos = buffer.find_first_not_of(delimstr,startpos); + endpos = buffer.find_first_of(delimstr,startpos); + if (endpos <= buffer.size() && startpos <= buffer.size()) { + vcr.push_back(buffer.substr(startpos,endpos-startpos)); + } + else { + break; + } + startpos = endpos+1; + } + + return true; +} + + +bool compareMoleculeMap(map firstMolMap, map secondMolMap) +{ + map::iterator it; + + if (firstMolMap.size() != secondMolMap.size()) { + return false; + } + for (it = firstMolMap.begin(); it != firstMolMap.end(); ++it) { + if (secondMolMap.count(it->first) == 0) { + return false; + } + else if (secondMolMap[it->first] != it->second) { + return false; + } + } + + return true; +} + +bool compareMolecules(string first_molecule, string second_molecule) +{ + istringstream line; + string atom; + unsigned long int i = 0; + + /* Check number of atoms, atom types and connectivity (first molecule) */ + vector firstMoleculeLineList; + unsigned int firstMoleculeAtomCount = 0; + unsigned int firstMoleculeBondCount = 0; + map firstMoleculeAtoms; + + splitString(first_molecule, firstMoleculeLineList); + + for (i = 0; i < firstMoleculeLineList.size(); ++i) { + if (i < 3) { + continue; + } + else if (i == 3) { + line.clear(); + line.str(firstMoleculeLineList[i].substr(0,3)); + if (!(line >> firstMoleculeAtomCount)) { + cerr << "Error: The first molecule is not a valid molecule" << endl; + return false; + } + line.clear(); + line.str(firstMoleculeLineList[i].substr(3,3)); + if (!(line >> firstMoleculeBondCount)) { + cerr << "Error: The first molecule is not a valid molecule" << endl; + return false; + } + } + else if (i < (firstMoleculeAtomCount + 4)) { + atom.clear(); + line.clear(); + line.str(firstMoleculeLineList[i].substr(31,3)); + if (!(line >> atom)) { + cerr << "Error: The first molecule is not a valid molecule" << endl; + return false; + } + if (firstMoleculeAtoms.count(atom)) { + firstMoleculeAtoms[atom] += 1; + } + else { + firstMoleculeAtoms[atom] = 1; + } + } + } + + /* Check number of atoms, atom types and connectivity (second molecule) */ + vector secondMoleculeLineList; + unsigned int secondMoleculeAtomCount = 0; + unsigned int secondMoleculeBondCount = 0; + map secondMoleculeAtoms; + + splitString(second_molecule, secondMoleculeLineList); + + for (i = 0; i < secondMoleculeLineList.size(); ++i) { + if (i < 3) { + continue; + } + else if (i == 3) { + line.clear(); + line.str(secondMoleculeLineList[i].substr(0,3)); + if (!(line >> secondMoleculeAtomCount)) { + cerr << "Error: The second molecule is not a valid molecule" << endl; + return false; + } + line.clear(); + line.str(secondMoleculeLineList[i].substr(3,3)); + if (!(line >> secondMoleculeBondCount)) { + cerr << "Error: The second molecule is not a valid molecule" << endl; + return false; + } + } + else if (i < (secondMoleculeAtomCount + 4)) { + atom.clear(); + line.clear(); + line.str(secondMoleculeLineList[i].substr(31,3)); + if (!(line >> atom)) { + cerr << "Error: The second molecule is not a valid molecule" << endl; + return false; + } + if (secondMoleculeAtoms.count(atom)) { + secondMoleculeAtoms[atom] += 1; + } + else { + secondMoleculeAtoms[atom] = 1; + } + } + } + + /* Check the similarity */ + if (!compareMoleculeMap(firstMoleculeAtoms,secondMoleculeAtoms)) { + return false; + } + if (firstMoleculeBondCount != secondMoleculeBondCount) { + return false; + } + + return true; +} + +bool compareV3000(string first_molecule, string second_molecule) +{ + vector lineVector; + bool inAtom = false; + bool inBond = false; + unsigned int i; + + /* Check number of atoms, atom types and connectivity (first molecule) */ + vector firstMoleculeLineList; + unsigned int firstMoleculeBondCount = 0; + map firstMoleculeAtoms; + + splitString(first_molecule, firstMoleculeLineList); + + for (i = 0; i < firstMoleculeLineList.size(); ++i) { + if (firstMoleculeLineList[i].find("BEGIN ATOM") != string::npos) { + if (inBond || inAtom) { + cerr << "Error: The first molecule is not a valid V3000 molecule" << endl; + return false; + } + inAtom = true; + } + else if (firstMoleculeLineList[i].find("END ATOM") != string::npos) { + if (inBond || !inAtom) { + cerr << "Error: The first molecule is not a valid V3000 molecule" << endl; + return false; + } + inAtom = false; + } + else if (firstMoleculeLineList[i].find("BEGIN BOND") != string::npos) { + if (inBond || inAtom) { + cerr << "Error: The first molecule is not a valid V3000 molecule" << endl; + return false; + } + inBond = true; + } + else if (firstMoleculeLineList[i].find("END BOND") != string::npos) { + if (!inBond || inAtom) { + cerr << "Error: The first molecule is not a valid V3000 molecule" << endl; + return false; + } + inBond = false; + } + if (inAtom) { + lineVector.clear(); + tokenize(lineVector, firstMoleculeLineList[i], " \t\n\r"); + if (lineVector.size() < 4) { + cerr << "Error: The first molecule is not a valid molecule" << endl; + return false; + } + if (firstMoleculeAtoms.count(lineVector[3])) { + firstMoleculeAtoms[lineVector[3]] += 1; + } + else { + firstMoleculeAtoms[lineVector[3]] = 1; + } + } + else if (inBond) { + firstMoleculeBondCount += 1; + } + } + + /* Check number of atoms, atom types and connectivity (second molecule) */ + vector secondMoleculeLineList; + unsigned int secondMoleculeBondCount = 0; + map secondMoleculeAtoms; + + splitString(second_molecule, secondMoleculeLineList); + + for (i = 0; i < secondMoleculeLineList.size(); ++i) { + if (secondMoleculeLineList[i].find("BEGIN ATOM") != string::npos) { + if (inBond || inAtom) { + cerr << "Error: The second molecule is not a valid V3000 molecule" << endl; + return false; + } + inAtom = true; + } + else if (secondMoleculeLineList[i].find("END ATOM") != string::npos) { + if (inBond || !inAtom) { + cerr << "Error: The second molecule is not a valid V3000 molecule" << endl; + return false; + } + inAtom = false; + } + else if (secondMoleculeLineList[i].find("BEGIN BOND") != string::npos) { + if (inBond || inAtom) { + cerr << "Error: The second molecule is not a valid V3000 molecule" << endl; + return false; + } + inBond = true; + } + else if (secondMoleculeLineList[i].find("END BOND") != string::npos) { + if (!inBond || inAtom) { + cerr << "Error: The second molecule is not a valid V3000 molecule" << endl; + return false; + } + inBond = false; + } + if (inAtom) { + lineVector.clear(); + tokenize(lineVector, secondMoleculeLineList[i], " \t\n\r"); + if (lineVector.size() < 4) { + cerr << "Error: The second molecule is not a valid molecule" << endl; + return false; + } + if (secondMoleculeAtoms.count(lineVector[3])) { + secondMoleculeAtoms[lineVector[3]] += 1; + } + else { + secondMoleculeAtoms[lineVector[3]] = 1; + } + } + else if (inBond) { + secondMoleculeBondCount += 1; + } + } + + /* Check the similarity */ + if (!compareMoleculeMap(firstMoleculeAtoms,secondMoleculeAtoms)) { + return false; + } + if (firstMoleculeBondCount != secondMoleculeBondCount) { + return false; + } + + return true; +} + +void printTestResult(string testName, bool testFailed) +{ + unsigned short int count; + + cout << "Testing " << testName << " ..."; + for (count = 0; count < (46-testName.size()); ++count) { + cout << " "; + } + if (testFailed) { + cout << "[FAILED]\n"; + } + else { + cout << " [OK]\n"; + } +} + +bool readFile(string filename, string& buffer) +{ + ifstream fileStream; + string line; + + fileStream.open(filename.c_str(), ios::in); + if (!fileStream) { + cerr << "Error: cannot read " << filename << endl; + return false; + } + while (!fileStream.eof()) { + getline(fileStream, line); + buffer += line + "\n"; + } + fileStream.close(); + + /* Remove a newline character added by this function */ + buffer.resize(buffer.size()-1); + /* Remove a newline character added by Open Babel */ + if (buffer[buffer.size()-1] == '\n') { + buffer.resize(buffer.size()-1); + } + + return true; +} + +bool readBinaryFile(string filename, string& binaryString) +{ + ifstream fileStream; + + int size = 0; + int pos = 0; + fileStream.open(filename.c_str(), ios::binary); + if (!fileStream.is_open()) { + cerr << "Error: cannot read " << filename << endl; + return false; + } + fileStream.seekg(0, ios::end); + size = fileStream.tellg(); + fileStream.seekg(0, ios::beg); + for (pos = 0; pos < size; ++pos) { + binaryString.push_back(fileStream.get()); + } + + fileStream.close(); + + return true; +} + diff --git a/tests/test_functions.h b/tests/test_functions.h new file mode 100644 index 0000000..a7e3789 --- /dev/null +++ b/tests/test_functions.h @@ -0,0 +1,47 @@ +/*************************************************************************** + * Copyright (C) 2009-2011 by CNRS * + * jerome.pansanel@iphc.cnrs.fr -- Project founder and lead developer * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301, USA. * + ***************************************************************************/ + +/** + * @file test_functions.h + * @brief Provide functions used by the test programs. + * @author Jerome Pansanel + * + * This file is part of the Mychem software. It contains functions used + * by the test programs. + */ + +#include +#include + +/** + * @short Compare two molecules (MDL Mol format) + * @param first_molecule The first molecule + * @param second_molecule The second molecule + * @return true If the molecules are the same + */ +bool compareMolecules(std::string first_molecule, std::string second_molecule); + +bool compareV3000(std::string first_molecule, std::string second_molecule); + +void printTestResult(std::string testName, bool testFailed); + +bool readFile(std::string filename, std::string& buffer); + +bool readBinaryFile(std::string filename, std::string& binaryString);