-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit cafe1f9
Showing
40 changed files
with
5,016 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
cmake-build* | ||
.idea/ | ||
**/build/ | ||
**/dist/ | ||
**/nbproject/ | ||
/pgdumps/ | ||
*~ | ||
/tpctl.config |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
cmake_minimum_required(VERSION 3.5) | ||
project(tpctools) | ||
|
||
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") | ||
set(CMAKE_CXX_STANDARD 11) | ||
set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required... | ||
set(CMAKE_CXX_FLAGS "-DBOOST_NO_CXX11_SCOPED_ENUMS -w") | ||
|
||
set(CMAKE_STATIC_LIBRARY_PREFIX "") # avoid the prefix "lib" | ||
set(CMAKE_SHARED_LIBRARY_PREFIX "") | ||
|
||
SET(CMAKE_INSTALL_PREFIX /usr/local) | ||
SET(CMAKE_SKIP_BUILD_RPATH FALSE) | ||
SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) | ||
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") | ||
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) | ||
|
||
find_package(APR REQUIRED) | ||
include_directories(${APR_INCLUDE_DIR}) | ||
|
||
find_package(Lucene++ REQUIRED) | ||
include_directories(${Lucene++_INCLUDE_DIRS}) | ||
|
||
find_package(Textpresso REQUIRED) | ||
include_directories(${Textpresso_INCLUDE_DIR}) | ||
|
||
include_directories("/home/valerio/workspace/caltech/libtpc") | ||
|
||
#### Main projects #### | ||
|
||
add_executable(printbibfromnxmlorcasfile printbibfromnxmlorcasfile/main.cpp printbibfromnxmlorcasfile/cmdline.h) | ||
target_link_libraries(printbibfromnxmlorcasfile uima xerces-c boost_system boost_filesystem boost_iostreams | ||
boost_regex) | ||
|
||
#### Subprojects #### | ||
|
||
add_executable(ppm2jpg ppm2jpg/main.cpp) | ||
target_link_libraries(ppm2jpg boost_system boost_filesystem boost_program_options pthread ${CImg_SYSTEM_LIBS}) | ||
|
||
add_executable(cas2index cas2index/cas2index.cpp) | ||
target_link_libraries(cas2index ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options lucene++) | ||
|
||
add_executable(updatecorpuscounter cas2index/update_corpus_counter.cpp) | ||
target_link_libraries(updatecorpuscounter ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options | ||
lucene++) | ||
|
||
add_executable(indexmerger cas2index/index_merger.cpp lucene/CaseSensitiveAnalyzer.cpp) | ||
target_link_libraries(indexmerger uima boost_filesystem boost_system lucene++) | ||
|
||
|
||
add_executable(articles2cas articles2cas/articles2cas.cpp articles2cas/Utils.h articles2cas/Utils.cpp) | ||
target_link_libraries(articles2cas ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options boost_iostreams) | ||
|
||
add_executable(getbib getbib/getbib.cpp getbib/getbibUtils.h getbib/getbibUtils.cpp | ||
TextpressoCentralGlobalDefinitions.h TextpressoCentralGlobals.h) | ||
target_link_libraries(getbib lucene++ xerces-c icuuc boost_system uima boost_filesystem boost_iostreams) | ||
|
||
add_executable(getbib4nxml getbib/getbib4nxml.cpp getbib/getbib4nxmlUtils.h getbib/getbib4nxmlUtils.cpp | ||
TextpressoCentralGlobalDefinitions.h TextpressoCentralGlobals.h) | ||
target_link_libraries(getbib4nxml lucene++ xerces-c icuuc boost_system uima boost_filesystem boost_iostreams) | ||
|
||
add_executable(saveidstodb cas2index/saveidstodb.cpp) | ||
target_link_libraries(saveidstodb lucene++ boost_filesystem boost_system boost_program_options ${Textpresso_LIBRARY} | ||
db_cxx db_stl) | ||
|
||
#### INSTALL #### | ||
|
||
install(TARGETS getbib getbib4nxml | ||
RUNTIME DESTINATION bin | ||
LIBRARY DESTINATION lib) | ||
|
||
install(TARGETS cas2index RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
install(TARGETS saveidstodb RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
install(TARGETS updatecorpuscounter RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
install(PROGRAMS cas2index/create_index_multi_literatures.sh cas2index/create_single_index.sh | ||
ppm2jpg/ppm2jpgCas.sh run_tpc_pipeline_incremental.sh | ||
getpdfs/getpdfs.py getbibinfoforpdffromserver/download_pdfinfo.pl | ||
getbibinfoforpdffromserver/extract_pdfbibinfo.pl | ||
DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
|
||
install(TARGETS articles2cas RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
install(PROGRAMS articles2cas/convertallarticles2cas.sh | ||
DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
// Global file containing all global definitions. | ||
|
||
#ifndef TEXTPRESSOCENTRALGLOBALDEFINITIONS_H | ||
#define TEXTPRESSOCENTRALGLOBALDEFINITIONS_H | ||
|
||
// Are these definitions really global? Otherwise move them back to their local project. | ||
#define PDF2TPCASDESCRIPTOR "/usr/local/uima_descriptors/TpTokenizer.xml" | ||
#define XML2TPCASDESCRIPTOR "/usr/local/uima_descriptors/TxTokenizer.xml" | ||
#define TPCAS2LINDEXDESCRIPTOR "/usr/local/uima_descriptors/Tpcas2Lindex.xml" | ||
|
||
#define AVAILABLELITERATUREFILE "/usr/local/textpresso/luceneindex/subindex.config" | ||
#define USERUPLOADROOTDIR "/usr/local/textpresso/useruploads" | ||
|
||
#define PGONTOLOGYBROWSER "dbname=www-data" | ||
#define PGONTOLOGYBROWSWERCOLUMNS "ontologybrowsercolumnnames" | ||
#define PGONTOLOGY "dbname=www-data" | ||
#define PGONTOLOGYTABLENAME "tpontology" | ||
#define PGONTOLOGYTMPTABLENAME "tmptpontology" | ||
#define PGCURATION "dbname=www-data" | ||
#define PGCURATIONTABLENAME "tpcuration" | ||
#define PCRELATIONSTABLENAME "pcrelations" | ||
#define PCRELATIONSTMPTABLENAME "tmppcrelations" | ||
#define PADCRELATIONSTABLENAME "padcrelations" | ||
#define PADCRELATIONSTMPTABLENAME "tmppadcrelations" | ||
#define STOPWORDTABLENAME "stopwords" | ||
#define PGLITERATURE "dbname=www-data" | ||
#define PGLITPREFTABLENAME "literaturepreference" | ||
#define PGPRELOADEDCATEGORIES "dbname=www-data" | ||
#define PGPRELOADEDCATTABLENAME "preloadedcategories" | ||
#define PGTIPOFDAY "dbname=www-data" | ||
#define PGTIPOFDAYTABLENAME "tipoftheday" | ||
#define PGCURATIONFIELDS "dbname=www-data" | ||
#define PGCURATIONFIELDSTABLENAME "curationfields" | ||
#define PGCURATIONFORMS "dbname=www-data" | ||
#define PGCURATIONFORMSTABLENAME "curationforms" | ||
#define PGCURATIONDATAFROMVIEWER "dbname=www-data" | ||
#define PGCURATIONDATAFROMVIEWERTABLENAME "curationdatafromviewer" | ||
#define PGLISTOFONTOLOGIES "dbname=www-data" | ||
#define PGLISTOFONTOLOGIESTABLENAME "listofontologies" | ||
#define PGPREPOPULATION "dbname=www-data" | ||
#define PGPREPOPULATIONTABLENAME "prepopulation" | ||
#define AUTHIDENTITIES "dbname=www-data" | ||
#define AUTHIDENTITIESTABLENAME "auth_identity" | ||
#define PGLITERATUREPERMISSION "dbname=www-data" | ||
#define PGLITERATUREPERMISSIONTABLENAME "literaturepermissions" | ||
#define PGCUSTOMCOLORS "dbname=www-data" | ||
#define PGCUSTOMCOLORSTABLENAME "customcolor" | ||
#define PGDIALOGPREFERENCES "dbname=www-data" | ||
#define PGDIALOGPREFERENCESTABLENAME "dialogpreferencestable" | ||
#define SENTENCE_SEARCH_MAX_NUM_DISPLAY_WORDS 50 | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// Global file containing all global definitions. | ||
|
||
#ifndef TEXTPRESSOCENTRALGLOBALS_H | ||
#define TEXTPRESSOCENTRALGLOBALS_H | ||
|
||
|
||
// Are these definitions really global? Otherwise move them back to their local project. | ||
|
||
#include "TextpressoCentralGlobalDefinitions.h" | ||
|
||
#include <uima/api.hpp> | ||
|
||
// If a composite delimiter exists, then there cannot be another delimiter | ||
// that is a subset of that composite token delimiter. Decompose it accordingly. | ||
// This applies to token and sentence delimiter | ||
UnicodeString G_initT[] = { | ||
" ", "\n", "\t", "'", "\"", | ||
"/", "—", "(", ")", "[", | ||
"]", "{", "}", ":", ". ", | ||
"; ", ", ", "! ", "? " | ||
}; | ||
|
||
const int G_initT_No = 19; | ||
UnicodeString G_initS[] = { | ||
".\n", "!\n", "?\n", ". ", "! ", "? ", | ||
".\t", "!\t", "?\t", ".<", "!<", "?<" | ||
}; | ||
const int G_initS_No = 12; | ||
UnicodeString G_initP[] = {"<_pdf _image", "<_pdf _sbr", "<_pdf _hbr", | ||
"<_pdf _fsc", "<_pdf _fnc", "<_pdf _ydiff", "<_pdf _cr", "<_pdf _page"}; | ||
const int G_initP_No = 8; | ||
const std::string ServerNames[] = {"http://goldturtle.caltech.edu/cgi-bin/ReceivePost.cgi", | ||
"http://go-genkisugi.rhcloud.com/capella", "http://localhost/cgi-bin/ReceivePost.cgi"}; | ||
const int ServerNames_No = 3; | ||
|
||
//const std::string G_pdftagstart("<_pdf "); | ||
//const std::string G_pdftagend("/>"); | ||
const UnicodeString usG_pdftagstart("<_pdf "); | ||
const UnicodeString usG_pdftagend("/>"); | ||
|
||
#endif |
Oops, something went wrong.