Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
valearna committed Oct 18, 2017
0 parents commit cafe1f9
Show file tree
Hide file tree
Showing 40 changed files with 5,016 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cmake-build*
.idea/
**/build/
**/dist/
**/nbproject/
/pgdumps/
*~
/tpctl.config
86 changes: 86 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
cmake_minimum_required(VERSION 3.5)
project(tpctools)

set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required...
set(CMAKE_CXX_FLAGS "-DBOOST_NO_CXX11_SCOPED_ENUMS -w")

set(CMAKE_STATIC_LIBRARY_PREFIX "") # avoid the prefix "lib"
set(CMAKE_SHARED_LIBRARY_PREFIX "")

SET(CMAKE_INSTALL_PREFIX /usr/local)
SET(CMAKE_SKIP_BUILD_RPATH FALSE)
SET(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib")
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)

find_package(APR REQUIRED)
include_directories(${APR_INCLUDE_DIR})

find_package(Lucene++ REQUIRED)
include_directories(${Lucene++_INCLUDE_DIRS})

find_package(Textpresso REQUIRED)
include_directories(${Textpresso_INCLUDE_DIR})

include_directories("/home/valerio/workspace/caltech/libtpc")

#### Main projects ####

add_executable(printbibfromnxmlorcasfile printbibfromnxmlorcasfile/main.cpp printbibfromnxmlorcasfile/cmdline.h)
target_link_libraries(printbibfromnxmlorcasfile uima xerces-c boost_system boost_filesystem boost_iostreams
boost_regex)

#### Subprojects ####

add_executable(ppm2jpg ppm2jpg/main.cpp)
target_link_libraries(ppm2jpg boost_system boost_filesystem boost_program_options pthread ${CImg_SYSTEM_LIBS})

add_executable(cas2index cas2index/cas2index.cpp)
target_link_libraries(cas2index ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options lucene++)

add_executable(updatecorpuscounter cas2index/update_corpus_counter.cpp)
target_link_libraries(updatecorpuscounter ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options
lucene++)

add_executable(indexmerger cas2index/index_merger.cpp lucene/CaseSensitiveAnalyzer.cpp)
target_link_libraries(indexmerger uima boost_filesystem boost_system lucene++)


add_executable(articles2cas articles2cas/articles2cas.cpp articles2cas/Utils.h articles2cas/Utils.cpp)
target_link_libraries(articles2cas ${Textpresso_LIBRARY} boost_filesystem boost_system boost_program_options boost_iostreams)

add_executable(getbib getbib/getbib.cpp getbib/getbibUtils.h getbib/getbibUtils.cpp
TextpressoCentralGlobalDefinitions.h TextpressoCentralGlobals.h)
target_link_libraries(getbib lucene++ xerces-c icuuc boost_system uima boost_filesystem boost_iostreams)

add_executable(getbib4nxml getbib/getbib4nxml.cpp getbib/getbib4nxmlUtils.h getbib/getbib4nxmlUtils.cpp
TextpressoCentralGlobalDefinitions.h TextpressoCentralGlobals.h)
target_link_libraries(getbib4nxml lucene++ xerces-c icuuc boost_system uima boost_filesystem boost_iostreams)

add_executable(saveidstodb cas2index/saveidstodb.cpp)
target_link_libraries(saveidstodb lucene++ boost_filesystem boost_system boost_program_options ${Textpresso_LIBRARY}
db_cxx db_stl)

#### INSTALL ####

install(TARGETS getbib getbib4nxml
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib)

install(TARGETS cas2index RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
install(TARGETS saveidstodb RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
install(TARGETS updatecorpuscounter RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
install(PROGRAMS cas2index/create_index_multi_literatures.sh cas2index/create_single_index.sh
ppm2jpg/ppm2jpgCas.sh run_tpc_pipeline_incremental.sh
getpdfs/getpdfs.py getbibinfoforpdffromserver/download_pdfinfo.pl
getbibinfoforpdffromserver/extract_pdfbibinfo.pl
DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)

install(TARGETS articles2cas RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
install(PROGRAMS articles2cas/convertallarticles2cas.sh
DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)



52 changes: 52 additions & 0 deletions TextpressoCentralGlobalDefinitions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Global file containing all global definitions.

#ifndef TEXTPRESSOCENTRALGLOBALDEFINITIONS_H
#define TEXTPRESSOCENTRALGLOBALDEFINITIONS_H

// Are these definitions really global? Otherwise move them back to their local project.
#define PDF2TPCASDESCRIPTOR "/usr/local/uima_descriptors/TpTokenizer.xml"
#define XML2TPCASDESCRIPTOR "/usr/local/uima_descriptors/TxTokenizer.xml"
#define TPCAS2LINDEXDESCRIPTOR "/usr/local/uima_descriptors/Tpcas2Lindex.xml"

#define AVAILABLELITERATUREFILE "/usr/local/textpresso/luceneindex/subindex.config"
#define USERUPLOADROOTDIR "/usr/local/textpresso/useruploads"

#define PGONTOLOGYBROWSER "dbname=www-data"
#define PGONTOLOGYBROWSWERCOLUMNS "ontologybrowsercolumnnames"
#define PGONTOLOGY "dbname=www-data"
#define PGONTOLOGYTABLENAME "tpontology"
#define PGONTOLOGYTMPTABLENAME "tmptpontology"
#define PGCURATION "dbname=www-data"
#define PGCURATIONTABLENAME "tpcuration"
#define PCRELATIONSTABLENAME "pcrelations"
#define PCRELATIONSTMPTABLENAME "tmppcrelations"
#define PADCRELATIONSTABLENAME "padcrelations"
#define PADCRELATIONSTMPTABLENAME "tmppadcrelations"
#define STOPWORDTABLENAME "stopwords"
#define PGLITERATURE "dbname=www-data"
#define PGLITPREFTABLENAME "literaturepreference"
#define PGPRELOADEDCATEGORIES "dbname=www-data"
#define PGPRELOADEDCATTABLENAME "preloadedcategories"
#define PGTIPOFDAY "dbname=www-data"
#define PGTIPOFDAYTABLENAME "tipoftheday"
#define PGCURATIONFIELDS "dbname=www-data"
#define PGCURATIONFIELDSTABLENAME "curationfields"
#define PGCURATIONFORMS "dbname=www-data"
#define PGCURATIONFORMSTABLENAME "curationforms"
#define PGCURATIONDATAFROMVIEWER "dbname=www-data"
#define PGCURATIONDATAFROMVIEWERTABLENAME "curationdatafromviewer"
#define PGLISTOFONTOLOGIES "dbname=www-data"
#define PGLISTOFONTOLOGIESTABLENAME "listofontologies"
#define PGPREPOPULATION "dbname=www-data"
#define PGPREPOPULATIONTABLENAME "prepopulation"
#define AUTHIDENTITIES "dbname=www-data"
#define AUTHIDENTITIESTABLENAME "auth_identity"
#define PGLITERATUREPERMISSION "dbname=www-data"
#define PGLITERATUREPERMISSIONTABLENAME "literaturepermissions"
#define PGCUSTOMCOLORS "dbname=www-data"
#define PGCUSTOMCOLORSTABLENAME "customcolor"
#define PGDIALOGPREFERENCES "dbname=www-data"
#define PGDIALOGPREFERENCESTABLENAME "dialogpreferencestable"
#define SENTENCE_SEARCH_MAX_NUM_DISPLAY_WORDS 50

#endif
41 changes: 41 additions & 0 deletions TextpressoCentralGlobals.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Global file containing all global definitions.

#ifndef TEXTPRESSOCENTRALGLOBALS_H
#define TEXTPRESSOCENTRALGLOBALS_H


// Are these definitions really global? Otherwise move them back to their local project.

#include "TextpressoCentralGlobalDefinitions.h"

#include <uima/api.hpp>

// If a composite delimiter exists, then there cannot be another delimiter
// that is a subset of that composite token delimiter. Decompose it accordingly.
// This applies to token and sentence delimiter
UnicodeString G_initT[] = {
" ", "\n", "\t", "'", "\"",
"/", "", "(", ")", "[",
"]", "{", "}", ":", ". ",
"; ", ", ", "! ", "? "
};

const int G_initT_No = 19;
UnicodeString G_initS[] = {
".\n", "!\n", "?\n", ". ", "! ", "? ",
".\t", "!\t", "?\t", ".<", "!<", "?<"
};
const int G_initS_No = 12;
UnicodeString G_initP[] = {"<_pdf _image", "<_pdf _sbr", "<_pdf _hbr",
"<_pdf _fsc", "<_pdf _fnc", "<_pdf _ydiff", "<_pdf _cr", "<_pdf _page"};
const int G_initP_No = 8;
const std::string ServerNames[] = {"http://goldturtle.caltech.edu/cgi-bin/ReceivePost.cgi",
"http://go-genkisugi.rhcloud.com/capella", "http://localhost/cgi-bin/ReceivePost.cgi"};
const int ServerNames_No = 3;

//const std::string G_pdftagstart("<_pdf ");
//const std::string G_pdftagend("/>");
const UnicodeString usG_pdftagstart("<_pdf ");
const UnicodeString usG_pdftagend("/>");

#endif
Loading

0 comments on commit cafe1f9

Please sign in to comment.