forked from RDFLib/graph-pattern-learner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
159 lines (137 loc) · 4.95 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import logging
logger = logging.getLogger(__name__)
# noinspection PyUnresolvedReferences
import logging_config
if __name__ == "__main__":
logger.info('init run: origin')
import argparse
import config
parser = argparse.ArgumentParser(
description='learn graph patterns',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'--sparql_endpoint',
help="the SPARQL endpoint to query",
action="store",
default=config.SPARQL_ENDPOINT,
)
parser.add_argument(
"--associations_filename",
help="ground truth source target file used for training and evaluation",
action="store",
default=config.GT_ASSOCIATIONS_FILENAME,
)
parser.add_argument(
"--print_train_test_sets",
help="prints the sets used for training and testing",
action="store",
default=True,
type=config.str_to_bool,
)
parser.add_argument(
"--splitting_variant",
help="how to split the train, validation & test set",
action="store",
default="random",
choices=config.SPLITTING_VARIANTS,
)
parser.add_argument(
"--reset",
help="remove previous training's result files if existing (otherwise "
"the previous training's model will be loaded. If the training "
"wasn't complete, its last completed run will be loaded and "
"training will continue)",
action="store_true",
default=False,
)
parser.add_argument(
"--print_topn_raw_patterns",
help="how many of the found (raw, unclustered) patterns to print out",
action="store",
type=int,
default=0,
)
parser.add_argument(
"--print_edge_only_connected_patterns",
help="separate print out of edge only connected and mixed var patterns",
action="store",
default=True,
type=config.str_to_bool,
)
parser.add_argument(
"--show_precision_loss_by_query_reduction",
help="shows a plot of expected precision degradation of prediction "
"(on the training set) if the amount of queries per prediction is "
"limited (max_q).",
action="store_true",
default=False,
)
parser.add_argument(
"--max_queries",
help="limits the amount of queries per prediction (0: no limit)",
action="store",
type=int,
default=100,
)
parser.add_argument(
"--clustering_variant",
help="if specified use this clustering variant for query reduction, "
"otherwise select the best from various.",
action="store",
type=str,
default=None,
)
parser.add_argument(
"--print_query_patterns",
help="print the graph patterns which are used to make predictions",
action="store_true",
default=False,
)
parser.add_argument(
"--predict",
help="evaluate the learned patterns by predicting the targets for the "
"sources in the specified set of ground truth source-target-pairs "
"and comparing the predicted targets to the ground truth targets. "
"During development of this algorithm, parameter tuning and to "
"get an upper bound use 'train_set', finally use 'test_set'. To "
"disable evaluation set to ''.",
action="store",
type=str,
choices=("test_set", "train_set", "manual", ""),
default="",
)
parser.add_argument(
"--fusion_methods",
help="Which fusion methods to train / use. During prediction, each of "
"the learned patterns can generate a list of target candidates. "
"Fusion allows to re-combine these into a single ranked list of "
"predicted targets. By default this will train and use all "
"implemented fusion methods. Any of them, or a ',' delimited list "
"can be used to reduce the output (just make sure you ran "
"--predict=train_set on them before). Also supports 'basic' and "
"'classifier' as shorthands.",
action="store",
type=str,
default=None,
)
cfg_group = parser.add_argument_group(
'Advanced config overrides',
'The following allow overriding default values from config/defaults.py'
)
config.arg_parse_config_vars(cfg_group)
prog_args = vars(parser.parse_args())
# the following were aliased above, make sure they're updated globally
prog_args.update({
'SPARQL_ENDPOINT': prog_args['sparql_endpoint'],
'GT_ASSOCIATIONS_FILENAME': prog_args['associations_filename'],
})
config.finalize(prog_args)
from gp_learner import main
main(**prog_args)
else:
logger.info('init run: worker')