Merge branch 'srw2.0' of github.com:TeamCohen/ProPPR into srw2.0

TeamCohen · Jun 22, 2015 · 47ff65c · 47ff65c
2 parents 7c6cae3 + 305eeee
commit 47ff65c
Show file tree

Hide file tree

Showing 13 changed files with 94 additions and 127 deletions.
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,7 @@ bin
 test-results
 out
 kbp_prototype
+.classpath
+.pydevproject
+.settings
+.Rhistory
diff --git a/doc/minAlphaProjection.tex b/doc/minAlphaProjection.tex
diff --git a/doc/proppr-math/deriv.pdf b/doc/proppr-math/deriv.pdf
diff --git a/doc/proppr-math/deriv.tex b/doc/proppr-math/deriv.tex
@@ -36,7 +36,7 @@ \section{Derivation: PPR and its derivative}
 parameters $\vek{w}$.
 
 \begin{eqnarray}
-\vek{p}^{t+1} & \equiv & \alpha \vek{s} + (1-\alpha) \M \vek{p}^t \\
+\vek{p}^{t+1} & \equiv & \alpha \vek{s} + (1-\alpha) \M^\top \vek{p}^t \\
 \vek{d}^t & \equiv &  \ddw \vek{p}^t
 \end{eqnarray}
 
@@ -62,10 +62,10 @@ \section{Derivation: PPR and its derivative}
 
 \begin{eqnarray}
 \vek{d}^{t+1}  & =  &  \ddw \vek{p}^{t+1} \\
-               & =  &  \ddw \left( \alpha \vek{s} + (1-\alpha) \M \vek{p}^t \right) \\
-               & =  &  (1-\alpha) \ddw \M \vek{p}^t \\
-               & =  &  (1-\alpha) \left( (\ddw \M) \vek{p}^t  + \M \ddw\vek{p}^t \right) \\
-               & =  &  (1-\alpha) \left( (\ddw \M) \vek{p}^t  + \M \vek{d}^t \right)
+               & =  &  \ddw \left( \alpha \vek{s} + (1-\alpha) \M^\top \vek{p}^t \right) \\
+               & =  &  (1-\alpha) \ddw \M^\top \vek{p}^t \\
+               & =  &  (1-\alpha) \left( (\ddw \M^\top) \vek{p}^t  + \M^\top \ddw\vek{p}^t \right) \\
+               & =  &  (1-\alpha) \left( (\ddw \M^\top) \vek{p}^t  + \M^\top \vek{d}^t \right)
 \end{eqnarray}
 
 Now let's look at $\ddw\M$, which I'll denote $\dM$ below.  Note that
@@ -166,8 +166,8 @@ \section{Computation}
 Then you can start with $\vek{p}^0 = \vek{d}^0 = \vek{0}$ and iterate
 
 \begin{eqnarray}
-\vek{p}^{t+1}  & =  & \alpha \vek{s} + (1-\alpha) \M \vek{p}^t \\
-\vek{d}^{t+1}  & =  & (1-\alpha) \left( \dM \vek{p}^t  + \M \vek{d}^t \right)
+\vek{p}^{t+1}  & =  & \alpha \vek{s} + (1-\alpha) \M^\top \vek{p}^t \\
+\vek{d}^{t+1}  & =  & (1-\alpha) \left( \dM^\top \vek{p}^t  + \M^\top \vek{d}^t \right)
 \end{eqnarray}
 
 In more detail, the iteration for the updates on $\vek{p}$ are shown
@@ -197,7 +197,7 @@ \section{Computation}
   \begin{enumerate}
   \item For each neighbor $v$ of $u$
     \begin{enumerate}
-      \item For each $i$ in $\dM_{uv}$
+      \item For each $i$ in $\dM_{vu}$
 
       \[ \vek{d}^{t+1}_{v,i} +\!\!= (1-\alpha) \vek{p}^t_u \dM_{uv,i} \]
       \item For each $i$ in $\vek{d}^t_{v}$
@@ -335,14 +335,14 @@ \section{Inference: PPR and APR}
 routine \texttt{proveState}$(u_0)$, where $u_0$ is the initial query
 node (aka the initial state of the proof graph).
 
-William's got a new version which calls this iteratively with smaller
-and smaller $\epsilon$'s, which seems to work faster.  That suggests
-that it's helpful to do pushes on the nodes with larger $\vek{r}$
-values first.  With that in mind, another thing we could consider
-would be storing $u$'s in a heap/priority queue, ordered by $\vek{r}$
-values.  I know this idea has been used in the past but I don't know
-how much it helps.  Obviously there's an overhead for the heap but my
-guess is that's not going to dominate.
+William Wang's got a new version which calls this iteratively with
+smaller and smaller $\epsilon$'s, which seems to work faster.  That
+suggests that it's helpful to do pushes on the nodes with larger
+$\vek{r}$ values first.  With that in mind, another thing we could
+consider would be storing $u$'s in a heap/priority queue, ordered by
+$\vek{r}$ values.  I know this idea has been used in the past but I
+don't know how much it helps.  Obviously there's an overhead for the
+heap but my guess is that's not going to dominate.
 
 \bigskip
 

diff --git a/doc/cikm-full.dot → doc/publication-graphs/cikm-full.dot b/doc/cikm-full.dot → doc/publication-graphs/cikm-full.dot
diff --git a/doc/redblue.dot → doc/publication-graphs/redblue.dot b/doc/redblue.dot → doc/publication-graphs/redblue.dot
diff --git a/scripts/proppr b/scripts/proppr
@@ -12,75 +12,80 @@ logging.basicConfig(level=logging.INFO)
 
 def doMain():
    logging.info('ProPPR v2')
+   opts = {'dryRun':False}
    com = getArg(0)
+   if com == "-n":
+       opts['dryRun'] = True
+       sys.argv = sys.argv[1:]
+       com = getArg(0)
    if com == "help":
-      doHelp()
+      doHelp(opts)
    elif com == "set":
-      doSet()
+      doSet(opts)
    elif com == "settings":
-      doSettings()
+      doSettings(opts)
    elif com == "compile":
-      doCompile()
+      doCompile(opts)
    elif com == "answer":
-      doAnswer()
+      doAnswer(opts)
    elif com == "ground":
-      doGround()
+      doGround(opts)
    elif com == "eval":
-      doEval()
+      doEval(opts)
    elif com == "train":
-      doTrain()
+      doTrain(opts)
    else:
-      doHelp()
+      doHelp(opts)
 
 # subcommands
 
-def doSet():
+def doSet(o):
    opts = loadOptionDict()  #also load command-line options
    saveOptionDict(opts)
 shortHelpMsg['set'] = ' OPTION VALUE: store a default setting in proppr.settings, where OPTION starts with --'
 
-def doSettings():
+def doSettings(o):
    for key,val in loadOptionDict().items():
       print key,val
 shortHelpMsg['settings'] = ': show current default settings in proppr.settings'
 
-def doAnswer():
+def doAnswer(o):
    stem,examples = stemExt(getArg(1),'.examples')
    solutions = getArg(2,stem+'.solutions.txt')
-   invokeMain('QueryAnswerer','--queries',examples,'--solutions',solutions)
+   invokeMain(o,'QueryAnswerer','--queries',examples,'--solutions',solutions)
    logging.info('answers in '+solutions)
 shortHelpMsg['answer'] = ' INPUT.examples [OUTPUT.solutions.txt]: answer queries from file and write solutions'
 
-def doGround():
+def doGround(o):
    stem,examples = stemExt(getArg(1),'.examples')
    grounded = getArg(2,stem+'.examples.grounded')
-   invokeMain('Grounder','--queries',examples,'--grounded',grounded)
+   invokeMain(o,'Grounder','--queries',examples,'--grounded',grounded)
    logging.info('grounded to '+grounded)
 shortHelpMsg['ground'] = ' INPUT.examples [OUTPUT.grounded]: answer queries from file and write graphs'
-def doEval():
+
+def doEval(o):
    stem,examples = stemExt(getArg(1),'.examples')
    solutions = getArg(2, stem+'.solutions.txt')
    opts = getArgvOptions() or ['--metric','map']
    root = getRoot()
-   callProcess(['python',root+'/scripts/answermetrics.py','--data',examples,'--answers',solutions] + opts)
+   callProcess(o,['python',root+'/scripts/answermetrics.py','--data',examples,'--answers',solutions] + opts)
 shortHelpMsg['eval'] = ' INPUT.examples INPUT.solutions.txt [--metric mrr|recall|ap] [--echo]: evaluate proposed solutions' 
 
-def doCompile():
+def doCompile(o):
    stem,src = stemExt(getArg(1),'.ppr')
    dst = getArg(2,stem+".wam")
    root = getRoot()
-   callProcess(['python',root+'/src/scripts/compiler.py','serialize',src],stdout=open(dst,'w'))
+   callProcess(o,['python',root+'/src/scripts/compiler.py','serialize',src],stdout=open(dst,'w'))
    logging.info('compiled '+ src + ' to ' + dst)
 shortHelpMsg['compile'] = ' INPUT.ppr [OUTPUT.wam]: compile a ProPPR program to abstract machine'
 
-def doTrain():
+def doTrain(o):
    stem,src = stemExt(getArg(1),'.examples.grounded')
    dst = getArg(2,stem+'.params')
-   invokeMain('Trainer','--train',src,'--params',dst)
+   invokeMain(o,'Trainer','--train',src,'--params',dst)
 shortHelpMsg['train'] = ' INPUT.examples.grounded [OUTPUT.params]: learn parameters for the prover'
 
-shortHelpMsg['help'] = ': this help message'
-def doHelp():
+def doHelp(o):
    print 'ProPPR v2: commands are:'
    for com in shortHelpMsg:
       print '  proppr '+ com + shortHelpMsg[com]
@@ -89,6 +94,7 @@ def doHelp():
    print '      e.g. "proppr answer test.examples --prover tr"'
    print '  (2) for all commands that invoke Java mains, appending "--help" will give help'
    print '      on available additional options'
+shortHelpMsg['help'] = ': this help message'
 
 ## helpers
 
@@ -102,7 +108,7 @@ def stemExt(fileName,expectedExtension):
    else:
       return (fileName,fileName+expectedExtension)
 
-def invokeMain(main,*mainProgArgs):
+def invokeMain(o,main,*mainProgArgs):
    """Invoke a main program, supplying options from the settings file
    and/or the command-line options."""
    root = getRoot()
@@ -112,12 +118,13 @@ def invokeMain(main,*mainProgArgs):
    for (key,val) in loadOptionDict().items():
       optArgs.append(key)
       optArgs.append(val)
-   callProcess(['java','-cp',classPath,mainClass] + list(mainProgArgs) + optArgs)
+   callProcess(o,['java','-cp',classPath,mainClass] + list(mainProgArgs) + optArgs)
 
-def callProcess(args,**kw):
+def callProcess(o,args,**kw):
    """Call a process, tracing the actual call."""
    if kw: logging.info('subprocess call options: '+ str(kw))
    logging.info('calling: ' + ' '.join(args))
+   if o['dryRun']: return
    subprocess.call(args,**kw)
 
 def getRoot():

diff --git a/src/java/main/edu/cmu/ml/proppr/graph/ArrayLearningGraphBuilder.java b/src/java/main/edu/cmu/ml/proppr/graph/ArrayLearningGraphBuilder.java
@@ -5,6 +5,7 @@
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
 
 public class ArrayLearningGraphBuilder extends LearningGraphBuilder {
@@ -74,23 +75,29 @@ public void freeze(LearningGraph g) {
 		}
 		int edge_cursor=0;
 		int label_cursor=0;
+		int label_deps=0;
+		HashSet<Integer> outgoingFeatures = null;
 		for (int u=0; u<current.node_hi; u++) {
 			current.node_near_lo[u]=edge_cursor;
+			if (current.labelDependencySize() < 0) outgoingFeatures = new HashSet<Integer>();
 			if (outlinks[u] != null) {
 				for (RWOutlink o : outlinks[u]) {
 					current.edge_dest[edge_cursor] = o.nodeid;
 					current.edge_labels_lo[edge_cursor] = label_cursor;
 					for(Map.Entry<String,Double> it : o.fd.entrySet()) {
 						current.label_feature_id[label_cursor] = ((LearningGraph) g).featureLibrary.getId(it.getKey());
 						current.label_feature_weight[label_cursor] = it.getValue();
+						if (current.labelDependencySize() < 0) outgoingFeatures.add(current.label_feature_id[label_cursor]);
 						label_cursor++;
 					}
 					current.edge_labels_hi[edge_cursor] = label_cursor;
 					edge_cursor++;
 				}
+				if (current.labelDependencySize() < 0) label_deps += outgoingFeatures.size();
 			}
 			current.node_near_hi[u]=edge_cursor;
 		}
+		if (current.labelDependencySize() < 0) current.setLabelDependencies(label_deps);
 		init();
 	}
 

diff --git a/src/java/main/edu/cmu/ml/proppr/graph/LearningGraph.java b/src/java/main/edu/cmu/ml/proppr/graph/LearningGraph.java
@@ -67,7 +67,8 @@ public void setLabelDependencies(int i) {
 	public void serialize(StringBuilder serialized) {
 		serialized.append(nodeSize()) // nodes
 		.append(LearningGraphBuilder.TAB).append(edgeSize()) //edges
-		.append(LearningGraphBuilder.TAB).append(labelDependencySize()); // label dependencies
+		.append(LearningGraphBuilder.TAB).append(labelDependencySize()) // label dependencies
+		.append(LearningGraphBuilder.TAB);
 		for (int i = 0; i<getFeatureSet().size(); i++) {
 			if (i>0) serialized.append(LearningGraphBuilder.FEATURE_INDEX_DELIM);
 			serialized.append(featureLibrary.getSymbol(i+1));

diff --git a/src/java/main/edu/cmu/ml/proppr/learn/SRW.java b/src/java/main/edu/cmu/ml/proppr/learn/SRW.java
@@ -151,7 +151,9 @@ protected void load(ParamVector params, PosNegRWExample example) {
 			}
 			// end (c)
 
-			if (tu==0 && udeg>0) { throw new IllegalStateException("tu=0 at u="+uid+"; example "+ex.toString()); }
+//			if (tu==0 && udeg>0) { 
+//				throw new IllegalStateException("tu=0 at u="+uid+"; example "+ex.toString()); 
+//			}
 
 			// begin (d): for each neighbor v of u,
 			ex.dM_lo[uid] = new int[udeg];
@@ -170,7 +172,11 @@ protected void load(ParamVector params, PosNegRWExample example) {
 				for (int lid = ex.getGraph().edge_labels_lo[eid], dfuvi = 0; lid < ex.getGraph().edge_labels_hi[eid]; lid++, dfuvi++) {
 					int fid = ex.getGraph().label_feature_id[lid];
 					dM_features.add(fid);
-					double dMuvi = scale * (tu * dfu[xvi][dfuvi] - c.weightingScheme.edgeWeight(suv[xvi]) * dtu.get(fid));
+					double dMuvi = (tu * dfu[xvi][dfuvi] - c.weightingScheme.edgeWeight(suv[xvi]) * dtu.get(fid));
+					if (tu == 0) { 
+						if (dMuvi != 0)
+							throw new IllegalStateException("tu=0 at u="+uid+"; example "+ex.toString()); 
+					} else dMuvi *= scale; 
 					dM_values.add(dMuvi);
 					seenFeatures[dfuvi] = fid; //save this feature so we can skip it later
 				}
@@ -187,7 +193,10 @@ protected void load(ParamVector params, PosNegRWExample example) {
 				}
 				ex.dM_hi[uid][xvi] = dM_features.size();
 				// also create the scalar M_{uv} = f(s_{uv}) / t_u
-				ex.M[uid][xvi] = (c.weightingScheme.edgeWeight(suv[xvi]) / tu);
+				ex.M[uid][xvi] = c.weightingScheme.edgeWeight(suv[xvi]);
+				if (tu==0) {
+					if (ex.M[uid][xvi] != 0) throw new IllegalStateException("tu=0 at u="+uid+"; example "+ex.toString());
+				} else ex.M[uid][xvi] /= tu;
 			}
 		}
 		// discard extendible version in favor of primitive array

diff --git a/src/java/main/edu/cmu/ml/proppr/prove/wam/WamInterpreter.java b/src/java/main/edu/cmu/ml/proppr/prove/wam/WamInterpreter.java
@@ -140,7 +140,7 @@ private void doFeatureFindallDFS(State state, int depth) throws LogicProgramExce
 		if (depth>=MAXDEPTH) throw new IllegalStateException("depth bound "+MAXDEPTH+" exceeded in feature computation");
 		if (!state.isCompleted()) {
 			// wwcmod: replace false with true to that you compute the features as well
-			for (Outlink o : wamOutlinks(state,false)) {
+			for (Outlink o : wamOutlinks(state,true)) {
 				doFeatureFindallDFS(o.child, depth+1);
 			}
 		}
@@ -353,7 +353,8 @@ public void freport() throws LogicProgramException {
 				reportFeature(g,1.0);
 			}
 		}
-		if (this.featureStack.isEmpty()) reportFeature(new Goal("_no_features_"),1.0);
+		if (this.featureStack.isEmpty()) 
+			reportFeature(new Goal("_no_features_"),1.0);
 		this.state.incrementProgramCounter();
 	}
 	public void ffindall(int address) throws LogicProgramException {

diff --git a/src/java/main/edu/cmu/ml/proppr/prove/wam/plugins/FactsPlugin.java b/src/java/main/edu/cmu/ml/proppr/prove/wam/plugins/FactsPlugin.java
@@ -177,6 +177,10 @@ public boolean equals(Object o) {
 			JumpArgKey f = (JumpArgKey) o;
 			return this.jump.equals(f.jump) && this.arg.equals(f.arg);
 		}
+		@Override
+		public String toString() {
+			return jump+":"+arg;
+		}
 	}
 
 	public static class JumpArgArgKey extends JumpArgKey {