Skip to content

Commit

Permalink
Baseline experiments for Beetle dataset.
Browse files Browse the repository at this point in the history
  • Loading branch information
zesch committed Dec 18, 2013
1 parent cf23967 commit b818af8
Show file tree
Hide file tree
Showing 364 changed files with 25,234 additions and 0 deletions.
58 changes: 58 additions & 0 deletions semeval2013/semeval2013.task7/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,62 @@
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>semeval2013.task7</artifactId>
<dependencies>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.lexsemresource</groupId>
<artifactId>
de.tudarmstadt.ukp.dkpro.lexsemresource.wordnet-asl
</artifactId>
<version>0.8.0</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>
de.tudarmstadt.ukp.dkpro.core.io.bincas-asl
</artifactId>
</dependency>
<dependency>
<groupId>dkpro.similarity</groupId>
<artifactId>dkpro.similarity.algorithms.vsm-asl</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.lab</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.lab.core</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>
de.tudarmstadt.ukp.dkpro.core.opennlp-asl
</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-gpl
</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.jazzy-asl</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>
de.tudarmstadt.ukp.dkpro.core.stanfordnlp-model-parser-en-pcfg
</artifactId>
<version>20120709.1</version>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.core</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.core.tokit-asl</artifactId>
</dependency>
<dependency>
<groupId>dkpro.similarity</groupId>
<artifactId>dkpro.similarity.algorithms.lexical-asl</artifactId>
</dependency>
<dependency>
<groupId>de.tudarmstadt.ukp.dkpro.tc</groupId>
<artifactId>de.tudarmstadt.ukp.dkpro.tc.weka-gpl</artifactId>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
package semeval2013.task7;

import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;

import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.fit.factory.ExternalResourceFactory;
import org.apache.uima.resource.ExternalResourceDescription;
import org.apache.uima.resource.ResourceInitializationException;

import semeval2013.task7.Common.TaskFormat;
import semeval2013.task7.report.SimpleOverviewReport;
import semeval2013.task7.task.AlwaysCorrectBaselineTask;
import semeval2013.task7.task.RandomBaselineTask;
import semeval2013.task7.task.SimpleUnsupervisedTask;
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter;
import de.tudarmstadt.ukp.dkpro.lab.Lab;
import de.tudarmstadt.ukp.dkpro.lab.task.Dimension;
import de.tudarmstadt.ukp.dkpro.lab.task.ParameterSpace;
import de.tudarmstadt.ukp.dkpro.lab.task.impl.BatchTask;
import de.tudarmstadt.ukp.dkpro.lab.task.impl.BatchTask.ExecutionPolicy;
import de.tudarmstadt.ukp.dkpro.lab.task.impl.TaskBase;
import dkpro.similarity.algorithms.lexical.uima.string.GreedyStringTilingMeasureResource;

public class BeetleBaselines
{

public static final String LANGUAGE = "en";

// for unsupervised
// public static final Float[] thresholds = new Float[] {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f};
public static final Float[] thresholds = new Float[] {0.5f};

public static final Boolean[] toLowerCase = new Boolean[] { true };

public static final String stopwordList = "classpath:/stopwords/english_stopwords.txt";

public static final String SPELL_VOCABULARY = "classpath:/vocabulary/en_US_dict.txt";

public static void main(String[] args)
throws Exception
{

runRandomBaseline(
TaskFormat.twoWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/twoWay/"})
)
);
runRandomBaseline(
TaskFormat.threeWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/threeWay/"})
)
);
runRandomBaseline(
TaskFormat.fiveWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/fiveWay/"})
)
);

runAlwaysCorrectBaseline(
TaskFormat.twoWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/twoWay/"})
)
);
runAlwaysCorrectBaseline(
TaskFormat.threeWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/threeWay/"})
)
);
runAlwaysCorrectBaseline(
TaskFormat.fiveWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/fiveWay/"})
)
);

runSimpleUnsupervised(
TaskFormat.twoWay,
new ParameterSpace(
Dimension.create("dataset", new String[]{"/beetle/twoWay/"}),
Dimension.create("threshold", thresholds)
),
ExternalResourceFactory.createExternalResourceDescription(
GreedyStringTilingMeasureResource.class,
GreedyStringTilingMeasureResource.PARAM_MIN_MATCH_LENGTH, "2"
)
);
}

private static void runRandomBaseline(TaskFormat taskFormat, ParameterSpace pSpace)
throws Exception
{
TaskBase unsupervisedTask = new RandomBaselineTask(taskFormat);

// Define the overall task scenario
BatchTask batch = new BatchTask();
batch.setType("SemEval2013Task7RandomBaseline");
batch.setParameterSpace(pSpace);
batch.addTask(unsupervisedTask);
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN);
batch.addReport(SimpleOverviewReport.class);

// Run
Lab.getInstance().run(batch);
// Lab.newInstance("/lab/debug_context.xml").run(batch);
}

private static void runAlwaysCorrectBaseline(TaskFormat taskFormat, ParameterSpace pSpace)
throws Exception
{
TaskBase unsupervisedTask = new AlwaysCorrectBaselineTask(taskFormat);

// Define the overall task scenario
BatchTask batch = new BatchTask();
batch.setType("SemEval2013Task7AlwaysCorrectBaseline");
batch.setParameterSpace(pSpace);
batch.addTask(unsupervisedTask);
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN);
batch.addReport(SimpleOverviewReport.class);

// Run
Lab.getInstance().run(batch);
// Lab.newInstance("/lab/debug_context.xml").run(batch);
}

private static void runSimpleUnsupervised(TaskFormat taskFormat, ParameterSpace pSpace, ExternalResourceDescription externalResource)
throws Exception
{
TaskBase unsupervisedTask = new SimpleUnsupervisedTask(taskFormat, externalResource);

// Define the overall task scenario
BatchTask batch = new BatchTask();
batch.setType("SemEval2013Task7SimpleSupervised");
batch.setParameterSpace(pSpace);
batch.addTask(unsupervisedTask);
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN);
batch.addReport(SimpleOverviewReport.class);

// Run
Lab.getInstance().run(batch);
// Lab.newInstance("/lab/debug_context.xml").run(batch);
}

public static AnalysisEngineDescription getPreprocessing()
throws ResourceInitializationException
{

return createEngineDescription(
createEngineDescription(
BreakIteratorSegmenter.class
)
// ),
// createEngineDescription(
// SpellChecker.class,
// SpellChecker.PARAM_MODEL_LOCATION, SPELL_VOCABULARY
// ),
// createEngineDescription(
// OpenNlpPosTagger.class
// ),
// createEngineDescription(
// StanfordParser.class,
// StanfordParser.PARAM_VARIANT,"pcfg"
// )
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package semeval2013.task7;


public class Common
{

public enum Dataset {
beetle,
sciEntsBank
}

public enum TaskFormat {
fiveWay,
threeWay,
twoWay,
partialEntailment

}

public enum Labels5way {
correct,
partially_correct_incomplete,
contradictory,
irrelevant,
non_domain
}

public enum Labels3way {
correct,
contradictory,
incorrect
}

public enum Labels2way {
correct,
incorrect
}

public enum PartialEntailmentOutcomes {
Expressed,
Unaddressed
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package semeval2013.task7;

import org.apache.uima.collection.CollectionReaderDescription;
import org.apache.uima.fit.factory.CollectionReaderFactory;
import org.apache.uima.fit.pipeline.JCasIterable;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

import semeval2013.task7.Common.Labels2way;
import semeval2013.task7.io.SemEval2013Task7Reader;
import semeval2013.task7.type.StudentAnswer;

public class SimpleBaseline
{

public static void main(String[] args)
throws Exception
{
String path = "src/main/resources/train/beetle/twoWay/";

CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription(
SemEval2013Task7Reader.class,
SemEval2013Task7Reader.PARAM_SOURCE_LOCATION, path,
SemEval2013Task7Reader.PARAM_PATTERNS, new String[] {
SemEval2013Task7Reader.INCLUDE_PREFIX + "*.xml"
}
);

int correct = 0;
int wrong = 0;

for (JCas jcas : new JCasIterable(reader)) {
for (StudentAnswer studentAnswer : JCasUtil.select(jcas, StudentAnswer.class)) {

String gold = studentAnswer.getLabel();
String computed = Labels2way.correct.name();

System.out.println(gold + " - " + computed);

if (gold.equals(computed)) {
correct++;
}
else {
wrong++;
}
}
}

System.out.println("Accuracy: " + (double) correct / (correct + wrong));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package semeval2013.task7.baseline;

import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;

import semeval2013.task7.Common.Labels2way;
import semeval2013.task7.Common.TaskFormat;
import semeval2013.task7.type.GoldAssessmentResult;
import semeval2013.task7.type.StudentAnswer;

public class AlwaysCorrectBaseline
extends JCasAnnotator_ImplBase
{

public static final String PARAM_TASK_FORMAT = "TaskFormat";
@ConfigurationParameter(name = PARAM_TASK_FORMAT, mandatory = true)
protected TaskFormat taskFormat;

@Override
public void process(JCas jcas)
throws AnalysisEngineProcessException
{
for (StudentAnswer studentAnswer : JCasUtil.select(jcas, StudentAnswer.class)) {
GoldAssessmentResult result = new GoldAssessmentResult(jcas, studentAnswer.getBegin(), studentAnswer.getEnd());
result.setResult(Labels2way.correct.name());
result.addToIndexes();
}
}
}
Loading

0 comments on commit b818af8

Please sign in to comment.