-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Baseline experiments for Beetle dataset.
- Loading branch information
Showing
364 changed files
with
25,234 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
170 changes: 170 additions & 0 deletions
170
semeval2013/semeval2013.task7/src/main/java/semeval2013/task7/BeetleBaselines.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,170 @@ | ||
package semeval2013.task7; | ||
|
||
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription; | ||
|
||
import org.apache.uima.analysis_engine.AnalysisEngineDescription; | ||
import org.apache.uima.fit.factory.ExternalResourceFactory; | ||
import org.apache.uima.resource.ExternalResourceDescription; | ||
import org.apache.uima.resource.ResourceInitializationException; | ||
|
||
import semeval2013.task7.Common.TaskFormat; | ||
import semeval2013.task7.report.SimpleOverviewReport; | ||
import semeval2013.task7.task.AlwaysCorrectBaselineTask; | ||
import semeval2013.task7.task.RandomBaselineTask; | ||
import semeval2013.task7.task.SimpleUnsupervisedTask; | ||
import de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter; | ||
import de.tudarmstadt.ukp.dkpro.lab.Lab; | ||
import de.tudarmstadt.ukp.dkpro.lab.task.Dimension; | ||
import de.tudarmstadt.ukp.dkpro.lab.task.ParameterSpace; | ||
import de.tudarmstadt.ukp.dkpro.lab.task.impl.BatchTask; | ||
import de.tudarmstadt.ukp.dkpro.lab.task.impl.BatchTask.ExecutionPolicy; | ||
import de.tudarmstadt.ukp.dkpro.lab.task.impl.TaskBase; | ||
import dkpro.similarity.algorithms.lexical.uima.string.GreedyStringTilingMeasureResource; | ||
|
||
public class BeetleBaselines | ||
{ | ||
|
||
public static final String LANGUAGE = "en"; | ||
|
||
// for unsupervised | ||
// public static final Float[] thresholds = new Float[] {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}; | ||
public static final Float[] thresholds = new Float[] {0.5f}; | ||
|
||
public static final Boolean[] toLowerCase = new Boolean[] { true }; | ||
|
||
public static final String stopwordList = "classpath:/stopwords/english_stopwords.txt"; | ||
|
||
public static final String SPELL_VOCABULARY = "classpath:/vocabulary/en_US_dict.txt"; | ||
|
||
public static void main(String[] args) | ||
throws Exception | ||
{ | ||
|
||
runRandomBaseline( | ||
TaskFormat.twoWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/twoWay/"}) | ||
) | ||
); | ||
runRandomBaseline( | ||
TaskFormat.threeWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/threeWay/"}) | ||
) | ||
); | ||
runRandomBaseline( | ||
TaskFormat.fiveWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/fiveWay/"}) | ||
) | ||
); | ||
|
||
runAlwaysCorrectBaseline( | ||
TaskFormat.twoWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/twoWay/"}) | ||
) | ||
); | ||
runAlwaysCorrectBaseline( | ||
TaskFormat.threeWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/threeWay/"}) | ||
) | ||
); | ||
runAlwaysCorrectBaseline( | ||
TaskFormat.fiveWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/fiveWay/"}) | ||
) | ||
); | ||
|
||
runSimpleUnsupervised( | ||
TaskFormat.twoWay, | ||
new ParameterSpace( | ||
Dimension.create("dataset", new String[]{"/beetle/twoWay/"}), | ||
Dimension.create("threshold", thresholds) | ||
), | ||
ExternalResourceFactory.createExternalResourceDescription( | ||
GreedyStringTilingMeasureResource.class, | ||
GreedyStringTilingMeasureResource.PARAM_MIN_MATCH_LENGTH, "2" | ||
) | ||
); | ||
} | ||
|
||
private static void runRandomBaseline(TaskFormat taskFormat, ParameterSpace pSpace) | ||
throws Exception | ||
{ | ||
TaskBase unsupervisedTask = new RandomBaselineTask(taskFormat); | ||
|
||
// Define the overall task scenario | ||
BatchTask batch = new BatchTask(); | ||
batch.setType("SemEval2013Task7RandomBaseline"); | ||
batch.setParameterSpace(pSpace); | ||
batch.addTask(unsupervisedTask); | ||
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN); | ||
batch.addReport(SimpleOverviewReport.class); | ||
|
||
// Run | ||
Lab.getInstance().run(batch); | ||
// Lab.newInstance("/lab/debug_context.xml").run(batch); | ||
} | ||
|
||
private static void runAlwaysCorrectBaseline(TaskFormat taskFormat, ParameterSpace pSpace) | ||
throws Exception | ||
{ | ||
TaskBase unsupervisedTask = new AlwaysCorrectBaselineTask(taskFormat); | ||
|
||
// Define the overall task scenario | ||
BatchTask batch = new BatchTask(); | ||
batch.setType("SemEval2013Task7AlwaysCorrectBaseline"); | ||
batch.setParameterSpace(pSpace); | ||
batch.addTask(unsupervisedTask); | ||
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN); | ||
batch.addReport(SimpleOverviewReport.class); | ||
|
||
// Run | ||
Lab.getInstance().run(batch); | ||
// Lab.newInstance("/lab/debug_context.xml").run(batch); | ||
} | ||
|
||
private static void runSimpleUnsupervised(TaskFormat taskFormat, ParameterSpace pSpace, ExternalResourceDescription externalResource) | ||
throws Exception | ||
{ | ||
TaskBase unsupervisedTask = new SimpleUnsupervisedTask(taskFormat, externalResource); | ||
|
||
// Define the overall task scenario | ||
BatchTask batch = new BatchTask(); | ||
batch.setType("SemEval2013Task7SimpleSupervised"); | ||
batch.setParameterSpace(pSpace); | ||
batch.addTask(unsupervisedTask); | ||
batch.setExecutionPolicy(ExecutionPolicy.RUN_AGAIN); | ||
batch.addReport(SimpleOverviewReport.class); | ||
|
||
// Run | ||
Lab.getInstance().run(batch); | ||
// Lab.newInstance("/lab/debug_context.xml").run(batch); | ||
} | ||
|
||
public static AnalysisEngineDescription getPreprocessing() | ||
throws ResourceInitializationException | ||
{ | ||
|
||
return createEngineDescription( | ||
createEngineDescription( | ||
BreakIteratorSegmenter.class | ||
) | ||
// ), | ||
// createEngineDescription( | ||
// SpellChecker.class, | ||
// SpellChecker.PARAM_MODEL_LOCATION, SPELL_VOCABULARY | ||
// ), | ||
// createEngineDescription( | ||
// OpenNlpPosTagger.class | ||
// ), | ||
// createEngineDescription( | ||
// StanfordParser.class, | ||
// StanfordParser.PARAM_VARIANT,"pcfg" | ||
// ) | ||
); | ||
} | ||
} |
43 changes: 43 additions & 0 deletions
43
semeval2013/semeval2013.task7/src/main/java/semeval2013/task7/Common.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package semeval2013.task7; | ||
|
||
|
||
public class Common | ||
{ | ||
|
||
public enum Dataset { | ||
beetle, | ||
sciEntsBank | ||
} | ||
|
||
public enum TaskFormat { | ||
fiveWay, | ||
threeWay, | ||
twoWay, | ||
partialEntailment | ||
|
||
} | ||
|
||
public enum Labels5way { | ||
correct, | ||
partially_correct_incomplete, | ||
contradictory, | ||
irrelevant, | ||
non_domain | ||
} | ||
|
||
public enum Labels3way { | ||
correct, | ||
contradictory, | ||
incorrect | ||
} | ||
|
||
public enum Labels2way { | ||
correct, | ||
incorrect | ||
} | ||
|
||
public enum PartialEntailmentOutcomes { | ||
Expressed, | ||
Unaddressed | ||
} | ||
} |
51 changes: 51 additions & 0 deletions
51
semeval2013/semeval2013.task7/src/main/java/semeval2013/task7/SimpleBaseline.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package semeval2013.task7; | ||
|
||
import org.apache.uima.collection.CollectionReaderDescription; | ||
import org.apache.uima.fit.factory.CollectionReaderFactory; | ||
import org.apache.uima.fit.pipeline.JCasIterable; | ||
import org.apache.uima.fit.util.JCasUtil; | ||
import org.apache.uima.jcas.JCas; | ||
|
||
import semeval2013.task7.Common.Labels2way; | ||
import semeval2013.task7.io.SemEval2013Task7Reader; | ||
import semeval2013.task7.type.StudentAnswer; | ||
|
||
public class SimpleBaseline | ||
{ | ||
|
||
public static void main(String[] args) | ||
throws Exception | ||
{ | ||
String path = "src/main/resources/train/beetle/twoWay/"; | ||
|
||
CollectionReaderDescription reader = CollectionReaderFactory.createReaderDescription( | ||
SemEval2013Task7Reader.class, | ||
SemEval2013Task7Reader.PARAM_SOURCE_LOCATION, path, | ||
SemEval2013Task7Reader.PARAM_PATTERNS, new String[] { | ||
SemEval2013Task7Reader.INCLUDE_PREFIX + "*.xml" | ||
} | ||
); | ||
|
||
int correct = 0; | ||
int wrong = 0; | ||
|
||
for (JCas jcas : new JCasIterable(reader)) { | ||
for (StudentAnswer studentAnswer : JCasUtil.select(jcas, StudentAnswer.class)) { | ||
|
||
String gold = studentAnswer.getLabel(); | ||
String computed = Labels2way.correct.name(); | ||
|
||
System.out.println(gold + " - " + computed); | ||
|
||
if (gold.equals(computed)) { | ||
correct++; | ||
} | ||
else { | ||
wrong++; | ||
} | ||
} | ||
} | ||
|
||
System.out.println("Accuracy: " + (double) correct / (correct + wrong)); | ||
} | ||
} |
32 changes: 32 additions & 0 deletions
32
...013/semeval2013.task7/src/main/java/semeval2013/task7/baseline/AlwaysCorrectBaseline.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package semeval2013.task7.baseline; | ||
|
||
import org.apache.uima.analysis_engine.AnalysisEngineProcessException; | ||
import org.apache.uima.fit.component.JCasAnnotator_ImplBase; | ||
import org.apache.uima.fit.descriptor.ConfigurationParameter; | ||
import org.apache.uima.fit.util.JCasUtil; | ||
import org.apache.uima.jcas.JCas; | ||
|
||
import semeval2013.task7.Common.Labels2way; | ||
import semeval2013.task7.Common.TaskFormat; | ||
import semeval2013.task7.type.GoldAssessmentResult; | ||
import semeval2013.task7.type.StudentAnswer; | ||
|
||
public class AlwaysCorrectBaseline | ||
extends JCasAnnotator_ImplBase | ||
{ | ||
|
||
public static final String PARAM_TASK_FORMAT = "TaskFormat"; | ||
@ConfigurationParameter(name = PARAM_TASK_FORMAT, mandatory = true) | ||
protected TaskFormat taskFormat; | ||
|
||
@Override | ||
public void process(JCas jcas) | ||
throws AnalysisEngineProcessException | ||
{ | ||
for (StudentAnswer studentAnswer : JCasUtil.select(jcas, StudentAnswer.class)) { | ||
GoldAssessmentResult result = new GoldAssessmentResult(jcas, studentAnswer.getBegin(), studentAnswer.getEnd()); | ||
result.setResult(Labels2way.correct.name()); | ||
result.addToIndexes(); | ||
} | ||
} | ||
} |
Oops, something went wrong.