Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions dkpro-core-tokit-asl/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,6 @@
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
<dependency>
<groupId>commons-jxpath</groupId>
<artifactId>commons-jxpath</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.dkpro.core</groupId>
<artifactId>dkpro-core-api-segmentation-asl</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import java.util.Iterator;
import java.util.List;

import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
Expand Down Expand Up @@ -79,15 +78,15 @@ public static enum LemmaMode
@ConfigurationParameter(name = PARAM_ANNOTATION_TYPE, mandatory = true)
private String annotationType;

/**
* A constraint on the annotations that should be considered in form of a JXPath statement.
* Example: set {@link #PARAM_ANNOTATION_TYPE} to a {@code NamedEntity} type and set the
* {@link #PARAM_CONSTRAINT} to {@code ".[value = 'LOCATION']"} to merge only tokens that are
* part of a location named entity.
*/
public static final String PARAM_CONSTRAINT = "constraint";
@ConfigurationParameter(name = PARAM_CONSTRAINT, mandatory = false)
private String constraint;
// /**
// * A constraint on the annotations that should be considered in form of a JXPath statement.
// * Example: set {@link #PARAM_ANNOTATION_TYPE} to a {@code NamedEntity} type and set the
// * {@link #PARAM_CONSTRAINT} to {@code ".[value = 'LOCATION']"} to merge only tokens that are
// * part of a location named entity.
// */
// public static final String PARAM_CONSTRAINT = "constraint";
// @ConfigurationParameter(name = PARAM_CONSTRAINT, mandatory = false)
// private String constraint;

/**
* Configure what should happen to the lemma of the merged tokens. It is possible to JOIN the
Expand Down Expand Up @@ -174,13 +173,13 @@ public void process(JCas aJCas) throws AnalysisEngineProcessException
continue;
}

if (constraint != null) {
JXPathContext ctx = JXPathContext.newContext(cover);
boolean match = ctx.iterate(constraint).hasNext();
if (!match) {
continue;
}
}
// if (constraint != null) {
// JXPathContext ctx = JXPathContext.newContext(cover);
// boolean match = ctx.iterate(constraint).hasNext();
// if (!match) {
// continue;
// }
// }

Iterator<Token> i = covered.iterator();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,14 @@
package org.dkpro.core.tokit;

import static java.util.Arrays.asList;
import static java.util.stream.Collectors.toList;
import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
import static org.apache.uima.fit.util.JCasUtil.select;
import static org.apache.uima.fit.util.JCasUtil.toText;
import static org.junit.Assert.assertEquals;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.jxpath.ClassFunctions;
import org.apache.commons.jxpath.DynamicPropertyHandler;
import org.apache.commons.jxpath.ExpressionContext;
import org.apache.commons.jxpath.JXPathContext;
import org.apache.commons.jxpath.JXPathIntrospector;
import org.apache.uima.UIMAException;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.JCasBuilder;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.CasUtil;
Expand Down Expand Up @@ -71,46 +57,48 @@ public void testSimpleMerge() throws Exception
filter.process(jcas);

assertEquals(asList("I", "love", "New York", "."),
pick(select(jcas, Token.class), "cas:text()"));
jcas.select(Token.class).map(Token::getCoveredText).collect(toList()));
}

@Test
public void testWithConstraintMatch() throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
TokenMerger.PARAM_CONSTRAINT, ".[value = 'LOCATION']");

JCas jcas = initCas();
filter.process(jcas);

assertEquals(asList("I", "love", "New York", "."), toText(select(jcas, Token.class)));
}

@Test
public void testWithConstraintNoMatch() throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class,
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
TokenMerger.PARAM_CONSTRAINT, ".[value = 'PERSON']");

JCas jcas = initCas();
filter.process(jcas);

assertEquals(asList("I", "love", "New", "York", "."), toText(select(jcas, Token.class)));
}
// @Test
// public void testWithConstraintMatch() throws Exception
// {
// AnalysisEngine filter = createEngine(TokenMerger.class,
// TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
// TokenMerger.PARAM_CONSTRAINT, ".[value = 'LOCATION']");
//
// JCas jcas = initCas();
// filter.process(jcas);
//
// assertEquals(asList("I", "love", "New York", "."), toText(select(jcas, Token.class)));
// }
//
// @Test
// public void testWithConstraintNoMatch() throws Exception
// {
// AnalysisEngine filter = createEngine(TokenMerger.class,
// TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class,
// TokenMerger.PARAM_CONSTRAINT, ".[value = 'PERSON']");
//
// JCas jcas = initCas();
// filter.process(jcas);
//
// assertEquals(asList("I", "love", "New", "York", "."), toText(select(jcas, Token.class)));
// }

@Test
public void testSimpleMergeLemmaJoin() throws Exception
{
AnalysisEngine filter = createEngine(TokenMerger.class, TokenMerger.PARAM_ANNOTATION_TYPE,
NamedEntity.class, TokenMerger.PARAM_LEMMA_MODE, LemmaMode.JOIN);
AnalysisEngine filter = createEngine(
TokenMerger.class, //
TokenMerger.PARAM_ANNOTATION_TYPE, NamedEntity.class, //
TokenMerger.PARAM_LEMMA_MODE, LemmaMode.JOIN);

JCas jcas = initCas();
filter.process(jcas);

assertEquals(asList("I", "love", "new york", "."),
pick(select(jcas, Token.class), "./lemma/value"));
jcas.select(Token.class).map(t -> t.getLemma().getValue()).collect(toList()));
}

private JCas initCas() throws UIMAException
Expand Down Expand Up @@ -153,101 +141,6 @@ private Token setLemmaPos(Token aToken, Class<? extends POS> aPosType, String aP
return aToken;
}

// =============================================================================================
// == JXPath helper methods
// =============================================================================================

{
JXPathIntrospector.registerDynamicClass(FeatureStructure.class,
FeatureStructureHandler.class);
}

public static class FeatureStructureHandler
implements DynamicPropertyHandler
{
@Override
public String[] getPropertyNames(Object aObject)
{
FeatureStructure fs = (FeatureStructure) aObject;
Type t = fs.getType();
List<Feature> features = t.getFeatures();
String[] featureNames = new String[features.size()];

int i = 0;
for (Feature f : features) {
featureNames[i] = f.getShortName();
i++;
}
return featureNames;
}

@Override
public Object getProperty(Object aObject, String aPropertyName)
{
FeatureStructure fs = (FeatureStructure) aObject;
Feature f = fs.getType().getFeatureByBaseName(aPropertyName);
if (CAS.TYPE_NAME_BOOLEAN.equals(f.getRange().getName())) {
return fs.getBooleanValue(f);
}
else if (CAS.TYPE_NAME_BYTE.equals(f.getRange().getName())) {
return fs.getByteValue(f);
}
else if (CAS.TYPE_NAME_DOUBLE.equals(f.getRange().getName())) {
return fs.getDoubleValue(f);
}
else if (CAS.TYPE_NAME_FLOAT.equals(f.getRange().getName())) {
return fs.getFloatValue(f);
}
else if (CAS.TYPE_NAME_INTEGER.equals(f.getRange().getName())) {
return fs.getIntValue(f);
}
else if (CAS.TYPE_NAME_LONG.equals(f.getRange().getName())) {
return fs.getLongValue(f);
}
else if (CAS.TYPE_NAME_SHORT.equals(f.getRange().getName())) {
return fs.getShortValue(f);
}
else if (CAS.TYPE_NAME_STRING.equals(f.getRange().getName())) {
return fs.getStringValue(f);
}
else {
return fs.getFeatureValue(f);
}
}

@Override
public void setProperty(Object aObject, String aPropertyName, Object aValue)
{
throw new UnsupportedOperationException();
}
}

@SuppressWarnings("unchecked")
public static List<Object> pick(Collection<?> aContext, String aPath)
{
List<Object> result = new ArrayList<Object>();
for (Object a : aContext) {
JXPathContext ctx = JXPathContext.newContext(a);
ctx.setFunctions(new ClassFunctions(JXPathCasFunctions.class, "cas"));
result.addAll(ctx.selectNodes(aPath));
}
return result;
}

public static class JXPathCasFunctions
{
public static String text(ExpressionContext aCtx)
{
Object value = aCtx.getContextNodePointer().getValue();
if (value instanceof AnnotationFS) {
return ((AnnotationFS) value).getCoveredText();
}
else {
return String.valueOf(value);
}
}
}

@Rule
public DkproTestContext testContext = new DkproTestContext();
}