fixed pom definitions, bumped dependency version for the dhp-schema module, removed unnecessary dependencies

This commit is contained in:
Claudio Atzori 2024-07-17 11:58:48 +02:00
parent db9c54c944
commit 83327239de
15 changed files with 91 additions and 210 deletions

View File

@ -25,10 +25,10 @@ public class TreeNodeStats implements Serializable {
}
public int fieldsCount() {
if(ignoreUndefined)
if (ignoreUndefined)
return this.results.size();
else
return this.results.size() - undefinedCount(); //do not count undefined
return this.results.size() - undefinedCount(); // do not count undefined
}
public int undefinedCount() {
@ -87,20 +87,17 @@ public class TreeNodeStats implements Serializable {
if (fs.getResult() == -1) {
if (fs.isCountIfUndefined()) {
min = 0.0;
}
else {
} else {
min = -1;
}
}
else {
} else {
min = fs.getResult();
}
}
}
if (ignoreUndefined) {
return min==-1.0? 0.0 : min;
}
else {
return min == -1.0 ? 0.0 : min;
} else {
return min;
}
}
@ -111,7 +108,7 @@ public class TreeNodeStats implements Serializable {
if (fieldStats.getResult() >= fieldStats.getThreshold())
return 1.0;
}
if (!ignoreUndefined && undefinedCount()>0){
if (!ignoreUndefined && undefinedCount() > 0) {
return -1.0;
} else {
return 0.0;
@ -124,7 +121,7 @@ public class TreeNodeStats implements Serializable {
if (fieldStats.getResult() == -1) {
if (fieldStats.isCountIfUndefined())
return ignoreUndefined? 0.0 : -1.0;
return ignoreUndefined ? 0.0 : -1.0;
} else {
if (fieldStats.getResult() < fieldStats.getThreshold())
return 0.0;

View File

@ -43,9 +43,9 @@ public class TreeProcessor {
TreeNodeStats stats = currentNode.evaluate(doc1, doc2, config);
treeStats.addNodeStats(nextNodeName, stats);
double finalScore = stats.getFinalScore(currentNode.getAggregation());
if(finalScore == -1.0)
if (finalScore == -1.0)
nextNodeName = currentNode.getUndefined();
else if (finalScore >= currentNode.getThreshold()) {
nextNodeName = currentNode.getPositive();
@ -53,7 +53,6 @@ public class TreeProcessor {
nextNodeName = currentNode.getNegative();
}
} while (MatchType.parse(nextNodeName) == MatchType.UNDEFINED);
treeStats.setResult(MatchType.parse(nextNodeName));

View File

@ -3,12 +3,12 @@ package eu.dnetlib.pace.clustering;
import java.util.Map;
import com.mongodb.connection.Cluster;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.mongodb.connection.Cluster;
import eu.dnetlib.pace.AbstractPaceTest;
import eu.dnetlib.pace.common.AbstractPaceFunctions;

View File

@ -61,7 +61,7 @@ public class ComparatorTest extends AbstractPaceTest {
assertEquals(0.0, codeMatch.distance("testing1 key::1", "testing2 key::2", conf));
// both names with codes (1 same, 1 different)
assertEquals(0.5,codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf));
assertEquals(0.5, codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf));
}

View File

@ -51,48 +51,5 @@
<artifactId>hadoop-distcp</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon-dom</artifactId>
</exclusion>
<exclusion>
<groupId>jgrapht</groupId>
<artifactId>jgrapht</artifactId>
</exclusion>
<exclusion>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.*</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>apache</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
</project>

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable;
import java.io.StringReader;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
@ -22,7 +21,6 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterables;
import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -65,7 +63,7 @@ public class ISClient implements Serializable {
.map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList()))
.orElseThrow(() -> new IllegalStateException("empty set list"));
} catch (ActionManagerException | ISLookUpException e) {
} catch (ISLookUpException e) {
throw new IllegalStateException("unable to query ActionSets info from the IS");
}
}
@ -89,31 +87,18 @@ public class ISClient implements Serializable {
return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
}
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException {
private String getBasePathHDFS(ISLookUpService isLookup) throws ISLookUpException {
return queryServiceProperty(isLookup, "basePath");
}
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
throws ActionManagerException {
throws ISLookUpException {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName
+ "']/@value/string()";
log.debug("quering for service property: {}", q);
try {
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value);
}
}

View File

@ -106,7 +106,6 @@ public class CreateActionSetFromWebEntries implements Serializable {
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
return ret
.iterator();
}, Encoders.bean(Relation.class))

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.ArrayList;
@ -32,7 +33,7 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
@Override
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report)
throws CollectorException {
throws CollectorException {
final String authUrl = api.getParams().get("auth_url");
final String clientId = api.getParams().get("auth_client_id");
@ -45,7 +46,8 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), false);
}
private String authenticate(final String authUrl, final String clientId, final String clientSecret) throws CollectorException {
private String authenticate(final String authUrl, final String clientId, final String clientSecret)
throws CollectorException {
try (final CloseableHttpClient client = HttpClients.createDefault()) {
final HttpPost req = new HttpPost(authUrl);
final List<NameValuePair> params = new ArrayList<>();
@ -59,7 +61,9 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
final String content = IOUtils.toString(response.getEntity().getContent());
final JSONObject obj = new JSONObject(content);
final String token = obj.getString("access_token");
if (StringUtils.isNotBlank(token)) { return token; }
if (StringUtils.isNotBlank(token)) {
return token;
}
}
} catch (final Throwable e) {
log.warn("Error obtaining access token", e);

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.Iterator;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.HashSet;
@ -23,7 +24,9 @@ public class ResearchFiCollectorPluginTest {
final ApiDescriptor api = new ApiDescriptor();
api.setBaseUrl("https://research.fi/api/rest/v1/funding-decisions?FunderName=AKA&FundingStartYearFrom=2022");
api.setProtocol("research_fi");
api.getParams().put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token");
api
.getParams()
.put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token");
api.getParams().put("auth_client_id", "");
api.getParams().put("auth_client_secret", "");

View File

@ -3,13 +3,13 @@ package eu.dnetlib.dhp.oa.dedup.jpath;
import java.io.IOException;
import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest;
import org.apache.commons.io.IOUtils;
import org.apache.spark.sql.Row;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;
import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest;
import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.SparkModel;
@ -33,13 +33,14 @@ class JsonPathTest {
}
@Test
void jsonToModelTest() throws IOException{
void jsonToModelTest() throws IOException {
DedupConfig conf = DedupConfig
.load(IOUtils
.toString(
SparkOpenorgsDedupTest.class
.getResourceAsStream(
"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
.load(
IOUtils
.toString(
SparkOpenorgsDedupTest.class
.getResourceAsStream(
"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json"));

View File

@ -37,6 +37,7 @@ import eu.dnetlib.dhp.schema.solr.Measure;
import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
import eu.dnetlib.dhp.schema.solr.Organization;
import eu.dnetlib.dhp.schema.solr.Pid;
import eu.dnetlib.dhp.schema.solr.Project;
import eu.dnetlib.dhp.schema.solr.Result;
import eu.dnetlib.dhp.schema.solr.Subject;

View File

@ -51,49 +51,6 @@
<artifactId>hadoop-distcp</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon-dom</artifactId>
</exclusion>
<exclusion>
<groupId>jgrapht</groupId>
<artifactId>jgrapht</artifactId>
</exclusion>
<exclusion>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.*</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>apache</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>

27
pom.xml
View File

@ -440,29 +440,6 @@
<scope>provided</scope>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<version>${dnet-actionmanager-common.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
<version>${dnet-actionmanager-api.version}</version>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>cnr-misc-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>cnr-rmi-api</artifactId>
@ -960,7 +937,7 @@
<commons.logging.version>1.1.3</commons.logging.version>
<commons-validator.version>1.7</commons-validator.version>
<dateparser.version>1.0.7</dateparser.version>
<dhp-schemas.version>[6.1.4-SNAPSHOT]</dhp-schemas.version>
<dhp-schemas.version>[7.0.0]</dhp-schemas.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version>
@ -969,8 +946,6 @@
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
<dhp.site.skip>true</dhp.site.skip>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
<google.gson.version>2.2.2</google.gson.version>
<log4j.version>1.2.17</log4j.version>