fixed pom definitions, bumped dependency version for the dhp-schema module, removed unnecessary dependencies

This commit is contained in:
Claudio Atzori 2024-07-17 11:58:48 +02:00
parent db9c54c944
commit 83327239de
15 changed files with 91 additions and 210 deletions

View File

@ -25,10 +25,10 @@ public class TreeNodeStats implements Serializable {
} }
public int fieldsCount() { public int fieldsCount() {
if(ignoreUndefined) if (ignoreUndefined)
return this.results.size(); return this.results.size();
else else
return this.results.size() - undefinedCount(); //do not count undefined return this.results.size() - undefinedCount(); // do not count undefined
} }
public int undefinedCount() { public int undefinedCount() {
@ -87,20 +87,17 @@ public class TreeNodeStats implements Serializable {
if (fs.getResult() == -1) { if (fs.getResult() == -1) {
if (fs.isCountIfUndefined()) { if (fs.isCountIfUndefined()) {
min = 0.0; min = 0.0;
} } else {
else {
min = -1; min = -1;
} }
} } else {
else {
min = fs.getResult(); min = fs.getResult();
} }
} }
} }
if (ignoreUndefined) { if (ignoreUndefined) {
return min==-1.0? 0.0 : min; return min == -1.0 ? 0.0 : min;
} } else {
else {
return min; return min;
} }
} }
@ -111,7 +108,7 @@ public class TreeNodeStats implements Serializable {
if (fieldStats.getResult() >= fieldStats.getThreshold()) if (fieldStats.getResult() >= fieldStats.getThreshold())
return 1.0; return 1.0;
} }
if (!ignoreUndefined && undefinedCount()>0){ if (!ignoreUndefined && undefinedCount() > 0) {
return -1.0; return -1.0;
} else { } else {
return 0.0; return 0.0;
@ -124,7 +121,7 @@ public class TreeNodeStats implements Serializable {
if (fieldStats.getResult() == -1) { if (fieldStats.getResult() == -1) {
if (fieldStats.isCountIfUndefined()) if (fieldStats.isCountIfUndefined())
return ignoreUndefined? 0.0 : -1.0; return ignoreUndefined ? 0.0 : -1.0;
} else { } else {
if (fieldStats.getResult() < fieldStats.getThreshold()) if (fieldStats.getResult() < fieldStats.getThreshold())
return 0.0; return 0.0;

View File

@ -45,7 +45,7 @@ public class TreeProcessor {
treeStats.addNodeStats(nextNodeName, stats); treeStats.addNodeStats(nextNodeName, stats);
double finalScore = stats.getFinalScore(currentNode.getAggregation()); double finalScore = stats.getFinalScore(currentNode.getAggregation());
if(finalScore == -1.0) if (finalScore == -1.0)
nextNodeName = currentNode.getUndefined(); nextNodeName = currentNode.getUndefined();
else if (finalScore >= currentNode.getThreshold()) { else if (finalScore >= currentNode.getThreshold()) {
nextNodeName = currentNode.getPositive(); nextNodeName = currentNode.getPositive();
@ -53,7 +53,6 @@ public class TreeProcessor {
nextNodeName = currentNode.getNegative(); nextNodeName = currentNode.getNegative();
} }
} while (MatchType.parse(nextNodeName) == MatchType.UNDEFINED); } while (MatchType.parse(nextNodeName) == MatchType.UNDEFINED);
treeStats.setResult(MatchType.parse(nextNodeName)); treeStats.setResult(MatchType.parse(nextNodeName));

View File

@ -3,12 +3,12 @@ package eu.dnetlib.pace.clustering;
import java.util.Map; import java.util.Map;
import com.mongodb.connection.Cluster;
import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.mongodb.connection.Cluster;
import eu.dnetlib.pace.AbstractPaceTest; import eu.dnetlib.pace.AbstractPaceTest;
import eu.dnetlib.pace.common.AbstractPaceFunctions; import eu.dnetlib.pace.common.AbstractPaceFunctions;

View File

@ -61,7 +61,7 @@ public class ComparatorTest extends AbstractPaceTest {
assertEquals(0.0, codeMatch.distance("testing1 key::1", "testing2 key::2", conf)); assertEquals(0.0, codeMatch.distance("testing1 key::1", "testing2 key::2", conf));
// both names with codes (1 same, 1 different) // both names with codes (1 same, 1 different)
assertEquals(0.5,codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf)); assertEquals(0.5, codeMatch.distance("key::1 key::2 testing1", "key::1 testing", conf));
} }

View File

@ -51,48 +51,5 @@
<artifactId>hadoop-distcp</artifactId> <artifactId>hadoop-distcp</artifactId>
</dependency> </dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon-dom</artifactId>
</exclusion>
<exclusion>
<groupId>jgrapht</groupId>
<artifactId>jgrapht</artifactId>
</exclusion>
<exclusion>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.*</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>apache</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@ -4,7 +4,6 @@ package eu.dnetlib.dhp.actionmanager;
import java.io.Serializable; import java.io.Serializable;
import java.io.StringReader; import java.io.StringReader;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException;
import java.util.Optional; import java.util.Optional;
import java.util.Set; import java.util.Set;
import java.util.stream.Collectors; import java.util.stream.Collectors;
@ -22,7 +21,6 @@ import com.google.common.base.Splitter;
import com.google.common.collect.Iterables; import com.google.common.collect.Iterables;
import com.google.common.collect.Sets; import com.google.common.collect.Sets;
import eu.dnetlib.actionmanager.rmi.ActionManagerException;
import eu.dnetlib.dhp.utils.ISLookupClientFactory; import eu.dnetlib.dhp.utils.ISLookupClientFactory;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpException;
import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService; import eu.dnetlib.enabling.is.lookup.rmi.ISLookUpService;
@ -65,7 +63,7 @@ public class ISClient implements Serializable {
.map(t -> buildDirectory(basePath, t)) .map(t -> buildDirectory(basePath, t))
.collect(Collectors.toList())) .collect(Collectors.toList()))
.orElseThrow(() -> new IllegalStateException("empty set list")); .orElseThrow(() -> new IllegalStateException("empty set list"));
} catch (ActionManagerException | ISLookUpException e) { } catch (ISLookUpException e) {
throw new IllegalStateException("unable to query ActionSets info from the IS"); throw new IllegalStateException("unable to query ActionSets info from the IS");
} }
} }
@ -89,31 +87,18 @@ public class ISClient implements Serializable {
return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight()); return Joiner.on("/").join(basePath, t.getMiddle(), t.getRight());
} }
private String getBasePathHDFS(ISLookUpService isLookup) throws ActionManagerException { private String getBasePathHDFS(ISLookUpService isLookup) throws ISLookUpException {
return queryServiceProperty(isLookup, "basePath"); return queryServiceProperty(isLookup, "basePath");
} }
private String queryServiceProperty(ISLookUpService isLookup, final String propertyName) private String queryServiceProperty(ISLookUpService isLookup, final String propertyName)
throws ActionManagerException { throws ISLookUpException {
final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='" final String q = "for $x in /RESOURCE_PROFILE[.//RESOURCE_TYPE/@value='ActionManagerServiceResourceType'] return $x//SERVICE_PROPERTIES/PROPERTY[./@ key='"
+ propertyName + propertyName
+ "']/@value/string()"; + "']/@value/string()";
log.debug("quering for service property: {}", q); log.debug("quering for service property: {}", q);
try {
final List<String> value = isLookup.quickSearchProfile(q); final List<String> value = isLookup.quickSearchProfile(q);
return Iterables.getOnlyElement(value); return Iterables.getOnlyElement(value);
} catch (ISLookUpException e) {
String msg = "Error accessing service profile, using query: " + q;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (NoSuchElementException e) {
String msg = "missing service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
} catch (IllegalArgumentException e) {
String msg = "found more than one service property: " + propertyName;
log.error(msg, e);
throw new ActionManagerException(msg, e);
}
} }
} }

View File

@ -106,7 +106,6 @@ public class CreateActionSetFromWebEntries implements Serializable {
+ IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror"))); + IdentifierFactory.md5(PidCleaner.normalizePidValue("ROR", row.getAs("ror")));
ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror)); ret.addAll(createAffiliationRelationPairDOI(row.getAs("doi"), ror));
return ret return ret
.iterator(); .iterator();
}, Encoders.bean(Relation.class)) }, Encoders.bean(Relation.class))

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi; package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.ArrayList; import java.util.ArrayList;
@ -32,7 +33,7 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
@Override @Override
public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report) public Stream<String> collect(final ApiDescriptor api, final AggregatorReport report)
throws CollectorException { throws CollectorException {
final String authUrl = api.getParams().get("auth_url"); final String authUrl = api.getParams().get("auth_url");
final String clientId = api.getParams().get("auth_client_id"); final String clientId = api.getParams().get("auth_client_id");
@ -45,7 +46,8 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), false); return StreamSupport.stream(Spliterators.spliteratorUnknownSize(iter, Spliterator.ORDERED), false);
} }
private String authenticate(final String authUrl, final String clientId, final String clientSecret) throws CollectorException { private String authenticate(final String authUrl, final String clientId, final String clientSecret)
throws CollectorException {
try (final CloseableHttpClient client = HttpClients.createDefault()) { try (final CloseableHttpClient client = HttpClients.createDefault()) {
final HttpPost req = new HttpPost(authUrl); final HttpPost req = new HttpPost(authUrl);
final List<NameValuePair> params = new ArrayList<>(); final List<NameValuePair> params = new ArrayList<>();
@ -59,7 +61,9 @@ public class ResearchFiCollectorPlugin implements CollectorPlugin {
final String content = IOUtils.toString(response.getEntity().getContent()); final String content = IOUtils.toString(response.getEntity().getContent());
final JSONObject obj = new JSONObject(content); final JSONObject obj = new JSONObject(content);
final String token = obj.getString("access_token"); final String token = obj.getString("access_token");
if (StringUtils.isNotBlank(token)) { return token; } if (StringUtils.isNotBlank(token)) {
return token;
}
} }
} catch (final Throwable e) { } catch (final Throwable e) {
log.warn("Error obtaining access token", e); log.warn("Error obtaining access token", e);

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi; package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.Iterator; import java.util.Iterator;

View File

@ -1,3 +1,4 @@
package eu.dnetlib.dhp.collection.plugin.researchfi; package eu.dnetlib.dhp.collection.plugin.researchfi;
import java.util.HashSet; import java.util.HashSet;
@ -23,7 +24,9 @@ public class ResearchFiCollectorPluginTest {
final ApiDescriptor api = new ApiDescriptor(); final ApiDescriptor api = new ApiDescriptor();
api.setBaseUrl("https://research.fi/api/rest/v1/funding-decisions?FunderName=AKA&FundingStartYearFrom=2022"); api.setBaseUrl("https://research.fi/api/rest/v1/funding-decisions?FunderName=AKA&FundingStartYearFrom=2022");
api.setProtocol("research_fi"); api.setProtocol("research_fi");
api.getParams().put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token"); api
.getParams()
.put("auth_url", "https://researchfi-auth.2.rahtiapp.fi/realms/publicapi/protocol/openid-connect/token");
api.getParams().put("auth_client_id", ""); api.getParams().put("auth_client_id", "");
api.getParams().put("auth_client_secret", ""); api.getParams().put("auth_client_secret", "");

View File

@ -3,13 +3,13 @@ package eu.dnetlib.dhp.oa.dedup.jpath;
import java.io.IOException; import java.io.IOException;
import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest;
import org.apache.commons.io.IOUtils; import org.apache.commons.io.IOUtils;
import org.apache.spark.sql.Row; import org.apache.spark.sql.Row;
import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils; import org.junit.platform.commons.util.StringUtils;
import eu.dnetlib.dhp.oa.dedup.SparkOpenorgsDedupTest;
import eu.dnetlib.pace.config.DedupConfig; import eu.dnetlib.pace.config.DedupConfig;
import eu.dnetlib.pace.model.SparkModel; import eu.dnetlib.pace.model.SparkModel;
@ -33,13 +33,14 @@ class JsonPathTest {
} }
@Test @Test
void jsonToModelTest() throws IOException{ void jsonToModelTest() throws IOException {
DedupConfig conf = DedupConfig DedupConfig conf = DedupConfig
.load(IOUtils .load(
.toString( IOUtils
SparkOpenorgsDedupTest.class .toString(
.getResourceAsStream( SparkOpenorgsDedupTest.class
"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json"))); .getResourceAsStream(
"/eu/dnetlib/dhp/dedup/conf/org.curr.conf.json")));
final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json")); final String org = IOUtils.toString(getClass().getResourceAsStream("organization_example1.json"));

View File

@ -37,6 +37,7 @@ import eu.dnetlib.dhp.schema.solr.Measure;
import eu.dnetlib.dhp.schema.solr.OpenAccessColor; import eu.dnetlib.dhp.schema.solr.OpenAccessColor;
import eu.dnetlib.dhp.schema.solr.OpenAccessRoute; import eu.dnetlib.dhp.schema.solr.OpenAccessRoute;
import eu.dnetlib.dhp.schema.solr.Organization; import eu.dnetlib.dhp.schema.solr.Organization;
import eu.dnetlib.dhp.schema.solr.Pid;
import eu.dnetlib.dhp.schema.solr.Project; import eu.dnetlib.dhp.schema.solr.Project;
import eu.dnetlib.dhp.schema.solr.Result; import eu.dnetlib.dhp.schema.solr.Result;
import eu.dnetlib.dhp.schema.solr.Subject; import eu.dnetlib.dhp.schema.solr.Subject;

View File

@ -51,49 +51,6 @@
<artifactId>hadoop-distcp</artifactId> <artifactId>hadoop-distcp</artifactId>
</dependency> </dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-openaireplus-mapping-utils</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon</artifactId>
</exclusion>
<exclusion>
<groupId>saxonica</groupId>
<artifactId>saxon-dom</artifactId>
</exclusion>
<exclusion>
<groupId>jgrapht</groupId>
<artifactId>jgrapht</artifactId>
</exclusion>
<exclusion>
<groupId>net.sf.ehcache</groupId>
<artifactId>ehcache</artifactId>
</exclusion>
<exclusion>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.*</groupId>
<artifactId>*</artifactId>
</exclusion>
<exclusion>
<groupId>apache</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency> <dependency>
<groupId>org.apache.httpcomponents</groupId> <groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId> <artifactId>httpclient</artifactId>

27
pom.xml
View File

@ -440,29 +440,6 @@
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-common</artifactId>
<version>${dnet-actionmanager-common.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>eu.dnetlib</groupId>
<artifactId>dnet-actionmanager-api</artifactId>
<version>${dnet-actionmanager-api.version}</version>
<exclusions>
<exclusion>
<groupId>eu.dnetlib</groupId>
<artifactId>cnr-misc-utils</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency> <dependency>
<groupId>eu.dnetlib</groupId> <groupId>eu.dnetlib</groupId>
<artifactId>cnr-rmi-api</artifactId> <artifactId>cnr-rmi-api</artifactId>
@ -960,7 +937,7 @@
<commons.logging.version>1.1.3</commons.logging.version> <commons.logging.version>1.1.3</commons.logging.version>
<commons-validator.version>1.7</commons-validator.version> <commons-validator.version>1.7</commons-validator.version>
<dateparser.version>1.0.7</dateparser.version> <dateparser.version>1.0.7</dateparser.version>
<dhp-schemas.version>[6.1.4-SNAPSHOT]</dhp-schemas.version> <dhp-schemas.version>[7.0.0]</dhp-schemas.version>
<dhp.cdh.version>cdh5.9.2</dhp.cdh.version> <dhp.cdh.version>cdh5.9.2</dhp.cdh.version>
<dhp.commons.lang.version>3.5</dhp.commons.lang.version> <dhp.commons.lang.version>3.5</dhp.commons.lang.version>
<dhp.guava.version>11.0.2</dhp.guava.version> <dhp.guava.version>11.0.2</dhp.guava.version>
@ -969,8 +946,6 @@
<dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version> <dhp.oozie.version>4.1.0-${dhp.cdh.version}</dhp.oozie.version>
<dhp.site.skip>true</dhp.site.skip> <dhp.site.skip>true</dhp.site.skip>
<dhp.spark.version>2.4.0.cloudera2</dhp.spark.version> <dhp.spark.version>2.4.0.cloudera2</dhp.spark.version>
<dnet-actionmanager-api.version>[4.0.3]</dnet-actionmanager-api.version>
<dnet-actionmanager-common.version>[6.0.5]</dnet-actionmanager-common.version>
<dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version> <dnet-openaire-broker-common.version>[3.1.6]</dnet-openaire-broker-common.version>
<google.gson.version>2.2.2</google.gson.version> <google.gson.version>2.2.2</google.gson.version>
<log4j.version>1.2.17</log4j.version> <log4j.version>1.2.17</log4j.version>