forked from D-Net/dnet-hadoop
Compare commits
1 Commits
main
...
metadata_c
Author | SHA1 | Date |
---|---|---|
Claudio Atzori | d7b67469a7 |
|
@ -1,134 +0,0 @@
|
|||
package eu.dnetlib.dhp.transformation.xslt;
|
||||
|
||||
import java.io.Serializable;
|
||||
import net.sf.saxon.s9api.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.json.JSONObject;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class fetches JSON from a provided link and returns
|
||||
* a Dublin Core. This functionality is particularly needed for OSF Preprints
|
||||
*/
|
||||
|
||||
|
||||
public class DataFetcher implements ExtensionFunction, Serializable {
|
||||
|
||||
/**
|
||||
* This method fetches JSON object from a given URL
|
||||
* @param url a url in the metadata for fetching authors in JSON format
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
static JSONObject getJson(URL url) throws IOException {
|
||||
|
||||
String json = IOUtils.toString(url);
|
||||
return new JSONObject(json);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method extracts authors from a given JSON
|
||||
*
|
||||
* @param jsonObject
|
||||
* @return
|
||||
*/
|
||||
static List<String> getAuthorsFromJson(JSONObject jsonObject) {
|
||||
List<String> authors = new ArrayList<>();
|
||||
// count of authors
|
||||
int countOfAuthors = jsonObject.getJSONArray("data").length();
|
||||
for (int i = 0; i < countOfAuthors; i++) {
|
||||
|
||||
authors.add(jsonObject
|
||||
.getJSONArray("data")
|
||||
.getJSONObject(i)
|
||||
.getJSONObject("embeds")
|
||||
.getJSONObject("users")
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("attributes")
|
||||
.getString("full_name"));
|
||||
}
|
||||
return authors;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method transforms list of authors into Dublin Core
|
||||
* @param authors
|
||||
* @return Dublin Core list of authors
|
||||
*/
|
||||
static List<String> transformListToDublinCore(List<String> authors) {
|
||||
|
||||
List<String> dublinCoreAuthors = new ArrayList<>();
|
||||
for (String author : authors){
|
||||
|
||||
//splitting full name into first and last names according to OpenAIRE v3 guidelines at:
|
||||
// https://guidelines.openaire.eu/en/latest/literature/field_creator.html
|
||||
// “surname”, “initials” (“first name”) “prefix”.
|
||||
String[] parts = author.split(" ");
|
||||
String firstName = parts[0];
|
||||
String lastName = parts[1];
|
||||
char initialOfFirstName = firstName.charAt(0);
|
||||
|
||||
dublinCoreAuthors.add(
|
||||
"<dc:creator>" + lastName + ", " + initialOfFirstName + ". (" + firstName + ")" + "</dc:creator>");
|
||||
}
|
||||
return dublinCoreAuthors;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a public method which fetches authors and transform them into Dublin Core
|
||||
*/
|
||||
public static String getAndTransformAuthors(URL url) throws IOException{
|
||||
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method extracts link to fulltext from a given JSON
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
|
||||
|
||||
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
|
||||
// And in the resultant JSON, “links->download” contains the URL to fulltext
|
||||
|
||||
return jsonObject
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("links")
|
||||
.getString("download");
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a public method which fetches link to full text and returns it as a suitable format
|
||||
*/
|
||||
public static String getFullTextLinkAndTransform (URL url )throws IOException{
|
||||
|
||||
return getLinkToFulltextFromJson(getJson(url));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -55,8 +55,6 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
|||
processor.registerExtensionFunction(new DateCleaner());
|
||||
processor.registerExtensionFunction(new PersonCleaner());
|
||||
|
||||
processor.registerExtensionFunction(new DataFetcher());
|
||||
|
||||
final XsltCompiler comp = processor.newXsltCompiler();
|
||||
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
||||
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
package eu.dnetlib.dhp.transformation.xslt;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class DataFetcherTest {
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() {
|
||||
}
|
||||
|
||||
@Test
|
||||
void getJson() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||
|
||||
String x = testJsonObj
|
||||
.getJSONArray("data")
|
||||
.getJSONObject(0)
|
||||
.getJSONObject("embeds")
|
||||
.getJSONObject("users")
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("attributes")
|
||||
.getString("full_name");
|
||||
System.out.println(x);
|
||||
System.out.println(testJsonObj.getJSONArray("data").length());
|
||||
testJsonObj.getJSONArray("data").forEach(System.out::println);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getAuthorsFromJson() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||
List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
|
||||
System.out.println(authors);
|
||||
System.out.println(DataFetcher.transformListToDublinCore(authors));
|
||||
}
|
||||
|
||||
@Test
|
||||
void getAndTransformAuthors() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
|
||||
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
|
||||
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -26,15 +26,15 @@ import eu.dnetlib.dhp.schema.oaf.Publication;
|
|||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.Software;
|
||||
|
||||
public class PrepareSimpleEntitiesJob {
|
||||
public class PrepareSimpleEntititiesJob {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntitiesJob.class);
|
||||
private static final Logger log = LoggerFactory.getLogger(PrepareSimpleEntititiesJob.class);
|
||||
|
||||
public static void main(final String[] args) throws Exception {
|
||||
final ArgumentApplicationParser parser = new ArgumentApplicationParser(
|
||||
IOUtils
|
||||
.toString(
|
||||
PrepareSimpleEntitiesJob.class
|
||||
PrepareSimpleEntititiesJob.class
|
||||
.getResourceAsStream("/eu/dnetlib/dhp/broker/oa/common_params.json")));
|
||||
parser.parseArgument(args);
|
||||
|
|
@ -160,7 +160,8 @@ public class ConversionUtils {
|
|||
.stream()
|
||||
.filter(Objects::nonNull)
|
||||
.filter(pid -> pid.getQualifier() != null)
|
||||
.filter(pid -> StringUtils.startsWithIgnoreCase(pid.getQualifier().getClassid(), ModelConstants.ORCID))
|
||||
.filter(pid -> pid.getQualifier().getClassid() != null)
|
||||
.filter(pid -> pid.getQualifier().getClassid().equalsIgnoreCase(ModelConstants.ORCID))
|
||||
.map(StructuredProperty::getValue)
|
||||
.map(ConversionUtils::cleanOrcid)
|
||||
.filter(StringUtils::isNotBlank)
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>outputDir</name>
|
||||
<description>the path where the generated data will be stored</description>
|
||||
<description>the path where the the generated data will be stored</description>
|
||||
</property>
|
||||
<property>
|
||||
<name>datasourceIdWhitelist</name>
|
||||
|
@ -179,18 +179,17 @@
|
|||
<master>yarn</master>
|
||||
<mode>cluster</mode>
|
||||
<name>PrepareSimpleEntititiesJob</name>
|
||||
<class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntitiesJob</class>
|
||||
<class>eu.dnetlib.dhp.broker.oa.PrepareSimpleEntititiesJob</class>
|
||||
<jar>dhp-broker-events-${projectVersion}.jar</jar>
|
||||
<spark-opts>
|
||||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -210,12 +209,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -236,12 +234,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -261,12 +258,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=5000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -286,12 +282,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=10000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -311,12 +306,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=2000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -338,12 +332,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -363,12 +356,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -388,12 +380,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -413,12 +404,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -438,12 +428,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -463,12 +452,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--graphPath</arg><arg>${graphInputPath}</arg>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
|
@ -488,12 +476,11 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
--conf spark.eventLog.dir=${nameNode}${spark2EventLogDir}
|
||||
--conf spark.sql.shuffle.partitions=8000
|
||||
--conf spark.sql.shuffle.partitions=3840
|
||||
</spark-opts>
|
||||
<arg>--workingDir</arg><arg>${workingDir}</arg>
|
||||
<arg>--outputDir</arg><arg>${outputDir}</arg>
|
||||
|
@ -516,7 +503,6 @@
|
|||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
|
@ -549,7 +535,6 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
@ -577,7 +562,6 @@
|
|||
--executor-cores=${sparkExecutorCores}
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
--conf spark.yarn.historyServer.address=${spark2YarnHistoryServerAddress}
|
||||
|
@ -601,7 +585,6 @@
|
|||
<spark-opts>
|
||||
--executor-memory=${sparkExecutorMemory}
|
||||
--driver-memory=${sparkDriverMemory}
|
||||
--conf spark.executor.memoryOverhead=${sparkExecutorMemory}
|
||||
--conf spark.dynamicAllocation.maxExecutors=${sparkMaxExecutorsForIndexing}
|
||||
--conf spark.extraListeners=${spark2ExtraListeners}
|
||||
--conf spark.sql.queryExecutionListeners=${spark2SqlQueryExecutionListeners}
|
||||
|
|
|
@ -1,64 +0,0 @@
|
|||
package eu.dnetlib.dhp.broker.oa.matchers.simple;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.broker.objects.OaBrokerAuthor;
|
||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
||||
|
||||
class EnrichMissingAuthorOrcidTest {
|
||||
|
||||
final EnrichMissingAuthorOrcid matcher = new EnrichMissingAuthorOrcid();
|
||||
|
||||
@BeforeEach
|
||||
void setUp() throws Exception {}
|
||||
|
||||
@Test
|
||||
void testFindDifferences_1() {
|
||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
||||
assertTrue(list.isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFindDifferences_2() {
|
||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
||||
|
||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
|
||||
|
||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
||||
assertEquals(1, list.size());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFindDifferences_3() {
|
||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
||||
|
||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", null));
|
||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
||||
|
||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
||||
assertTrue(list.isEmpty());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testFindDifferences_4() {
|
||||
final OaBrokerMainEntity source = new OaBrokerMainEntity();
|
||||
final OaBrokerMainEntity target = new OaBrokerMainEntity();
|
||||
source.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
||||
target.getCreators().add(new OaBrokerAuthor("Claudio Atzori", "0000-0001-9613-6639"));
|
||||
|
||||
final List<OaBrokerAuthor> list = this.matcher.findDifferences(source, target);
|
||||
assertTrue(list.isEmpty());
|
||||
}
|
||||
|
||||
}
|
|
@ -2,31 +2,27 @@
|
|||
package eu.dnetlib.dhp.broker.oa.util;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertNull;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import eu.dnetlib.broker.objects.OaBrokerMainEntity;
|
||||
import eu.dnetlib.broker.objects.OaBrokerTypedValue;
|
||||
import eu.dnetlib.dhp.schema.common.ModelConstants;
|
||||
import eu.dnetlib.dhp.schema.oaf.Author;
|
||||
import eu.dnetlib.dhp.schema.oaf.Instance;
|
||||
import eu.dnetlib.dhp.schema.oaf.Qualifier;
|
||||
import eu.dnetlib.dhp.schema.oaf.Result;
|
||||
import eu.dnetlib.dhp.schema.oaf.StructuredProperty;
|
||||
|
||||
public class ConversionUtilsTest {
|
||||
class ConversionUtilsTest {
|
||||
|
||||
@BeforeEach
|
||||
public void setUp() throws Exception {}
|
||||
void setUp() throws Exception {
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllResultPids() {
|
||||
void testAllResultPids() {
|
||||
final Qualifier qf = new Qualifier();
|
||||
qf.setClassid("test");
|
||||
qf.setClassname("test");
|
||||
|
@ -95,42 +91,4 @@ public class ConversionUtilsTest {
|
|||
assertEquals(6, list.size());
|
||||
}
|
||||
|
||||
public void testOafResultToBrokerResult() {
|
||||
|
||||
final Author a1 = createAuthor("Michele Artini", "0000-0002-4406-428X");
|
||||
final Author a2 = createAuthor("Claudio Atzori", "http://orcid.org/0000-0001-9613-6639");
|
||||
final Author a3 = createAuthor("Alessia Bardi", null);
|
||||
|
||||
final Result r = new Result();
|
||||
r.setAuthor(Arrays.asList(a1, a2, a3));
|
||||
|
||||
final OaBrokerMainEntity br = ConversionUtils.oafResultToBrokerResult(r);
|
||||
|
||||
assertEquals(3, br.getCreators().size());
|
||||
assertEquals("0000-0002-4406-428X", br.getCreators().get(0).getOrcid());
|
||||
assertEquals("0000-0001-9613-6639", br.getCreators().get(1).getOrcid());
|
||||
assertNull(br.getCreators().get(2).getOrcid());
|
||||
}
|
||||
|
||||
private Author createAuthor(final String name, final String orcid) {
|
||||
|
||||
final Author a = new Author();
|
||||
a.setFullname("Michele Artini");
|
||||
|
||||
if (orcid != null) {
|
||||
final Qualifier q = new Qualifier();
|
||||
q.setClassid(ModelConstants.ORCID);
|
||||
q.setClassname(ModelConstants.ORCID);
|
||||
q.setSchemeid("dnet:pids");
|
||||
q.setSchemename("dnet:pids");
|
||||
|
||||
final StructuredProperty pid = new StructuredProperty();
|
||||
pid.setQualifier(q);
|
||||
pid.setValue(orcid);
|
||||
|
||||
a.setPid(Arrays.asList(pid));
|
||||
}
|
||||
return a;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -128,14 +128,12 @@ public class SolrAdminApplication implements Closeable {
|
|||
|
||||
public SolrResponse deleteAlias(String aliasName) throws SolrServerException, IOException {
|
||||
CollectionAdminRequest.DeleteAlias deleteAliasRequest = CollectionAdminRequest.deleteAlias(aliasName);
|
||||
log.info("deleting alias: {}", aliasName);
|
||||
return deleteAliasRequest.process(solrClient);
|
||||
}
|
||||
|
||||
public SolrResponse createAlias(String aliasName, String collection) throws IOException, SolrServerException {
|
||||
CollectionAdminRequest.CreateAlias createAliasRequest = CollectionAdminRequest
|
||||
.createAlias(aliasName, collection);
|
||||
log.info("creating alias: {} for collection: {}", aliasName, collection);
|
||||
return createAliasRequest.process(solrClient);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
<workflow-app name="Promote Graph Stats" xmlns="uri:oozie:workflow:0.5">
|
||||
<workflow-app name="Graph Stats" xmlns="uri:oozie:workflow:0.5">
|
||||
<parameters>
|
||||
<property>
|
||||
<name>stats_db_name</name>
|
||||
|
|
Loading…
Reference in New Issue