Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
sab | ef6c90cc64 | |
sab | df82f8beb9 | |
sab | 53787dbf67 | |
sab | bbb79273a3 | |
sab | 7f39375ba8 |
|
@ -0,0 +1,134 @@
|
||||||
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import net.sf.saxon.s9api.*;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class fetches JSON from a provided link and returns
|
||||||
|
* a Dublin Core. This functionality is particularly needed for OSF Preprints
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
public class DataFetcher implements ExtensionFunction, Serializable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method fetches JSON object from a given URL
|
||||||
|
* @param url a url in the metadata for fetching authors in JSON format
|
||||||
|
* @return
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
static JSONObject getJson(URL url) throws IOException {
|
||||||
|
|
||||||
|
String json = IOUtils.toString(url);
|
||||||
|
return new JSONObject(json);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method extracts authors from a given JSON
|
||||||
|
*
|
||||||
|
* @param jsonObject
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static List<String> getAuthorsFromJson(JSONObject jsonObject) {
|
||||||
|
List<String> authors = new ArrayList<>();
|
||||||
|
// count of authors
|
||||||
|
int countOfAuthors = jsonObject.getJSONArray("data").length();
|
||||||
|
for (int i = 0; i < countOfAuthors; i++) {
|
||||||
|
|
||||||
|
authors.add(jsonObject
|
||||||
|
.getJSONArray("data")
|
||||||
|
.getJSONObject(i)
|
||||||
|
.getJSONObject("embeds")
|
||||||
|
.getJSONObject("users")
|
||||||
|
.getJSONObject("data")
|
||||||
|
.getJSONObject("attributes")
|
||||||
|
.getString("full_name"));
|
||||||
|
}
|
||||||
|
return authors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method transforms list of authors into Dublin Core
|
||||||
|
* @param authors
|
||||||
|
* @return Dublin Core list of authors
|
||||||
|
*/
|
||||||
|
static List<String> transformListToDublinCore(List<String> authors) {
|
||||||
|
|
||||||
|
List<String> dublinCoreAuthors = new ArrayList<>();
|
||||||
|
for (String author : authors){
|
||||||
|
|
||||||
|
//splitting full name into first and last names according to OpenAIRE v3 guidelines at:
|
||||||
|
// https://guidelines.openaire.eu/en/latest/literature/field_creator.html
|
||||||
|
// “surname”, “initials” (“first name”) “prefix”.
|
||||||
|
String[] parts = author.split(" ");
|
||||||
|
String firstName = parts[0];
|
||||||
|
String lastName = parts[1];
|
||||||
|
char initialOfFirstName = firstName.charAt(0);
|
||||||
|
|
||||||
|
dublinCoreAuthors.add(
|
||||||
|
"<dc:creator>" + lastName + ", " + initialOfFirstName + ". (" + firstName + ")" + "</dc:creator>");
|
||||||
|
}
|
||||||
|
return dublinCoreAuthors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a public method which fetches authors and transform them into Dublin Core
|
||||||
|
*/
|
||||||
|
public static String getAndTransformAuthors(URL url) throws IOException{
|
||||||
|
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method extracts link to fulltext from a given JSON
|
||||||
|
*
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
|
||||||
|
|
||||||
|
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
|
||||||
|
// And in the resultant JSON, “links->download” contains the URL to fulltext
|
||||||
|
|
||||||
|
return jsonObject
|
||||||
|
.getJSONObject("data")
|
||||||
|
.getJSONObject("links")
|
||||||
|
.getString("download");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a public method which fetches link to full text and returns it as a suitable format
|
||||||
|
*/
|
||||||
|
public static String getFullTextLinkAndTransform (URL url )throws IOException{
|
||||||
|
|
||||||
|
return getLinkToFulltextFromJson(getJson(url));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public QName getName() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SequenceType getResultType() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SequenceType[] getArgumentTypes() {
|
||||||
|
return new SequenceType[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
|
@ -55,6 +55,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
||||||
processor.registerExtensionFunction(new DateCleaner());
|
processor.registerExtensionFunction(new DateCleaner());
|
||||||
processor.registerExtensionFunction(new PersonCleaner());
|
processor.registerExtensionFunction(new PersonCleaner());
|
||||||
|
|
||||||
|
processor.registerExtensionFunction(new DataFetcher());
|
||||||
|
|
||||||
final XsltCompiler comp = processor.newXsltCompiler();
|
final XsltCompiler comp = processor.newXsltCompiler();
|
||||||
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
||||||
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
package eu.dnetlib.dhp.transformation.xslt;
|
||||||
|
|
||||||
|
import org.json.JSONObject;
|
||||||
|
import org.junit.jupiter.api.AfterEach;
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.MalformedURLException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
class DataFetcherTest {
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterEach
|
||||||
|
void tearDown() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getJson() throws IOException, URISyntaxException {
|
||||||
|
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||||
|
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||||
|
|
||||||
|
String x = testJsonObj
|
||||||
|
.getJSONArray("data")
|
||||||
|
.getJSONObject(0)
|
||||||
|
.getJSONObject("embeds")
|
||||||
|
.getJSONObject("users")
|
||||||
|
.getJSONObject("data")
|
||||||
|
.getJSONObject("attributes")
|
||||||
|
.getString("full_name");
|
||||||
|
System.out.println(x);
|
||||||
|
System.out.println(testJsonObj.getJSONArray("data").length());
|
||||||
|
testJsonObj.getJSONArray("data").forEach(System.out::println);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getAuthorsFromJson() throws IOException, URISyntaxException {
|
||||||
|
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||||
|
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||||
|
List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
|
||||||
|
System.out.println(authors);
|
||||||
|
System.out.println(DataFetcher.transformListToDublinCore(authors));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getAndTransformAuthors() throws IOException, URISyntaxException {
|
||||||
|
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||||
|
System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
|
||||||
|
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
|
||||||
|
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue