Compare commits
5 Commits
Author | SHA1 | Date |
---|---|---|
sab | ef6c90cc64 | |
sab | df82f8beb9 | |
sab | 53787dbf67 | |
sab | bbb79273a3 | |
sab | 7f39375ba8 |
|
@ -0,0 +1,134 @@
|
|||
package eu.dnetlib.dhp.transformation.xslt;
|
||||
|
||||
import java.io.Serializable;
|
||||
import net.sf.saxon.s9api.*;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.json.JSONObject;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* This class fetches JSON from a provided link and returns
|
||||
* a Dublin Core. This functionality is particularly needed for OSF Preprints
|
||||
*/
|
||||
|
||||
|
||||
public class DataFetcher implements ExtensionFunction, Serializable {
|
||||
|
||||
/**
|
||||
* This method fetches JSON object from a given URL
|
||||
* @param url a url in the metadata for fetching authors in JSON format
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
static JSONObject getJson(URL url) throws IOException {
|
||||
|
||||
String json = IOUtils.toString(url);
|
||||
return new JSONObject(json);
|
||||
}
|
||||
|
||||
/**
|
||||
* This method extracts authors from a given JSON
|
||||
*
|
||||
* @param jsonObject
|
||||
* @return
|
||||
*/
|
||||
static List<String> getAuthorsFromJson(JSONObject jsonObject) {
|
||||
List<String> authors = new ArrayList<>();
|
||||
// count of authors
|
||||
int countOfAuthors = jsonObject.getJSONArray("data").length();
|
||||
for (int i = 0; i < countOfAuthors; i++) {
|
||||
|
||||
authors.add(jsonObject
|
||||
.getJSONArray("data")
|
||||
.getJSONObject(i)
|
||||
.getJSONObject("embeds")
|
||||
.getJSONObject("users")
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("attributes")
|
||||
.getString("full_name"));
|
||||
}
|
||||
return authors;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method transforms list of authors into Dublin Core
|
||||
* @param authors
|
||||
* @return Dublin Core list of authors
|
||||
*/
|
||||
static List<String> transformListToDublinCore(List<String> authors) {
|
||||
|
||||
List<String> dublinCoreAuthors = new ArrayList<>();
|
||||
for (String author : authors){
|
||||
|
||||
//splitting full name into first and last names according to OpenAIRE v3 guidelines at:
|
||||
// https://guidelines.openaire.eu/en/latest/literature/field_creator.html
|
||||
// “surname”, “initials” (“first name”) “prefix”.
|
||||
String[] parts = author.split(" ");
|
||||
String firstName = parts[0];
|
||||
String lastName = parts[1];
|
||||
char initialOfFirstName = firstName.charAt(0);
|
||||
|
||||
dublinCoreAuthors.add(
|
||||
"<dc:creator>" + lastName + ", " + initialOfFirstName + ". (" + firstName + ")" + "</dc:creator>");
|
||||
}
|
||||
return dublinCoreAuthors;
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a public method which fetches authors and transform them into Dublin Core
|
||||
*/
|
||||
public static String getAndTransformAuthors(URL url) throws IOException{
|
||||
return String.join(", ", transformListToDublinCore(getAuthorsFromJson(getJson(url))));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* This method extracts link to fulltext from a given JSON
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
static private String getLinkToFulltextFromJson(JSONObject jsonObject) throws MalformedURLException {
|
||||
|
||||
// note: Link to JSON containing fulltextlink is in "primary_file" attribute.
|
||||
// And in the resultant JSON, “links->download” contains the URL to fulltext
|
||||
|
||||
return jsonObject
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("links")
|
||||
.getString("download");
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a public method which fetches link to full text and returns it as a suitable format
|
||||
*/
|
||||
public static String getFullTextLinkAndTransform (URL url )throws IOException{
|
||||
|
||||
return getLinkToFulltextFromJson(getJson(url));
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public QName getName() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType getResultType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SequenceType[] getArgumentTypes() {
|
||||
return new SequenceType[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
public XdmValue call(XdmValue[] xdmValues) throws SaxonApiException {
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -55,6 +55,8 @@ public class XSLTTransformationFunction implements MapFunction<MetadataRecord, M
|
|||
processor.registerExtensionFunction(new DateCleaner());
|
||||
processor.registerExtensionFunction(new PersonCleaner());
|
||||
|
||||
processor.registerExtensionFunction(new DataFetcher());
|
||||
|
||||
final XsltCompiler comp = processor.newXsltCompiler();
|
||||
QName datasourceIDParam = new QName(DATASOURCE_ID_PARAM);
|
||||
comp.setParameter(datasourceIDParam, new XdmAtomicValue(value.getProvenance().getDatasourceId()));
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
package eu.dnetlib.dhp.transformation.xslt;
|
||||
|
||||
import org.json.JSONObject;
|
||||
import org.junit.jupiter.api.AfterEach;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
class DataFetcherTest {
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
void tearDown() {
|
||||
}
|
||||
|
||||
@Test
|
||||
void getJson() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||
|
||||
String x = testJsonObj
|
||||
.getJSONArray("data")
|
||||
.getJSONObject(0)
|
||||
.getJSONObject("embeds")
|
||||
.getJSONObject("users")
|
||||
.getJSONObject("data")
|
||||
.getJSONObject("attributes")
|
||||
.getString("full_name");
|
||||
System.out.println(x);
|
||||
System.out.println(testJsonObj.getJSONArray("data").length());
|
||||
testJsonObj.getJSONArray("data").forEach(System.out::println);
|
||||
}
|
||||
|
||||
@Test
|
||||
void getAuthorsFromJson() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
JSONObject testJsonObj = DataFetcher.getJson(contributorsUrl);
|
||||
List<String> authors = DataFetcher.getAuthorsFromJson(testJsonObj);
|
||||
System.out.println(authors);
|
||||
System.out.println(DataFetcher.transformListToDublinCore(authors));
|
||||
}
|
||||
|
||||
@Test
|
||||
void getAndTransformAuthors() throws IOException, URISyntaxException {
|
||||
URL contributorsUrl = new URI("https://api.osf.io/v2/preprints/mrwqb/contributors/?format=json").toURL();
|
||||
System.out.println(DataFetcher.getAndTransformAuthors(contributorsUrl));
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
void getLinkToFulltextFromJson() throws URISyntaxException, IOException {
|
||||
URL linkToFullTextDocument = new URI("https://api.osf.io/v2/files/5de7c96f84c479000c7928af/?format=json").toURL();
|
||||
System.out.println(DataFetcher.getFullTextLinkAndTransform(linkToFullTextDocument));
|
||||
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue