dnet-hadoop/dhp-workflows/dhp-graph-mapper/src/test/java/eu/dnetlib/dhp/enrich/orcid/ORCIDAuthorEnricherTest.java


package eu.dnetlib.dhp.enrich.orcid;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

import org.junit.jupiter.api.Test;
import org.junit.platform.commons.util.StringUtils;

import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;

import eu.dnetlib.dhp.schema.oaf.Author;

public class ORCIDAuthorEnricherTest {

	@Test
	public void testEnrcichAuthor() throws Exception {
		final ObjectMapper mapper = new ObjectMapper();

		BufferedReader pr = new BufferedReader(new InputStreamReader(
			Objects
				.requireNonNull(
					ORCIDAuthorEnricherTest.class
						.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_publication_sample.json"))));
		BufferedReader or = new BufferedReader(new InputStreamReader(
			Objects
				.requireNonNull(
					ORCIDAuthorEnricherTest.class
						.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_orcid_sample.json"))));

		TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {
		};
		String pubLine;

		int i = 0;
		while ((pubLine = pr.readLine()) != null) {
			final String pubId = pubLine;
			final String MatchPidOrcid = or.readLine();
			final String pubOrcid = or.readLine();

			final String data = pr.readLine();

			if (StringUtils.isNotBlank(data)) {
				List<Author> publicationAuthors = mapper.readValue(data, aclass);
				List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);
				System.out.printf("OAF ID = %s \n", pubId);
				System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);
				System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());
				System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());
				System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);

				long originalAuthorWithPiD = publicationAuthors
					.stream()
					.filter(
						a -> a.getPid() != null && a
							.getPid()
							.stream()
							.anyMatch(
								p -> p.getQualifier() != null
									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
					.count();
				long start = System.currentTimeMillis();

//                final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);
				final List<Author> enrichedList = Collections.emptyList(); // SparkEnrichGraphWithOrcidAuthors.enrichOrcid(publicationAuthors,
																			// orcidAuthors);

				long enrichedAuthorWithPid = enrichedList
					.stream()
					.filter(
						a -> a.getPid() != null && a
							.getPid()
							.stream()
							.anyMatch(
								p -> p.getQualifier() != null
									&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))
					.count();

				long totalTime = (System.currentTimeMillis() - start) / 1000;
				System.out
					.printf(
						"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,
						enrichedAuthorWithPid);

				System.out.println("=================");
			}
		}
	}

}
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`package eu.dnetlib.dhp.enrich.orcid;`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
			`import java.io.BufferedReader;`
			`import java.io.InputStreamReader;`
Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`import java.util.Collections;`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00			`import java.util.List;`
Implemented Author MErger for ORCID that takes in account the case when name and surname are swapped 2023-11-28 08:43:56 +01:00			`import java.util.Objects;`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
			`import org.junit.jupiter.api.Test;`
			`import org.junit.platform.commons.util.StringUtils;`

			`import com.fasterxml.jackson.core.type.TypeReference;`
			`import com.fasterxml.jackson.databind.ObjectMapper;`

			`import eu.dnetlib.dhp.schema.oaf.Author;`

Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`public class ORCIDAuthorEnricherTest {`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
Implemented Author MErger for ORCID that takes in account the case when name and surname are swapped 2023-11-28 08:43:56 +01:00			`@Test`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00			`public void testEnrcichAuthor() throws Exception {`
			`final ObjectMapper mapper = new ObjectMapper();`

			`BufferedReader pr = new BufferedReader(new InputStreamReader(`
Changed implementation of check similarity to verify exact match of name instead of the first char 2023-11-29 11:17:41 +01:00			`Objects`
			`.requireNonNull(`
Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`ORCIDAuthorEnricherTest.class`
			`.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_publication_sample.json"))));`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00			`BufferedReader or = new BufferedReader(new InputStreamReader(`
Changed implementation of check similarity to verify exact match of name instead of the first char 2023-11-29 11:17:41 +01:00			`Objects`
			`.requireNonNull(`
Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`ORCIDAuthorEnricherTest.class`
			`.getResourceAsStream("/eu/dnetlib/dhp/enrich/orcid/authors_orcid_sample.json"))));`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
			`TypeReference<List<Author>> aclass = new TypeReference<List<Author>>() {`
			`};`
			`String pubLine;`

			`int i = 0;`
			`while ((pubLine = pr.readLine()) != null) {`
			`final String pubId = pubLine;`
			`final String MatchPidOrcid = or.readLine();`
			`final String pubOrcid = or.readLine();`

			`final String data = pr.readLine();`

			`if (StringUtils.isNotBlank(data)) {`
			`List<Author> publicationAuthors = mapper.readValue(data, aclass);`
			`List<Author> orcidAuthors = mapper.readValue(or.readLine(), aclass);`
			`System.out.printf("OAF ID = %s \n", pubId);`
			`System.out.printf("ORCID Intersected ID = %s \n", pubOrcid);`
			`System.out.printf("OAF Author Size = %d \n", publicationAuthors.size());`
			`System.out.printf("Oricd Author Size = %d \n", orcidAuthors.size());`
			`System.out.printf("Oricd Matched PID = %s \n", MatchPidOrcid);`

			`long originalAuthorWithPiD = publicationAuthors`
			`.stream()`
			`.filter(`
			`a -> a.getPid() != null && a`
			`.getPid()`
			`.stream()`
			`.anyMatch(`
			`p -> p.getQualifier() != null`
			`&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))`
			`.count();`
			`long start = System.currentTimeMillis();`

			`// final List<Author> enrichedList = AuthorMerger.enrichOrcid(publicationAuthors, orcidAuthors);`
Enrich authors with ORCID info using new matching algorithm 2024-02-04 10:07:15 +01:00			`final List<Author> enrichedList = Collections.emptyList(); // SparkEnrichGraphWithOrcidAuthors.enrichOrcid(publicationAuthors,`
			`// orcidAuthors);`
Implemented ORCID Enrichment 2023-11-24 12:39:58 +01:00
			`long enrichedAuthorWithPid = enrichedList`
			`.stream()`
			`.filter(`
			`a -> a.getPid() != null && a`
			`.getPid()`
			`.stream()`
			`.anyMatch(`
			`p -> p.getQualifier() != null`
			`&& p.getQualifier().getClassid().toLowerCase().contains("orcid")))`
			`.count();`

			`long totalTime = (System.currentTimeMillis() - start) / 1000;`
			`System.out`
			`.printf(`
			`"Enriched authors in %d seconds from %d pid to %d pid \n", totalTime, originalAuthorWithPiD,`
			`enrichedAuthorWithPid);`

			`System.out.println("=================");`
			`}`
			`}`
			`}`

			`}`