'use strict'; import {properties} from "../../explore/src/environments/environment"; import {SearchResearchResultsService} from "../../explore/src/app/openaireLibrary/services/searchResearchResults.service"; import {ResultPreview} from "../../explore/src/app/openaireLibrary/utils/result-preview/result-preview"; import {Identifier} from "../../explore/src/app/openaireLibrary/utils/string-utils.class"; import {SearchFields} from "../../explore/src/app/openaireLibrary/utils/properties/searchFields"; import {ContextsService} from "../../explore/src/app/openaireLibrary/claims/claim-utils/service/contexts.service"; const request = require('superagent'); function get(resultsPerUrl) { setTimeout(() => { let searchFields = new SearchFields(); let fieldIdsMap = searchFields.RESULT_FIELDS; request.get(refineUrl, async function (err: any, refineResponse: any) { if (!refineResponse && err) { console.error("Error getting refine filters ",err); } else { let keys = refineResponse.body['refineResults'] ? Object.keys(refineResponse.body['refineResults']) : null; console.log("number of keys: " + keys.length); let allUrls = new Set(); let promiseArray = []; for (let key of keys) { if(key == "community") { await communitiesPromise; } console.log("key: "+key+", number of values: " + refineResponse.body['refineResults'][key].length); for (let value of refineResponse.body['refineResults'][key]) { if(!value || !value.name || !value.id || value.name.toLowerCase().includes('unknown') || value.name.toLowerCase().includes('not available') || value.name == "unidentified" || value.name == "Undetermined") { console.log("filtered out: "+(value ? ("name: "+value.name + " - id: "+value.id) : value)); continue; } if(key=="community") { let valueId = ""; if(value.id) { let idArray = value.id.split("||"); if(idArray) { valueId = idArray[0]; } } if(!valueId || !publicCommunities.includes(valueId)) { console.log("hidden community: "+valueId); continue; } } const url = resultsUrlPrefix + "&fq=" + key + " " + fieldIdsMap[key].equalityOperator + " \"" + encodeURIComponent(value.id) + "\"" + "&type=results&page=0&size=" + resultsPerUrl; await new Promise(resolve => setTimeout(resolve, 500)); promiseArray.push( new Promise((resolve, reject) => { request.get(url, function (err: any, response: any) { if (!response && err) { reject(err); } else { parseAllUrls(response, allUrls); resolve(); } }) }).catch(error => { console.error("Error getting results ", error); fs.appendFileSync("./"+errorFileName, "no response "+url+" "); fs.appendFileSync("./"+errorFileName, error); fs.appendFileSync("./"+errorFileName, "\n"); })); } console.log(""); } await Promise.all(promiseArray); console.log("\nDuplicate urls: "+alreadyin + " vs unique urls: "+notin); fs.appendFile("./" + fileName, "\n", function (err) { if (err) { return console.log("Error appending in file "+fileName+": ", err); } console.timeEnd("total_time"); }); } }) }) } // }); function parseAllUrls(response: any, allUrls: any) { // let allUrls: any = []; let responses: any = response.body['results']; let searchResearchResultsService: any = new SearchResearchResultsService(); // if(responses) { // let length = Array.isArray(responses) ? responses.length : 1; // for (let i = 0; i < length; i++) { // let resData = Array.isArray(responses) ? responses[i]['result']['metadata']['oaf:entity']['oaf:result'] : responses['result']['metadata']['oaf:entity']['oaf:result']; // // if (resData['pid']) { // if (!Array.isArray(resData['pid'])) { // if (resData['pid'].classid && resData['pid'].classid == 'doi') { // if (resData['pid'].content != '' && resData['pid'].content != null) { // console.log("|"+resData['pid'].content+"| "+(typeof resData['pid'].content)); // resData['pid'].content.replace("https://doi.org/", ""); // } // } // } // } // } // } let searchResults: any = searchResearchResultsService.parseResults("result", responses, properties); if(searchResults.length < 100 && searchResults.length > 0) { console.log("num of results: "+searchResults.length + " " + response.request.url); } if(searchResults.length == 0) { fs.appendFileSync("./"+errorFileName, response.statusCode+" "+response.request.url+"/n"); } for(let j=0; j\n" + ""; fs.writeFile("./"+fileName, sitemap, function(err) { if(err) { return console.log("Error writing in file "+fileName+": ", err); } }); getCommunities(); get(resultsPerUrl); } let fileName; let errorFileName; const fs = require('fs'); let alreadyin = 0; // duplicate urls let notin= 0; let communitiesPromise; let publicCommunities = []; const refineUrl = "https://services.openaire.eu/search/v2/api/resources2/?format=json&refine=true&fields=resultbestaccessright&fields=relfunder&fields=instancetypename&fields=resultlanguagename&fields=community&fields=resulthostingdatasource&fields=country&type=results&page=0&size=0"; const resultsUrlPrefix = "https://services.openaire.eu/search/v2/api/resources2/?format=json"; const landingPrefix = "https://explore.openaire.eu/search/"; const contextUrl = "https://services.openaire.eu/openaire/contexts/"; buildSiteMap(100);