updated opengraph checkURL for getting images in case of HTTP redirect permanently is encountered

git-svn-id: https://svn.research-infrastructures.eu/d4science/gcube/trunk/portlets/user/share-updates@132816 82a268e6-3cf1-43bd-a215-b396298e98cf
2016-10-06 12:49:55 +00:00 · 2016-10-06 12:49:55 +00:00 · 7332351c32
parent f898cb6180
commit 7332351c32
8 changed files with 59 additions and 23 deletions
--- a/.classpath
+++ b/.classpath
@ -1,19 +1,27 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <classpath>
-	<classpathentry kind="src" output="target/share-updates-2.0.0-SNAPSHOT/WEB-INF/classes" path="src/main/java">
+	<classpathentry kind="src" output="target/share-updates-2.1.0-SNAPSHOT/WEB-INF/classes" path="src/main/java">
 		<attributes>
 			<attribute name="optional" value="true"/>
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>
 	</classpathentry>
-	<classpathentry including="**/*.java" kind="src" output="src/main/webapp/WEB-INF/classes" path="src/main/resources"/>
+	<classpathentry excluding="**" kind="src" output="target/share-updates-2.1.0-SNAPSHOT/WEB-INF/classes" path="src/main/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
 	<classpathentry kind="src" output="target/test-classes" path="src/test/java">
 		<attributes>
 			<attribute name="optional" value="true"/>
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>
 	</classpathentry>
-	<classpathentry including="**/*.java" kind="src" output="target/test-classes" path="src/test/resources"/>
+	<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
+		<attributes>
+			<attribute name="maven.pomderived" value="true"/>
+		</attributes>
+	</classpathentry>
 	<classpathentry kind="con" path="com.google.gwt.eclipse.core.GWT_CONTAINER"/>
 	<classpathentry kind="con" path="org.maven.ide.eclipse.MAVEN2_CLASSPATH_CONTAINER">
 		<attributes>
@ -31,5 +39,5 @@
 			<attribute name="maven.pomderived" value="true"/>
 		</attributes>
 	</classpathentry>
-	<classpathentry kind="output" path="target/share-updates-2.0.0-SNAPSHOT/WEB-INF/classes"/>
+	<classpathentry kind="output" path="target/share-updates-2.1.0-SNAPSHOT/WEB-INF/classes"/>
 </classpath>
--- a/.project
+++ b/.project
@ -55,5 +55,6 @@
 		<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
 		<nature>org.eclipse.wst.jsdt.core.jsNature</nature>
 		<nature>com.google.gwt.eclipse.core.gwtNature</nature>
+		<nature>com.liferay.ide.core.liferayNature</nature>
 	</natures>
 </projectDescription>
--- a/.settings/org.eclipse.wst.common.component
+++ b/.settings/org.eclipse.wst.common.component
@ -5,7 +5,10 @@
    <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/>
        <wb-resource deploy-path="/WEB-INF/classes" source-path="/target/generated-sources/gwt"/>
        <wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/resources"/>
-        <dependent-module archiveName="social-util-library-1.0.0-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/social-util-library/social-util-library">
+        <dependent-module archiveName="gcube-widgets-2.0.0-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/gcube-widgets/gcube-widgets">
+            <dependency-type>uses</dependency-type>
+        </dependent-module>
+        <dependent-module archiveName="social-util-library-1.0.1-SNAPSHOT.jar" deploy-path="/WEB-INF/lib" handle="module:/resource/social-util-library/social-util-library">
            <dependency-type>uses</dependency-type>
        </dependent-module>
    <property name="java-output-path" value="/${module}/target/www/WEB-INF/classes"/>
--- a/.tern-project
+++ b/.tern-project
@ -1 +1,17 @@
-{"ide":{"scriptPaths":[]},"plugins":{"aui":{},"liferay":{},"yui":{}},"libs":["ecma5","browser"]}
+{
+  "plugins": {
+    "aui": {
+      
+    },
+    "liferay": {
+      
+    },
+    "yui": {
+      
+    }
+  },
+  "libs": [
+    "ecma5",
+    "browser"
+  ]
+}
--- a/distro/changelog.xml
+++ b/distro/changelog.xml
@ -1,4 +1,9 @@
 <ReleaseNotes>
+<Changeset component="org.gcube.portlets-user.shareupdates.2-1-0"
+		date="2016-09-29">
+		<Change>updated opengraph checkURL for getting images in case of HTTP redirect permanently is encountered</Change>
+	</Changeset>
+
 	<Changeset component="org.gcube.portlets-user.shareupdates.2-0-0"
 		date="2016-06-29">
 		<Change>Updated to Liferay 6.2.5</Change>
--- a/pom.xml
+++ b/pom.xml
@ -13,7 +13,7 @@
 	<groupId>org.gcube.portlets.user</groupId>
 	<artifactId>share-updates</artifactId>
 	<packaging>war</packaging>
-	<version>2.0.0-SNAPSHOT</version>
+	<version>2.1.0-SNAPSHOT</version>
 	<name>gCube Share Updates Portlet</name>
 	<description>
 		gCube Share Updates for exchanging updates with other users of VREs.
--- a/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java
+++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/ShareUpdateServiceImpl.java
@ -107,7 +107,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements	Shar


 	/**
-	 * the current ASLSession
+	 * the current ASsLSession
 	 * @return the session
 	 */
 	private ASLSession getASLSession() {
@ -122,7 +122,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements	Shar
 	}
 	public String getDevelopmentUser() {
 		String user = TEST_USER;
-		//		user = "costantino.perciante";
+				user = "costantino.perciante";
 		return user;
 	}
 	/**
@ -770,10 +770,10 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements	Shar
 		ArrayList<String> imageUrls = new ArrayList<String>();
 		//get the host from the url
 		String host = pageURL.getHost().replaceAll("www.", "");
-
 		//try openGraph First
 		OpenGraph ogLink = null;
 		try {
+			System.out.println("linkToCheck=" + linkToCheck);
 			ogLink = new OpenGraph(linkToCheck, true, siteConnection);
 			if (ogLink == null || ogLink.getContent("title") == null) { 
 				//there is no OpenGraph for this link
@ -787,7 +787,7 @@ public class ShareUpdateServiceImpl extends RemoteServiceServlet implements	Shar
 				description = ((description.length() > 256) ? description.substring(0, 256)+"..." : description);
 				//look for the image ask the guesser if not present
 				if (ogLink.getContent("image") != null) {
-					String imageUrl = TextTransfromUtils.getImageUrlFromSrcAttribute(pageURL, ogLink.getContent("image"));
+					String imageUrl = TextTransfromUtils.getImageUrlFromSrcAttribute(ogLink.getRealURL(), ogLink.getContent("image"));
 					imageUrls.add(imageUrl);
 					_log.trace("OpenGraph getImage = " +imageUrl) ;
 				}
--- a/src/main/java/org/gcube/portlets/user/shareupdates/server/opengraph/OpenGraph.java
+++ b/src/main/java/org/gcube/portlets/user/shareupdates/server/opengraph/OpenGraph.java
@ -1,10 +1,6 @@
 package org.gcube.portlets.user.shareupdates.server.opengraph;

-import org.htmlcleaner.HtmlCleaner;
-import org.htmlcleaner.TagNode;
-
 import java.io.BufferedReader;
-import java.io.IOException;
 import java.io.InputStreamReader;
 import java.net.URL;
 import java.net.URLConnection;
@ -14,15 +10,20 @@ import java.util.Hashtable;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;

+import org.htmlcleaner.HtmlCleaner;
+import org.htmlcleaner.TagNode;
+
 /**
 * A Java object representation of an Open Graph enabled webpage.
 * A simplified layer over a Hastable.
 *
 * @author Callum Jones
+ * @author Massimiliano Assante
 */
-public class OpenGraph
-{
+public class OpenGraph {
 	private String pageUrl;
+	
+	private URL realURL;
 	private ArrayList<OpenGraphNamespace> pageNamespaces;
 	private Hashtable<String, ArrayList<MetaElement>> metaAttributes;
 	private String baseType;
@ -47,8 +48,7 @@ public class OpenGraph
 	/**
 	 * Create an open graph representation for generating your own Open Graph object
 	 */
-	public OpenGraph()
-	{
+	public OpenGraph() {
 		pageNamespaces = new ArrayList<OpenGraphNamespace>();
 		metaAttributes = new Hashtable<String, ArrayList<MetaElement>>();
 		hasChanged = false;
@ -70,14 +70,13 @@ public class OpenGraph
 		// download the (X)HTML content, but only up to the closing head tag. We do not want to waste resources parsing irrelevant content
 		Charset charset = getConnectionCharset(siteConnection);
 		BufferedReader dis = new BufferedReader(new InputStreamReader(siteConnection.getInputStream(), charset));
+		this.realURL = siteConnection.getURL();
 		String inputLine;
 		StringBuffer headContents = new StringBuffer();

 		// Loop through each line, looking for the closing head element
-		while ((inputLine = dis.readLine()) != null)
-		{
-			if (inputLine.contains("</head>"))
-			{
+		while ((inputLine = dis.readLine()) != null) {
+			if (inputLine.contains("</head>")) {
 				inputLine = inputLine.substring(0, inputLine.indexOf("</head>") + 7);
 				inputLine = inputLine.concat("<body></body></html>");
 				headContents.append(inputLine + "\r\n");
@ -156,6 +155,10 @@ public class OpenGraph
 		pageUrl = realURL.toExternalForm();
 	}

+	public URL getRealURL() {
+		return realURL;
+	}
+
 	/**
 	 * Gets the charset for specified connection.
 	 * Content Type header is parsed to get the charset name.