-
March 3rd, 2014, 02:15 AM
#1
Has anyone here got any experience on htmlparser?
I am trying to get the actual picture behind the thumbnails.
But I couldn't get to them if you try the following code.
Anyone can help me to fix this?
Thanks
Jack
Code:
package spiderdemo;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import org.apache.commons.io.FilenameUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.util.EntityUtils;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.lexerapplications.thumbelina.Thumbelina;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.nodes.TextNode;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeList;
public class SpiderDemo {
private static final String hkepcURL = "http://www.hkepc.com";
private static void downloadArticleImage() throws Exception {
Parser parser = new Parser();
parser.setURL(hkepcURL);
NodeList imageTags = parser.parse(new NodeClassFilter(ImageTag.class));
for(int i=0; i<imageTags.size(); i++){
try {
ImageTag it = (ImageTag)imageTags.elementAt(i);
Thumbelina thumbnaila = new Thumbelina(it.getImageURL());
// has url
if (thumbnaila.getCurrentURL() != null) {
System.out.println("Picture Link:" + thumbnaila.getCurrentURL());
HttpClient httpclient = HttpClientBuilder.create().build();
// has no url
if (thumbnaila.getCurrentURL() != null) {
HttpGet httpget = new HttpGet(thumbnaila.getCurrentURL());
///////////////////////
HttpResponse response = httpclient.execute(httpget);
HttpEntity entity = response.getEntity();
if(null != entity){
byte[] images = EntityUtils.toByteArray(entity);
String name = FilenameUtils.getName(thumbnaila.getCurrentURL());
if (name != null) {
//System.out.println("Saving to D:/Download/" + name);
File yourFile = new File("D:/Download/" + name);
if(!yourFile.exists()) {
yourFile.createNewFile();
}
FileOutputStream oFile = new FileOutputStream(yourFile, false);
if (oFile != null) {
oFile.write(images);
System.out.println("Picture [" + thumbnaila.getCurrentURL() + "] Download Completed");
}
}
}
}
}
} catch (IOException e) {
continue;
}
}
}
public static void main(String argv[]) throws Exception {
downloadArticleImage();
}
}
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|