Categories
Source Code in Java for quick fixes

Java code to get all your urls to make a sitemap

This java program uses some already existing libraries to copy files.
The logic is to parse the website and to extract all the URLs and visit them recursively until all of the links have been visited. Sometimes we have links to external websites on our pages and so we scan only the webpages that have a phrase string i.e website name that is constant on all pages.

import java.net.*;
import java.io.*;
import javax.xml.parsers.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.jsoup.nodes.Document;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.jsoup.select.Selector;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.w3c.tidy.Tidy;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import java.util.Scanner;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import redstone.xmlrpc.XmlRpcFault;
import redstone.xmlrpc.XmlRpcArray;
import net.bican.wordpress.Page;
import net.bican.wordpress.PageDefinition;
import net.bican.wordpress.Wordpress;
import java.net.MalformedURLException;
import java.util.List;
import java.util.*;
import java.util.LinkedList;

public class getallurl {

public static class GenQueue {
private LinkedList list = new LinkedList();
public void enqueue(String item) {
list.addLast(item);
}
public String dequeue() {
return list.poll().toString();
}
public boolean hasItems() {
return !list.isEmpty();
}
public int size() {
return list.size();
}
public int indexOf(String item) {
return list.indexOf(item);
}

}

public static void main(String args[]) throws Exception {

DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
dbf.setValidating(false);
dbf.setNamespaceAware(true);
dbf.setIgnoringComments(false);
dbf.setIgnoringElementContentWhitespace(false);
dbf.setExpandEntityReferences(false);
DocumentBuilder db = dbf.newDocumentBuilder();
String oracle = args[0];
String phrase = args[1];

Document doc = Jsoup.connect(args[0]).get();
List linksvisited = new ArrayList ();
GenQueue linkstobevisted = new GenQueue();
getURLlist(oracle,linksvisited,linkstobevisted,phrase);
while(linkstobevisted.hasItems())
{
String newlink = linkstobevisted.dequeue();
getURLlist(newlink,linksvisited,linkstobevisted,phrase);
}

}

public static int getURLlist(String ul, List linksvisited, GenQueue linkstobevisted, String phrase) throws Exception
{
int i = 0;
try{
Document doc = Jsoup.connect(ul).get();
System.out.println(ul);
linksvisited.add(ul);

Elements links = doc.select(“a[href]”);
Iterator itr = links.iterator();
while(itr.hasNext()) {
Element el = itr.next();
i++;
String st = el.attr(“abs:href”);

if (st.contains(“http://”) && st.contains(phrase) && !(st.equals(ul)) && !(linksvisited.contains(st)) && (linkstobevisted.indexOf(st) == -1))
{
linkstobevisted.enqueue(st);

if (s != -1){
System.out.println( “Copying ………… ” + st);
copywebsitemt cp = new copywebsitemt();
cp.copyw(st);
Elements linksimages = doc.select(“img[src$=.jpg]”);
Iterator itrimages = linksimages.iterator();
while(itrimages.hasNext()) {
Element elimages = itrimages.next();
String myURL = “http://”;
String stimages = elimages.attr(“src”);
stimages = stimages.replace(“../”, “”);
stimages = stimages.replace(“http://”, “”);
stimages = myURL + stimages;
System.out.println( “Copying ………… ” + stimages);
copywebsiteimages cpimages = new copywebsiteimages();
cpimages.copyw(stimages);
}
}

}
}

catch (IOException e) {
System.err.println(“Caught IOException: ” + e.getMessage());
return -1;
}
return i;
}

public static int getURLlistimages(String ul, List linksvisited) throws Exception
{
int i = 0;
try{
Document doc = Jsoup.connect(ul).get();

System.out.println(ul);
String myURL = “http://mercurytrip.com/”;

linksvisited.add(ul);
Elements links = doc.select(“img[src$=.jpg]”);
System.out.println(links);
Iterator itr = links.iterator();

while(itr.hasNext()) {
Element el = itr.next();
i++;
String st = el.attr(“src”);
st = st.replace(“../”, “”);
st = myURL + st;
if (!(st.equals(ul)) && !(linksvisited.contains(st)))
{

System.out.println( “Copying ………… ” + st);
//copywebsiteimages cp = new copywebsiteimages();
//cp.copyw(st);
}
}

}

catch (IOException e) {
System.err.println(“Caught IOException: ” + e.getMessage());
return -1;
}
return i;
}
}

Categories
Source Code in Java for quick fixes

Java code to get all pictures from your facebook page

Facebook does allow a individual user to zip and download all pictures that he or she has ever uploaded.
If you are a business user then you do not have that ability.
If you are an individual and would like to download all your friends pictures and the pictures that you have been tagged in – how do you do it.

You will have to use the facebook graph API.
Once you log in to your account and get the graphical interface for graph API you can run some cool queries to get information about your graph.
One useful thing to to with it is to get a list of URLs with images from your page.
Once you have the list of image URLS stored away in a file, then use this Java program to extract the files to your computer to use it for prints etc…

Having difficulty doing it on your own – we can do it for you.

import java.net.*;
import java.io.*;
import java.util.Date;
import java.util.StringTokenizer;
import javax.imageio.stream.ImageOutputStream;
import javax.imageio.stream.ImageInputStream;
import javax.imageio.*;
import java.util.*;
import java.awt.image.BufferedImage;
class copywebsitefb
{
public static void main(String[] args)
{

try
{
FileInputStream fstream = new FileInputStream(“test.txt”);
DataInputStream in = new DataInputStream(fstream);
BufferedReader br = new BufferedReader(new InputStreamReader(in));
String strLine;
while ((strLine = br.readLine()) != null) {
boolean dirFlag = false;
URL url = new URL(strLine);
System.out.println(“Opening connection to ” + strLine + “…”);
URLConnection urlC = url.openConnection();
File f = new File (“C:/i” + url.getFile());
System.out.println(f.toString());
try {
System.out.println(f.getParentFile().mkdirs());
} catch (SecurityException Se) {
System.out.println(“Error while creating directory in Java:” + Se);
}

BufferedInputStream in1 = new BufferedInputStream(urlC.getInputStream());
FileOutputStream out1 = new FileOutputStream(“C:\\i\\” + url.getFile());
//try{
//URI u = new URI(“C:/i” + url.getFile());

//}catch (URISyntaxException ue){
// System.out.println(“Error while creating directory in Java:” + ue);
//}

int i = 0;
byte[] bytesIn = new byte[1024];

while ((i = in1.read(bytesIn)) >= 0) {
out1.write(bytesIn, 0, i);
}

out1.close();
in1.close();
}
}
catch (MalformedURLException e)
{ System.err.println(e.toString()); }
catch (IOException e)
{ System.err.println(e.toString()); }
}
}