CheckLinks.java |
/* * $Id: CheckLinks.java,v 1.44 2012/03/15 21:07:39 agoubard Exp $ * * See the COPYRIGHT file for redistribution and use restrictions. */ package org.xins.server; import java.io.IOException; import java.net.ConnectException; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.http.HttpResponse; import org.apache.http.client.methods.HttpOptions; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.conn.ConnectTimeoutException; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; import org.apache.http.params.CoreConnectionPNames; import org.w3c.dom.Element; import org.xins.common.MandatoryArgumentChecker; import org.xins.common.Utils; import org.xins.common.service.Descriptor; import org.xins.common.service.TargetDescriptor; import org.xins.common.xml.ElementBuilder; /** * Checks all the links in the given <code>descriptor</code>s list and builds * a <code>FunctionResult</code>. It connects to each link in * {@link TargetDescriptor}s in {@link Descriptor}s list using a * {@link URLChecker} and calculates the total links count and * total links failures. The returned {@link FunctionResult} contains * information about total links checked, failures and details. * * The following example uses a {@link CheckLinks} object to get the * {@link FunctionResult}. * * <blockquote><pre> * FunctionResult result = CheckLinks.checkLinks(descriptorList); * * // Returns parameters * result.getParameters(); * </pre></blockquote> * * @version $Revision: 1.44 $ $Date: 2012/03/15 21:07:39 $ * @author <a href="mailto:tauseef.rehman@orange-ftgroup.com">Tauseef Rehman</a> */ class CheckLinks { /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is <code>UnknownHostException</code>. */ private static final String UNKNOWN_HOST = "UnknownHost"; /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is <code>ConnectTimeoutException</code> or the message * of the exception starts with "Connect timed out". */ private static final String CONNECTION_TIMEOUT = "ConnectionTimeout"; /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is <code>ConnectException</code>. */ private static final String CONNECTION_REFUSAL = "ConnectionRefusal"; /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is <code>SocketTimeoutException</code>. */ private static final String SOCKET_TIMEOUT = "SocketTimeout"; /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is <code>IOException</code>. */ private static final String OTHER_IO_ERROR = "OtherIOError"; /** * The failure message to be added in the <code>FunctionResult</code> when * the exception is an unknown <code>Exception</code>. */ private static final String OTHER_FAILURE = "OtherFailure"; /** * The success message to be added in the <code>FunctionResult</code>. */ private static final String SUCCESS = "Success"; /** * HTTP retry handler that does not allow any retries. */ private static DefaultHttpRequestRetryHandler NO_RETRIES = new DefaultHttpRequestRetryHandler(0, false); /** * Checks all the links in <code>TargetDescriptor</code>s inside the * <code>Descriptor</code> list and builds a <code>FunctionResult</code>. * First gets all the {@link TargetDescriptor}s from the * {@link Descriptor}s list then creates {@link URLChecker} threads with * {@link TargetDescriptor}s and runs them. When all the threads have * finished execution, the {@link FunctionResult} is built and returned. * The returned {@link FunctionResult} contains all the links which were * checked with their results. * * @param descriptors * the list of {@link Descriptor}s defined in the runtime properties, * cannot be <code>null</code>. * * @return * the constructed {@link FunctionResult} object, never * <code>null</code>. * * @throws IllegalArgumentException * if <code>descriptors == null</code>. */ static FunctionResult checkLinks(List<Descriptor> descriptors) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("descriptors", descriptors); List<URLChecker> threads = new ArrayList<URLChecker>(); if (!descriptors.isEmpty()) { // Get all the targets from the descriptor list List<TargetDescriptor> targetDescriptors = getTargetDescriptors(descriptors); // Create the thread for each target and run them threads = createAndRunUrlCheckers(targetDescriptors); // Get the biggest time-out from all the targets int timeout = getBiggestTimeout(targetDescriptors); // Wait till all the threads finish their execution or timedout. waitTillThreadsRunning(threads, timeout); // Confirm all threads have finished their execution. confirmThreadsStopped(threads); } // Start building the result FunctionResult builder = new FunctionResult(); int errorCount = (!descriptors.isEmpty()) ? addCheckElements(builder, threads) : 0; builder.param("linkCount", String.valueOf(threads.size())); builder.param("errorCount", String.valueOf(errorCount)); return builder; } /** * Creates a list of <code>TargetDescriptor</code>s from the * given <code>Descriptor</code>s list. Each {@link Descriptor} in the * list contains a list of {@link TargetDescriptor}s, which are added to * the returned list. * * @param descriptors * the list of {@link Descriptor}s, cannot be <code>null</code>. * * @return * the constructed {@link TargetDescriptor}s list, never * <code>null</code>. * * @throws IllegalArgumentException * if <code>descriptors == null</code>. */ private static List<TargetDescriptor> getTargetDescriptors(List<Descriptor> descriptors) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("descriptors", descriptors); List targetDescriptors = new ArrayList(); // Each descriptor in the list contains target descriptors, so // iterate over descriptors and get all the target descriptors, then // iterate over each target descriptor and get the individual // target descriptors. for (Descriptor descriptor : descriptors) { for (TargetDescriptor targetDescriptor : descriptor) { // Add all the target descriptors in a list targetDescriptors.add(targetDescriptor); } } return targetDescriptors; } /** * Creates and runs a thread for each <code>TargetDescriptor</code> in the * given list. Each {@link TargetDescriptor} in the list contains a URL. A * {@link URLChecker} thread is created for each {@link TargetDescriptor}, * which tries to connect to the URL provided in the * {@link TargetDescriptor}. Each thread is then added to a list which is * returned. * * @param targetDescriptors * the list of {@link TargetDescriptor}s which needs to be checked, * cannot be <code>null</code>. * * @return * the constructed {@link URLChecker}s list, never <code>null</code>. * * @throws IllegalArgumentException * if <code>targetDescriptors == null</code>. */ private static List<URLChecker> createAndRunUrlCheckers(List<TargetDescriptor> targetDescriptors) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("targetDescriptors", targetDescriptors); // Iterate over all target descriptors List<URLChecker> threads = new ArrayList<URLChecker>(); for (TargetDescriptor target : targetDescriptors) { // Create a thread for the target descriptor URLChecker urlThread = new URLChecker(target); // Start the thread with target descriptor urlThread.start(); // Store the thread just started in a list threads.add(urlThread); } return threads; } /** * Returns the biggest time-out of all the URLs defined in * <code>TargetDescriptor</code>s list. Each {@link TargetDescriptor} in * the list has total time-out. The biggest of all of them is returned. * This time-out is then used to setup the time-outs of the * {@link URLChecker} threads. * * @param targetDescriptors * the list of {@link TargetDescriptor}s, cannot be <code>null</code>. * * @return * the biggest time-out from the list, or <code>-1</code> if none of the * target descriptors defines a time-out. * * @throws IllegalArgumentException * if <code>targetDescriptors == null</code>. */ private static int getBiggestTimeout(List<TargetDescriptor> targetDescriptors) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("targetDescriptors", targetDescriptors); int biggestTimeout = -1; // Iterate over all target descriptors for (TargetDescriptor target : targetDescriptors) { // Try to get the biggest time out of all the target descriptors if (biggestTimeout < target.getTotalTimeOut()) { biggestTimeout = target.getTotalTimeOut(); } } return biggestTimeout; } /** * Sets up the time-out for each thread and waits till each thread finishes * execution. The time-out is the biggest time-out of all the URLs in * {@link TargetDescriptor}s. Timeout for every next thread also considers * the time which is already spent and that time is subtracted from the * time-out for the current thread. * * @param threads * the list of {@link URLChecker} threads, cannot be <code>null</code>. * * @param timeout * the time-out for {@link URLChecker} threads. * * @throws IllegalArgumentException * if <code>threads == null</code>. */ private static void waitTillThreadsRunning(List<URLChecker> threads, int timeout) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("threads", threads); long threadTimeout = timeout; // Storing the time approximately when the first thread was started long startTime = System.currentTimeMillis(); try { for (URLChecker urlThread : threads) { urlThread.join(threadTimeout); // If the previous thread was setup with a certain time-out // the next thread should be setup with a time-out subtracted // by the time which is already passed. long endTime = System.currentTimeMillis(); long timePassed = endTime - startTime; threadTimeout = timeout - timePassed; // If the time-out becomes negative, it means that the total // time-out interval has passed now we do not need to setup // time-out for threads and they all should have finished // execution by now. if (threadTimeout <= 0) { return; } } } catch (InterruptedException exception) { // The exception is thrown when another thread has interrupted // the current thread. This should never happen so it should log // a programming error and throw a ProgrammingException. throw Utils.logProgrammingError(exception); } } /** * Confimrs that each <code>URLChecker</code> has finished its execution. * If some threads are still running, inforce a connection time-out and let * it run and ignore. * * @param threads * the list of {@link URLChecker} threads, cannot be <code>null</code>. * * @throws IllegalArgumentException * if <code>threads == null</code>. */ private static void confirmThreadsStopped(List<URLChecker> threads) throws IllegalArgumentException { for (URLChecker urlThread : threads) { // Check if thread is still alive. if (urlThread.isAlive()) { // Enforce a time-out for the thread and log it. urlThread.enforceTimeout(); Log.log_3505(urlThread.getURL()); } } } /** * Builds the <code>FunctionResult</code> for all the URLs checked. It * iterates over the list of all {@link URLChecker} threads and gets the * information like the total time each thread took to execute and the * result of the execution. The information is added in an * {@link ElementBuilder} object using which {@link org.w3c.dom.Element} * is created which then is added to the passed {@link FunctionResult}. * * @param builder * the {@link FunctionResult} where the result is added, cannot be * <code>null</code>. * * @param threads * the list of {@link URLChecker} threads, cannot be <code>null</code>. * * @return * the total number of URLs without success. * * @throws IllegalArgumentException * if <code>builder == null || threads == null</code>. */ private static int addCheckElements(FunctionResult builder, List<URLChecker> threads) throws IllegalArgumentException { int errorCount = 0; // Iterate over the threads of target descriptors and create the check element. for (URLChecker urlThread : threads) { Element check = builder.getDataElementBuilder().createElement("check"); check.setAttribute("url", urlThread.getURL()); check.setAttribute("duration", Long.toString(urlThread.getDuration())); check.setAttribute("result", getResult(urlThread)); builder.getDataElement().appendChild(check); if (!urlThread.getSuccess()) { errorCount ++; } } return errorCount; } /** * Returns the value for the result parameter which is added in the * <code>FunctionBuilder</code>. The value of the result depends on the * success or failure of the passed {@link URLChecker} thread. If the * {@link URLChecker} thread gives a success, the status code of the * {@link URLChecker} thread is used to create the value for result * parameter, otherwise the exception in the {@link URLChecker} thread * determines the value for the result parameter. * * @param urlThread * the {@link URLChecker} thread for which the result value is to * detemined, cannot be <code>null</code>. * * @return * the result message, never <code>null</code>. * * @throws IllegalArgumentException * if <code>urlThread == null || urlThread.hasRun() == false</code>. */ private static String getResult(URLChecker urlThread) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("urlThread", urlThread); if (! urlThread.hasRun()) { throw new IllegalArgumentException("urlThread().hasRun() == false"); } if (urlThread.getSuccess()) { return SUCCESS; } else { return getResult(urlThread.getException(), urlThread.getURL()); } } /** * Returns the value for the result parameter which is added in the * <code>FunctionBuilder</code> when the <code>URLChecker</code> thread * failed to connect the URL. The value for the result parameter depends * on the exception occured in the {@link URLChecker} thread. The * exception is passed to this method. Based on the type of exception, an * appropriate value is returned. * * @param exception * the {@link Throwable} exception occured in the {@link URLChecker} * thread, cannot be <code>null</code>. * * @param url * the url which threw the exception, cannot be <code>null</code>. * * @return * the result message, never <code>null</code>. * * @throws IllegalArgumentException * if <code>exception == null</code>. */ private static String getResult(Throwable exception, String url) throws IllegalArgumentException { // Check preconditions. MandatoryArgumentChecker.check("exception", exception, "url", url); String exceptionName = exception.getClass().getName(); String result; // DNS error, unknown host name if (exception instanceof UnknownHostException) { result = UNKNOWN_HOST; // Connection time-out } else if (exception instanceof ConnectTimeoutException) { result = CONNECTION_TIMEOUT; // Connection refused } else if (exception instanceof ConnectException) { result = CONNECTION_REFUSAL; // SocketTimeoutException is not available in older Java versions, // so we do not refer to the class to avoid a NoClassDefFoundError. } else if (exception instanceof java.net.SocketTimeoutException) { result = SOCKET_TIMEOUT; // Other I/O error } else if (exception instanceof IOException) { result = OTHER_IO_ERROR; // Other error, apparently not an I/O error } else { result = OTHER_FAILURE; } // Log the result and exception. Log.log_3502(exception, url, result); return result; } /** * Creates a new <code>CheckLinks</code> object. */ private CheckLinks() { // empty } /** * Tries to connect to a URL provided in the * <code>TargetDescriptor</code>. Runs as a separate thread. The URL is * connected by sending a request associated with an HTTP * <code>OPTIONS</code> method. Also calculates the total time to * connect to the provided URL. * * <p>The following example uses a {@link CheckLinks} object to get the * {@link FunctionResult}. * * <blockquote><pre>TargetDescriptor target = new TargetDescriptor(); * target.setURL("www.hotmail.com"); * * URLChecker urlThread = new URLChecker(target); * urlThread.start(); * * String URL = urlThread.getURL(); * int duration = urlThread.getDuration(); * boolean success = urlThread.getSuccess(); * if (!success) { * exception = urlThread.getException(); * }</pre></blockquote> * * @version $Revision: 1.44 $ $Date: 2012/03/15 21:07:39 $ * @author <a href="mailto:tauseef.rehman@orange-ftgroup.com">Tauseef Rehman</a> */ private static final class URLChecker extends Thread { /** * The target descriptor for which the URL needs to be checked. Never * <code>null</code>. */ private final TargetDescriptor _targetDescriptor; /** * The URL to be checked. Never <code>null</code>. */ private final String _url; /** * The exception thrown when accessing the URL. Can be * <code>null</code> if the <code>URLChecker</code> has not run yet, or * if there was no error. */ private Throwable _exception; /** * The result of the URL check. Is <code>true</code> if the * <code>URLChecker</code> has run and was successful. If either of * these conditions is not met, then <code>false</code>. */ private boolean _success; /** * The time taken to check the URL. Initially <code>-1</code>. */ private long _duration; /** * The status code returned when the URL was called. Initially * <code>-1</code>, when the <code>URLChecker</code> was not run yet. */ private int _statusCode; /** * Constructs a new <code>URLChecker</code> for the specified target * descriptor. * * @param targetDescriptor * the {@link TargetDescriptor}, whose URL needs to be checked, * cannot be <code>null</code>. * * @throws IllegalArgumentException * if <code>targetDescriptor == null</code>. */ public URLChecker(TargetDescriptor targetDescriptor) throws IllegalArgumentException { // Check preconditions MandatoryArgumentChecker.check("targetDescriptor", targetDescriptor); // Initialize fields _targetDescriptor = targetDescriptor; _url = targetDescriptor.getURL(); _duration = -1; _statusCode = -1; // Check postconditions if (_url == null) { throw Utils.logProgrammingError("_url == null"); } } /** * Runs this thread. It tries to connect to the URL provided in the * {@link TargetDescriptor}. The URL is connected by sending a request * associated with an HTTP <code>OPTIONS</code> method. It also * calculates the total time to connect to the provided URL and saves * the exception in case an exception occurs. * * @throws IllegalStateException * if this <code>URLChecker</code> has already run. */ public void run() throws IllegalStateException { // Check preconditions if (hasRun()) { throw new IllegalStateException("This URLChecker for URL: " + _url + "has already run."); } // Logging the start of this thread. Log.log_3503(_url, _targetDescriptor.getTotalTimeOut(), _targetDescriptor.getConnectionTimeOut(), _targetDescriptor.getSocketTimeOut()); // Register current time, to compute total duration later long startTime = System.currentTimeMillis(); HttpRequestBase optionsMethod = null; try { DefaultHttpClient client = new DefaultHttpClient(); // Set the socket time-out for the URL. client.getParams().setIntParameter(CoreConnectionPNames.SO_TIMEOUT, _targetDescriptor.getSocketTimeOut()); // Set the connection time-out for the URL. client.getParams().setIntParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, _targetDescriptor.getConnectionTimeOut()); client.setHttpRequestRetryHandler(NO_RETRIES); // Create a new OptionsMethod with the URL, this will represent // a request for information about the communication options // available on the request/response chain identified by the url. // This method allows the client to determine the options and/or // requirements associated with a resource, or the capabilities // of a server, without implying a resource action or initiating // a resource retrieval. optionsMethod = new HttpOptions(_url); // Execute the OptionsMethod. client.execute(optionsMethod); // Successfully executed, so set the success as true. _success = true; } catch (Throwable exception) { // Save the exception and set the success as false as the // execution was failed. _exception = exception; _success = false; } finally { optionsMethod.abort(); } // Calculate the total time taken to check the URL. _duration = System.currentTimeMillis() - startTime; // Logging the stopping of this thread. Log.log_3504(_url, _duration); } /** * Checks if this <code>URLChecker</code> has already run. * * @return * <code>true</code> if this <code>URLChecker</code> has already run, * or <code>false</code> otherwise. */ boolean hasRun() { return (_duration >= 0); } /** * Checks if this <code>URLChecker</code> has already run and if not, * throws an exception. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ private void assertHasRun() throws IllegalStateException { if (!hasRun()) { String message = "This URLChecker has not run yet. URL: \"" + _url + "\"."; throw new IllegalStateException(message); } } /** * Returns the total time it took to connect to the URL. * * @return * the total duration in milliseconds, or <code>-1</code> if this * thread has not run. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ public long getDuration() throws IllegalStateException { assertHasRun(); return _duration; } /** * Returns the flag indicating if the URL was connected successfully. * * @return * the success flag, Is <code>true</code> if this thread has run and * was successful. If either of these conditions is not met, * then <code>false</code>. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ public boolean getSuccess() throws IllegalStateException { assertHasRun(); return _success; } /** * Returns the status code of the method execution. * * @return * the status code returned when the URL was called. <code>-1</code>, * when this thread has not run. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ public int getStatusCode() throws IllegalStateException { assertHasRun(); return _statusCode; } /** * Returns the URL which was connected. * * @return * the URL, never <code>null</code>. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ public String getURL() throws IllegalStateException { assertHasRun(); return _url; } /** * Returns the exception thrown while trying to connect to the URL. * * @return * the exception, can be <code>null</code>. * * @throws IllegalStateException * if this <code>URLChecker</code> has not run yet. */ public Throwable getException() throws IllegalStateException { assertHasRun(); return _exception; } /** * Enforces a time-out on the <code>URLChecker</code> thread. Actualy * the thread is allowed to run and ignored. So set the duration as the * initial connection time-out value and create a new * {@link ConnectException}. */ public void enforceTimeout() { if (! hasRun()) { // Set the duration as was defined for connection time-out _duration = _targetDescriptor.getConnectionTimeOut(); // Create a new ConnectException. _exception = new ConnectException("Connect timed out"); // XXX: Currently it is observed that mostly the URLs which are // expected to throw a ConnectTimeoutException keeps on running // but we need to take care of the situation when because of some // other reason the thread is still active. } } } }