Saturday, June 2, 2012

A Utility To Inline Shared HTML

A website typically has sections that are shared among multiple pages. For example, a common header and footer. It makes sense to factor the shared sections into separate included files. But the include mechanism is not standard: some servers support SSI, other servers require using PHP includes or JSP, still others have no server-side support for includes and it has to be done with JS writes. None of these approaches seem satisfactory. When using a not-universally supported server-side mechanism, files may require rewriting when changing hosting providers. When using a client-side mechanism, there are more requests to the server. This utility eliminates this problem by inlining included HTML, CSS, and JavaScript before deployment:
/**
 * Copyright (c) 2012 jimandlisa LLC. All rights reserved.
 * This program is made available under the terms of the
 * Eclipse Public License v1.0, which is available at
 * http://www.opensource.org/licenses/eclipse-1.0.php.
 */

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * Given a directory of source for a website, recursively navigates the directory structure,
 * inlining included files (which may in turn include files), and saving the result to a staging
 * directory. This eliminates the need for include-file solutions such as SSI, PHP, or Javascript.
 * It also speeds up page loads and eliminates requests. Included files must be located in a directory
 * called "includes" located in the root of the directory containing the source. Include directives
 * must have the prefix and suffix pairs defined by INCLUDES_DELIMITERS, and the name of the included
 * file must be between the left and right parentheses defined by the prefix and suffix. Included files
 * must have simple names (no directories in the names), and must contain text. Does not process individual
 * files, only the entire site; for large sites this would not be acceptable, but for our application
 * it only takes ~30 ms. Included files must form a directed acyclic graph (no cycles); the utility does
 * not perform cycle detection, but by going into an infinite loop it's pretty obvious when there is a cycle,
 * and the log output makes it easy to determine the cause.
 * @author Jim Showalter
 */
public class Includer {
    
    /**
     * Reserved name of includes directory. Must be contained directly within source directory.
     * Could of course be turned into an argument to the program, but was not necessary for
     * our application.
     */
    private static final String INCLUDES_DIR = "includes";
    
    /**
     * Set of types that support includes. Must all be text format.
     */
    private static final Set<String> INCLUDING_TYPES = new HashSet<String>();
    
    static {
        // Initialize including types.
        INCLUDING_TYPES.add("html");
        INCLUDING_TYPES.add("css");
        INCLUDING_TYPES.add("js");
    }
    
    /**
     * Allowable prefixes for includes. Must match pairwise with allowable suffixes.
     */
    private static final List<String[]> INCLUDES_DELIMITERS = new ArrayList<String[]>();
    
    static {
        // Initialize includes delimiters.
        INCLUDES_DELIMITERS.add(new String[] {"<!--INCLUDE(\"", "\")INCLUDE-->"}); // HTML
        INCLUDES_DELIMITERS.add(new String[] {"/*--INCLUDE(\"", "\")INCLUDE--*/"}); // CSS
        INCLUDES_DELIMITERS.add(new String[] {"//--INCLUDE(\"", "\")INCLUDE--//"}); // JavaScript
    }
    
    /**
     * Outputs usage error and exits program with error code.
     * @param explanation Explanation of error.
     */
    private static void usage(String explanation) {
        System.out.println(explanation);
        System.out.println(Includer.class.getSimpleName() + ": <source directory> <target directory>");
        System.exit(-1);
    }
    
    /**
     * Determines longest common prefix substring for a pair of paths. Used to shorten log messages.
     * @param sourcePath Source path.
     * @param targetPath Target path.
     * @return Longest common prefix.
     */
    private static String longestCommonPrefix(String sourcePath, String targetPath) {
        
        String longerString = null;
        String shorterString = null;
        
        if (sourcePath.length() > targetPath.length()) {
            longerString = sourcePath;
            shorterString = targetPath;
        } else {
            longerString = targetPath;
            shorterString = sourcePath;
        }
        
        String longestCommonPrefix = null;
        
        for (int i = 0; i < shorterString.length(); i++) {
            if (shorterString.charAt(i) != longerString.charAt(i)) {
                longestCommonPrefix = shorterString.substring(0, i - 1);
                break;
            }
        }
        
        return longestCommonPrefix;
    }
    
    /**
     * Shortens path by removing the longest prefix it has in common with another path.
     * @param path Path to shorten.
     * @param longestCommonPrefix Longest common prefix.
     * @return Shortened path.
     */
    private static String shorten(String path, String longestCommonPrefix) {
        return path.substring(longestCommonPrefix.length(), path.length());
    }
    
    /**
     * Logs a progress message.
     * @param prefix Beginning of logged message.
     * @param source Source file or directory.
     * @param longestCommonPrefix Longest common prefix.
     */
    private static void log(String prefix, File source, File target, String longestCommonPrefix) {
        String sourcePath = shorten(source.getAbsolutePath(), longestCommonPrefix);
        String targetPath = shorten(target.getAbsolutePath(), longestCommonPrefix);
        System.out.println(prefix + " copy: " + sourcePath + " => " + targetPath);
    }

    /**
     * Determines whether file name ends with a suffix that is one of the including types.
     * @param fileName File name to check.
     * @return True if file is an including type, false otherwise.
     */
    private static boolean isIncludingType(String fileName) {
        
        int dotIndex = fileName.lastIndexOf('.');
        
        if (dotIndex == -1) {
            return false;
        }
        
        String extension = fileName.substring(dotIndex + 1);
        
        return INCLUDING_TYPES.contains(extension);
    }
    
    /**
     * Extracts name of included file from current line if it is an include directive.
     * @param line Line.
     * @return Include file name, or null if current line is not an include directive.
     */
    private static String includeFileName(String line) {
        String trimmedLine = line.trim();
        for (String[] delimiters : INCLUDES_DELIMITERS) {
            String prefix = delimiters[0];
            String suffix = delimiters[1];
            if (trimmedLine.startsWith(prefix) && trimmedLine.endsWith(suffix)) {
                return trimmedLine.substring(prefix.length(), trimmedLine.length() - suffix.length());
            }
        }
        return null;
    }

    /**
     * Copies a file that does not have includes. Does not use NIO's transferTo or other new mechanisms
     * because, depending on file size, OS, etc., performance can actually wind up worse.
     * @param sourceFile File to copy from.
     * @param targetFile File to copy to.
     * @param longestCommonPrefix Longest common prefix.
     * @throws IOException Problem encountered opening, reading, creating, writing, or closing files.
     */
    private static void simpleCopy(File sourceFile, File targetFile, String longestCommonPrefix) throws IOException {
        
        log("simple", sourceFile, targetFile, longestCommonPrefix);
        
        InputStream in = new FileInputStream(sourceFile);
        OutputStream out = new FileOutputStream(targetFile);
        byte[] buf = new byte[1024*10];
        int len;
        while ((len = in.read(buf)) > 0) {
           out.write(buf, 0, len);
        }
        in.close();
        out.close();
    }
    
    /**
     * Platform-specific line separator.
     */
    private static final String LINE_SEPARATOR = System.getProperty("line.separator");
    
    /**
     * Reads contents of include file into memory, and caches it so it doesn't have to be read again.
     * For large sites this could be too much of a memory hog, in which case an eviction strategy
     * could be implemented (for example, keep the most-often referenced files).
     * @param includesDirName Name of includes directory.
     * @param includeFileName Name of include file.
     * @param includeFiles Cache of already included files.
     * @return Contents of include file.
     * @throws IOException Problem encountered opening, reading, or closing include file.
     */
    private static StringBuilder get(String includesDirName, String includeFileName,  Map<String, StringBuilder> includeFiles) throws IOException {
        
        System.out.println("get: " + includeFileName);

        StringBuilder includeFileContents = includeFiles.get(includeFileName);
        
        if (includeFileContents == null) {
            includeFileContents = new StringBuilder();
            File includeFile = new File(includesDirName + File.separator + INCLUDES_DIR + File.separator + includeFileName);
            BufferedReader reader = new BufferedReader(new InputStreamReader(new DataInputStream(new FileInputStream(includeFile))));
            
            String line;
            while ((line = reader.readLine()) != null) {
                String subIncludeFileName = includeFileName(line);
                if (subIncludeFileName != null) {
                    StringBuilder subIncludeFileContents = get(includesDirName, subIncludeFileName, includeFiles);
                    includeFileContents.append(subIncludeFileContents.toString());
                } else {
                    includeFileContents.append(line);
                    includeFileContents.append(LINE_SEPARATOR);
                }
            }
            
            reader.close();
            includeFiles.put(includeFileName, includeFileContents);
            System.out.println("put: " + includeFileName);
        }
        
        return includeFileContents;
    }

    /**
     * Copies a file that might have includes.
     * @param sourceFile File to copy from.
     * @param targetFile File to copy to.
     * @param longestCommonPrefix Longest common prefix.
     * @param includeFiles Cache of already included files.
     * @throws IOException Problem encountered opening, reading, creating, writing, or closing files.
     */
    private static void includeCopy(File sourceFile, File targetFile, String longestCommonPrefix, Map<String, StringBuilder> includeFiles) throws IOException {
        
        log("file", sourceFile, targetFile, longestCommonPrefix);

        BufferedReader reader = new BufferedReader(new InputStreamReader(new DataInputStream(new FileInputStream(sourceFile))));
        BufferedWriter writer = new BufferedWriter(new FileWriter(targetFile));

        String line;
        while ((line = reader.readLine()) != null) {
            String includeFileName = includeFileName(line);
            if (includeFileName != null) {
                StringBuilder includeFileContents = get(sourceFile.getParent(), includeFileName, includeFiles);
                writer.write(includeFileContents.toString());
            } else {
                writer.write(line);
                writer.newLine();
            }
        }

        reader.close();
        writer.close();
    }
    
    /**
     * Copies a source directory to a target directory, recursively, resolving any included files it encounters.
     * @param sourceDir Directory to copy from.
     * @param targetDir Directory to copy to.
     * @param longestCommonPrefix Longest common prefix. (For logging, to shorten messages.)
     * @param includeFiles Cache of already included files.
     * @throws IOException Problem encountered opening, reading, creating, writing, or closing files.
     */
    private static void copy(File sourceDir, File targetDir, String longestCommonPrefix, Map<String, StringBuilder> includeFiles) throws IOException {
        
        log("directory", sourceDir, targetDir, longestCommonPrefix);
        
        if (!targetDir.exists()) {
            targetDir.mkdirs();
        }
        
        File[] files = sourceDir.listFiles();
        for (File file : files) {
            if (file.isDirectory()) {
                File dir = file;
                String dirName = dir.getName();
                if (dirName.compareTo(INCLUDES_DIR) != 0) {
                    File subTargetDir = new File(targetDir.getAbsolutePath() + File.separator + dirName);
                    copy(dir, subTargetDir, longestCommonPrefix, includeFiles);
                }
            } else {
                String fileName = file.getName();
                String targetFileName = targetDir.getAbsolutePath() + File.separator + fileName;
                File targetFile = new File(targetFileName);
                if (!isIncludingType(fileName)) {
                    simpleCopy(file, targetFile, longestCommonPrefix);
                } else {
                    includeCopy(file, targetFile, longestCommonPrefix, includeFiles);
                }
            }
        }
    }
    
    /**
     * Main entry point for program.
     * @param args Command-line arguments.
     * @throws IOException Problem encountered opening, reading, creating, writing, or closing files.
     */
    public static void main(String[] args) throws IOException {
        
        // Validate the args.
        
        if (args == null) {
            usage("Null args");
        }
        
        if (args.length != 2) {
            usage("Wrong number of args");
        }
        
        String sourceDirName = args[0];
        
        if (sourceDirName == null) {
            usage("Null source directory");
        }
        
        if (sourceDirName.trim().length() == 0) {
            usage("Blank source directory");
        }
        
        String targetDirName = args[1];
        
        if (targetDirName == null) {
            usage("Null target directory");
        }
        
        if (targetDirName.trim().length() == 0) {
            usage("Blank target directory");
        }
        
        if (sourceDirName.startsWith(targetDirName)) {
            usage("Source directory child of target directory");
        }
        
        if (targetDirName.startsWith(sourceDirName)) {
            usage("Target directory child of source directory");
        }
        
        File source = new File(sourceDirName);
        
        if (!source.exists()) {
            usage("Non-existent source directory");
        }
        
        if (!source.isDirectory()) {
            usage("Non-directory source directory");
        }
        
        // Initialize and perform the copying.
        
        File target = new File(targetDirName);
        
        Map<String, StringBuilder> includeFiles = new HashMap<String, StringBuilder>();
        
        long start = System.currentTimeMillis();
        
        try {
            copy(source, target, longestCommonPrefix(sourceDirName, targetDirName), includeFiles);
        } catch (Exception e) {
            System.out.println("Encountered problem copying source to target: " + e.getMessage());
            e.printStackTrace();
            System.exit(-1);
        }
        
        long end = System.currentTimeMillis();
        
        System.out.println("done in " + (end - start) + " ms");
    }
}
An example file set up to be processed by the utility looks like this:
<!--INCLUDE("top.html")INCLUDE-->

<body id="manual">

<div id="header" class="header">
<h1 class="header">User Manual</h1>
</div>

<!--INCLUDE("nav.html")INCLUDE-->

<div>
<p>TODO: Put safety instructions and user manual here.</p>
</div>

<!--INCLUDE("bottom.html")INCLUDE-->

No comments:

Post a Comment