package edu.byu.deg.resourcehandler.impl;

import edu.byu.deg.ontologyprojectcommon.IResource;
import edu.byu.deg.resourcehandler.AbstractHtmlResourceHandler;
import edu.byu.deg.resourcehandler.IHtmlResourceHandler;
import edu.byu.deg.resourcehandler.exception.InvalidResourceException;
import java.util.ArrayList;
import java.util.Collection;
import org.mortbay.html.StyleLink;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:edu/byu/deg/resourcehandler/impl/TidyHtmlResourceHandler.class */
public class TidyHtmlResourceHandler extends AbstractHtmlResourceHandler implements IHtmlResourceHandler {
    private Tidy tidy;

    public TidyHtmlResourceHandler(IResource iResource) throws InvalidResourceException {
        super(iResource);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // edu.byu.deg.resourcehandler.AbstractHtmlResourceHandler, edu.byu.deg.resourcehandler.AbstractResourceHandler
    public void handle(IResource iResource) throws InvalidResourceException {
        super.handle(iResource);
        this.tidy = new Tidy();
        this.tidy.setCharEncoding(3);
        this.tidy.setQuiet(true);
        this.tidy.setShowWarnings(false);
        try {
            Document parseDOM = this.tidy.parseDOM(iResource.getLocation().openStream(), null);
            addToPropertyMap("text", getTextContent(parseDOM));
            addToPropertyMap("links", constructLinkCollection(parseDOM.getElementsByTagName("a")));
            addToPropertyMap("title", getTitle(parseDOM.getElementsByTagName("title").item(0)));
        } catch (Exception e) {
            throw new InvalidResourceException(e.getLocalizedMessage());
        }
    }

    @Override // edu.byu.deg.resourcehandler.AbstractResourceHandler, edu.byu.deg.resourcehandler.IResourceHandler
    public String getAllIndexableText() {
        Object property;
        StringBuilder sb = new StringBuilder();
        for (String str : getProperties()) {
            if (!str.equals("links") && (property = getProperty(str)) != null) {
                sb.append(str + " : " + property + "\n");
            }
        }
        return sb.toString();
    }

    private Collection<String> constructLinkCollection(NodeList nodeList) {
        ArrayList arrayList = new ArrayList(nodeList.getLength());
        for (int i = 0; i < nodeList.getLength(); i++) {
            Node namedItem = nodeList.item(i).getAttributes().getNamedItem(StyleLink.HREF);
            if (namedItem != null) {
                arrayList.add(namedItem.getNodeValue());
            }
        }
        return arrayList;
    }

    private String getTitle(Node node) {
        String str = "";
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            if (childNodes.item(0) instanceof Text) {
                str = childNodes.item(0).getNodeValue();
            }
        }
        return str;
    }

    private String getTextContent(Document document) {
        StringBuilder sb = new StringBuilder();
        NodeList childNodes = document.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            appendTextContent(childNodes.item(i), sb);
        }
        return sb.toString();
    }

    private void appendTextContent(Node node, StringBuilder sb) {
        if ((node instanceof Text) && node.getNodeValue() != null) {
            sb.append(node.getNodeValue() + " ");
        }
        NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            Node item = childNodes.item(i);
            if (item.getNodeType() != 8 && !item.getNodeName().toLowerCase().contains("script")) {
                appendTextContent(childNodes.item(i), sb);
            }
        }
    }
}
