package edu.byu.deg.framework.DSP.fanout;

import edu.byu.deg.framework.DSP.heuristic.CombinedHeuristic;
import edu.byu.deg.framework.Document;
import edu.byu.deg.framework.DocumentStructureParser;
import edu.byu.deg.framework.UnrecognizedFormatException;
import edu.byu.deg.framework.document.DOMDocument;
import edu.byu.deg.util.DOMPrinter;
import java.util.ArrayList;
import java.util.Iterator;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.tidy.Tidy;

/* loaded from: input_file:edu/byu/deg/framework/DSP/fanout/FanoutRecordSeparator.class */
public class FanoutRecordSeparator implements DocumentStructureParser {
    public static final String SEPARATOR_TAG = "record";
    static Tidy tidy = null;

    public FanoutRecordSeparator() {
        if (tidy == null) {
            tidy = new Tidy();
            tidy.setShowWarnings(false);
            tidy.setQuiet(true);
        }
    }

    @Override // edu.byu.deg.framework.DocumentStructureParser
    public Document parse(Document document) throws UnrecognizedFormatException {
        if (!(document instanceof DOMDocument)) {
            throw new UnrecognizedFormatException("RecordSeparator cannot handle this document");
        }
        DOMDocument dOMDocument = (DOMDocument) document;
        Node highestFanout = getHighestFanout(dOMDocument.getRoot());
        dOMDocument.addSubDocuments(split(highestFanout, new CombinedHeuristic(highestFanout).getSeparator()));
        return dOMDocument;
    }

    private static ArrayList split(Node node, String str) {
        ArrayList arrayList = new ArrayList();
        org.w3c.dom.Document ownerDocument = node.getOwnerDocument();
        Element createElement = ownerDocument.createElement("record");
        boolean z = false;
        Node firstChild = node.getFirstChild();
        while (firstChild != null) {
            if (z && firstChild.getNodeName().toLowerCase().equals(str)) {
                arrayList.add(createElement);
                System.out.println("***RECORD***");
                DOMPrinter.print(createElement);
                System.out.println("***RECORD***");
                createElement = ownerDocument.createElement("record");
            }
            createElement.appendChild(firstChild.cloneNode(true));
            firstChild = firstChild.getNextSibling();
            z = true;
        }
        if (z && createElement != null) {
            arrayList.add(createElement);
            System.out.println("***RECORD***");
            DOMPrinter.print(createElement);
            System.out.println("***RECORD***");
        }
        return arrayList;
    }

    private static int getFanout(Node node) {
        return node.getChildNodes().getLength();
    }

    private static Node getHighestFanout(Node node) {
        int fanout = getFanout(node);
        Node node2 = node;
        Node firstChild = node.getFirstChild();
        while (true) {
            Node node3 = firstChild;
            if (node3 == null) {
                return node2;
            }
            Node highestFanout = getHighestFanout(node3);
            int fanout2 = getFanout(highestFanout);
            if (fanout2 > fanout) {
                fanout = fanout2;
                node2 = highestFanout;
            }
            firstChild = node3.getNextSibling();
        }
    }

    public static void main(String[] strArr) {
        try {
            Iterator subDocuments = new FanoutRecordSeparator().parse(new DOMDocument("c:/wwwroot/demo/default_data/defaulthtmlsrc/obit.html")).getSubDocuments();
            while (subDocuments.hasNext()) {
                DOMPrinter.print(((DOMDocument) subDocuments.next()).getRoot());
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        System.out.println("done");
    }
}
