package edu.byu.deg.ontos;

import edu.byu.deg.framework.ContentFilter;
import edu.byu.deg.framework.Document;
import edu.byu.deg.framework.document.DOMDocument;
import edu.byu.deg.framework.document.PlaintextDocument;
import edu.byu.deg.util.HTMLStripper;
import edu.byu.deg.util.TagInfo;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Iterator;

/* loaded from: input_file:edu/byu/deg/ontos/HTMLFilter.class */
public class HTMLFilter implements ContentFilter {
    private HTMLStripper htmlCleaner = new HTMLStripper();

    @Override // edu.byu.deg.framework.ContentFilter
    public Document filterDocument(Document document) {
        PlaintextDocument plaintextDocument = new PlaintextDocument();
        plaintextDocument.setURI(document.getURI());
        try {
            if (document instanceof DOMDocument) {
                plaintextDocument.setContents(((DOMDocument) document).getDOMString().toString());
            } else {
                plaintextDocument.setContents(this.htmlCleaner.stripHTML(stringFromStream(document.getContentStream())));
            }
            plaintextDocument.setContents(plaintextDocument.getContentString().replaceAll("(\\s)+", TagInfo.SP));
        } catch (IOException e) {
        }
        Iterator subDocuments = document.getSubDocuments();
        while (subDocuments.hasNext()) {
            plaintextDocument.addSubDocument(filterDocument((Document) subDocuments.next()));
        }
        return plaintextDocument;
    }

    protected static String stringFromStream(InputStream inputStream) throws IOException {
        int read;
        StringBuffer stringBuffer = new StringBuffer();
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        do {
            read = bufferedReader.read();
            if (read >= 0) {
                stringBuffer.append((char) read);
            }
        } while (read != -1);
        return stringBuffer.toString();
    }
}
