package edu.byu.deg.ontos;

import edu.byu.deg.framework.ContentFilter;
import edu.byu.deg.framework.Document;
import edu.byu.deg.framework.DocumentRetriever;
import edu.byu.deg.framework.DocumentStructureParser;
import edu.byu.deg.framework.DocumentStructureRecognizer;
import edu.byu.deg.framework.ExtractionPlan;
import edu.byu.deg.framework.OntologyWriter;
import edu.byu.deg.framework.PlanExecutionException;
import edu.byu.deg.framework.RetrieverConfigurationException;
import edu.byu.deg.framework.UnrecognizedFormatException;
import edu.byu.deg.framework.ValueMapper;
import edu.byu.deg.framework.ValueRecognizer;
import edu.byu.deg.osmxwrappers.OSMXDocument;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Iterator;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/byu/deg/ontos/OntosExtractionPlan.class */
public class OntosExtractionPlan extends ExtractionPlan {
    protected final Logger LOG;
    protected DocumentRetriever retriever;
    protected DocumentStructureRecognizer dsr;
    protected DocumentStructureParser dsp;
    protected ContentFilter contentFilter;
    protected ValueRecognizer valueRecognizer;
    protected ValueMapper valueMapper;
    protected OntologyWriter ontologyWriter;
    protected OSMXDocument ontology;
    protected OntosEngine eng;
    public static final String ONTOLOGY_WRITER_OUTPUT_FILENAME = "ontologyWriterOutputFile";

    public OntosExtractionPlan(OntosEngine ontosEngine) {
        super(ontosEngine);
        this.LOG = Logger.getLogger(getClass());
        this.eng = ontosEngine;
        this.retriever = (DocumentRetriever) this.eng.getInitParameter(OntosEngine.RETRIEVER_KEY);
        this.dsr = (DocumentStructureRecognizer) this.eng.getInitParameter(OntosEngine.STRUCTURE_RECOGNIZER_KEY);
        this.dsp = (DocumentStructureParser) this.eng.getInitParameter(OntosEngine.PARSER_KEY);
        this.contentFilter = (ContentFilter) this.eng.getInitParameter(OntosEngine.CONTENT_FILTER_KEY);
        this.valueRecognizer = (ValueRecognizer) this.eng.getInitParameter(OntosEngine.VALUE_RECOGNIZER_KEY);
        this.valueMapper = (ValueMapper) this.eng.getInitParameter(OntosEngine.VALUE_MAPPER_KEY);
        this.ontologyWriter = (OntologyWriter) this.eng.getInitParameter(OntosEngine.ONTOLOGY_WRITER_KEY);
        this.ontology = (OSMXDocument) this.eng.getInitParameter(OntosEngine.ONTOLOGY_KEY);
    }

    @Override // edu.byu.deg.framework.ExtractionPlan
    public void execute() throws PlanExecutionException {
        if (!(this.retriever instanceof LocalDocumentRetriever)) {
            throw new PlanExecutionException("The OntosExtractionPlan currently requires a LocalDocumentRetriever");
        }
        try {
            this.retriever.initialize();
            ((LocalDocumentRetriever) this.retriever).addLocations((String) this.engine.getInitParameter(OntosEngine.RETRIEVAL_PATH), (String) this.engine.getInitParameter(OntosEngine.RETRIEVAL_PATTERN));
            Iterator<Document> retrieveDocuments = this.retriever.retrieveDocuments();
            while (retrieveDocuments.hasNext()) {
                processDocument(retrieveDocuments.next());
            }
        } catch (RetrieverConfigurationException e) {
            throw new PlanExecutionException(e);
        }
    }

    private void processDocument(Document document) {
        if (this.dsr != null) {
            this.dsp = this.dsr.getDocumentParser(document);
        }
        if (this.dsp != null) {
            try {
                document = this.dsp.parse(document);
            } catch (UnrecognizedFormatException e) {
                this.LOG.warn("Unrecognized format", e);
                return;
            }
        }
        if (this.contentFilter != null) {
            document = this.contentFilter.filterDocument(document);
        }
        ArrayList<Document> arrayList = new ArrayList();
        Iterator<? extends Document> subDocuments = document.getSubDocuments();
        while (subDocuments.hasNext()) {
            arrayList.add(subDocuments.next());
        }
        if (arrayList.isEmpty()) {
            arrayList.add(document);
        }
        for (Document document2 : arrayList) {
            this.LOG.info("Processing document " + document2.getURI());
            long currentTimeMillis = System.currentTimeMillis();
            try {
                this.LOG.info("Running ValueRecognizer... t = " + (System.currentTimeMillis() - currentTimeMillis));
                if (this.valueRecognizer != null) {
                    this.valueRecognizer.findValues(document2);
                }
                this.LOG.info("Running ValueMapper... t = " + (System.currentTimeMillis() - currentTimeMillis));
                if (this.valueMapper != null) {
                    this.valueMapper.generateValueMappings(document2);
                }
                this.LOG.info("Saving the document... t = " + (System.currentTimeMillis() - currentTimeMillis));
                this.ontology.saveDocument();
                this.LOG.info("Finished processing document in " + (System.currentTimeMillis() - currentTimeMillis) + " ms");
            } catch (Exception e2) {
                this.LOG.error("Could not process document: " + e2.getMessage(), e2);
                e2.printStackTrace();
            }
        }
        try {
            if (this.ontologyWriter != null && !arrayList.isEmpty()) {
                this.ontologyWriter.setOntology(this.ontology);
                this.ontologyWriter.writeDataInstance(new FileWriter((String) this.engine.getInitParameter("ontologyWriterOutputFile")));
            }
        } catch (Exception e3) {
            this.LOG.warn("Error writing data instance: " + e3.getMessage(), e3);
        }
    }
}
