package edu.byu.deg.ontos;

import edu.byu.deg.framework.ContentFilter;
import edu.byu.deg.framework.DataExtractionEngine;
import edu.byu.deg.framework.Document;
import edu.byu.deg.framework.DocumentRetriever;
import edu.byu.deg.framework.DocumentStructureParser;
import edu.byu.deg.framework.DocumentStructureRecognizer;
import edu.byu.deg.framework.ExtractionPlan;
import edu.byu.deg.framework.OntologyWriter;
import edu.byu.deg.framework.PlanExecutionException;
import edu.byu.deg.framework.RetrieverConfigurationException;
import edu.byu.deg.framework.UnrecognizedFormatException;
import edu.byu.deg.framework.ValueMapper;
import edu.byu.deg.framework.ValueRecognizer;
import edu.byu.deg.osmx.OSMXDocument;
import java.io.File;
import java.io.FileFilter;
import java.io.FileWriter;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Pattern;

/* loaded from: input_file:edu/byu/deg/ontos/OntosExtractionPlan.class */
public class OntosExtractionPlan extends ExtractionPlan {
    protected DocumentRetriever retriever;
    protected DocumentStructureRecognizer dsr;
    protected DocumentStructureParser dsp;
    protected ContentFilter contentFilter;
    protected ValueRecognizer valueRecognizer;
    protected ValueMapper valueMapper;
    protected OntologyWriter ontologyWriter;
    protected OSMXDocument ontology;
    protected OntosEngine eng;
    public static final String ONTOLOGY_WRITER_OUTPUT_FILENAME = "ontologyWriterOutputFile";

    public OntosExtractionPlan(OntosEngine ontosEngine) {
        super(ontosEngine);
        this.eng = ontosEngine;
        this.retriever = (DocumentRetriever) this.eng.getInitParameter(OntosEngine.RETRIEVER_KEY);
        this.dsr = (DocumentStructureRecognizer) this.eng.getInitParameter(OntosEngine.STRUCTURE_RECOGNIZER_KEY);
        this.dsp = (DocumentStructureParser) this.eng.getInitParameter(OntosEngine.PARSER_KEY);
        this.contentFilter = (ContentFilter) this.eng.getInitParameter(OntosEngine.CONTENT_FILTER_KEY);
        this.valueRecognizer = (ValueRecognizer) this.eng.getInitParameter(OntosEngine.VALUE_RECOGNIZER_KEY);
        this.valueMapper = (ValueMapper) this.eng.getInitParameter(OntosEngine.VALUE_MAPPER_KEY);
        this.ontologyWriter = (OntologyWriter) this.eng.getInitParameter(OntosEngine.ONTOLOGY_WRITER_KEY);
        this.ontology = (OSMXDocument) this.eng.getInitParameter(OntosEngine.ONTOLOGY_KEY);
    }

    @Override // edu.byu.deg.framework.ExtractionPlan
    public void execute() throws PlanExecutionException {
        try {
            this.retriever.initialize();
            String str = (String) this.engine.getInitParameter(OntosEngine.RETRIEVAL_PATH);
            String str2 = (String) this.engine.getInitParameter(OntosEngine.RETRIEVAL_PATTERN);
            if (str != null) {
                if (str2 == null) {
                    str2 = "*";
                }
                ((LocalDocumentRetriever) this.retriever).setFileFilter(new FileFilter(this, str2) { // from class: edu.byu.deg.ontos.OntosExtractionPlan.1
                    private Pattern pat;
                    final OntosExtractionPlan this$0;

                    {
                        this.this$0 = this;
                        this.pat = Pattern.compile(str2);
                    }

                    @Override // java.io.FileFilter
                    public boolean accept(File file) {
                        return this.pat.matcher(file.getName()).matches();
                    }
                });
                try {
                    this.retriever.addDocumentLocation(str);
                } catch (URISyntaxException e) {
                    OntosEngine.getLogger().warning(new StringBuffer("Location ").append(str).append(" is not a valid URI").toString());
                }
            }
            Iterator retrieveDocuments = this.retriever.retrieveDocuments();
            while (retrieveDocuments.hasNext()) {
                processDocument((Document) retrieveDocuments.next());
            }
        } catch (RetrieverConfigurationException e2) {
            throw new PlanExecutionException(e2);
        }
    }

    private void processDocument(Document document) {
        if (this.dsr != null) {
            this.dsp = this.dsr.getDocumentParser(document);
        }
        if (this.dsp != null) {
            try {
                document = this.dsp.parse(document);
            } catch (UnrecognizedFormatException e) {
                return;
            }
        }
        if (this.contentFilter != null) {
            document = this.contentFilter.filterDocument(document);
        }
        ArrayList<Document> arrayList = new ArrayList();
        Iterator subDocuments = document.getSubDocuments();
        while (subDocuments.hasNext()) {
            arrayList.add(subDocuments.next());
        }
        if (arrayList.isEmpty()) {
            arrayList.add(document);
        }
        for (Document document2 : arrayList) {
            DataExtractionEngine.getLogger().info(new StringBuffer("Processing document ").append(document2.getURI()).toString());
            try {
                if (this.valueRecognizer != null) {
                    this.valueRecognizer.findValues(document2);
                }
                if (this.valueMapper != null) {
                    this.valueMapper.generateValueMappings(document2);
                }
                this.ontology.saveDocument();
            } catch (Exception e2) {
                DataExtractionEngine.getLogger().severe(new StringBuffer("Could not process document: ").append(e2.getMessage()).toString());
                e2.printStackTrace();
            }
        }
        try {
            if (this.ontologyWriter == null || arrayList.isEmpty()) {
                return;
            }
            this.ontologyWriter.setOntology(this.ontology);
            this.ontologyWriter.writeDataInstance(new FileWriter((String) this.engine.getInitParameter("ontologyWriterOutputFile")));
        } catch (Exception e3) {
            DataExtractionEngine.getLogger().warning(new StringBuffer("Error writing data instance: ").append(e3.getMessage()).toString());
            e3.printStackTrace();
        }
    }
}
