package com.oxygenxml.smartautocomplete.core.openai;

import com.oxygenxml.smartautocomplete.plugin.MessagePresenter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.stream.Stream;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.SimpleXmlSerializer;
import org.htmlcleaner.TagNode;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import ro.sync.basic.io.IOUtil;

/* loaded from: input_file:oxygen-smart-autocomplete-addon-1.0.0-SNAPSHOT/lib/oxygen-smart-autocomplete-addon-1.0.0-SNAPSHOT.jar:com/oxygenxml/smartautocomplete/core/openai/FolderTrainingSet.class */
public class FolderTrainingSet {
    private XMLReader xmlReader;
    private static final Logger logger = LoggerFactory.getLogger(FolderTrainingSet.class.getName());

    private XMLReader getXmlReader() throws SAXException, ParserConfigurationException {
        if (this.xmlReader == null) {
            SAXParserFactory newInstance = SAXParserFactory.newInstance();
            newInstance.setFeature("http://javax.xml.XMLConstants/feature/secure-processing", true);
            this.xmlReader = newInstance.newSAXParser().getXMLReader();
            this.xmlReader.setEntityResolver(new EntityResolver() { // from class: com.oxygenxml.smartautocomplete.core.openai.FolderTrainingSet.1
                private InputSource emptySource = new InputSource(new ByteArrayInputStream(new byte[0]));

                @Override // org.xml.sax.EntityResolver
                public InputSource resolveEntity(String str, String str2) throws SAXException, IOException {
                    return this.emptySource;
                }
            });
        }
        return this.xmlReader;
    }

    public Map<String, String> getFolderTextContent(File file, MessagePresenter messagePresenter) throws IOException {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        Stream<Path> walk = Files.walk(file.toPath(), new FileVisitOption[0]);
        try {
            walk.filter(path -> {
                return path.toFile().isFile();
            }).forEach(path2 -> {
                process(linkedHashMap, path2, messagePresenter);
            });
            if (walk != null) {
                walk.close();
            }
            return linkedHashMap;
        } catch (Throwable th) {
            if (walk != null) {
                try {
                    walk.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
            throw th;
        }
    }

    private void process(Map<String, String> map, Path path, MessagePresenter messagePresenter) {
        String lowerCase = path.getFileName().toString().toLowerCase();
        try {
            if (lowerCase.endsWith(".txt") || lowerCase.endsWith(".md")) {
                processText(map, path);
            } else if (lowerCase.endsWith(".xml")) {
                processXml(map, path);
            } else if (lowerCase.endsWith(".html")) {
                processHtml(map, path);
            } else if (lowerCase.endsWith(".pdf")) {
                processPdf(map, path);
            }
        } catch (IOException e) {
            logger.debug(e, e);
            messagePresenter.updateMessageStatus("Cannot read contents of " + path + ": " + e.getMessage());
        }
    }

    private void processXml(Map<String, String> map, Path path) throws IOException {
        try {
            FileInputStream fileInputStream = new FileInputStream(path.toFile());
            try {
                InputSource inputSource = new InputSource(fileInputStream);
                inputSource.setSystemId(path.toUri().toString());
                map.put(path.toUri().toString(), parse(inputSource).toString());
                fileInputStream.close();
            } finally {
            }
        } catch (ParserConfigurationException | SAXException e) {
            throw new IOException(e);
        }
    }

    private StringBuilder parse(InputSource inputSource) throws SAXException, IOException, ParserConfigurationException {
        final StringBuilder sb = new StringBuilder();
        XMLReader xmlReader = getXmlReader();
        xmlReader.setContentHandler(new DefaultHandler() { // from class: com.oxygenxml.smartautocomplete.core.openai.FolderTrainingSet.2
            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
            public void characters(char[] cArr, int i, int i2) throws SAXException {
                sb.append(cArr, i, i2);
            }

            @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
            public void endElement(String str, String str2, String str3) throws SAXException {
                sb.append(' ');
            }
        });
        xmlReader.parse(inputSource);
        return sb;
    }

    private void processHtml(Map<String, String> map, Path path) throws IOException {
        try {
            CleanerProperties cleanerProperties = new CleanerProperties();
            cleanerProperties.setTransResCharsToNCR(true);
            cleanerProperties.setKeepWhitespaceAndCommentsInHead(true);
            cleanerProperties.setAdvancedXmlEscape(true);
            cleanerProperties.setRecognizeUnicodeChars(false);
            cleanerProperties.setOmitDoctypeDeclaration(false);
            cleanerProperties.setIgnoreQuestAndExclam(false);
            TagNode clean = new HtmlCleaner(cleanerProperties).clean(path.toFile());
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            new SimpleXmlSerializer(cleanerProperties).writeToStream(clean, byteArrayOutputStream, StandardCharsets.UTF_8.name());
            byteArrayOutputStream.flush();
            map.put(path.toUri().toString(), parse(new InputSource(new ByteArrayInputStream(byteArrayOutputStream.toByteArray()))).toString());
        } catch (ParserConfigurationException | SAXException e) {
            throw new IOException(e);
        }
    }

    private static void processText(Map<String, String> map, Path path) throws IOException {
        map.put(path.toUri().toString(), IOUtil.readFile(path.toFile(), StandardCharsets.UTF_8.name()));
    }

    private static void processPdf(Map<String, String> map, Path path) throws IOException {
        PDDocument load = PDDocument.load(path.toFile());
        PDFTextStripper pDFTextStripper = new PDFTextStripper();
        pDFTextStripper.setStartPage(1);
        pDFTextStripper.setEndPage(load.getNumberOfPages());
        map.put(path.toUri().toString(), filter(pDFTextStripper.getText(load)));
    }

    private static String filter(String str) {
        return str.replace(StringUtils.CR, StringUtils.SPACE).replace("\u00ad", "-");
    }
}
