/*
 * Decompiled with CFR 0.152.
 */
package com.oxygenxml.positron.core.util.attach.pdf;

import com.oxygenxml.positron.api.connector.dto.ImageUrl;
import com.oxygenxml.positron.api.connector.dto.MessageContent;
import com.oxygenxml.positron.api.connector.dto.MessageImageUrlWithResolutionContent;
import com.oxygenxml.positron.api.connector.dto.MessageTextContent;
import com.oxygenxml.positron.core.util.SecurityUtils;
import com.oxygenxml.positron.core.util.attach.MessageAttachmentUtils;
import com.oxygenxml.positron.core.util.attach.pdf.FontAwareTextStripper;
import java.awt.image.BufferedImage;
import java.awt.image.RenderedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.imageio.ImageIO;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PdfDocumentUtils {
    private static final Logger log = LoggerFactory.getLogger(PdfDocumentUtils.class);
    static final float DEFAULT_PAGE_HEIGHT = 792.0f;
    private static final float DEFAULT_FONT_SIZE = 12.0f;
    private static final String FIGURE_LIKELY_START_CAPTION = "Figure ";
    private static final String TABLE_LIKELY_START_CAPTION = "Table ";
    private static final int MAX_TABLE_ROWS = 100;
    private static final String[] MONOSPACE_FONT_NAME_FRAGS = new String[]{"courier", "consolas", "monaco", "menlo", "lucida console", "dejavu sans mono", "source code", "fira code", "fira mono", "mono", "code", "console", "fixed", "typewriter", "terminal", "pre", "monospace"};

    private PdfDocumentUtils() {
        throw new UnsupportedOperationException("Instantiation of this utility class is not allowed!");
    }

    public static List<MessageContent> getPDFText(String filePath, byte[] bytes) throws Exception {
        ArrayList<MessageContent> mcs = new ArrayList<MessageContent>();
        try (PDDocument pdDoc = PdfDocumentUtils.parsePDFDocument(bytes);){
            String markdownText = PdfDocumentUtils.convertToMarkdown(pdDoc);
            mcs.add((MessageContent)new MessageTextContent(MessageAttachmentUtils.addFileURLMarkers(filePath, markdownText)));
            Collection<byte[]> imagesFromResources = PdfDocumentUtils.extractImagesFromPDF(pdDoc).values();
            for (byte[] imageBytes : imagesFromResources) {
                MessageAttachmentUtils.ImageDetails imageDetails = MessageAttachmentUtils.getImageDetails("image.png", imageBytes);
                mcs.add((MessageContent)new MessageImageUrlWithResolutionContent(new ImageUrl(imageDetails.base64Representation), imageDetails.resolution));
            }
        }
        return mcs;
    }

    public static PDDocument parsePDFDocument(byte[] bytes) throws Exception {
        PDDocument pdDoc = null;
        Class<PDFParser> parserClass = PDFParser.class;
        Constructor parserConstructor = parserClass.getConstructor(Class.forName("org.apache.pdfbox.io.RandomAccessRead"));
        try {
            Class<?> rarClass = Class.forName("org.apache.pdfbox.io.RandomAccessBuffer");
            Object buf = rarClass.getConstructor(byte[].class).newInstance(new Object[]{bytes});
            Object parser = parserConstructor.newInstance(buf);
            parser.getClass().getMethod("parse", new Class[0]).invoke(parser, new Object[0]);
            COSDocument cosDoc = (COSDocument)parser.getClass().getMethod("getDocument", new Class[0]).invoke(parser, new Object[0]);
            pdDoc = new PDDocument(cosDoc);
        }
        catch (Throwable th) {
            Class<?> rarClass = Class.forName("org.apache.pdfbox.io.RandomAccessReadBuffer");
            Object rarBuffer = rarClass.getConstructor(byte[].class).newInstance(new Object[]{bytes});
            Object parser = parserConstructor.newInstance(rarBuffer);
            pdDoc = (PDDocument)parser.getClass().getMethod("parse", new Class[0]).invoke(parser, new Object[0]);
        }
        return pdDoc;
    }

    static Map<String, byte[]> extractImagesFromPDF(PDDocument document) throws NoSuchAlgorithmException {
        LinkedHashMap<String, byte[]> images = new LinkedHashMap<String, byte[]>();
        PDPageTree iter = document.getPages();
        for (PDPage page : iter) {
            images.putAll(PdfDocumentUtils.getImagesFromResources(page.getResources()));
        }
        return images;
    }

    private static Map<String, byte[]> getImagesFromResources(PDResources resources) throws NoSuchAlgorithmException {
        LinkedHashMap<String, byte[]> images = new LinkedHashMap<String, byte[]>();
        Iterable iter = resources.getXObjectNames();
        for (COSName xObjectName : iter) {
            try {
                PDXObject xObject = resources.getXObject(xObjectName);
                if (!(xObject instanceof PDImageXObject)) continue;
                BufferedImage image = ((PDImageXObject)xObject).getImage();
                try (ByteArrayOutputStream baos = new ByteArrayOutputStream();){
                    ImageIO.write((RenderedImage)image, "png", baos);
                    byte[] byteArray = baos.toByteArray();
                    images.put(SecurityUtils.hash(byteArray), byteArray);
                }
            }
            catch (Exception ex) {
                log.debug((Throwable)ex);
            }
        }
        return images;
    }

    static boolean isMonospaceFont(String fontName) {
        if (fontName != null && !fontName.isEmpty()) {
            String lowerFontName = fontName.toLowerCase();
            for (int i = 0; i < MONOSPACE_FONT_NAME_FRAGS.length; ++i) {
                if (!lowerFontName.contains(MONOSPACE_FONT_NAME_FRAGS[i])) continue;
                return true;
            }
        }
        return false;
    }

    static boolean isLineMonospaced(TextLineInfo lineInfo) {
        if (lineInfo.fontName != null && PdfDocumentUtils.isMonospaceFont(lineInfo.fontName)) {
            return true;
        }
        if (lineInfo.textSegments != null && !lineInfo.textSegments.isEmpty()) {
            int monospacedCount = 0;
            int totalCount = 0;
            for (TextSegment segment : lineInfo.textSegments) {
                if (segment.text == null || segment.text.trim().isEmpty()) continue;
                ++totalCount;
                if (!segment.isMonospaced) continue;
                ++monospacedCount;
            }
            if (totalCount > 0 && monospacedCount * 2 >= totalCount) {
                return true;
            }
        }
        return false;
    }

    static String convertTextLinesToMarkdownWithCodeBlocks(List<TextLineInfo> textLines, List<TocEntry> tocEntries, float avgFontSize, float maxFontSize) {
        StringBuilder markdown = new StringBuilder();
        TextLineInfo previousLineInfo = null;
        boolean inCodeBlock = false;
        StringBuilder codeBlockContent = new StringBuilder();
        int size = textLines.size();
        for (int i = 0; i < size; ++i) {
            TextLineInfo lineInfo = textLines.get(i);
            String trimmed = lineInfo.text.trim();
            if (PdfDocumentUtils.isTableOfContentsLine(trimmed)) continue;
            boolean isMonospaced = PdfDocumentUtils.isLineMonospaced(lineInfo);
            if (trimmed.isEmpty()) {
                if (inCodeBlock) {
                    codeBlockContent.append("\n");
                    continue;
                }
                if (previousLineInfo != null && previousLineInfo.text.trim().length() > 0) {
                    markdown.append("\n");
                }
                previousLineInfo = null;
                continue;
            }
            boolean isHeading = false;
            if (avgFontSize > 0.0f) {
                isHeading = PdfDocumentUtils.isLikelyHeading(lineInfo, avgFontSize, maxFontSize, i, textLines, tocEntries);
            }
            if (isHeading) {
                if (inCodeBlock) {
                    markdown.append("```\n").append(codeBlockContent.toString()).append("```\n\n");
                    codeBlockContent = new StringBuilder();
                    inCodeBlock = false;
                }
                if (previousLineInfo != null && previousLineInfo.text.trim().length() > 0) {
                    markdown.append("\n\n");
                }
                markdown.append("## ").append(trimmed).append("\n\n");
                previousLineInfo = null;
                continue;
            }
            if (isMonospaced) {
                if (!inCodeBlock) {
                    if (previousLineInfo != null && previousLineInfo.text.trim().length() > 0) {
                        markdown.append("\n\n");
                    }
                    inCodeBlock = true;
                    codeBlockContent = new StringBuilder();
                }
                if (codeBlockContent.length() > 0) {
                    codeBlockContent.append("\n");
                }
                codeBlockContent.append(lineInfo.text);
                previousLineInfo = lineInfo;
                continue;
            }
            if (inCodeBlock) {
                markdown.append("```\n").append(codeBlockContent.toString()).append("```\n\n");
                codeBlockContent = new StringBuilder();
                inCodeBlock = false;
            }
            String formattedText = PdfDocumentUtils.formatTextWithMarkdown(lineInfo);
            if (previousLineInfo != null) {
                boolean isParagraphBreak = PdfDocumentUtils.hasVisualParagraphBreak(lineInfo, previousLineInfo);
                if (isParagraphBreak) {
                    markdown.append("\n\n");
                } else {
                    markdown.append("\n");
                }
            }
            markdown.append(formattedText);
            previousLineInfo = lineInfo;
        }
        if (inCodeBlock) {
            markdown.append("```\n").append(codeBlockContent.toString()).append("```\n");
        }
        return markdown.toString();
    }

    private static String convertToMarkdown(PDDocument pdDoc) throws IOException {
        Map<Integer, Float> pageHeights = PdfDocumentUtils.buildPageHeightMap(pdDoc);
        FontAwareTextStripper stripper = new FontAwareTextStripper();
        stripper.getText(pdDoc);
        List<TextLineInfo> textLines = stripper.getTextLines();
        textLines = PdfDocumentUtils.filterTopMarginContent(textLines, pageHeights);
        textLines = PdfDocumentUtils.convertTablesToMarkdown(textLines);
        List<TocEntry> tocEntries = PdfDocumentUtils.extractTableOfContents(textLines);
        float avgFontSize = PdfDocumentUtils.calculateAverageFontSize(textLines);
        float maxFontSize = PdfDocumentUtils.calculateMaxFontSize(textLines);
        return PdfDocumentUtils.convertTextLinesToMarkdownWithCodeBlocks(textLines, tocEntries, avgFontSize, maxFontSize);
    }

    public static Map<Integer, Float> buildPageHeightMap(PDDocument document) {
        HashMap<Integer, Float> pageHeights = new HashMap<Integer, Float>();
        int pageNum = 0;
        for (PDPage page : document.getPages()) {
            PDRectangle pageMediaBox = page.getMediaBox();
            float pageHeight = pageMediaBox != null ? pageMediaBox.getHeight() : 792.0f;
            pageHeights.put(pageNum + 1, Float.valueOf(pageHeight));
            ++pageNum;
        }
        return pageHeights;
    }

    static List<TextLineInfo> filterTopMarginContent(List<TextLineInfo> textLines, Map<Integer, Float> pageHeights) {
        ArrayList<TextLineInfo> filtered = new ArrayList<TextLineInfo>();
        float TOP_MARGIN_THRESHOLD_PERCENT = 0.1f;
        float TOP_MARGIN_THRESHOLD_FIXED = 100.0f;
        for (TextLineInfo lineInfo : textLines) {
            Float pageHeight = pageHeights.get(lineInfo.pageNumber);
            if (pageHeight == null) {
                filtered.add(lineInfo);
                continue;
            }
            float threshold = Math.min(pageHeight.floatValue() * 0.1f, 100.0f);
            if (lineInfo.yPosition > 0.0f && lineInfo.yPosition < threshold) continue;
            filtered.add(lineInfo);
        }
        return filtered;
    }

    static List<TextLineInfo> convertTablesToMarkdown(List<TextLineInfo> textLines) {
        ArrayList<TextLineInfo> result = new ArrayList<TextLineInfo>();
        int i = 0;
        int size = textLines.size();
        while (i < size) {
            TableRegion tableRegion = PdfDocumentUtils.detectTable(textLines, i);
            if (tableRegion != null) {
                String markdownTable = PdfDocumentUtils.formatTableAsMarkdown(tableRegion);
                if (tableRegion.caption != null) {
                    result.add(tableRegion.caption);
                }
                TextLineInfo tableInfo = new TextLineInfo("\n" + markdownTable, tableRegion.lines.get((int)0).fontSize, false, false, tableRegion.lines.get((int)0).fontName, tableRegion.lines.get((int)0).pageNumber);
                tableInfo.yPosition = tableRegion.lines.get((int)0).yPosition;
                result.add(tableInfo);
                i = tableRegion.endIndex;
                continue;
            }
            result.add(textLines.get(i));
            ++i;
        }
        return result;
    }

    static TableRegion detectTable(List<TextLineInfo> textLines, int startIndex) {
        if (startIndex >= textLines.size()) {
            return null;
        }
        TextLineInfo firstLine = textLines.get(startIndex);
        String trimmed = firstLine.text.trim();
        TextLineInfo captionLine = null;
        int tableStartIndex = startIndex;
        if (trimmed.matches("^Table\\s+\\d+\\..*")) {
            captionLine = firstLine;
            tableStartIndex = startIndex + 1;
            if (tableStartIndex >= textLines.size()) {
                return null;
            }
        }
        ArrayList<TextLineInfo> tableLines = new ArrayList<TextLineInfo>();
        int numColumns = -1;
        for (int i = tableStartIndex; i < textLines.size(); ++i) {
            TextLineInfo line = textLines.get(i);
            if (line.text.trim().isEmpty() && tableLines.isEmpty()) {
                tableStartIndex = i + 1;
                continue;
            }
            if (line.text.trim().isEmpty() && !tableLines.isEmpty()) break;
            if (line.segments.size() < 2) {
                if (!tableLines.isEmpty()) break;
                return null;
            }
            if (numColumns == -1) {
                numColumns = line.segments.size();
            }
            if (Math.abs(line.segments.size() - numColumns) > 1) {
                if (!tableLines.isEmpty()) break;
                return null;
            }
            tableLines.add(line);
            if (tableLines.size() >= 2 && (tableLines.size() >= 100 || i == textLines.size() - 1)) break;
        }
        if (tableLines.size() < 2) {
            return null;
        }
        return new TableRegion(captionLine, tableLines, tableStartIndex + tableLines.size());
    }

    static String formatTableAsMarkdown(TableRegion tableRegion) {
        int i;
        StringBuilder markdown = new StringBuilder();
        int numColumns = tableRegion.lines.get((int)0).segments.size();
        ArrayList<ArrayList<String>> normalizedRows = new ArrayList<ArrayList<String>>();
        for (TextLineInfo line : tableRegion.lines) {
            ArrayList<String> row = new ArrayList<String>(line.segments);
            while (row.size() < numColumns) {
                row.add("");
            }
            for (int i2 = 0; i2 < row.size(); ++i2) {
                row.set(i2, ((String)row.get(i2)).trim());
            }
            normalizedRows.add(row);
        }
        markdown.append("| ");
        for (String cell : (List)normalizedRows.get(0)) {
            markdown.append(cell).append(" | ");
        }
        markdown.append("\n");
        markdown.append("|");
        for (i = 0; i < numColumns; ++i) {
            markdown.append(" --- |");
        }
        markdown.append("\n");
        for (i = 1; i < normalizedRows.size(); ++i) {
            markdown.append("| ");
            for (String cell : (List)normalizedRows.get(i)) {
                markdown.append(cell).append(" | ");
            }
            markdown.append("\n");
        }
        return markdown.toString();
    }

    static String formatTextWithMarkdown(TextLineInfo lineInfo) {
        String result = lineInfo.textSegments != null && !lineInfo.textSegments.isEmpty() ? PdfDocumentUtils.processSegments(lineInfo) : PdfDocumentUtils.processLineWithoutSegments(lineInfo);
        result = PdfDocumentUtils.convertPossibleListItem(result);
        return result;
    }

    private static String convertPossibleListItem(String result) {
        int bulletIndex;
        String trimmedResult;
        if (result != null && !((String)result).trim().isEmpty() && (trimmedResult = ((String)result).trim()).startsWith("\u2022") && (bulletIndex = ((String)result).indexOf("\u2022")) >= 0) {
            result = ((String)result).substring(0, bulletIndex) + "- " + ((String)result).substring(bulletIndex + 1) + "\n";
        }
        return result;
    }

    private static LineExtractionInfo extractLineInfo(String lineText) {
        int end;
        int start;
        String leadingWhitespace = "";
        String trailingWhitespace = "";
        String trimmedText = lineText;
        for (start = 0; start < lineText.length() && Character.isWhitespace(lineText.charAt(start)); ++start) {
        }
        leadingWhitespace = lineText.substring(0, start);
        for (end = lineText.length(); end > start && Character.isWhitespace(lineText.charAt(end - 1)); --end) {
        }
        trailingWhitespace = lineText.substring(end);
        trimmedText = lineText.substring(start, end);
        return new LineExtractionInfo(leadingWhitespace, trailingWhitespace, trimmedText);
    }

    private static String processLineWithoutSegments(TextLineInfo lineInfo) {
        Object result;
        String text = lineInfo.text;
        if (text != null && !text.trim().isEmpty()) {
            boolean isMonospaced;
            LineExtractionInfo li = PdfDocumentUtils.extractLineInfo(text);
            boolean bl = isMonospaced = lineInfo.fontName != null && PdfDocumentUtils.isMonospaceFont(lineInfo.fontName);
            result = isMonospaced ? li.leadingWhitespace + "`" + li.trimmedText + "`" + li.trailingWhitespace : (lineInfo.isBold && lineInfo.isItalic ? li.leadingWhitespace + "***" + li.trimmedText + "***" + li.trailingWhitespace : (lineInfo.isBold ? li.leadingWhitespace + "**" + li.trimmedText + "**" + li.trailingWhitespace : (lineInfo.isItalic ? li.leadingWhitespace + "*" + li.trimmedText + "*" + li.trailingWhitespace : text)));
        } else {
            result = text;
        }
        return result;
    }

    private static String processSegments(TextLineInfo lineInfo) {
        StringBuilder formatted = new StringBuilder();
        for (TextSegment segment : lineInfo.textSegments) {
            String text = segment.text;
            LineExtractionInfo li = PdfDocumentUtils.extractLineInfo(text);
            if (li.trimmedText.isEmpty()) {
                formatted.append(text);
                continue;
            }
            if (segment.isMonospaced) {
                formatted.append(li.leadingWhitespace).append("`").append(li.trimmedText).append("`").append(li.trailingWhitespace);
                continue;
            }
            if (segment.isBold && segment.isItalic) {
                formatted.append(li.leadingWhitespace).append("***").append(li.trimmedText).append("***").append(li.trailingWhitespace);
                continue;
            }
            if (segment.isBold) {
                formatted.append(li.leadingWhitespace).append("**").append(li.trimmedText).append("**").append(li.trailingWhitespace);
                continue;
            }
            if (segment.isItalic) {
                formatted.append(li.leadingWhitespace).append("*").append(li.trimmedText).append("*").append(li.trailingWhitespace);
                continue;
            }
            formatted.append(text);
        }
        return formatted.toString();
    }

    static boolean hasVisualParagraphBreak(TextLineInfo currentLineInfo, TextLineInfo previousLineInfo) {
        boolean prevEndsWithSentence;
        if (currentLineInfo == null || previousLineInfo == null) {
            return false;
        }
        if (currentLineInfo.pageNumber != previousLineInfo.pageNumber) {
            return false;
        }
        if (currentLineInfo.yPosition == 0.0f && previousLineInfo.yPosition == 0.0f) {
            return false;
        }
        float ySpacing = Math.abs(currentLineInfo.yPosition - previousLineInfo.yPosition);
        if (ySpacing < 0.1f) {
            return false;
        }
        float avgFontSize = (currentLineInfo.fontSize + previousLineInfo.fontSize) / 2.0f;
        if (avgFontSize <= 0.0f) {
            avgFontSize = 12.0f;
        }
        float normalizedSpacing = ySpacing / avgFontSize;
        String prevText = previousLineInfo.text.trim();
        boolean bl = prevEndsWithSentence = prevText.length() > 0 && (prevText.endsWith(".") || prevText.endsWith("!") || prevText.endsWith("?"));
        if (previousLineInfo.isBold || currentLineInfo.isBold) {
            return true;
        }
        if (normalizedSpacing > 2.0f && (prevEndsWithSentence || previousLineInfo.isBold || currentLineInfo.isBold)) {
            return true;
        }
        return normalizedSpacing > 2.5f;
    }

    static boolean isLikelyHeading(TextLineInfo lineInfo, float avgFontSize, float maxFontSize, int lineIndex, List<TextLineInfo> allLines, List<TocEntry> tocEntries) {
        char lastChar;
        String trimmed = lineInfo.text.trim();
        if (trimmed.length() > 150) {
            return false;
        }
        if (trimmed.length() < 3) {
            return false;
        }
        if (trimmed.startsWith(TABLE_LIKELY_START_CAPTION) || trimmed.startsWith(FIGURE_LIKELY_START_CAPTION)) {
            return false;
        }
        int score = 0;
        TocEntry matchedTocEntry = PdfDocumentUtils.matchWithTocEntry(trimmed, lineInfo.pageNumber, tocEntries);
        if (matchedTocEntry != null) {
            score += 5;
        }
        if (lineInfo.fontSize > avgFontSize * 1.3f) {
            score += 3;
        } else if (lineInfo.fontSize > avgFontSize * 1.15f) {
            score += 2;
        }
        if (lineInfo.isBold) {
            score += 2;
        }
        if (PdfDocumentUtils.isTitleCase(trimmed)) {
            ++score;
        }
        if (trimmed.matches("^[A-Z][A-Z0-9\\s\\-:]+$") && trimmed.length() < 80) {
            score += 2;
        }
        if (lineIndex < allLines.size() - 1) {
            TextLineInfo nextLine = allLines.get(lineIndex + 1);
            if (nextLine.text.trim().isEmpty()) {
                ++score;
            }
        }
        if ((lastChar = trimmed.charAt(trimmed.length() - 1)) != '.' && lastChar != ',' && lastChar != ';') {
            ++score;
        }
        if (trimmed.length() < 60) {
            ++score;
        }
        return score >= 5;
    }

    static boolean isTitleCase(String text) {
        String[] words = text.split("\\s+");
        if (words.length < 2) {
            return false;
        }
        int capitalizedWords = 0;
        for (String word : words) {
            String cleanWord = word.replaceFirst("^[^a-zA-Z]+", "");
            if (cleanWord.length() <= 0 || !Character.isUpperCase(cleanWord.charAt(0))) continue;
            ++capitalizedWords;
        }
        return (double)capitalizedWords >= (double)words.length * 0.6;
    }

    static List<TocEntry> extractTableOfContents(List<TextLineInfo> textLines) {
        ArrayList<TocEntry> tocEntries = new ArrayList<TocEntry>();
        for (TextLineInfo lineInfo : textLines) {
            TocEntry entry;
            String line = lineInfo.text.trim();
            if (!PdfDocumentUtils.isTableOfContentsLine(line) || (entry = PdfDocumentUtils.parseTocLine(line)) == null) continue;
            tocEntries.add(entry);
        }
        return tocEntries;
    }

    static TocEntry parseTocLine(String line) {
        if (line == null || line.isEmpty()) {
            return null;
        }
        String cleaned = line.replaceAll("[.\\-_\\s]{3,}", " ").trim();
        Pattern pattern = Pattern.compile("^(.+?)\\s+([0-9]+|[ivxlcdmIVXLCDM]+|[a-zA-Z])\\s*$");
        Matcher matcher = pattern.matcher(cleaned);
        if (matcher.matches()) {
            String title = matcher.group(1).trim();
            String pageStr = matcher.group(2).trim();
            try {
                int pageNum = Integer.parseInt(pageStr);
                return new TocEntry(title, pageNum);
            }
            catch (NumberFormatException e) {
                int pageNum = PdfDocumentUtils.convertRomanToInt(pageStr);
                if (pageNum > 0) {
                    return new TocEntry(title, pageNum);
                }
                return new TocEntry(title, -1);
            }
        }
        return null;
    }

    public static boolean isTableOfContentsLine(String line) {
        String trimmed;
        if (line == null || line.length() < 5) {
            return false;
        }
        int maxRepeatedChars = PdfDocumentUtils.countMaxRepeatedSymbols(line);
        if (maxRepeatedChars >= 5 && ((trimmed = line.trim()).matches(".*[._\\-\\s]{3,}\\s*[0-9]+\\s*$") || trimmed.matches(".*[._\\-\\s]{3,}\\s*[ivxlcdmIVXLCDM]+\\s*$") || trimmed.matches(".*[._\\-\\s]{3,}\\s*[a-zA-Z]\\s*$"))) {
            return true;
        }
        return line.matches("^.+[\\s._\\-]{8,}\\s*[0-9ivxlcdmIVXLCDMA-Za-z]+\\s*$");
    }

    private static int countMaxRepeatedSymbols(String line) {
        int maxRepeatedChars = 0;
        int currentRepeatedChars = 0;
        for (int i = 0; i < line.length(); ++i) {
            char c = line.charAt(i);
            if (c == '.' || c == '_' || c == '-' || c == '\u2026') {
                ++currentRepeatedChars;
                continue;
            }
            if (c == ' ' && currentRepeatedChars > 0) continue;
            if (currentRepeatedChars > maxRepeatedChars) {
                maxRepeatedChars = currentRepeatedChars;
            }
            currentRepeatedChars = 0;
        }
        if (currentRepeatedChars > maxRepeatedChars) {
            maxRepeatedChars = currentRepeatedChars;
        }
        return maxRepeatedChars;
    }

    static float calculateAverageFontSize(List<TextLineInfo> textLines) {
        if (textLines.isEmpty()) {
            return 12.0f;
        }
        float sum = 0.0f;
        int count = 0;
        for (TextLineInfo line : textLines) {
            if (line.text.trim().isEmpty()) continue;
            sum += line.fontSize;
            ++count;
        }
        return count > 0 ? sum / (float)count : 12.0f;
    }

    static float calculateMaxFontSize(List<TextLineInfo> textLines) {
        float max = 0.0f;
        for (TextLineInfo line : textLines) {
            if (line.text.trim().isEmpty() || !(line.fontSize > max)) continue;
            max = line.fontSize;
        }
        return max > 0.0f ? max : 12.0f;
    }

    public static boolean isSubsectionHeading(String headingText) {
        String[] subsectionPatterns;
        if (headingText == null || headingText.trim().isEmpty()) {
            return false;
        }
        String lowerText = headingText.toLowerCase().trim();
        for (String pattern : subsectionPatterns = new String[]{"related information", "related topics", "see also", "references", "reference", "further reading", "additional resources", "additional information", "more information", "notes", "note", "warning", "warnings", "tip", "tips", "example", "examples", "see", "also see", "related", "for more information", "for additional information", "additional notes", "footnotes", "appendix", "glossary", "index"}) {
            if (!lowerText.equals(pattern)) continue;
            return true;
        }
        for (String pattern : subsectionPatterns) {
            if (!lowerText.startsWith(pattern + ":") && !lowerText.startsWith(pattern + " -") && !lowerText.startsWith(pattern + " \u2014")) continue;
            return true;
        }
        return false;
    }

    private static int convertRomanToInt(String roman) {
        if (roman == null || roman.isEmpty()) {
            return -1;
        }
        roman = roman.toUpperCase();
        HashMap<Character, Integer> romanMap = new HashMap<Character, Integer>();
        romanMap.put(Character.valueOf('I'), 1);
        romanMap.put(Character.valueOf('V'), 5);
        romanMap.put(Character.valueOf('X'), 10);
        romanMap.put(Character.valueOf('L'), 50);
        romanMap.put(Character.valueOf('C'), 100);
        romanMap.put(Character.valueOf('D'), 500);
        romanMap.put(Character.valueOf('M'), 1000);
        int result = 0;
        int prevValue = 0;
        for (int i = roman.length() - 1; i >= 0; --i) {
            char c = roman.charAt(i);
            if (!romanMap.containsKey(Character.valueOf(c))) {
                return -1;
            }
            int value = (Integer)romanMap.get(Character.valueOf(c));
            result = value < prevValue ? (result -= value) : (result += value);
            prevValue = value;
        }
        return result;
    }

    static TocEntry matchWithTocEntry(String text, int pageNumber, List<TocEntry> tocEntries) {
        if (tocEntries.isEmpty()) {
            return null;
        }
        String normalizedText = PdfDocumentUtils.normalizeForMatching(text);
        for (TocEntry entry : tocEntries) {
            String normalizedTitle = PdfDocumentUtils.normalizeForMatching(entry.title);
            if (!normalizedText.equals(normalizedTitle)) continue;
            if (entry.pageNumber > 0) {
                if (Math.abs(pageNumber - entry.pageNumber) > 2) continue;
                return entry;
            }
            return entry;
        }
        return null;
    }

    public static String normalizeForMatching(String text) {
        if (text == null) {
            return "";
        }
        String normalized = text.replaceAll("[^a-zA-Z0-9\\s]", "");
        normalized = normalized.replaceAll("\\s+", " ");
        normalized = normalized.toLowerCase().trim();
        return normalized;
    }

    public static class TextLineInfo {
        public final String text;
        public final float fontSize;
        public final boolean isBold;
        public final boolean isItalic;
        public final String fontName;
        public final int pageNumber;
        public float yPosition;
        public List<Float> xPositions;
        public List<String> segments;
        public List<TextSegment> textSegments;

        TextLineInfo(String text, float fontSize, boolean isBold, boolean isItalic, String fontName, int pageNumber) {
            this.text = text;
            this.fontSize = fontSize;
            this.isBold = isBold;
            this.isItalic = isItalic;
            this.fontName = fontName;
            this.pageNumber = pageNumber;
            this.yPosition = 0.0f;
            this.xPositions = new ArrayList<Float>();
            this.segments = new ArrayList<String>();
            this.textSegments = new ArrayList<TextSegment>();
        }
    }

    public static class TextSegment {
        String text;
        boolean isBold;
        boolean isItalic;
        boolean isMonospaced;

        TextSegment(String text, boolean isBold, boolean isItalic, boolean isMonospaced) {
            this.text = text;
            this.isBold = isBold;
            this.isItalic = isItalic;
            this.isMonospaced = isMonospaced;
        }
    }

    public static class TableRegion {
        TextLineInfo caption;
        List<TextLineInfo> lines;
        int endIndex;

        TableRegion(TextLineInfo caption, List<TextLineInfo> lines, int endIndex) {
            this.caption = caption;
            this.lines = lines;
            this.endIndex = endIndex;
        }
    }

    private static class LineExtractionInfo {
        private String leadingWhitespace;
        private String trailingWhitespace;
        private String trimmedText;

        public LineExtractionInfo(String leadingWhitespace, String trailingWhitespace, String trimmedText) {
            this.leadingWhitespace = leadingWhitespace;
            this.trailingWhitespace = trailingWhitespace;
            this.trimmedText = trimmedText;
        }
    }

    public static class TocEntry {
        String title;
        int pageNumber;

        TocEntry(String title, int pageNumber) {
            this.title = title;
            this.pageNumber = pageNumber;
        }
    }
}

