package com.caucho.web.search;

import com.caucho.util.CharBuffer;
import com.caucho.util.IntMap;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.xml.LooseHtml;
import com.caucho.xml.XmlUtil;
import com.caucho.xpath.XPath;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;

/* loaded from: input_file:com/caucho/web/search/Page.class */
public class Page {
    private static final int TITLE_SCORE = 200;
    private static final int KEYWORD_SCORE = 100;
    private static final int DESCRIPTION_SCORE = 10;
    private static final int H1_SCORE = 50;
    private static final int H2_SCORE = 25;
    private static final int H3_SCORE = 10;
    private static final int H4_SCORE = 5;
    private static final int A_SCORE = 10;
    private static final int DESC_LENGTH = 255;
    private Spider spider;
    private Path path;
    private String url;
    private String title;
    private String description;
    private Document doc;
    static String[] stop = {"and", "are", "all", "also", "can", "for", "from", "has", "have", "how", "may", "might", "must", "not", "only", "other", "should", "the", "that", "this", "than", "there", "use", "you", "your", "was", "with", "were", "what", "when", "where", "who", "why"};
    private static IntMap stopWords = new IntMap();
    private HashMap words = new HashMap();
    private ArrayList links = new ArrayList();

    /* loaded from: input_file:com/caucho/web/search/Page$Word.class */
    public static class Word {
        String word;
        int score;

        void addScore(int i) {
            this.score += i;
        }

        public String getWord() {
            return this.word;
        }

        public int getScore() {
            return this.score;
        }

        Word(String str) {
            this.word = str;
        }
    }

    public static Page analyze(Spider spider, Path path, ReadStream readStream, String str) throws Exception {
        if (str == null || !str.equals("text/html")) {
            return null;
        }
        Page page = new Page(spider, path, new LooseHtml().parseDocument(readStream));
        page.analyzeTitle();
        page.analyzeDescription();
        page.analyzeWords();
        page.analyzeLinks();
        return page;
    }

    public String getUrl() {
        return this.url;
    }

    public String getTitle() {
        return this.title;
    }

    public String getDescription() {
        return this.description;
    }

    private void analyzeTitle() throws Exception {
        this.title = XPath.evalString("/html/head/title", this.doc);
        if (this.title == null || this.title.equals("")) {
            this.title = this.url;
        }
    }

    private void analyzeDescription() throws Exception {
        String evalString = XPath.evalString("/html/head/meta[@name='description']/@content", this.doc);
        if (evalString != null && evalString.length() > 0) {
            if (evalString.length() > DESC_LENGTH) {
                this.description = evalString.substring(0, DESC_LENGTH);
            } else {
                this.description = evalString;
            }
            addParagraph(this.description, 10);
            return;
        }
        Iterator select = XPath.select("//text()", this.doc);
        CharBuffer charBuffer = new CharBuffer();
        while (select.hasNext() && charBuffer.length() < DESC_LENGTH) {
            Text text = (Text) select.next();
            if (!text.getParentNode().getNodeName().equals("script") && !text.getParentNode().getNodeName().equals("style") && !text.getParentNode().getNodeName().equals("title")) {
                String nodeValue = text.getNodeValue();
                for (int i = 0; i < nodeValue.length(); i++) {
                    if (nodeValue.charAt(i) != '\'' && (!Character.isWhitespace(nodeValue.charAt(i)) || charBuffer.length() == 0 || !Character.isWhitespace(charBuffer.charAt(charBuffer.length() - 1)))) {
                        charBuffer.append(nodeValue.charAt(i));
                    }
                }
            }
        }
        if (charBuffer.length() > DESC_LENGTH) {
            charBuffer.setLength(DESC_LENGTH);
        }
        this.description = charBuffer.toString();
    }

    private void analyzeWords() throws Exception {
        addParagraph(XPath.evalString("/html/head/title", this.doc), TITLE_SCORE);
        addParagraph(XPath.evalString("/html/head/meta[@name='keywords']/@content", this.doc), KEYWORD_SCORE);
        addSelect("//text()", 1);
    }

    private void addSelect(String str, int i) throws Exception {
        Iterator select = XPath.select(str, this.doc);
        while (select.hasNext()) {
            Text text = (Text) select.next();
            if (!text.getParentNode().getNodeName().equals("script") && !text.getParentNode().getNodeName().equals("style")) {
                addParagraph(text.getNodeValue(), i);
            }
        }
    }

    private void addParagraph(String str, int i) {
        if (str == null) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        getWords(arrayList, str);
        for (int size = arrayList.size() - 1; size >= 0; size--) {
            addWord((String) arrayList.get(size), i);
        }
    }

    private void addWord(String str, int i) {
        Word word = (Word) this.words.get(str);
        if (word == null) {
            if (stopWords.get(str) > 0) {
                return;
            }
            word = new Word(str);
            this.words.put(str, word);
        }
        word.addScore(i);
    }

    public Iterator getWords() {
        return this.words.values().iterator();
    }

    public void analyzeLinks() throws Exception {
        Path parent = this.path.getParent();
        this.links = new ArrayList();
        ArrayList arrayList = new ArrayList();
        Iterator select = XPath.select("//a", this.doc);
        while (select.hasNext()) {
            Element element = (Element) select.next();
            String attribute = element.getAttribute("href");
            if (attribute.equals("")) {
                attribute = element.getAttribute("HREF");
            }
            int indexOf = attribute.indexOf(35);
            if (indexOf >= 0) {
                attribute = attribute.substring(0, indexOf);
            }
            if (attribute.length() > 0) {
                Path lookup = parent.lookup(attribute);
                this.links.add(lookup);
                if (this.spider.isValidPage(lookup)) {
                    String textValue = XmlUtil.textValue(element);
                    arrayList.clear();
                    getWords(arrayList, textValue);
                    int page = this.spider.getStore().getPage(lookup.getURL());
                    for (int size = arrayList.size() - 1; size >= 0; size--) {
                        this.spider.getStore().addScore(page, (String) arrayList.get(size), 10);
                    }
                }
            }
        }
    }

    private void getWords(ArrayList arrayList, String str) {
        if (str == null) {
            return;
        }
        CharBuffer charBuffer = new CharBuffer();
        int i = 0;
        while (i < str.length()) {
            if (Character.isLetter(str.charAt(i))) {
                charBuffer.clear();
                while (i < str.length()) {
                    char charAt = str.charAt(i);
                    if (!Character.isLetterOrDigit(charAt) && charAt != '-' && charAt != '_' && (charAt != '.' || i + 1 >= str.length() || !Character.isLetterOrDigit(str.charAt(i + 1)))) {
                        break;
                    }
                    charBuffer.append(charAt);
                    i++;
                }
                if (charBuffer.length() > 2) {
                    arrayList.add(normalize(charBuffer));
                } else if (charBuffer.length() == 0) {
                    i++;
                }
            } else {
                i++;
            }
        }
    }

    private String normalize(CharBuffer charBuffer) {
        return charBuffer.toLowerCase().toString();
    }

    public Iterator getLinks() throws Exception {
        return this.links.iterator();
    }

    Page(Spider spider, Path path, Document document) {
        this.spider = spider;
        this.path = path;
        this.url = path.getPath();
        this.doc = document;
    }

    static {
        for (int i = 0; i < stop.length; i++) {
            stopWords.put(stop[i], 1);
        }
    }
}
