/*
 * Decompiled with CFR 0.152.
 */
package com.rw.crawler;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.berkeley.BreadthCrawler;
import com.rw.crawler.EntryObject;
import com.wb.util.DbUtil;
import com.wb.util.SysUtil;
import com.xiaoleilu.hutool.util.StrUtil;
import com.xiaoleilu.hutool.util.URLUtil;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import jdk.nashorn.api.scripting.ScriptObjectMirror;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class GTCrawler
extends BreadthCrawler {
    private static final Logger logger = LoggerFactory.getLogger(GTCrawler.class);

    public GTCrawler(String crawlPath, boolean autoParse) {
        super(crawlPath, autoParse);
        this.addSeed("http://bbs.gtja.net/hgwx/menu.htm");
        this.addRegex("http://bbs.gtja.net/hgwx/menu.htm");
    }

    public void visit(Page page, CrawlDatums next) {
        if (page.matchUrl("http://bbs.gtja.net/hgwx/menu.htm")) {
            String data = ((Element)page.select("script").get(2)).data();
            String jsVar = StrUtil.removePrefix((String)StrUtil.removeSuffix((String)StrUtil.trim((String)data), (String)"-->"), (String)"<!--");
            ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
            ScriptEngine nashorn = scriptEngineManager.getEngineByName("nashorn");
            ScriptObjectMirror o = null;
            CrawlDatum cd = null;
            try {
                nashorn.eval(jsVar);
                o = (ScriptObjectMirror)nashorn.get("menu");
                ArrayList entryList = new ArrayList();
                this.eachObject("", o, entryList);
                String url = "http://bbs.gtja.net/hgwx/";
                String seed = null;
                int i = 1;
                for (EntryObject eo : entryList) {
                    if (!StrUtil.isNotBlank((CharSequence)eo.getUrl()) || !StrUtil.endWithIgnoreCase((String)eo.getUrl(), (String)"htm")) continue;
                    seed = String.valueOf(url) + URLUtil.encode((String)eo.getUrl(), (String)"utf-8");
                    logger.debug(String.valueOf(i++) + ":" + seed + ",id=" + eo.getId() + ", title=" + eo.getTitle());
                    cd = new CrawlDatum(seed);
                    cd.key(eo.getId());
                    cd.meta("id", eo.getId());
                    cd.meta("pid", eo.getPid());
                    cd.meta("title", eo.getTitle());
                    next.add(cd);
                }
            }
            catch (ScriptException e) {
                e.printStackTrace();
                logger.error("Error crawler: " + e.getMessage());
            }
        } else if (page.matchUrl("http://bbs.gtja.net/hgwx/bookpath.*htm")) {
            String id = page.meta("id");
            String pid = page.meta("pid");
            String title = page.meta("title");
            String content = StrUtil.trimStart((String)StrUtil.removePrefix((String)page.select("body").first().html().trim(), (String)"\u201d"));
            if (StrUtil.isNotBlank((CharSequence)content)) {
                if (StrUtil.containsIgnoreCase((String)content, (String)"script")) {
                    content = StrUtil.subPre((String)content, (int)content.indexOf("<script"));
                }
                if (!StrUtil.containsIgnoreCase((String)content, (String)"<table")) {
                    content = content.replaceAll("<p[^>]*>", "").replaceAll("</p>", "\r").replaceAll("<br>", "\r").replaceAll("&nbsp;", "").replaceAll("<(S*?)[^>]*>.*?|<.*? />", "");
                }
                logger.debug("URL=" + page.url() + ",ID=" + id + ", PID=" + pid + ", TITLE=" + title + ", BODY=" + content);
                this.record(id, pid, title, content);
            } else {
                logger.debug("URL=" + page.url() + ",ID=" + id + ", PID=" + pid + ", TITLE=" + title + ", BODY=" + content);
                this.record(id, pid, title, "");
            }
        }
    }

    public void eachObject(String pid, ScriptObjectMirror o, List<EntryObject> entryList) {
        String key = null;
        Object value = null;
        EntryObject eo = new EntryObject();
        String id = SysUtil.getId();
        eo.setId(id);
        eo.setPid(pid);
        for (Map.Entry entry : o.entrySet()) {
            key = (String)entry.getKey();
            value = entry.getValue();
            if (value instanceof ScriptObjectMirror) {
                this.eachObject(id, (ScriptObjectMirror)value, entryList);
                continue;
            }
            switch (key) {
                case "1": {
                    eo.setTitle(value.toString());
                    break;
                }
                case "2": {
                    eo.setUrl(value.toString());
                }
            }
        }
        entryList.add(eo);
    }

    private void record(String id, String pid, String title, String content) {
        block5: {
            Connection conn = null;
            PreparedStatement st = null;
            try {
                try {
                    conn = DbUtil.getConnection();
                    st = conn.prepareStatement("insert into WB_CRAWLER_GT values(?,?,?,?)");
                    st.setString(1, id);
                    st.setString(2, pid);
                    st.setString(3, title);
                    st.setString(4, content);
                    st.executeUpdate();
                }
                catch (Throwable e) {
                    logger.error("\u7f51\u9875\u6293\u53d6\u5185\u5bb9\u5165\u5e93\u9519\u8bef:" + e.getMessage());
                    DbUtil.close((Object)st);
                    DbUtil.close((Object)conn);
                    break block5;
                }
            }
            catch (Throwable throwable) {
                DbUtil.close(st);
                DbUtil.close((Object)conn);
                throw throwable;
            }
            DbUtil.close((Object)st);
            DbUtil.close((Object)conn);
        }
    }

    public static void main(String[] args) throws Exception {
        GTCrawler crawler = new GTCrawler("crawler", true);
        crawler.setThreads(100);
        crawler.start(3);
    }
}

