package ir.webutils;

import ir.utilities.MoreMath;
import ir.utilities.MoreString;
import java.io.File;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;

/* loaded from: input_file:ir/webutils/YahooSpider.class */
public class YahooSpider {
    protected Link topCategoryLink;
    protected File saveDir;
    protected String filePrefix = "P";
    protected List<Link> categoryLinks = new LinkedList();
    protected List<Link> siteLinks = new LinkedList();
    protected boolean slow = false;
    protected HTMLPageRetriever retriever = new HTMLPageRetriever();
    protected int count = 0;
    protected int maxCount = 10000;
    public Map<Link, List<Link>> categoryLinksMap = new HashMap();
    public Map<Link, List<Link>> siteLinksMap = new HashMap();
    protected HashSet<Link> visitedSites = new HashSet<>();
    protected Random random = new Random();

    public void go(String[] strArr) {
        processArgs(strArr);
        doCrawl();
    }

    public void processArgs(String[] strArr) {
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].charAt(0) == '-') {
                if (strArr[i].equals("-d")) {
                    i++;
                    handleDCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-c")) {
                    i++;
                    handleCCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-u")) {
                    i++;
                    handleUCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-p")) {
                    i++;
                    handlePCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-slow")) {
                    handleSlowCommandLineOption();
                }
            }
            i++;
        }
    }

    protected void handleDCommandLineOption(String str) {
        this.saveDir = new File(str);
        if (this.saveDir.exists()) {
            return;
        }
        if (!this.saveDir.mkdir()) {
            throw new IllegalArgumentException("Failed to create directory " + this.saveDir.toString());
        }
        System.out.println("Created destination directory " + this.saveDir.toString());
    }

    protected void handleCCommandLineOption(String str) {
        this.maxCount = Integer.parseInt(str);
    }

    protected void handleUCommandLineOption(String str) {
        this.topCategoryLink = new Link(str);
    }

    protected void handlePCommandLineOption(String str) {
        this.filePrefix = str;
    }

    protected void handleSlowCommandLineOption() {
        this.slow = true;
    }

    public void doCrawl() {
        while (this.count < this.maxCount) {
            if (this.slow) {
                synchronized (this) {
                    try {
                        wait(1000L);
                    } catch (InterruptedException e) {
                    }
                }
            }
            Link link = this.topCategoryLink;
            Link link2 = null;
            while (link2 == null) {
                System.out.println("Trying Category: " + link);
                this.categoryLinks = this.categoryLinksMap.get(link);
                if (this.categoryLinks == null) {
                    try {
                        HTMLPage hTMLPage = this.retriever.getHTMLPage(link);
                        this.categoryLinks = new YahooCategoryLinkExtractor(hTMLPage).extractLinks();
                        this.categoryLinksMap.put(link, this.categoryLinks);
                        this.siteLinks = new YahooSiteLinkExtractor(hTMLPage).extractLinks();
                        this.siteLinksMap.put(link, this.siteLinks);
                    } catch (PathDisallowedException e2) {
                        System.out.println(e2);
                    }
                } else {
                    this.siteLinks = this.siteLinksMap.get(link);
                }
                if ((this.categoryLinks == null || this.categoryLinks.isEmpty()) && (this.siteLinks == null || this.siteLinks.isEmpty())) {
                    System.out.println("No categories or sites");
                    break;
                }
                if (this.categoryLinks == null || this.categoryLinks.isEmpty()) {
                    link2 = getRandomLink(this.siteLinks);
                } else if (this.siteLinks == null || this.siteLinks.isEmpty()) {
                    link = getRandomLink(this.categoryLinks);
                } else if (this.random.nextBoolean()) {
                    link = getRandomLink(this.categoryLinks);
                } else {
                    link2 = getRandomLink(this.siteLinks);
                }
            }
            if (link2 == null) {
                System.out.println("Failed to find site");
            } else {
                System.out.println("Picking Site: " + link2);
                if (!this.visitedSites.add(link2)) {
                    System.out.println("Already picked site");
                } else if (linkToHTMLPage(link2)) {
                    try {
                        HTMLPage hTMLPage2 = this.retriever.getHTMLPage(link2);
                        if (hTMLPage2.empty()) {
                            System.out.println("No Page Found");
                        } else {
                            this.count++;
                            System.out.println("Indexing(" + this.count + "): " + link2);
                            indexPage(hTMLPage2);
                        }
                    } catch (PathDisallowedException e3) {
                        System.out.println(e3);
                    }
                } else {
                    System.out.println("Not HTML Page");
                }
            }
        }
    }

    protected Link getRandomLink(List<Link> list) {
        return list.get(this.random.nextInt(list.size()));
    }

    protected boolean linkToHTMLPage(Link link) {
        String fileExtension = MoreString.fileExtension(link.getURL().getPath());
        return fileExtension.equals("") || fileExtension.equalsIgnoreCase("html") || fileExtension.equalsIgnoreCase("htm") || fileExtension.equalsIgnoreCase("shtml");
    }

    protected void indexPage(HTMLPage hTMLPage) {
        hTMLPage.write(this.saveDir, this.filePrefix + MoreString.padWithZeros(this.count, ((int) Math.floor(MoreMath.log(this.maxCount, 10))) + 1));
    }

    public static void main(String[] strArr) {
        new YahooSpider().go(strArr);
    }
}
