package ir.webutils;

import ir.utilities.MoreMath;
import ir.utilities.MoreString;
import java.io.File;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;

/* JADX WARN: Classes with same name are omitted:
  input_file:ir/ir.jar:ir/webutils/Spider.class
 */
/* loaded from: input_file:ir/webutils/Spider.class */
public class Spider {
    protected File saveDir;
    protected HashSet<Link> visited;
    protected List<Link> linksToVisit = new LinkedList();
    protected boolean slow = false;
    protected HTMLPageRetriever retriever = new HTMLPageRetriever();
    protected int count = 0;
    protected int maxCount = 10000;

    public void go(String[] strArr) {
        processArgs(strArr);
        doCrawl();
    }

    public void processArgs(String[] strArr) {
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].charAt(0) == '-') {
                if (strArr[i].equals("-safe")) {
                    handleSafeCommandLineOption();
                } else if (strArr[i].equals("-d")) {
                    i++;
                    handleDCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-c")) {
                    i++;
                    handleCCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-u")) {
                    i++;
                    handleUCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-slow")) {
                    handleSlowCommandLineOption();
                }
            }
            i++;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleSafeCommandLineOption() {
        this.retriever = new SafeHTMLPageRetriever();
    }

    protected void handleDCommandLineOption(String str) {
        this.saveDir = new File(str);
        if (this.saveDir.exists()) {
            return;
        }
        if (!this.saveDir.mkdir()) {
            throw new IllegalArgumentException("Failed to create directory " + this.saveDir.toString());
        }
        System.out.println("Created destination directory " + this.saveDir.toString());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleCCommandLineOption(String str) {
        this.maxCount = Integer.parseInt(str);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleUCommandLineOption(String str) {
        this.linksToVisit.add(new Link(str));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void handleSlowCommandLineOption() {
        this.slow = true;
    }

    public void doCrawl() {
        if (this.linksToVisit.size() == 0) {
            System.err.println("Exiting: No pages to visit.");
            System.exit(0);
        }
        this.visited = new HashSet<>();
        while (this.linksToVisit.size() > 0 && this.count < this.maxCount) {
            if (this.slow) {
                synchronized (this) {
                    try {
                        wait(1000L);
                    } catch (InterruptedException e) {
                    }
                }
            }
            Link remove = this.linksToVisit.remove(0);
            System.out.println("Trying: " + remove);
            if (!this.visited.add(remove)) {
                System.out.println("Already visited");
            } else if (linkToHTMLPage(remove)) {
                try {
                    HTMLPage hTMLPage = this.retriever.getHTMLPage(remove);
                    if (hTMLPage.empty()) {
                        System.out.println("No Page Found");
                    } else {
                        if (hTMLPage.indexAllowed()) {
                            this.count++;
                            System.out.println("Indexing(" + this.count + "): " + remove);
                            indexPage(hTMLPage);
                        }
                        if (this.count < this.maxCount) {
                            this.linksToVisit.addAll(getNewLinks(hTMLPage));
                        }
                    }
                } catch (PathDisallowedException e2) {
                    System.out.println(e2);
                }
            } else {
                System.out.println("Not HTML Page");
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public boolean linkToHTMLPage(Link link) {
        String fileExtension = MoreString.fileExtension(link.getURL().getPath());
        return fileExtension.equals("") || fileExtension.equalsIgnoreCase("html") || fileExtension.equalsIgnoreCase("htm") || fileExtension.equalsIgnoreCase("shtml");
    }

    protected List<Link> getNewLinks(HTMLPage hTMLPage) {
        return new LinkExtractor(hTMLPage).extractLinks();
    }

    protected void indexPage(HTMLPage hTMLPage) {
        hTMLPage.write(this.saveDir, "P" + MoreString.padWithZeros(this.count, ((int) Math.floor(MoreMath.log(this.maxCount, 10))) + 1));
    }

    public static void main(String[] strArr) {
        new Spider().go(strArr);
    }
}
