package ir.webutils;

import ir.utilities.Browser;
import ir.utilities.MoreString;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

/* JADX WARN: Classes with same name are omitted:
  input_file:ir/ir.jar:ir/webutils/BeamSearchSpider.class
 */
/* loaded from: input_file:ir/webutils/BeamSearchSpider.class */
public class BeamSearchSpider extends Spider {
    protected PageGoal goal;
    protected LinkHeuristic heuristic;
    protected int beamSize = 100;
    protected HTMLPage goalPage = null;

    @Override // ir.webutils.Spider
    public void go(String[] strArr) {
        processArgs(strArr);
        if (this.heuristic == null || this.heuristic.wantStrings == null) {
            System.out.println("Error: No want strings specified.");
            return;
        }
        System.out.print("\nSearch for: ");
        for (int i = 0; i < this.heuristic.wantStrings.length; i++) {
            System.out.print("\"" + this.heuristic.wantStrings[i] + "\" ");
        }
        System.out.print("\nHelped by: ");
        for (int i2 = 0; i2 < this.heuristic.helpStrings.length; i2++) {
            System.out.print("\"" + this.heuristic.helpStrings[i2] + "\" ");
        }
        System.out.println("");
        doCrawl();
        if (this.goalPage == null) {
            System.out.println("\nGoal page not found");
            return;
        }
        System.out.println("\nGoal Page found.  Path from start URL is:");
        printPath(this.goalPage.getLink());
        Browser.display(this.goalPage.getLink().getURL().toString());
    }

    @Override // ir.webutils.Spider
    public void processArgs(String[] strArr) {
        int i = 0;
        while (i < strArr.length) {
            if (strArr[i].charAt(0) == '-') {
                if (strArr[i].equals("-safe")) {
                    handleSafeCommandLineOption();
                } else if (strArr[i].equals("-c")) {
                    i++;
                    handleCCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-u")) {
                    i++;
                    handleUCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-w")) {
                    i++;
                    handleWCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-h")) {
                    i++;
                    handleHCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-b")) {
                    i++;
                    handleBCommandLineOption(strArr[i]);
                } else if (strArr[i].equals("-slow")) {
                    handleSlowCommandLineOption();
                }
            }
            i++;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    @Override // ir.webutils.Spider
    public void handleUCommandLineOption(String str) {
        this.linksToVisit.add(new ScoredAnchoredLink(str));
    }

    protected void handleWCommandLineOption(String str) {
        String[] segmentToArray = MoreString.segmentToArray(str, ';');
        this.goal = new PageGoal(segmentToArray);
        if (this.heuristic == null) {
            this.heuristic = constructLinkHeuristic();
        }
        this.heuristic.wantStrings = segmentToArray;
    }

    protected void handleHCommandLineOption(String str) {
        if (this.heuristic == null) {
            this.heuristic = constructLinkHeuristic();
        }
        this.heuristic.helpStrings = MoreString.segmentToArray(str, ';');
    }

    protected LinkHeuristic constructLinkHeuristic() {
        return new LinkHeuristic();
    }

    protected void handleBCommandLineOption(String str) {
        this.beamSize = Integer.parseInt(str);
    }

    @Override // ir.webutils.Spider
    public void doCrawl() {
        this.visited = new HashSet<>();
        this.goalPage = null;
        while (this.linksToVisit.size() > 0 && this.count < this.maxCount) {
            if (this.slow) {
                synchronized (this) {
                    try {
                        wait(1000L);
                    } catch (InterruptedException e) {
                    }
                }
            }
            ScoredAnchoredLink scoredAnchoredLink = (ScoredAnchoredLink) this.linksToVisit.remove(0);
            this.count++;
            System.out.println("\nExpanding(" + this.count + "): " + scoredAnchoredLink + "\nScore: " + scoredAnchoredLink.score);
            if (!this.visited.add(scoredAnchoredLink)) {
                System.out.println("Already visited");
            } else if (linkToHTMLPage(scoredAnchoredLink)) {
                try {
                    HTMLPage hTMLPage = this.retriever.getHTMLPage(scoredAnchoredLink);
                    if (hTMLPage.empty()) {
                        System.out.println("No Page Found");
                    } else if (this.goal.satisfiedBy(hTMLPage)) {
                        this.goalPage = hTMLPage;
                        return;
                    } else if (this.count < this.maxCount) {
                        List<Link> newLinks = getNewLinks(hTMLPage);
                        scoreLinks(newLinks, hTMLPage);
                        this.linksToVisit.addAll(newLinks);
                        Collections.sort(this.linksToVisit);
                        if (this.linksToVisit.size() > this.beamSize) {
                            this.linksToVisit.subList(this.beamSize, this.linksToVisit.size()).clear();
                        }
                    }
                } catch (PathDisallowedException e2) {
                    System.out.println(e2);
                }
            } else {
                System.out.println("Not HTML Page");
            }
        }
    }

    @Override // ir.webutils.Spider
    protected List<Link> getNewLinks(HTMLPage hTMLPage) {
        return new ScoredAnchoredLinkExtractor(hTMLPage).extractLinks();
    }

    protected void scoreLinks(List<Link> list, HTMLPage hTMLPage) {
        Iterator<Link> it = list.iterator();
        while (it.hasNext()) {
            ScoredAnchoredLink scoredAnchoredLink = (ScoredAnchoredLink) it.next();
            scoredAnchoredLink.score = this.heuristic.scoreLink(scoredAnchoredLink, hTMLPage);
        }
    }

    void printPath(Link link) {
        printPath((ScoredAnchoredLink) link);
    }

    void printPath(ScoredAnchoredLink scoredAnchoredLink) {
        if (scoredAnchoredLink.getBackLink() != null) {
            printPath(scoredAnchoredLink.getBackLink());
        }
        System.out.println("  " + scoredAnchoredLink);
    }

    public static void main(String[] strArr) {
        new BeamSearchSpider().go(strArr);
    }
}
