package edu.utexas.cs.tamerProject.agents;

import edu.utexas.cs.tamerProject.actSelect.ActionSelect;
import edu.utexas.cs.tamerProject.agents.tamer.HRew;
import edu.utexas.cs.tamerProject.experiment.LogTrainer;
import edu.utexas.cs.tamerProject.experiment.RecordHandler;
import edu.utexas.cs.tamerProject.featGen.FeatGen_Discretize;
import edu.utexas.cs.tamerProject.featGen.FeatGen_Mario;
import edu.utexas.cs.tamerProject.featGen.FeatGen_NoChange;
import edu.utexas.cs.tamerProject.featGen.FeatGen_RBFs;
import edu.utexas.cs.tamerProject.featGen.FeatGen_Tetris;
import edu.utexas.cs.tamerProject.featGen.FeatGenerator;
import edu.utexas.cs.tamerProject.modeling.IncGDLinearModel;
import edu.utexas.cs.tamerProject.modeling.RegressionModel;
import edu.utexas.cs.tamerProject.modeling.WekaModelPerActionModel;
import edu.utexas.cs.tamerProject.modeling.WekaModelWrap;
import edu.utexas.cs.tamerProject.params.Params;
import edu.utexas.cs.tamerProject.utilities.MutableDouble;
import edu.utexas.cs.tamerProject.utilities.Stopwatch;
import java.io.File;
import java.io.IOException;
import java.net.DatagramPacket;
import java.net.DatagramSocket;
import java.net.InetAddress;
import java.net.URL;
import java.net.URLClassLoader;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.rlcommunity.rlglue.codec.AgentInterface;
import org.rlcommunity.rlglue.codec.network.Network;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpec;
import org.rlcommunity.rlglue.codec.taskspec.TaskSpecVRLGLUE3;
import org.rlcommunity.rlglue.codec.taskspec.ranges.DoubleRange;
import org.rlcommunity.rlglue.codec.taskspec.ranges.IntRange;
import org.rlcommunity.rlglue.codec.types.Action;
import org.rlcommunity.rlglue.codec.types.Observation;
import org.rlcommunity.rlglue.codec.types.Reward_observation_terminal;
import rlVizLib.general.ParameterHolder;
import rlVizLib.messaging.NotAnRLVizMessageException;
import rlVizLib.messaging.agent.AgentMessageParser;
import rlVizLib.messaging.agent.AgentMessages;
import rlVizLib.messaging.agentShell.TaskSpecResponsePayload;
import weka.core.TestInstances;

/* loaded from: input_file:edu/utexas/cs/tamerProject/agents/GeneralAgent.class */
public abstract class GeneralAgent implements AgentInterface {
    public static final int MAX_STEPS_SET_BY_EXP = 10000000;
    protected TaskSpecVRLGLUE3 taskSpecObj;
    protected double discountFactorOfMDP;
    protected int[][] theObsIntRanges;
    protected double[][] theObsDoubleRanges;
    protected int[][] theActIntRanges;
    protected double[][] theActDoubleRanges;
    protected DoubleRange theRewardRange;
    public String envName;
    public ObsAndAct currObsAndAct;
    public ObsAndAct lastObsAndAct;
    public ArrayList<HRew> hRewList;
    public ArrayList<HRew> hRewThisStep;
    protected double stepStartTime;
    protected double lastStepStartTime;
    protected boolean isTopLevelAgent;
    public boolean enableGUI;
    protected boolean inTrainSess;
    protected MutableDouble discountFactorForLearning;
    public boolean safeActionOnly;
    public boolean TRAINING_BY_DEFAULT;
    protected boolean allowUserToggledTraining;
    public Params params;
    public RegressionModel model;
    public FeatGenerator featGen;
    public ActionSelect actSelector;
    public int stepsThisEp;
    public int totalSteps;
    public double rewThisEp;
    public double totalRew;
    public int currEpNum;
    public boolean masterLogSwitch;
    protected RecordHandler recHandler;
    protected boolean recordLog;
    protected String expName;
    protected String trainerName;
    protected String unique;
    protected String writeLogDir;
    protected String writeLogPath;
    protected boolean recordRew;
    protected String writeRewDir;
    protected String writeRewPath;
    public boolean trainFromLog;
    public String trainLogPath;
    public int trainEpLimit;
    public boolean countTrainingEps;
    public int logTrainEpochs;
    public int numEpsBeforeStop;
    protected int numEpsBeforePause;
    public boolean pause;
    protected boolean verbose;
    protected Random random;
    private int serverPort;
    private boolean startCalled;
    protected boolean glue;
    protected boolean learningOnly;
    public static boolean duringStepTransition = false;
    public static boolean isApplet = false;
    public static String RLLIBRARY_PATH = "";

    public void processPreInitArgs(String[] strArr) {
        System.out.println("\n[------Process pre-init args in " + getClass().getSimpleName() + "------] " + Arrays.toString(strArr));
        for (int i = 0; i < strArr.length; i++) {
            String str = strArr[i];
            if (str.equals("-expName") && i + 1 < strArr.length) {
                this.expName = strArr[i + 1];
                System.out.println("this.expName set to: " + this.expName);
            } else if (str.equals("-unique") && i + 1 < strArr.length) {
                this.unique = strArr[i + 1];
                System.out.println("this.unique set to: " + this.unique);
            } else if (str.equals("-rewLog")) {
                this.recordRew = true;
                System.out.println("Recording reward per episode.");
            } else if (str.equals("-fullLog")) {
                this.recordLog = true;
                System.out.println("Recording full log (not just reward per episode).");
            } else if ((str.equals("-trainLogPath") || str.equals("-logPath")) && i + 1 < strArr.length) {
                this.trainLogPath = strArr[i + 1];
                this.trainFromLog = true;
                System.out.println("this.trainFromLog set to: " + this.trainFromLog);
                System.out.println("this.trainLogPath set to: " + this.trainLogPath);
                if (str.equals("-logPath")) {
                    System.err.println("-logPath is no longer an accepted argument. Change whatever called it to -trainLogPath.");
                }
            } else if (str.equals("-trainEpLimit") && i + 1 < strArr.length) {
                this.trainEpLimit = Integer.valueOf(strArr[i + 1]).intValue();
                System.out.println("this.trainEpLimit set to: " + this.trainEpLimit);
            } else if (str.equals("-numEps") && i + 1 < strArr.length) {
                this.numEpsBeforeStop = Integer.valueOf(strArr[i + 1]).intValue();
                System.out.println("this.numEpsBeforeStop set to: " + this.numEpsBeforeStop);
            } else if (str.equals("-epsBeforePause") && i + 1 < strArr.length) {
                this.numEpsBeforePause = Integer.valueOf(strArr[i + 1]).intValue();
                System.out.println("this.numEpsBeforePause set to: " + this.numEpsBeforePause);
            } else if (str.equals("-envName") && i + 1 < strArr.length) {
                this.envName = strArr[i + 1];
                System.out.println("this.envName set to: " + this.envName);
            } else if (str.equals("-noGlue")) {
                this.glue = false;
                System.out.println("Communicating with environment using non-RLGlue interface.");
            } else if (str.equals("-trainByDefault")) {
                this.TRAINING_BY_DEFAULT = true;
                System.out.println("this.TRAINING_BY_DEFAULT set to: " + this.TRAINING_BY_DEFAULT);
            }
        }
    }

    public void processPostInitArgs(String[] strArr) {
    }

    public GeneralAgent() {
        this(getDefaultParameters());
    }

    public GeneralAgent(ParameterHolder parameterHolder) {
        this.taskSpecObj = null;
        this.isTopLevelAgent = true;
        this.enableGUI = true;
        this.discountFactorForLearning = null;
        this.safeActionOnly = false;
        this.TRAINING_BY_DEFAULT = false;
        this.allowUserToggledTraining = true;
        this.masterLogSwitch = true;
        this.recordLog = false;
        this.expName = "test";
        this.trainerName = "wbknox";
        this.unique = String.valueOf(this.trainerName) + "-tamerOnly-" + String.format("%f", Double.valueOf(Stopwatch.getWallTimeInSec()));
        this.recordRew = false;
        this.trainFromLog = false;
        this.trainLogPath = "";
        this.trainEpLimit = -1;
        this.countTrainingEps = false;
        this.logTrainEpochs = 1;
        this.numEpsBeforeStop = -1;
        this.numEpsBeforePause = 0;
        this.pause = false;
        this.verbose = false;
        this.random = new Random();
        this.serverPort = 37564;
        this.startCalled = false;
        this.glue = true;
        this.learningOnly = false;
    }

    public static ParameterHolder getDefaultParameters() {
        return new ParameterHolder();
    }

    public double getDiscountFactorForLearning() {
        return this.discountFactorForLearning.getValue();
    }

    public void setDiscountFactorForLearning(double d) {
        this.discountFactorForLearning.setValue(d);
    }

    public boolean getIsTopLevelAgent() {
        return this.isTopLevelAgent;
    }

    public void setIsTopLevelAgent(boolean z) {
        this.isTopLevelAgent = z;
    }

    public boolean getRecordRew() {
        return this.recordRew;
    }

    public void setRecordRew(boolean z) {
        this.recordRew = z;
    }

    public boolean getRecordLog() {
        return this.recordLog;
    }

    public void setRecordLog(boolean z) {
        this.recordLog = z;
    }

    public void setUnique(String str) {
        this.unique = str;
    }

    public String getUnique() {
        return this.unique;
    }

    public void setExpName(String str) {
        this.expName = str;
    }

    public String getExpName() {
        return this.expName;
    }

    public void setAllowUserToggledTraining(boolean z) {
        this.allowUserToggledTraining = z;
    }

    public RecordHandler getRecHandler() {
        return this.recHandler;
    }

    public boolean getInTrainSess() {
        return this.inTrainSess;
    }

    public void setInTrainSess(boolean z) {
        this.inTrainSess = z;
    }

    public static TaskSpecResponsePayload isCompatible(ParameterHolder parameterHolder, String str) {
        return new TaskSpecResponsePayload(false, "");
    }

    public void initParams(String str) {
        this.params = Params.getParams(getClass().getName(), str);
    }

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public abstract void agent_init(String str);

    /* JADX INFO: Access modifiers changed from: protected */
    public static void agent_init(String str, GeneralAgent generalAgent) {
        System.out.println("\n\n\n----Agent " + generalAgent.getClass().getName() + " is being initialized.----");
        generalAgent.startInitHelper(str);
        System.out.println("featGen before initialization: " + generalAgent.featGen);
        if (generalAgent.featGen == null) {
            generalAgent.featGen = generalAgent.getFeatGen(generalAgent.params);
        } else {
            System.out.println("Keeping a previously instantiated featGen!!!!!");
        }
        System.out.println("featGen after initialization: " + generalAgent.featGen);
        System.out.println("model: " + generalAgent.model);
        if (generalAgent.model == null) {
            generalAgent.setModel();
        } else {
            System.out.println("Keeping a previously instantiated model!!!!!");
        }
        System.out.println("model after initialization: " + generalAgent.model);
        generalAgent.recHandler = new RecordHandler(!isApplet);
        System.out.println(String.valueOf(generalAgent.getClass().getName()) + " masterLogSwitch: " + generalAgent.masterLogSwitch);
        if (generalAgent.masterLogSwitch) {
            if (generalAgent.recordLog) {
                System.out.println("Log base path: " + generalAgent.writeLogDir);
                if (generalAgent.recHandler.canWriteToFile) {
                    new File(generalAgent.writeLogDir).mkdir();
                }
                generalAgent.recHandler.writeParamsToFullLog(generalAgent.writeLogPath, generalAgent.params);
            }
            if (generalAgent.recordRew) {
                System.out.println("Reward log base path: " + generalAgent.writeRewDir);
                if (generalAgent.recHandler.canWriteToFile) {
                    new File(generalAgent.writeLogDir).mkdir();
                }
                System.out.println("agent.writeRewPath: " + generalAgent.writeRewPath);
                generalAgent.recHandler.writeParamsToRewLog(generalAgent.writeRewPath, generalAgent.params);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void startInitHelper(String str) {
        if (!isApplet) {
            System.out.println("unique: " + this.unique);
            RLLIBRARY_PATH = RecordHandler.getPresentWorkingDir();
            this.writeLogDir = String.valueOf(RLLIBRARY_PATH) + "/data/" + this.expName;
            this.writeLogPath = String.valueOf(this.writeLogDir) + "/recTraj-" + this.unique + ".log";
            this.writeRewDir = String.valueOf(RLLIBRARY_PATH) + "/data/" + this.expName;
            this.writeRewPath = String.valueOf(this.writeRewDir) + "/" + this.unique + ".rew";
            for (URL url : ((URLClassLoader) ClassLoader.getSystemClassLoader()).getURLs()) {
                System.out.println("Classpath: " + url.getFile());
            }
        }
        this.taskSpecObj = new TaskSpecVRLGLUE3(str);
        System.out.println("Agent parsed the task spec.");
        this.envName = getEnvName(this.taskSpecObj.getExtraString());
        System.out.println("taskStr: " + this.taskSpecObj.toString());
        if (this.envName.equals("")) {
            this.taskSpecObj = new TaskSpecVRLGLUE3("VERSION RL-Glue-3.0 PROBLEMTYPE episodic DISCOUNTFACTOR 1.0 OBSERVATIONS INTS (0 1) (0 1) (0 1) (0 3) (0 1) DOUBLES (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) (-5.0 15.0) ACTIONS INTS (-1 1) (0 1) (0 1)  REWARDS (-1.0 0.0)  EXTRA EnvName:Mario Revision:null");
            this.envName = getEnvName(this.taskSpecObj.getExtraString());
        }
        System.out.println("Environment name: " + this.envName);
        if (this.params == null) {
            this.params = Params.getParams(getClass().getName(), this.envName);
        }
        System.out.println("params: " + this.params.toOneLineStr());
        this.theObsIntRanges = getObsIntRanges(this.taskSpecObj);
        this.theObsDoubleRanges = getObsDoubleRanges(this.taskSpecObj);
        this.theActIntRanges = getActIntRanges(this.taskSpecObj);
        this.theActDoubleRanges = getActDoubleRanges(this.taskSpecObj);
        this.theRewardRange = this.taskSpecObj.getRewardRange();
        System.out.println("Reward range is: " + this.theRewardRange.getMin() + " to " + this.theRewardRange.getMax());
        this.discountFactorOfMDP = this.taskSpecObj.getDiscountFactor();
        if (this.discountFactorForLearning == null) {
            this.discountFactorForLearning = new MutableDouble(this.discountFactorOfMDP);
        }
        System.out.println("Discount factor of MDP is: " + this.discountFactorOfMDP);
        System.out.println("Discount factor for learning is: " + this.discountFactorForLearning.getValue());
        this.currObsAndAct = new ObsAndAct();
        this.currObsAndAct.setAct(new Action(this.taskSpecObj.getNumDiscreteActionDims(), this.taskSpecObj.getNumContinuousActionDims()));
        initRecords();
    }

    public static int[][] getObsIntRanges(TaskSpecVRLGLUE3 taskSpecVRLGLUE3) {
        int numDiscreteObsDims = taskSpecVRLGLUE3.getNumDiscreteObsDims();
        System.out.println("Observations have " + numDiscreteObsDims + " integer dimensions");
        int[][] iArr = new int[numDiscreteObsDims][2];
        for (int i = 0; i < numDiscreteObsDims; i++) {
            IntRange discreteObservationRange = taskSpecVRLGLUE3.getDiscreteObservationRange(i);
            System.out.println("Observation integer " + i + " range is: " + discreteObservationRange.getMin() + " to " + discreteObservationRange.getMax());
            iArr[i][0] = discreteObservationRange.getMin();
            iArr[i][1] = discreteObservationRange.getMax();
        }
        return iArr;
    }

    public static double[][] getObsDoubleRanges(TaskSpecVRLGLUE3 taskSpecVRLGLUE3) {
        int numContinuousObsDims = taskSpecVRLGLUE3.getNumContinuousObsDims();
        System.out.println("Observations have " + numContinuousObsDims + " double dimensions");
        double[][] dArr = new double[numContinuousObsDims][2];
        for (int i = 0; i < numContinuousObsDims; i++) {
            DoubleRange continuousObservationRange = taskSpecVRLGLUE3.getContinuousObservationRange(i);
            System.out.println("Observation double " + i + " range is: " + continuousObservationRange.getMin() + " to " + continuousObservationRange.getMax());
            dArr[i][0] = continuousObservationRange.getMin();
            dArr[i][1] = continuousObservationRange.getMax();
        }
        return dArr;
    }

    public static int[][] getActIntRanges(TaskSpecVRLGLUE3 taskSpecVRLGLUE3) {
        int numDiscreteActionDims = taskSpecVRLGLUE3.getNumDiscreteActionDims();
        System.out.println("Actions have " + numDiscreteActionDims + " integer dimensions");
        int[][] iArr = new int[numDiscreteActionDims][2];
        for (int i = 0; i < numDiscreteActionDims; i++) {
            IntRange discreteActionRange = taskSpecVRLGLUE3.getDiscreteActionRange(i);
            System.out.println("Action integer " + i + " range is: " + discreteActionRange.getMin() + " to " + discreteActionRange.getMax());
            iArr[i][0] = discreteActionRange.getMin();
            iArr[i][1] = discreteActionRange.getMax();
        }
        return iArr;
    }

    public static double[][] getActDoubleRanges(TaskSpecVRLGLUE3 taskSpecVRLGLUE3) {
        int numContinuousActionDims = taskSpecVRLGLUE3.getNumContinuousActionDims();
        System.out.println("Actions have " + numContinuousActionDims + " double dimensions");
        double[][] dArr = new double[numContinuousActionDims][2];
        for (int i = 0; i < numContinuousActionDims; i++) {
            DoubleRange continuousActionRange = taskSpecVRLGLUE3.getContinuousActionRange(i);
            System.out.println("Action double " + i + " range is: " + continuousActionRange.getMin() + " to " + continuousActionRange.getMax());
            dArr[i][0] = continuousActionRange.getMin();
            dArr[i][1] = continuousActionRange.getMax();
        }
        return dArr;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void endInitHelper() {
        if (this.actSelector != null) {
            this.actSelector.setEnvTransModel(this.params.envTransModel);
            if (this.actSelector.getRewModel() == null) {
                this.actSelector.setRewModel(this.params.envRewModel);
            }
        }
        if (this.trainFromLog) {
            LogTrainer.trainOnLog(this.trainLogPath, this, this.logTrainEpochs);
        }
    }

    public void initRecords() {
        this.totalSteps = 0;
        this.totalRew = 0.0d;
        this.currEpNum = 0;
        this.inTrainSess = this.TRAINING_BY_DEFAULT;
        this.stepsThisEp = 0;
        this.rewThisEp = 0.0d;
        this.lastObsAndAct = new ObsAndAct();
        this.hRewList = new ArrayList<>();
    }

    public FeatGenerator getFeatGen(Params params) {
        System.out.println("Creating feature generation class " + params.featClass + ".");
        FeatGenerator featGenerator = null;
        int[][] iArr = this.theActIntRanges;
        double[][] dArr = this.theActDoubleRanges;
        if (params.featClass.equals("FeatGen_Discretize")) {
            featGenerator = new FeatGen_Discretize(this.theObsIntRanges, this.theObsDoubleRanges, iArr, dArr, Integer.valueOf(params.featGenParams.get("numBinsPerDim")).intValue());
        } else if (params.featClass.equals("FeatGen_NoChange")) {
            featGenerator = new FeatGen_NoChange(this.theObsIntRanges, this.theObsDoubleRanges, iArr, dArr);
        } else if (params.featClass.equals("FeatGen_RBFs")) {
            featGenerator = new FeatGen_RBFs(this.theObsIntRanges, this.theObsDoubleRanges, iArr, dArr, Integer.valueOf(params.featGenParams.get("basisFcnsPerDim")).intValue(), Double.valueOf(params.featGenParams.get("relWidth")).doubleValue());
            if (params.featGenParams.get("normMin") != null && params.featGenParams.get("normMax") != null) {
                ((FeatGen_RBFs) featGenerator).setNormBounds(Float.valueOf(params.featGenParams.get("normMin")).floatValue(), Float.valueOf(params.featGenParams.get("normMax")).floatValue());
            }
            if (params.featGenParams.get("biasFeatVal") != null) {
                ((FeatGen_RBFs) featGenerator).setBiasFeatPerAct(Double.valueOf(params.featGenParams.get("biasFeatVal")).doubleValue());
            }
        } else if (params.featClass.equals("FeatGen_Mario")) {
            featGenerator = new FeatGen_Mario(this.theObsIntRanges, this.theObsDoubleRanges, iArr, dArr);
        } else if (params.featClass.equals("FeatGen_Tetris")) {
            featGenerator = new FeatGen_Tetris(this.theObsIntRanges, this.theObsDoubleRanges, iArr, dArr);
        } else {
            System.out.println("The current code doesn't support class " + params.featClass + " for feature generation. Adding support might be trivial. (Printed from GeneralAgent.getFeatGen().)");
        }
        return featGenerator;
    }

    private void setModel() {
        System.out.println("Creating model class " + this.params.modelClass + " for " + getClass() + ".");
        if (this.params.modelClass.equals("IncGDLinearModel")) {
            System.out.println("featGen: " + this.featGen);
            this.model = new IncGDLinearModel(this.featGen.getNumFeatures(), this.params.stepSize, this.featGen, this.params.initWtsValue, this.params.modelAddsBiasFeat);
            ((IncGDLinearModel) this.model).setEligTraceParams(this.params.traceDecayFactor, this.discountFactorForLearning, this.params.traceType);
        } else if (this.params.modelClass.equals("WekaModelPerActionModel")) {
            this.model = new WekaModelPerActionModel(this.params.wekaModelName, this.featGen);
        } else if (this.params.modelClass.equals("WekaModel")) {
            this.model = new WekaModelWrap(this.featGen, this.params.wekaModelName);
        } else {
            System.out.println("The current code doesn't support class " + this.params.modelClass + " for modeling. Adding support might be trivial.");
        }
        if (this.params.initModelWSamples) {
            this.model.biasWGenSamples(this.params.numBiasingSamples, this.params.initSampleValue, this.params.biasSampleWt);
        }
    }

    public void addModelBasedFeat(String str, RegressionModel regressionModel, FeatGenerator featGenerator) {
        startInitHelper(str);
        this.featGen = getFeatGen(this.params);
        this.featGen.setSupplModel(regressionModel, featGenerator);
        System.out.println("\n\nfeatGen in " + getClass().getName() + " after adding feature: " + featGenerator);
    }

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public Action agent_start(Observation observation) {
        if (this.currEpNum == this.numEpsBeforeStop) {
            System.out.println("Finished " + this.numEpsBeforeStop + " episodes. Exiting.");
            System.exit(0);
        }
        return agent_start(observation, Stopwatch.getComparableTimeInSec(), null);
    }

    public abstract Action agent_start(Observation observation, double d, Action action);

    /* JADX INFO: Access modifiers changed from: protected */
    public void startHelper() {
        this.currEpNum++;
        while (this.pause) {
            System.out.print("\n\n.\n\n");
            sleep(2000.0d);
        }
        this.stepsThisEp = 0;
        this.rewThisEp = 0.0d;
        this.lastObsAndAct = new ObsAndAct();
    }

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public Action agent_step(double d, Observation observation) {
        return agent_step(d, observation, Stopwatch.getComparableTimeInSec());
    }

    public Action agent_step(double d, Observation observation, double d2) {
        return agent_step(d, observation, Stopwatch.getComparableTimeInSec(), null);
    }

    public abstract Action agent_step(double d, Observation observation, double d2, Action action);

    /* JADX INFO: Access modifiers changed from: protected */
    public void stepStartHelper(double d) {
        this.totalSteps++;
        duringStepTransition = true;
        while (this.pause) {
            sleep(2000.0d);
            System.out.print("GenAgent.stepStart()");
        }
        this.stepsThisEp++;
        this.totalRew += d;
        this.rewThisEp += d;
        this.hRewThisStep = new ArrayList<>(this.hRewList);
        this.hRewList.clear();
        this.currObsAndAct = new ObsAndAct();
        if (this.verbose) {
            System.out.println("\nAgent on step: " + this.stepsThisEp);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void stepEndHelper(double d, Observation observation) {
        if (this.safeActionOnly) {
            if (this.inTrainSess) {
                System.err.println("Safe action mandated during training! This shouldn't happen!");
                System.exit(1);
            }
            if (this.params.safeAction != null) {
                this.currObsAndAct.setAct(this.params.safeAction);
            }
        }
        logStep(this.writeLogPath, observation, this.currObsAndAct.getAct(), d, this.hRewThisStep, this.stepStartTime);
        this.lastObsAndAct.setAct(this.currObsAndAct.getAct().duplicate());
        this.lastObsAndAct.setObs(observation.duplicate());
        if (this.verbose) {
            System.out.println("act chosen: " + this.currObsAndAct.getAct().intArray[0]);
        }
        if (this.stepsThisEp == 10000000) {
            if (this.isTopLevelAgent) {
                System.out.println("At end of steps!!");
            }
            logStep(this.writeLogPath, null, null, d, this.hRewThisStep, this.stepStartTime);
            if (this.recordRew && this.masterLogSwitch) {
                this.recHandler.writeLineToRewLog(this.writeRewPath, new StringBuilder(String.valueOf(this.rewThisEp)).toString(), true);
            }
        }
        this.lastStepStartTime = this.stepStartTime;
        duringStepTransition = false;
    }

    public abstract void agent_end(double d, double d2);

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public void agent_end(double d) {
        agent_end(d, Stopwatch.getComparableTimeInSec());
    }

    public void endHelper(double d) {
        if (Double.isNaN(d)) {
            System.err.println("Received NaN in agent_end()");
        }
        logStep(this.writeLogPath, null, null, d, this.hRewThisStep, this.stepStartTime);
        this.totalSteps++;
        this.stepsThisEp++;
        this.totalRew += d;
        this.rewThisEp += d;
        this.hRewThisStep = new ArrayList<>(this.hRewList);
        this.hRewList.clear();
        this.currObsAndAct.setAct(new Action());
        if (this.recordRew && this.masterLogSwitch) {
            this.recHandler.writeLineToRewLog(this.writeRewPath, new StringBuilder(String.valueOf(this.rewThisEp)).toString(), true);
        }
        if (this.verbose) {
            System.out.println("Steps in episode: " + this.stepsThisEp);
            System.out.println("Reward in episode: \t\t" + this.rewThisEp);
        }
        if (this.currEpNum % 50 == 0) {
            println("Mean reward after " + this.currEpNum + " episodes: " + (this.totalRew / this.currEpNum));
        }
    }

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public void agent_cleanup() {
    }

    public double getVal(Observation observation, Action action) {
        if (observation == null || action == null) {
            return 0.0d;
        }
        return this.model.predictLabel(observation, action);
    }

    public double getStatePotForTrans(Observation observation, Observation observation2) {
        if (observation == null || observation2 == null) {
            return 0.0d;
        }
        double value = (this.discountFactorForLearning.getValue() * this.model.predictLabel(this.featGen.getSAFeats(observation2, this.actSelector.greedyActSelect(observation2, null)))) - this.model.predictLabel(this.featGen.getSAFeats(observation, this.actSelector.greedyActSelect(observation, null)));
        if (this.verbose) {
            System.out.println("changeInStatePot: " + value);
        }
        return value;
    }

    public double getSAPotForTrans(Observation observation, Action action, Observation observation2, Action action2) {
        if (observation == null) {
            return 0.0d;
        }
        double predictLabel = this.model.predictLabel(observation, action);
        double d = 0.0d;
        if (observation2 != null) {
            d = this.model.predictLabel(observation2, action2);
        }
        double value = (this.discountFactorForLearning.getValue() * d) - predictLabel;
        if (this.verbose) {
            System.out.println("changeInSAPot: " + value);
        }
        return value;
    }

    @Override // org.rlcommunity.rlglue.codec.AgentInterface
    public String agent_message(String str) {
        try {
            AgentMessages parseMessage = AgentMessageParser.parseMessage(str);
            if (parseMessage.canHandleAutomatically(this)) {
                return parseMessage.handleAutomatically(this);
            }
            return null;
        } catch (NotAnRLVizMessageException e) {
            System.err.println("Someone sent random agent a message that wasn't RL-Viz compatible");
            return "I only respond to RL-Viz messages!";
        } catch (Exception e2) {
            System.err.println("Exception while parsing message: " + e2);
            return "There was a problem with this message.";
        }
    }

    public void receiveKeyInput(char c) {
        if (c != 'T') {
            if (c == 'C') {
                this.model.changeClassifier();
            }
        } else {
            this.actSelector.setTreeSearchFlag(!this.actSelector.getTreeSearchFlag());
            if (this.actSelector.getEnvTransModel() == null) {
                System.out.println("Can't do tree search. No environment model.");
                this.actSelector.setTreeSearchFlag(false);
            }
        }
    }

    public void toggleInTrainSess() {
        this.inTrainSess = !this.inTrainSess;
        if (this.inTrainSess) {
            System.out.println("---Starting training session.---");
        } else {
            System.out.println("---Ending training session---");
        }
    }

    public void togglePause() {
        this.pause = !this.pause;
        if (this.pause) {
            System.out.println("Agent paused.");
        } else {
            System.out.println("Agent unpaused.");
        }
    }

    public static void sleep(double d) {
        try {
            TimeUnit.MILLISECONDS.sleep((int) d);
        } catch (InterruptedException e) {
            System.err.println("Exception while trying to sleep: " + e);
        }
    }

    public static String getEnvName(String str) {
        String str2 = "";
        String[] split = str.split(TestInstances.DEFAULT_SEPARATORS);
        for (int i = 0; i < split.length; i++) {
            if (split[i].startsWith("EnvName:")) {
                str2 = split[i].split(":")[1];
            }
        }
        return str2;
    }

    public void setMasterLogSwitch(boolean z) {
        this.masterLogSwitch = z;
    }

    public void checkObs(Observation observation) {
        if (observation == null) {
            System.err.println("Observation is null.");
        }
        if (observation.intArray.length != this.theObsIntRanges.length) {
            System.err.println("Unexpected number of observation integers: " + observation.intArray.length + ". Expected: " + this.theObsIntRanges.length);
        }
        if (observation.doubleArray.length != this.theObsDoubleRanges.length) {
            System.err.println("Unexpected number of observation doubles: " + observation.doubleArray.length + ". Expected: " + this.theObsDoubleRanges.length);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void overwriteLastObsAndAct(GeneralAgent generalAgent) {
        generalAgent.lastObsAndAct.setObs(this.lastObsAndAct.obsIsNull() ? null : this.lastObsAndAct.getObs().duplicate());
        generalAgent.lastObsAndAct.setAct(this.lastObsAndAct.actIsNull() ? null : this.lastObsAndAct.getAct().duplicate());
    }

    protected void logStep(String str, Observation observation, Action action, double d, ArrayList<HRew> arrayList, double d2) {
        if (this.recordLog && this.masterLogSwitch) {
            this.recHandler.writeTimeStep(str, observation, action, d, arrayList, d2, this.inTrainSess);
            if (observation == null) {
                this.recHandler.writeEpEnd(str);
            }
        }
    }

    public String makeEndInfoStr() {
        return "";
    }

    public URL getImageURL() {
        return getClass().getResource("/images/randomagent.png");
    }

    public void runSelf() {
        TaskSpecVRLGLUE3 taskSpecVRLGLUE3 = new TaskSpecVRLGLUE3();
        if (this.envName == null) {
            this.envName = "CarStop";
        }
        if (this.envName.equals("CarStop")) {
            taskSpecVRLGLUE3.setEpisodic();
            taskSpecVRLGLUE3.setDiscountFactor(1.0d);
            taskSpecVRLGLUE3.addContinuousObservation(new DoubleRange(-50.0d, 100.0d));
            taskSpecVRLGLUE3.addContinuousObservation(new DoubleRange(0.0d, 12.0d));
            taskSpecVRLGLUE3.addDiscreteAction(new IntRange(-1, 1));
            taskSpecVRLGLUE3.setRewardRange(new DoubleRange(-500.0d, 1000.0d));
        } else if (this.envName.equals("FuelWorld")) {
            taskSpecVRLGLUE3.setEpisodic();
            taskSpecVRLGLUE3.setDiscountFactor(1.0d);
        } else {
            System.err.println("Environment " + this.envName + " not supported. Agent exiting.");
            System.exit(1);
        }
        taskSpecVRLGLUE3.setExtra("EnvName:" + this.envName);
        String taskSpec = taskSpecVRLGLUE3.toTaskSpec();
        TaskSpec.checkTaskSpec(taskSpec);
        agent_init(taskSpec);
        try {
            DatagramSocket datagramSocket = new DatagramSocket(this.serverPort);
            while (true) {
                byte[] bArr = new byte[1024];
                DatagramPacket datagramPacket = new DatagramPacket(bArr, bArr.length);
                try {
                    datagramSocket.receive(datagramPacket);
                    processTimeStepMessage(datagramSocket, datagramPacket);
                } catch (IOException e) {
                    System.err.println(e);
                }
            }
        } catch (Exception e2) {
            System.err.println(e2);
        }
    }

    public void processTimeStepMessage(DatagramSocket datagramSocket, DatagramPacket datagramPacket) {
        String[] split = new String(datagramPacket.getData()).split(",");
        float floatValue = Float.valueOf(split[0]).floatValue();
        float[] fArr = new float[split.length - 2];
        for (int i = 0; i < split.length - 2; i++) {
            fArr[i] = Float.valueOf(split[i + 1]).floatValue();
        }
        boolean z = Integer.valueOf(split[split.length - 1].trim()).intValue() != 0;
        Observation observation = new Observation(0, fArr.length, 0);
        for (int i2 = 0; i2 < fArr.length; i2++) {
            observation.setDouble(i2, fArr[i2]);
        }
        Reward_observation_terminal reward_observation_terminal = new Reward_observation_terminal();
        reward_observation_terminal.setObservation(observation);
        reward_observation_terminal.setTerminal(z);
        reward_observation_terminal.setReward(floatValue);
        respondWithAction(reward_observation_terminal, null);
    }

    public void respondWithAction(Reward_observation_terminal reward_observation_terminal, Action action) {
        boolean z = false;
        if (action != null) {
            z = true;
        }
        if (!this.startCalled) {
            Action agent_start = agent_start(reward_observation_terminal.getObservation(), Stopwatch.getComparableTimeInSec(), action);
            if (!z) {
                sendAction(agent_start);
            }
            this.startCalled = true;
            return;
        }
        if (!reward_observation_terminal.isTerminal()) {
            Action agent_step = agent_step(reward_observation_terminal.getReward(), reward_observation_terminal.getObservation(), Stopwatch.getComparableTimeInSec(), action);
            if (z) {
                return;
            }
            sendAction(agent_step);
            return;
        }
        agent_end(reward_observation_terminal.getReward());
        this.startCalled = false;
        if (this.envName.equals("FuelWorld")) {
            ((IncGDLinearModel) this.model).getWeights();
        }
    }

    public void addHRew(double d) {
        this.hRewList.add(new HRew(d, Stopwatch.getComparableTimeInSec()));
    }

    public void sendAction(Action action) {
        try {
            InetAddress byName = InetAddress.getByName(Network.kDefaultHost);
            ByteBuffer allocate = ByteBuffer.allocate(4);
            allocate.putInt(action.getInt(0));
            new DatagramSocket().send(new DatagramPacket(allocate.array(), allocate.capacity(), byName, this.serverPort + 1));
        } catch (Exception e) {
        }
    }

    public void println(String str) {
        if (this.verbose) {
            System.out.println(str);
        }
    }
}
