package org.encog.ml.world.learning.mdp;

import java.util.Iterator;
import org.encog.ml.world.Action;
import org.encog.ml.world.State;
import org.encog.ml.world.SuccessorState;
import org.encog.ml.world.World;
import org.encog.neural.flat.FlatNetwork;

/* loaded from: classes2.dex */
public class ValueIteration extends MarkovDecisionProcess {
    private double discountFactor;

    public ValueIteration(World world, double d) {
        super(world);
        this.discountFactor = d;
    }

    public void calculateValue(State state) {
        double d = Double.NEGATIVE_INFINITY;
        if (getWorld().isGoalState(state)) {
            state.getPolicyValue()[0] = state.getReward();
            return;
        }
        Iterator it2 = getWorld().getActions().iterator();
        while (true) {
            double d2 = d;
            if (!it2.hasNext()) {
                state.getPolicyValue()[0] = d2 + state.getReward();
                return;
            }
            Action action = (Action) it2.next();
            double d3 = FlatNetwork.NO_BIAS_ACTIVATION;
            for (SuccessorState successorState : getWorld().getProbability().determineSuccessorStates(state, action)) {
                d3 = (successorState.getState().getPolicyValue()[0] * successorState.getProbability()) + d3;
            }
            d = Math.max(d2, this.discountFactor * d3);
        }
    }

    public void iteration() {
        Iterator it2 = getWorld().getStates().iterator();
        while (it2.hasNext()) {
            calculateValue((State) it2.next());
        }
    }
}
