package org.encog.ml.world.learning.q;

import org.encog.ml.world.Action;
import org.encog.ml.world.State;
import org.encog.ml.world.World;

/* loaded from: classes2.dex */
public class QLearning {
    private double discountRate;
    private double learningRate;
    private final World world;

    public QLearning(World world, double d, double d2) {
        this.world = world;
        this.learningRate = d;
        this.discountRate = d2;
    }

    public void learn(State state, Action action, State state2, Action action2) {
        double policyValue = this.world.getPolicyValue(state, action);
        double policyValue2 = this.world.getPolicyValue(state2, action2);
        double reward = state.getReward();
        this.world.setPolicyValue(state, action, policyValue + ((((policyValue2 * this.discountRate) + reward) - policyValue) * this.learningRate));
    }
}
